modify doc structure and update existing doc-links as preparation for new doc generation script

2025-12-14 05:45:06 +03:00 · 2024-05-15 19:32:51 +02:00
parent 0b464b5239
commit 4dbfd42f1d
82 changed files with 70 additions and 70 deletions
--- a/docs/developers/ci-setup.md
+++ b/docs/developers/ci-setup.md
@@ -0,0 +1,211 @@
+# CI Setup
+
+## Pipeline
+
+1. build: build a docker image to be used in the pipeline
+2. unit-tests: fast jobs for fast feedback (linting, etc...)
+3. deploy-part1: small number of jobs to test if the PR works with default settings
+4. deploy-part2: slow jobs testing different platforms, OS, settings, CNI, etc...
+5. deploy-part3: very slow jobs (upgrades, etc...)
+
+## Runners
+
+Kubespray has 3 types of GitLab runners:
+
+- packet runners: used for E2E jobs (usually long), running on Equinix Metal (ex-packet), on kubevirt managed VMs
+- light runners: used for short lived jobs, running on Equinix Metal (ex-packet), as managed pods
+- auto scaling runners (managed via docker-machine on Equinix Metal): used for on-demand resources, see [GitLab docs](https://docs.gitlab.com/runner/configuration/autoscale.html) for more info
+
+## Vagrant
+
+Vagrant jobs are using the [quay.io/kubespray/vagrant](/test-infra/vagrant-docker/Dockerfile) docker image with `/var/run/libvirt/libvirt-sock` exposed from the host, allowing the container to boot VMs on the host.
+
+## CI Variables
+
+In CI we have a set of overrides we use to ensure greater success of our CI jobs and avoid throttling by various APIs we depend on. See:
+
+- [Docker mirrors](/tests/common/_docker_hub_registry_mirror.yml)
+- [Test settings](/tests/common/_kubespray_test_settings.yml)
+
+## CI Environment
+
+The CI packet and light runners are deployed on a kubernetes cluster on Equinix Metal. The cluster is deployed with kubespray itself and maintained by the kubespray maintainers.
+
+The following files are used for that inventory:
+
+### cluster.tfvars
+
+```ini
+# your Kubernetes cluster name here
+cluster_name = "ci"
+
+# Your Equinix Metal project ID. See https://metal.equinix.com/developers/docs/accounts/
+equinix_metal_project_id = "_redacted_"
+
+# The public SSH key to be uploaded into authorized_keys in bare metal Equinix Metal nodes provisioned
+# leave this value blank if the public key is already setup in the Equinix Metal project
+# Terraform will complain if the public key is setup in Equinix Metal
+public_key_path = "~/.ssh/id_rsa.pub"
+
+# cluster location
+metro = "da"
+
+# standalone etcds
+number_of_etcd = 0
+
+plan_etcd = "t1.small.x86"
+
+# masters
+number_of_k8s_masters = 1
+
+number_of_k8s_masters_no_etcd = 0
+
+plan_k8s_masters = "c3.small.x86"
+
+plan_k8s_masters_no_etcd = "t1.small.x86"
+
+# nodes
+number_of_k8s_nodes = 1
+
+plan_k8s_nodes = "c3.medium.x86"
+```
+
+### group_vars/all/mirrors.yml
+
+```yaml
+---
+docker_registry_mirrors:
+  - "https://mirror.gcr.io"
+
+containerd_grpc_max_recv_message_size: 16777216
+containerd_grpc_max_send_message_size: 16777216
+
+containerd_registries_mirrors:
+  - prefix: docker.io
+    mirrors:
+      - host: https://mirror.gcr.io
+        capabilities: ["pull", "resolve"]
+        skip_verify: false
+      - host: https://registry-1.docker.io
+        capabilities: ["pull", "resolve"]
+        skip_verify: false
+
+containerd_max_container_log_line_size: -1
+
+crio_registries_mirrors:
+  - prefix: docker.io
+    insecure: false
+    blocked: false
+    location: registry-1.docker.io
+    mirrors:
+      - location: mirror.gcr.io
+        insecure: false
+
+netcheck_agent_image_repo: "{{ quay_image_repo }}/kubespray/k8s-netchecker-agent"
+netcheck_server_image_repo: "{{ quay_image_repo }}/kubespray/k8s-netchecker-server"
+
+nginx_image_repo: "{{ quay_image_repo }}/kubespray/nginx"
+```
+
+### group_vars/all/settings.yml
+
+```yaml
+---
+# Networking setting
+kube_service_addresses: 172.30.0.0/18
+kube_pods_subnet: 172.30.64.0/18
+kube_network_plugin: calico
+# avoid overlap with CI jobs deploying nodelocaldns
+nodelocaldns_ip: 169.254.255.100
+
+# ipip: False
+calico_ipip_mode: "Never"
+calico_vxlan_mode: "Never"
+calico_network_backend: "bird"
+calico_wireguard_enabled: True
+
+# Cluster settings
+upgrade_cluster_setup: True
+force_certificate_regeneration: True
+
+# Etcd settings
+etcd_deployment_type: "host"
+
+# Kubernetes settings
+kube_controller_terminated_pod_gc_threshold: 100
+kubelet_enforce_node_allocatable: pods
+kubelet_preferred_address_types: 'InternalIP,ExternalIP,Hostname'
+kubelet_custom_flags:
+  - "--serialize-image-pulls=true"
+  - "--eviction-hard=memory.available<1Gi"
+  - "--eviction-soft-grace-period=memory.available=30s"
+  - "--eviction-soft=memory.available<2Gi"
+  - "--system-reserved cpu=100m,memory=4Gi"
+  - "--eviction-minimum-reclaim=memory.available=2Gi"
+
+# DNS settings
+resolvconf_mode: none
+dns_min_replicas: 1
+upstream_dns_servers:
+  - 1.1.1.1
+  - 1.0.0.1
+
+# Extensions
+ingress_nginx_enabled: True
+helm_enabled: True
+cert_manager_enabled: True
+metrics_server_enabled: True
+
+# Enable ZSWAP
+kubelet_fail_swap_on: False
+kube_feature_gates:
+  - "NodeSwap=True"
+```
+
+## Aditional files
+
+This section documents additional files used to complete a deployment of the kubespray CI, these files sit on the control-plane node and assume a working kubernetes cluster.
+
+### /root/nscleanup.sh
+
+```bash
+#!/bin/bash
+
+kubectl=/usr/local/bin/kubectl
+
+$kubectl get ns | grep -P "(\d.+-\d.+)" | awk 'match($3,/[0-9]+d/) {print $1}' | xargs -r $kubectl delete ns
+$kubectl get ns | grep -P "(\d.+-\d.+)" | awk 'match($3,/[3-9]+h/) {print $1}' | xargs -r $kubectl delete ns
+$kubectl get ns | grep Terminating | awk '{print $1}' | xargs -i $kubectl delete vmi/instance-1 vmi/instance-0 vmi/instance-2 -n {} --force --grace-period=0 &
+```
+
+### /root/path-calico.sh
+
+```bash
+#!/bin/bash
+
+calicoctl patch felixconfig default -p '{"spec":{"allowIPIPPacketsFromWorkloads":true, "allowVXLANPacketsFromWorkloads": true}}'
+```
+
+### /root/kubevirt/kubevirt.sh
+
+```bash
+#!/bin/bash
+
+export VERSION=$(curl -s https://api.github.com/repos/kubevirt/kubevirt/releases | grep tag_name | grep -v -- '-rc' | sort -r | head -1 | awk -F': ' '{print $2}' | sed 's/,//' | xargs)
+echo $VERSION
+kubectl apply -f https://github.com/kubevirt/kubevirt/releases/download/${VERSION}/kubevirt-operator.yaml
+kubectl apply -f https://github.com/kubevirt/kubevirt/releases/download/${VERSION}/kubevirt-cr.yaml
+```
+
+### /root/kubevirt/virtctl.sh
+
+```bash
+#!/bin/bash
+
+VERSION=$(kubectl get kubevirt.kubevirt.io/kubevirt -n kubevirt -o=jsonpath="{.status.observedKubeVirtVersion}")
+ARCH=$(uname -s | tr A-Z a-z)-$(uname -m | sed 's/x86_64/amd64/') || windows-amd64.exe
+echo ${ARCH}
+curl -L -o virtctl https://github.com/kubevirt/kubevirt/releases/download/${VERSION}/virtctl-${VERSION}-${ARCH}
+chmod +x virtctl
+sudo install virtctl /usr/local/bin
+```
--- a/docs/developers/ci.md
+++ b/docs/developers/ci.md
@@ -0,0 +1,57 @@
+# CI test coverage
+
+To generate this Matrix run `./tests/scripts/md-table/main.py`
+
+## containerd
+
+| OS / CNI | calico | cilium | custom_cni | flannel | kube-ovn | kube-router | macvlan | weave |
+|---| --- | --- | --- | --- | --- | --- | --- | --- |
+almalinux8 |  :white_check_mark: | :x: | :x: | :x: | :white_check_mark: | :x: | :x: | :x: |
+amazon |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+centos7 |  :white_check_mark: | :x: | :x: | :white_check_mark: | :x: | :white_check_mark: | :x: | :white_check_mark: |
+debian10 |  :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | :x: | :white_check_mark: | :x: |
+debian11 |  :white_check_mark: | :x: | :white_check_mark: | :x: | :x: | :x: | :x: | :x: |
+debian12 |  :white_check_mark: | :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | :x: | :x: |
+fedora37 |  :white_check_mark: | :x: | :x: | :x: | :x: | :white_check_mark: | :x: | :x: |
+fedora38 |  :x: | :x: | :x: | :x: | :white_check_mark: | :x: | :x: | :x: |
+opensuse |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+rockylinux8 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+rockylinux9 |  :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: |
+ubuntu20 |  :white_check_mark: | :white_check_mark: | :x: | :white_check_mark: | :x: | :white_check_mark: | :x: | :white_check_mark: |
+ubuntu22 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+
+## crio
+
+| OS / CNI | calico | cilium | custom_cni | flannel | kube-ovn | kube-router | macvlan | weave |
+|---| --- | --- | --- | --- | --- | --- | --- | --- |
+almalinux8 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+amazon |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+centos7 |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+debian10 |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+debian11 |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+debian12 |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+fedora37 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+fedora38 |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+opensuse |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+rockylinux8 |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+rockylinux9 |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+ubuntu20 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+ubuntu22 |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+
+## docker
+
+| OS / CNI | calico | cilium | custom_cni | flannel | kube-ovn | kube-router | macvlan | weave |
+|---| --- | --- | --- | --- | --- | --- | --- | --- |
+almalinux8 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+amazon |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+centos7 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+debian10 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+debian11 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+debian12 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+fedora37 |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+fedora38 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :white_check_mark: |
+opensuse |  :x: | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: |
+rockylinux8 |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+rockylinux9 |  :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
+ubuntu20 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :white_check_mark: |
+ubuntu22 |  :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
--- a/docs/developers/test_cases.md
+++ b/docs/developers/test_cases.md
@@ -0,0 +1,38 @@
+# Node Layouts
+
+There are six node layout types: `default`, `separate`, `ha`, `scale`, `all-in-one`, and `node-etcd-client`.
+
+`default` is a non-HA two nodes setup with one separate `kube_node`
+and the `etcd` group merged with the `kube_control_plane`.
+
+`separate` layout is when there is only node of each type, which includes
+ a kube_control_plane, kube_node, and etcd cluster member.
+
+`ha` layout consists of two etcd nodes, two control planes and a single worker node,
+with role intersection.
+
+`scale` layout can be combined with above layouts (`ha-scale`, `separate-scale`). It includes 200 fake hosts
+in the Ansible inventory. This helps test TLS certificate generation at scale
+to prevent regressions and profile certain long-running tasks. These nodes are
+never actually deployed, but certificates are generated for them.
+
+`all-in-one` layout use a single node for with `kube_control_plane`, `etcd` and `kube_node` merged.
+
+`node-etcd-client` layout consists of a 4 nodes cluster, all of them in `kube_node`, first 3 in `etcd` and only one `kube_control_plane`.
+This is necessary to tests setups requiring that nodes are etcd clients (use of cilium as `network_plugin` for instance)
+
+Note, the canal network plugin deploys flannel as well plus calico policy controller.
+
+## Test cases
+
+The [CI Matrix](/docs/developers/ci.md) displays OS, Network Plugin and Container Manager tested.
+
+All tests are breakdown into 3 "stages" ("Stage" means a build step of the build pipeline) as follows:
+
+- _unit_tests_: Linting, markdown, vagrant & terraform validation etc...
+- _part1_: Molecule and AIO tests
+- _part2_: Standard tests with different layouts and OS/Runtime/Network
+- _part3_: Upgrade jobs, terraform jobs and recover control plane tests
+- _special_: Other jobs (manuals)
+
+The steps are ordered as `unit_tests->part1->part2->part3->special`.
--- a/docs/developers/vagrant.md
+++ b/docs/developers/vagrant.md
@@ -0,0 +1,169 @@
+# Vagrant
+
+Assuming you have Vagrant 2.0+ installed with virtualbox or libvirt/qemu
+(vmware may work, but is untested) you should be able to launch a 3 node
+Kubernetes cluster by simply running `vagrant up`.
+
+This will spin up 3 VMs and install kubernetes on them.
+Once they are completed you can connect to any of them by running `vagrant ssh k8s-[1..3]`.
+
+To give an estimate of the expected duration of a provisioning run:
+On a dual core i5-6300u laptop with an SSD, provisioning takes around 13
+to 15 minutes, once the container images and other files are cached.
+Note that libvirt/qemu is recommended over virtualbox as it is quite a bit
+faster, especially during boot-up time.
+
+For proper performance a minimum of 12GB RAM is recommended.
+It is possible to run a 3 node cluster on a laptop with 8GB of RAM using
+the default Vagrantfile, provided you have 8GB zram swap configured and
+not much more than a browser and a mail client running.
+If you decide to run on such a machine, then also make sure that any tmpfs
+devices, that are mounted, are mostly empty and disable any swapfiles
+mounted on HDD/SSD or you will be in for some serious swap-madness.
+Things can get a bit sluggish during provisioning, but when that's done,
+the system will actually be able to perform quite well.
+
+## Customize Vagrant
+
+You can override the default settings in the `Vagrantfile` either by
+directly modifying the `Vagrantfile` or through an override file.
+In the same directory as the `Vagrantfile`, create a folder called
+`vagrant` and create `config.rb` file in it.
+An example of how to configure this file is given below.
+
+## Use alternative OS for Vagrant
+
+By default, Vagrant uses Ubuntu 18.04 box to provision a local cluster.
+You may use an alternative supported operating system for your local cluster.
+
+Customize `$os` variable in `Vagrantfile` or as override, e.g.,:
+
+```ShellSession
+echo '$os = "flatcar-stable"' >> vagrant/config.rb
+```
+
+The supported operating systems for vagrant are defined in the `SUPPORTED_OS`
+constant in the `Vagrantfile`.
+
+## File and image caching
+
+Kubespray can take quite a while to start on a laptop. To improve provisioning
+speed, the variable 'download_run_once' is set. This will make kubespray
+download all files and containers just once and then redistributes them to
+the other nodes and as a bonus, also cache all downloads locally and re-use
+them on the next provisioning run. For more information on download settings
+see [download documentation](/docs/advanced/downloads.md).
+
+## Example use of Vagrant
+
+The following is an example of setting up and running kubespray using `vagrant`.
+For repeated runs, you could save the script to a file in the root of the
+kubespray and run it by executing `source <name_of_the_file>`.
+
+```ShellSession
+# use virtualenv to install all python requirements
+VENVDIR=venv
+virtualenv --python=/usr/bin/python3.7 $VENVDIR
+source $VENVDIR/bin/activate
+pip install -r requirements.txt
+
+# prepare an inventory to test with
+INV=inventory/my_lab
+rm -rf ${INV}.bak &> /dev/null
+mv ${INV} ${INV}.bak &> /dev/null
+cp -a inventory/sample ${INV}
+rm -f ${INV}/hosts.ini
+
+# customize the vagrant environment
+mkdir vagrant
+cat << EOF > vagrant/config.rb
+\$instance_name_prefix = "kub"
+\$vm_cpus = 1
+\$num_instances = 3
+\$os = "centos-bento"
+\$subnet = "10.0.20"
+\$network_plugin = "flannel"
+\$inventory = "$INV"
+\$shared_folders = { 'temp/docker_rpms' => "/var/cache/yum/x86_64/7/docker-ce/packages" }
+\$extra_vars = {
+    dns_domain: my.custom.domain
+}
+# or
+\$extra_vars = "path/to/extra/vars/file.yml"
+EOF
+
+# make the rpm cache
+mkdir -p temp/docker_rpms
+
+vagrant up
+
+# make a copy of the downloaded docker rpm, to speed up the next provisioning run
+scp kub-1:/var/cache/yum/x86_64/7/docker-ce/packages/* temp/docker_rpms/
+
+# copy kubectl access configuration in place
+mkdir $HOME/.kube/ &> /dev/null
+ln -s $PWD/$INV/artifacts/admin.conf $HOME/.kube/config
+# make the kubectl binary available
+sudo ln -s $PWD/$INV/artifacts/kubectl /usr/local/bin/kubectl
+#or
+export PATH=$PATH:$PWD/$INV/artifacts
+```
+
+If a vagrant run failed and you've made some changes to fix the issue causing
+the fail, here is how you would re-run ansible:
+
+```ShellSession
+ansible-playbook -vvv -i .vagrant/provisioners/ansible/inventory/vagrant_ansible_inventory cluster.yml
+```
+
+If all went well, you check if it's all working as expected:
+
+```ShellSession
+kubectl get nodes
+```
+
+The output should look like this:
+
+```ShellSession
+$ kubectl get nodes
+NAME    STATUS   ROLES                  AGE     VERSION
+kub-1   Ready    control-plane,master   4m37s   v1.22.5
+kub-2   Ready    control-plane,master   4m7s    v1.22.5
+kub-3   Ready    <none>                 3m7s    v1.22.5
+```
+
+Another nice test is the following:
+
+```ShellSession
+kubectl get pods --all-namespaces -o wide
+```
+
+Which should yield something like the following:
+
+```ShellSession
+$ kubectl get pods --all-namespaces -o wide
+NAMESPACE            NAME                                      READY   STATUS    RESTARTS   AGE     IP            NODE    NOMINATED NODE   READINESS GATES
+kube-system          coredns-8474476ff8-m2469                  1/1     Running   0          2m45s   10.233.65.2   kub-2   <none>           <none>
+kube-system          coredns-8474476ff8-v5wzj                  1/1     Running   0          2m41s   10.233.64.3   kub-1   <none>           <none>
+kube-system          dns-autoscaler-5ffdc7f89d-76tnv           1/1     Running   0          2m43s   10.233.64.2   kub-1   <none>           <none>
+kube-system          kube-apiserver-kub-1                      1/1     Running   1          4m54s   10.0.20.101   kub-1   <none>           <none>
+kube-system          kube-apiserver-kub-2                      1/1     Running   1          4m33s   10.0.20.102   kub-2   <none>           <none>
+kube-system          kube-controller-manager-kub-1             1/1     Running   1          5m1s    10.0.20.101   kub-1   <none>           <none>
+kube-system          kube-controller-manager-kub-2             1/1     Running   1          4m33s   10.0.20.102   kub-2   <none>           <none>
+kube-system          kube-flannel-9xgf5                        1/1     Running   0          3m10s   10.0.20.102   kub-2   <none>           <none>
+kube-system          kube-flannel-l8jbl                        1/1     Running   0          3m10s   10.0.20.101   kub-1   <none>           <none>
+kube-system          kube-flannel-zss4t                        1/1     Running   0          3m10s   10.0.20.103   kub-3   <none>           <none>
+kube-system          kube-multus-ds-amd64-bhpc9                1/1     Running   0          3m2s    10.0.20.103   kub-3   <none>           <none>
+kube-system          kube-multus-ds-amd64-n6vl8                1/1     Running   0          3m2s    10.0.20.102   kub-2   <none>           <none>
+kube-system          kube-multus-ds-amd64-qttgs                1/1     Running   0          3m2s    10.0.20.101   kub-1   <none>           <none>
+kube-system          kube-proxy-2x4jl                          1/1     Running   0          3m33s   10.0.20.101   kub-1   <none>           <none>
+kube-system          kube-proxy-d48r7                          1/1     Running   0          3m33s   10.0.20.103   kub-3   <none>           <none>
+kube-system          kube-proxy-f45lp                          1/1     Running   0          3m33s   10.0.20.102   kub-2   <none>           <none>
+kube-system          kube-scheduler-kub-1                      1/1     Running   1          4m54s   10.0.20.101   kub-1   <none>           <none>
+kube-system          kube-scheduler-kub-2                      1/1     Running   1          4m33s   10.0.20.102   kub-2   <none>           <none>
+kube-system          nginx-proxy-kub-3                         1/1     Running   0          3m33s   10.0.20.103   kub-3   <none>           <none>
+kube-system          nodelocaldns-cg9tz                        1/1     Running   0          2m41s   10.0.20.102   kub-2   <none>           <none>
+kube-system          nodelocaldns-htswt                        1/1     Running   0          2m41s   10.0.20.103   kub-3   <none>           <none>
+kube-system          nodelocaldns-nsp7s                        1/1     Running   0          2m41s   10.0.20.101   kub-1   <none>           <none>
+local-path-storage   local-path-provisioner-66df45bfdd-km4zg   1/1     Running   0          2m54s   10.233.66.2   kub-3   <none>           <none>
+```