modify doc structure and update existing doc-links as preparation for new doc generation script

This commit is contained in:
Payback159
2024-05-15 19:32:51 +02:00
parent 0b464b5239
commit 4dbfd42f1d
82 changed files with 70 additions and 70 deletions

211
docs/developers/ci-setup.md Normal file
View File

@@ -0,0 +1,211 @@
# CI Setup
## Pipeline
1. build: build a docker image to be used in the pipeline
2. unit-tests: fast jobs for fast feedback (linting, etc...)
3. deploy-part1: small number of jobs to test if the PR works with default settings
4. deploy-part2: slow jobs testing different platforms, OS, settings, CNI, etc...
5. deploy-part3: very slow jobs (upgrades, etc...)
## Runners
Kubespray has 3 types of GitLab runners:
- packet runners: used for E2E jobs (usually long), running on Equinix Metal (ex-packet), on kubevirt managed VMs
- light runners: used for short lived jobs, running on Equinix Metal (ex-packet), as managed pods
- auto scaling runners (managed via docker-machine on Equinix Metal): used for on-demand resources, see [GitLab docs](https://docs.gitlab.com/runner/configuration/autoscale.html) for more info
## Vagrant
Vagrant jobs are using the [quay.io/kubespray/vagrant](/test-infra/vagrant-docker/Dockerfile) docker image with `/var/run/libvirt/libvirt-sock` exposed from the host, allowing the container to boot VMs on the host.
## CI Variables
In CI we have a set of overrides we use to ensure greater success of our CI jobs and avoid throttling by various APIs we depend on. See:
- [Docker mirrors](/tests/common/_docker_hub_registry_mirror.yml)
- [Test settings](/tests/common/_kubespray_test_settings.yml)
## CI Environment
The CI packet and light runners are deployed on a kubernetes cluster on Equinix Metal. The cluster is deployed with kubespray itself and maintained by the kubespray maintainers.
The following files are used for that inventory:
### cluster.tfvars
```ini
# your Kubernetes cluster name here
cluster_name = "ci"
# Your Equinix Metal project ID. See https://metal.equinix.com/developers/docs/accounts/
equinix_metal_project_id = "_redacted_"
# The public SSH key to be uploaded into authorized_keys in bare metal Equinix Metal nodes provisioned
# leave this value blank if the public key is already setup in the Equinix Metal project
# Terraform will complain if the public key is setup in Equinix Metal
public_key_path = "~/.ssh/id_rsa.pub"
# cluster location
metro = "da"
# standalone etcds
number_of_etcd = 0
plan_etcd = "t1.small.x86"
# masters
number_of_k8s_masters = 1
number_of_k8s_masters_no_etcd = 0
plan_k8s_masters = "c3.small.x86"
plan_k8s_masters_no_etcd = "t1.small.x86"
# nodes
number_of_k8s_nodes = 1
plan_k8s_nodes = "c3.medium.x86"
```
### group_vars/all/mirrors.yml
```yaml
---
docker_registry_mirrors:
- "https://mirror.gcr.io"
containerd_grpc_max_recv_message_size: 16777216
containerd_grpc_max_send_message_size: 16777216
containerd_registries_mirrors:
- prefix: docker.io
mirrors:
- host: https://mirror.gcr.io
capabilities: ["pull", "resolve"]
skip_verify: false
- host: https://registry-1.docker.io
capabilities: ["pull", "resolve"]
skip_verify: false
containerd_max_container_log_line_size: -1
crio_registries_mirrors:
- prefix: docker.io
insecure: false
blocked: false
location: registry-1.docker.io
mirrors:
- location: mirror.gcr.io
insecure: false
netcheck_agent_image_repo: "{{ quay_image_repo }}/kubespray/k8s-netchecker-agent"
netcheck_server_image_repo: "{{ quay_image_repo }}/kubespray/k8s-netchecker-server"
nginx_image_repo: "{{ quay_image_repo }}/kubespray/nginx"
```
### group_vars/all/settings.yml
```yaml
---
# Networking setting
kube_service_addresses: 172.30.0.0/18
kube_pods_subnet: 172.30.64.0/18
kube_network_plugin: calico
# avoid overlap with CI jobs deploying nodelocaldns
nodelocaldns_ip: 169.254.255.100
# ipip: False
calico_ipip_mode: "Never"
calico_vxlan_mode: "Never"
calico_network_backend: "bird"
calico_wireguard_enabled: True
# Cluster settings
upgrade_cluster_setup: True
force_certificate_regeneration: True
# Etcd settings
etcd_deployment_type: "host"
# Kubernetes settings
kube_controller_terminated_pod_gc_threshold: 100
kubelet_enforce_node_allocatable: pods
kubelet_preferred_address_types: 'InternalIP,ExternalIP,Hostname'
kubelet_custom_flags:
- "--serialize-image-pulls=true"
- "--eviction-hard=memory.available<1Gi"
- "--eviction-soft-grace-period=memory.available=30s"
- "--eviction-soft=memory.available<2Gi"
- "--system-reserved cpu=100m,memory=4Gi"
- "--eviction-minimum-reclaim=memory.available=2Gi"
# DNS settings
resolvconf_mode: none
dns_min_replicas: 1
upstream_dns_servers:
- 1.1.1.1
- 1.0.0.1
# Extensions
ingress_nginx_enabled: True
helm_enabled: True
cert_manager_enabled: True
metrics_server_enabled: True
# Enable ZSWAP
kubelet_fail_swap_on: False
kube_feature_gates:
- "NodeSwap=True"
```
## Aditional files
This section documents additional files used to complete a deployment of the kubespray CI, these files sit on the control-plane node and assume a working kubernetes cluster.
### /root/nscleanup.sh
```bash
#!/bin/bash
kubectl=/usr/local/bin/kubectl
$kubectl get ns | grep -P "(\d.+-\d.+)" | awk 'match($3,/[0-9]+d/) {print $1}' | xargs -r $kubectl delete ns
$kubectl get ns | grep -P "(\d.+-\d.+)" | awk 'match($3,/[3-9]+h/) {print $1}' | xargs -r $kubectl delete ns
$kubectl get ns | grep Terminating | awk '{print $1}' | xargs -i $kubectl delete vmi/instance-1 vmi/instance-0 vmi/instance-2 -n {} --force --grace-period=0 &
```
### /root/path-calico.sh
```bash
#!/bin/bash
calicoctl patch felixconfig default -p '{"spec":{"allowIPIPPacketsFromWorkloads":true, "allowVXLANPacketsFromWorkloads": true}}'
```
### /root/kubevirt/kubevirt.sh
```bash
#!/bin/bash
export VERSION=$(curl -s https://api.github.com/repos/kubevirt/kubevirt/releases | grep tag_name | grep -v -- '-rc' | sort -r | head -1 | awk -F': ' '{print $2}' | sed 's/,//' | xargs)
echo $VERSION
kubectl apply -f https://github.com/kubevirt/kubevirt/releases/download/${VERSION}/kubevirt-operator.yaml
kubectl apply -f https://github.com/kubevirt/kubevirt/releases/download/${VERSION}/kubevirt-cr.yaml
```
### /root/kubevirt/virtctl.sh
```bash
#!/bin/bash
VERSION=$(kubectl get kubevirt.kubevirt.io/kubevirt -n kubevirt -o=jsonpath="{.status.observedKubeVirtVersion}")
ARCH=$(uname -s | tr A-Z a-z)-$(uname -m | sed 's/x86_64/amd64/') || windows-amd64.exe
echo ${ARCH}
curl -L -o virtctl https://github.com/kubevirt/kubevirt/releases/download/${VERSION}/virtctl-${VERSION}-${ARCH}
chmod +x virtctl
sudo install virtctl /usr/local/bin
```

57
docs/developers/ci.md Normal file
View File

@@ -0,0 +1,57 @@
# CI test coverage
To generate this Matrix run `./tests/scripts/md-table/main.py`
## containerd
| OS / CNI | calico | cilium | custom_cni | flannel | kube-ovn | kube-router | macvlan | weave |
|---| --- | --- | --- | --- | --- | --- | --- | --- |
almalinux8 | :white_check_mark: | :x: | :x: | :x: | :white_check_mark: | :x: | :x: | :x: |
amazon | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
centos7 | :white_check_mark: | :x: | :x: | :white_check_mark: | :x: | :white_check_mark: | :x: | :white_check_mark: |
debian10 | :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | :x: | :white_check_mark: | :x: |
debian11 | :white_check_mark: | :x: | :white_check_mark: | :x: | :x: | :x: | :x: | :x: |
debian12 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | :x: | :x: |
fedora37 | :white_check_mark: | :x: | :x: | :x: | :x: | :white_check_mark: | :x: | :x: |
fedora38 | :x: | :x: | :x: | :x: | :white_check_mark: | :x: | :x: | :x: |
opensuse | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
rockylinux8 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
rockylinux9 | :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: |
ubuntu20 | :white_check_mark: | :white_check_mark: | :x: | :white_check_mark: | :x: | :white_check_mark: | :x: | :white_check_mark: |
ubuntu22 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
## crio
| OS / CNI | calico | cilium | custom_cni | flannel | kube-ovn | kube-router | macvlan | weave |
|---| --- | --- | --- | --- | --- | --- | --- | --- |
almalinux8 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
amazon | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
centos7 | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
debian10 | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
debian11 | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
debian12 | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
fedora37 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
fedora38 | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
opensuse | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
rockylinux8 | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
rockylinux9 | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
ubuntu20 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
ubuntu22 | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
## docker
| OS / CNI | calico | cilium | custom_cni | flannel | kube-ovn | kube-router | macvlan | weave |
|---| --- | --- | --- | --- | --- | --- | --- | --- |
almalinux8 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
amazon | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
centos7 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
debian10 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
debian11 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
debian12 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
fedora37 | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
fedora38 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :white_check_mark: |
opensuse | :x: | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: |
rockylinux8 | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
rockylinux9 | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
ubuntu20 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :white_check_mark: |
ubuntu22 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |

View File

@@ -0,0 +1,38 @@
# Node Layouts
There are six node layout types: `default`, `separate`, `ha`, `scale`, `all-in-one`, and `node-etcd-client`.
`default` is a non-HA two nodes setup with one separate `kube_node`
and the `etcd` group merged with the `kube_control_plane`.
`separate` layout is when there is only node of each type, which includes
a kube_control_plane, kube_node, and etcd cluster member.
`ha` layout consists of two etcd nodes, two control planes and a single worker node,
with role intersection.
`scale` layout can be combined with above layouts (`ha-scale`, `separate-scale`). It includes 200 fake hosts
in the Ansible inventory. This helps test TLS certificate generation at scale
to prevent regressions and profile certain long-running tasks. These nodes are
never actually deployed, but certificates are generated for them.
`all-in-one` layout use a single node for with `kube_control_plane`, `etcd` and `kube_node` merged.
`node-etcd-client` layout consists of a 4 nodes cluster, all of them in `kube_node`, first 3 in `etcd` and only one `kube_control_plane`.
This is necessary to tests setups requiring that nodes are etcd clients (use of cilium as `network_plugin` for instance)
Note, the canal network plugin deploys flannel as well plus calico policy controller.
## Test cases
The [CI Matrix](/docs/developers/ci.md) displays OS, Network Plugin and Container Manager tested.
All tests are breakdown into 3 "stages" ("Stage" means a build step of the build pipeline) as follows:
- _unit_tests_: Linting, markdown, vagrant & terraform validation etc...
- _part1_: Molecule and AIO tests
- _part2_: Standard tests with different layouts and OS/Runtime/Network
- _part3_: Upgrade jobs, terraform jobs and recover control plane tests
- _special_: Other jobs (manuals)
The steps are ordered as `unit_tests->part1->part2->part3->special`.

169
docs/developers/vagrant.md Normal file
View File

@@ -0,0 +1,169 @@
# Vagrant
Assuming you have Vagrant 2.0+ installed with virtualbox or libvirt/qemu
(vmware may work, but is untested) you should be able to launch a 3 node
Kubernetes cluster by simply running `vagrant up`.
This will spin up 3 VMs and install kubernetes on them.
Once they are completed you can connect to any of them by running `vagrant ssh k8s-[1..3]`.
To give an estimate of the expected duration of a provisioning run:
On a dual core i5-6300u laptop with an SSD, provisioning takes around 13
to 15 minutes, once the container images and other files are cached.
Note that libvirt/qemu is recommended over virtualbox as it is quite a bit
faster, especially during boot-up time.
For proper performance a minimum of 12GB RAM is recommended.
It is possible to run a 3 node cluster on a laptop with 8GB of RAM using
the default Vagrantfile, provided you have 8GB zram swap configured and
not much more than a browser and a mail client running.
If you decide to run on such a machine, then also make sure that any tmpfs
devices, that are mounted, are mostly empty and disable any swapfiles
mounted on HDD/SSD or you will be in for some serious swap-madness.
Things can get a bit sluggish during provisioning, but when that's done,
the system will actually be able to perform quite well.
## Customize Vagrant
You can override the default settings in the `Vagrantfile` either by
directly modifying the `Vagrantfile` or through an override file.
In the same directory as the `Vagrantfile`, create a folder called
`vagrant` and create `config.rb` file in it.
An example of how to configure this file is given below.
## Use alternative OS for Vagrant
By default, Vagrant uses Ubuntu 18.04 box to provision a local cluster.
You may use an alternative supported operating system for your local cluster.
Customize `$os` variable in `Vagrantfile` or as override, e.g.,:
```ShellSession
echo '$os = "flatcar-stable"' >> vagrant/config.rb
```
The supported operating systems for vagrant are defined in the `SUPPORTED_OS`
constant in the `Vagrantfile`.
## File and image caching
Kubespray can take quite a while to start on a laptop. To improve provisioning
speed, the variable 'download_run_once' is set. This will make kubespray
download all files and containers just once and then redistributes them to
the other nodes and as a bonus, also cache all downloads locally and re-use
them on the next provisioning run. For more information on download settings
see [download documentation](/docs/advanced/downloads.md).
## Example use of Vagrant
The following is an example of setting up and running kubespray using `vagrant`.
For repeated runs, you could save the script to a file in the root of the
kubespray and run it by executing `source <name_of_the_file>`.
```ShellSession
# use virtualenv to install all python requirements
VENVDIR=venv
virtualenv --python=/usr/bin/python3.7 $VENVDIR
source $VENVDIR/bin/activate
pip install -r requirements.txt
# prepare an inventory to test with
INV=inventory/my_lab
rm -rf ${INV}.bak &> /dev/null
mv ${INV} ${INV}.bak &> /dev/null
cp -a inventory/sample ${INV}
rm -f ${INV}/hosts.ini
# customize the vagrant environment
mkdir vagrant
cat << EOF > vagrant/config.rb
\$instance_name_prefix = "kub"
\$vm_cpus = 1
\$num_instances = 3
\$os = "centos-bento"
\$subnet = "10.0.20"
\$network_plugin = "flannel"
\$inventory = "$INV"
\$shared_folders = { 'temp/docker_rpms' => "/var/cache/yum/x86_64/7/docker-ce/packages" }
\$extra_vars = {
dns_domain: my.custom.domain
}
# or
\$extra_vars = "path/to/extra/vars/file.yml"
EOF
# make the rpm cache
mkdir -p temp/docker_rpms
vagrant up
# make a copy of the downloaded docker rpm, to speed up the next provisioning run
scp kub-1:/var/cache/yum/x86_64/7/docker-ce/packages/* temp/docker_rpms/
# copy kubectl access configuration in place
mkdir $HOME/.kube/ &> /dev/null
ln -s $PWD/$INV/artifacts/admin.conf $HOME/.kube/config
# make the kubectl binary available
sudo ln -s $PWD/$INV/artifacts/kubectl /usr/local/bin/kubectl
#or
export PATH=$PATH:$PWD/$INV/artifacts
```
If a vagrant run failed and you've made some changes to fix the issue causing
the fail, here is how you would re-run ansible:
```ShellSession
ansible-playbook -vvv -i .vagrant/provisioners/ansible/inventory/vagrant_ansible_inventory cluster.yml
```
If all went well, you check if it's all working as expected:
```ShellSession
kubectl get nodes
```
The output should look like this:
```ShellSession
$ kubectl get nodes
NAME STATUS ROLES AGE VERSION
kub-1 Ready control-plane,master 4m37s v1.22.5
kub-2 Ready control-plane,master 4m7s v1.22.5
kub-3 Ready <none> 3m7s v1.22.5
```
Another nice test is the following:
```ShellSession
kubectl get pods --all-namespaces -o wide
```
Which should yield something like the following:
```ShellSession
$ kubectl get pods --all-namespaces -o wide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
kube-system coredns-8474476ff8-m2469 1/1 Running 0 2m45s 10.233.65.2 kub-2 <none> <none>
kube-system coredns-8474476ff8-v5wzj 1/1 Running 0 2m41s 10.233.64.3 kub-1 <none> <none>
kube-system dns-autoscaler-5ffdc7f89d-76tnv 1/1 Running 0 2m43s 10.233.64.2 kub-1 <none> <none>
kube-system kube-apiserver-kub-1 1/1 Running 1 4m54s 10.0.20.101 kub-1 <none> <none>
kube-system kube-apiserver-kub-2 1/1 Running 1 4m33s 10.0.20.102 kub-2 <none> <none>
kube-system kube-controller-manager-kub-1 1/1 Running 1 5m1s 10.0.20.101 kub-1 <none> <none>
kube-system kube-controller-manager-kub-2 1/1 Running 1 4m33s 10.0.20.102 kub-2 <none> <none>
kube-system kube-flannel-9xgf5 1/1 Running 0 3m10s 10.0.20.102 kub-2 <none> <none>
kube-system kube-flannel-l8jbl 1/1 Running 0 3m10s 10.0.20.101 kub-1 <none> <none>
kube-system kube-flannel-zss4t 1/1 Running 0 3m10s 10.0.20.103 kub-3 <none> <none>
kube-system kube-multus-ds-amd64-bhpc9 1/1 Running 0 3m2s 10.0.20.103 kub-3 <none> <none>
kube-system kube-multus-ds-amd64-n6vl8 1/1 Running 0 3m2s 10.0.20.102 kub-2 <none> <none>
kube-system kube-multus-ds-amd64-qttgs 1/1 Running 0 3m2s 10.0.20.101 kub-1 <none> <none>
kube-system kube-proxy-2x4jl 1/1 Running 0 3m33s 10.0.20.101 kub-1 <none> <none>
kube-system kube-proxy-d48r7 1/1 Running 0 3m33s 10.0.20.103 kub-3 <none> <none>
kube-system kube-proxy-f45lp 1/1 Running 0 3m33s 10.0.20.102 kub-2 <none> <none>
kube-system kube-scheduler-kub-1 1/1 Running 1 4m54s 10.0.20.101 kub-1 <none> <none>
kube-system kube-scheduler-kub-2 1/1 Running 1 4m33s 10.0.20.102 kub-2 <none> <none>
kube-system nginx-proxy-kub-3 1/1 Running 0 3m33s 10.0.20.103 kub-3 <none> <none>
kube-system nodelocaldns-cg9tz 1/1 Running 0 2m41s 10.0.20.102 kub-2 <none> <none>
kube-system nodelocaldns-htswt 1/1 Running 0 2m41s 10.0.20.103 kub-3 <none> <none>
kube-system nodelocaldns-nsp7s 1/1 Running 0 2m41s 10.0.20.101 kub-1 <none> <none>
local-path-storage local-path-provisioner-66df45bfdd-km4zg 1/1 Running 0 2m54s 10.233.66.2 kub-3 <none> <none>
```