Fix recover-control-plane to work with etcd 3.3.x and add CI (#5500)

* Fix recover-control-plane to work with etcd 3.3.x and add CI

* Set default values for testcase

* Add actual test jobs

* Attempt to satisty gitlab ci linter

* Fix ansible targets

* Set etcd_member_name as stated in the docs...

* Recovering from 0 masters is not supported yet

* Add other master to broken_kube-master group as well

* Increase number of retries to see if etcd needs more time to heal

* Make number of retries for ETCD loops configurable, increase it for recovery CI and document it
This commit is contained in:
qvicksilver
2020-02-11 10:38:01 +01:00
committed by GitHub
parent 68c8c05775
commit ac2135e450
23 changed files with 204 additions and 134 deletions

View File

@@ -8,21 +8,22 @@
retries: 6
delay: 10
changed_when: false
when: groups['broken_kube-master']
- name: Delete old kube-master nodes from cluster
- name: Delete broken kube-master nodes from cluster
shell: "{{ bin_dir }}/kubectl delete node {{ item }}"
environment:
- KUBECONFIG: "{{ ansible_env.HOME | default('/root') }}/.kube/config"
with_items: "{{ old_kube_masters.split(',') }}"
register: delete_old_kube_masters
with_items: "{{ groups['broken_kube-master'] }}"
register: delete_broken_kube_masters
failed_when: false
when: old_kube_masters is defined
when: groups['broken_kube-master']
- name: Fail if unable to delete old kube-master nodes from cluster
- name: Fail if unable to delete broken kube-master nodes from cluster
fail:
msg: "Unable to delete old kube-master node: {{ item.item }}"
loop: "{{ delete_old_kube_masters.results }}"
msg: "Unable to delete broken kube-master node: {{ item.item }}"
loop: "{{ delete_broken_kube_masters.results }}"
changed_when: false
when:
- old_kube_masters is defined
- groups['broken_kube-master']
- "item.rc != 0 and not 'NotFound' in item.stderr"