首先查看master节点状态
发现 是没有notReady,因此查看日志
root@ubuntu-128:/home/itcast/working# kubectl get nodes
NAME STATUS ROLES AGE VERSION
ubuntu-128 NotReady master 79m v1.13.1
查看日志发现是网络问题
root@ubuntu-128:/home/itcast/working# journalctl -f -u kubelet
-- Logs begin at 日 2019-08-11 12:26:08 CST. --
8月 11 12:35:25 ubuntu-128 kubelet[9252]: E0811 12:35:25.364881 9252 kubelet.go:2192] Container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:docker: network plugin is not ready: cni config uninitialized
8月 11 12:35:30 ubuntu-128 kubelet[9252]: W0811 12:35:30.367241 9252 cni.go:203] Unable to update cni config: No networks found in /etc/cni/net.d
8月 11 12:35:30 ubuntu-128 kubelet[9252]: E0811 12:35:30.367648 9252 kubelet.go:2192] Container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:docker: network plugin is not ready: cni config uninitialized
8月 11 12:35:35 ubuntu-128 kubelet[9252]: W0811 12:35:35.369164 9252 cni.go:203] Unable to update cni config: No networks found in /etc/cni/net.d
8月 11 12:35:35 ubuntu-128 kubelet[9252]: E0811 12:35:35.369379 9252 kubelet.go:2192] Container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:docker: network plugin is not ready: cni config uninitialized
查看pod节点的状态,发现kube-flannel-ds-amd64-ftt86已经运行了,但是coredns-59b69b999c-l5pfz一直处于CrashLoopBackOff状态,需要查看日志
root@ubuntu-128:/home/itcast/working# kubectl get pod -n kube-system -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
coredns-59b69b999c-l5pfz 0/1 CrashLoopBackOff 6 13m 10.244.0.2 ubuntu-128 <none> <none>
coredns-59b69b999c-vnw5l 0/1 CrashLoopBackOff 6 13m 10.244.0.3 ubuntu-128 <none> <none>
etcd-ubuntu-128 1/1 Running 0 12m 192.168.13.128 ubuntu-128 <none> <none>
kube-apiserver-ubuntu-128 1/1 Running 0 12m 192.168.13.128 ubuntu-128 <none> <none>
kube-controller-manager-ubuntu-128 1/1 Running 0 12m 192.168.13.128 ubuntu-128 <none> <none>
kube-flannel-ds-amd64-ftt86 1/1 Running 0 8m54s 192.168.13.128 ubuntu-128 <none> <none>
kube-proxy-nd84h 1/1 Running 0 13m 192.168.13.128 ubuntu-128 <none> <none>
kube-scheduler-ubuntu-128 1/1 Running 0 12m 192.168.13.128 ubuntu-128 <none> <none>
查看coredns-59b69b999c-l5pfz日志,发现是因为 转发循环的问题,并且官网也给出了解析https://coredns.io/plugins/loop/#troubleshooting,
root@ubuntu-128:/home/itcast/working# kubectl logs -f coredns-59b69b999c-l5pfz -n kube-system
.:53
2019-08-11T06:35:49.508Z [INFO] CoreDNS-1.2.6
2019-08-11T06:35:49.508Z [INFO] linux/amd64, go1.11.2, 756749c
CoreDNS-1.2.6
linux/amd64, go1.11.2, 756749c
[INFO] plugin/reload: Running configuration MD5 = f65c4821c8a9b7b5eb30fa4fbc167769
[FATAL] plugin/loop: Forwarding loop detected in "." zone. Exiting. See https://coredns.io/plugins/loop#troubleshooting. Probe query: "HINFO 4841045043467666762.1128331503887863411.".
最终修改/etc/resolv.conf 的namespace从127.0.0.1--->192.168.13.128
最终修改/etc/resolv.conf 的namespace
重置k8s启动配置,
kubeadm reset
#重置完成,重新初始化
kubeadm init --config kubeadm.conf
#再次执行下面三个命令
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
开启flannel网络
root@ubuntu-128:/home/itcast/working# kubectl apply -f kube-flannel.yml
最后查看pod的全部插件运行完毕,且master node运行正常
root@ubuntu-128:/home/itcast/working# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-59b69b999c-ffsdq 1/1 Running 0 101s
coredns-59b69b999c-qkjd9 1/1 Running 0 101s
etcd-ubuntu-128 1/1 Running 0 62s
kube-apiserver-ubuntu-128 1/1 Running 0 54s
kube-controller-manager-ubuntu-128 1/1 Running 0 57s
kube-flannel-ds-amd64-99fdb 1/1 Running 0 24s
kube-proxy-mfvbx 1/1 Running 0 101s
kube-scheduler-ubuntu-128 1/1 Running 0 58s
root@ubuntu-128:/home/itcast/working# kubectl get nodes
NAME STATUS ROLES AGE VERSION
ubuntu-128 Ready master 12m v1.13.1
子节点加入到master集群中
root@ubuntu-130:/home/itcast/working# kubeadm join 192.168.13.128:6443 --token abcdef.0123456789abcdef --discovery-token-ca-cert-hash sha256:e3f32e2e1eed5ccd9799472dc68c3caed5581479165857e5cfc792d3e5fb1b27
[preflight] Running pre-flight checks
[preflight] Some fatal errors occurred:
[ERROR FileAvailable--etc-kubernetes-kubelet.conf]: /etc/kubernetes/kubelet.conf already exists
[ERROR FileAvailable--etc-kubernetes-bootstrap-kubelet.conf]: /etc/kubernetes/bootstrap-kubelet.conf already exists
[ERROR Port-10250]: Port 10250 is in use
[ERROR FileAvailable--etc-kubernetes-pki-ca.crt]: /etc/kubernetes/pki/ca.crt already exists
[preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`
root@ubuntu-130:/home/itcast/working# kubeadm reset
[reset] WARNING: changes made to this host by 'kubeadm init' or 'kubeadm join' will be reverted.
[reset] are you sure you want to proceed? [y/N]: y
[preflight] running pre-flight checks
[reset] Reading configuration from the cluster...
[reset] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
W0811 15:00:14.635476 29097 reset.go:213] [reset] Unable to fetch the kubeadm-config ConfigMap, using etcd pod spec as fallback: failed to get config map: Get https://192.168.13.128:6443/api/v1/namespaces/kube-system/configmaps/kubeadm-config: x509: certificate signed by unknown authority (possibly because of "crypto/rsa: verification error" while trying to verify candidate authority certificate "kubernetes")
[reset] no etcd config found. Assuming external etcd
[reset] please manually reset etcd to prevent further issues
[reset] stopping the kubelet service
[reset] unmounting mounted directories in "/var/lib/kubelet"
[reset] deleting contents of stateful directories: [/var/lib/kubelet /etc/cni/net.d /var/lib/dockershim /var/run/kubernetes]
[reset] deleting contents of config directories: [/etc/kubernetes/manifests /etc/kubernetes/pki]
[reset] deleting files: [/etc/kubernetes/admin.conf /etc/kubernetes/kubelet.conf /etc/kubernetes/bootstrap-kubelet.conf /etc/kubernetes/controller-manager.conf /etc/kubernetes/scheduler.conf]
The reset process does not reset or clean up iptables rules or IPVS tables.
If you wish to reset iptables, you must do so manually.
For example:
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
If your cluster was setup to utilize IPVS, run ipvsadm --clear (or similar)
to reset your system's IPVS tables.
如果报错 ,可以使用kubeadm reset重置
root@ubuntu-130:/home/itcast/working# kubeadm join 192.168.13.128:6443 --token abcdef.0123456789abcdef --discovery-token-ca-cert-hash sha256:e3f32e2e1eed5ccd9799472dc68c3caed5581479165857e5cfc792d3e5fb1b27
[preflight] Running pre-flight checks
[preflight] Some fatal errors occurred:
[ERROR FileAvailable--etc-kubernetes-kubelet.conf]: /etc/kubernetes/kubelet.conf already exists
[ERROR FileAvailable--etc-kubernetes-bootstrap-kubelet.conf]: /etc/kubernetes/bootstrap-kubelet.conf already exists
[ERROR Port-10250]: Port 10250 is in use
[ERROR FileAvailable--etc-kubernetes-pki-ca.crt]: /etc/kubernetes/pki/ca.crt already exists
同理ubuntu-130
查看pod运行状态 以及node运行状态 ,完美
root@ubuntu-128:/home/itcast/working# kubectl get pod -n kube-system -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
coredns-59b69b999c-ffsdq 1/1 Running 0 18m 10.244.0.4 ubuntu-128 <none> <none>
coredns-59b69b999c-qkjd9 1/1 Running 0 18m 10.244.0.5 ubuntu-128 <none> <none>
etcd-ubuntu-128 1/1 Running 0 17m 192.168.13.128 ubuntu-128 <none> <none>
kube-apiserver-ubuntu-128 1/1 Running 0 17m 192.168.13.128 ubuntu-128 <none> <none>
kube-controller-manager-ubuntu-128 1/1 Running 0 17m 192.168.13.128 ubuntu-128 <none> <none>
kube-flannel-ds-amd64-99fdb 1/1 Running 0 17m 192.168.13.128 ubuntu-128 <none> <none>
kube-flannel-ds-amd64-b9fzb 1/1 Running 0 3m55s 192.168.13.129 ubuntu-129 <none> <none>
kube-flannel-ds-amd64-g2nfm 1/1 Running 0 2m30s 192.168.13.130 ubuntu-130 <none> <none>
kube-proxy-hbcwr 1/1 Running 0 2m30s 192.168.13.130 ubuntu-130 <none> <none>
kube-proxy-l5nml 1/1 Running 0 3m55s 192.168.13.129 ubuntu-129 <none> <none>
kube-proxy-mfvbx 1/1 Running 0 18m 192.168.13.128 ubuntu-128 <none> <none>
kube-scheduler-ubuntu-128 1/1 Running 0 17m 192.168.13.128 ubuntu-128 <none> <none>
root@ubuntu-128:/home/itcast/working# kubectl get nodes
NAME STATUS ROLES AGE VERSION
ubuntu-128 Ready master 19m v1.13.1
ubuntu-129 Ready <none> 4m41s v1.13.1
ubuntu-130 Ready <none> 3m15s v1.13.1
解决方案二:
每次初始化都会清除之前所有操作,这是不允许的
因此当系统pod出现异常后,只需要重新拉起来即可
root@ubuntu-128:~# kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-59b69b999c-j8v5n 0/1 CrashLoopBackOff 6 8h
coredns-59b69b999c-vvzqq 0/1 CrashLoopBackOff 6 8h
etcd-ubuntu-128 1/1 Running 1 8h
kube-apiserver-ubuntu-128 1/1 Running 1 8h
kube-controller-manager-ubuntu-128 1/1 Running 2 8h
kube-flannel-ds-amd64-7b54h 1/1 Running 2 8h
kube-flannel-ds-amd64-7zpjw 1/1 Running 1 8h
kube-flannel-ds-amd64-cxwsg 1/1 Running 0 8h
kube-proxy-6hb9w 1/1 Running 1 8h
kube-proxy-wgk4w 1/1 Running 1 8h
kube-proxy-xqgrw 1/1 Running 0 8h
kube-scheduler-ubuntu-128 1/1 Running 2 8h
定位出是 最终修改/etc/resolv.conf 的namespace从127.0.0.1--->192.168.13.128,修改后做如下操作
删除coredsns
root@ubuntu-128:~# kubectl delete pod coredns-59b69b999c-j8v5n -n kube-system
pod "coredns-59b69b999c-j8v5n" deleted
root@ubuntu-128:~# kubectl delete pod coredns-59b69b999c-vvzqq -n kube-system
pod "coredns-59b69b999c-vvzqq" deleted
deployment控制器会重新创建coredns,
root@ubuntu-128:~# kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-59b69b999c-7w2mk 1/1 Running 0 79s
etcd-ubuntu-128 1/1 Running 1 8h
kube-apiserver-ubuntu-128 1/1 Running 1 8h
kube-controller-manager-ubuntu-128 1/1 Running 3 8h
kube-flannel-ds-amd64-7b54h 1/1 Running 2 8h
kube-flannel-ds-amd64-7zpjw 1/1 Running 1 8h
kube-flannel-ds-amd64-cxwsg 1/1 Running 0 8h
kube-proxy-6hb9w 1/1 Running 1 8h
kube-proxy-wgk4w 1/1 Running 1 8h
kube-proxy-xqgrw 1/1 Running 0 8h
kube-scheduler-ubuntu-128 1/1 Running 3 8h