方式一:通过指定nodeSelector匹配Node
[root@k8s-master kuboard]# kubectl get node --show-labels
NAME STATUS ROLES AGE VERSION LABELS
k8s-master Ready control-plane 4d22h v1.29.7 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,k8s.kuboard.cn/role=etcd,kubernetes.io/arch=amd64,kubernetes.io/hostname=k8s-master,kubernetes.io/os=linux,node-role.kubernetes.io/control-plane=,node.kubernetes.io/exclude-from-external-load-balancers=
k8s-node01 Ready <none> 4d22h v1.29.7 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=k8s-node01,kubernetes.io/os=linux
k8s-node02 Ready <none> 4d22h v1.29.7 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=k8s-node02,kubernetes.io/os=linux
[root@k8s-master kuboard]# kubectl label nodes k8s-node01 disk=ssd
node/k8s-node01 labeled
[root@k8s-master kuboard]# kubectl get node --show-labels
NAME STATUS ROLES AGE VERSION LABELS
k8s-master Ready control-plane 4d22h v1.29.7 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,k8s.kuboard.cn/role=etcd,kubernetes.io/arch=amd64,kubernetes.io/hostname=k8s-master,kubernetes.io/os=linux,node-role.kubernetes.io/control-plane=,node.kubernetes.io/exclude-from-external-load-balancers=
k8s-node01 Ready <none> 4d22h v1.29.7 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,disk=ssd,kubernetes.io/arch=amd64,kubernetes.io/hostname=k8s-node01,kubernetes.io/os=linux
k8s-node02 Ready <none> 4d22h v1.29.7 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=k8s-node02,kubernetes.io/os=linux
接下来在Pod的定义中添加selector
[root@k8s-master test]# cat nginx.yaml
apiVersion: v1
kind: Pod
metadata:
name: nginx
labels:
env: test
spec:
containers:
- name: nginx
image: nginx:latest
imagePullPolicy: IfNotPresent
nodeSelector: ### 这里指定Node的Label
disk: ssd
[root@k8s-master test]# kubectl apply -f nginx.yaml
pod/nginx created
[root@k8s-master test]# kubectl get po -n default
NAME READY STATUS RESTARTS AGE
nginx 0/1 ContainerCreating 0 11s
[root@k8s-master test]# kubectl get po -n default -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx 0/1 ContainerCreating 0 16s <none> k8s-node01 <none> <none>
方式二: 通过指定NodeName
可以在pod中配置nodeName字段,直接指定对应的Node指定NodeName的例子
查看node名称
[root@k8s-master test]# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master Ready control-plane 4d23h v1.29.7
k8s-node01 Ready <none> 4d23h v1.29.7
k8s-node02 Ready <none> 4d23h v1.29.7
[root@k8s-master test]# kubectl apply -f nginx.yaml
pod/nginx-byname created
[root@k8s-master test]# cat nginx.yaml
apiVersion: v1
kind: Pod
metadata:
name: nginx-byname
labels:
env: test
spec:
nodeName: k8s-node02
containers:
- name: nginx
image: nginx:latest
imagePullPolicy: IfNotPresent
[root@k8s-master test]# kubectl get po -n default -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-byname 0/1 ContainerCreating 0 17s <none> k8s-node02 <none> <none>
使用 nodeName 来选择节点的方式有一些局限性:
如果所指代的节点不存在,则 Pod 无法运行,而且在某些情况下可能会被自动删除。
如果所指代的节点无法提供用来运行 Pod 所需的资源,Pod 会失败, 而其失败原因中会给出是否因为内存或 CPU 不足而造成无法运行。
在云环境中的节点名称并不总是可预测的,也不总是稳定的。
方式三:亲和性和反亲和性
nodeSelector 提供了一种最简单的方法来将 Pod 约束到具有特定标签的节点上。 亲和性和反亲和性扩展了你可以定义的约束类型。使用亲和性与反亲和性的一些好处有:
亲和性、反亲和性语言的表达能力更强。nodeSelector 只能选择拥有所有指定标签的节点。 亲和性、反亲和性为你提供对选择逻辑的更强控制能力。
你可以标明某规则是“软需求”或者“偏好”,这样调度器在无法找到匹配节点时仍然调度该 Pod。
你可以使用节点上(或其他拓扑域中)运行的其他 Pod 的标签来实施调度约束, 而不是只能使用节点本身的标签。这个能力让你能够定义规则允许哪些 Pod 可以被放置在一起。
通过亲和性/反亲和性调度Pod有三种方式:
通过node亲和性(nodeAffinity)调度pod:匹配Node上的标签
通过Pod亲和性(podAffinity)调度Pod:匹配Pod上的标签,Pod与指定Pod同一拓扑域
通过Pod反亲和性(podAntiAffinity)调度Pod:匹配Pod上的标签,Pod与指定Pod不在同一拓扑域
为了方便说明,我们给两个节点分别添加Label:
kubectl label nodes k8s-node01 disk=ssd
kubectl label nodes k8s-node01 topology.kubernetes.io/zone=Beijing
kubectl label nodes k8s-master disk=hdd
kubectl label nodes k8s-master topology.kubernetes.io/zone=Shanghai
node亲和性(nodeAffinity)调度pod
node节点的亲和性nodeAffinity,指明Pod更偏向于部署到什么样的节点上。节点亲和性包含2种方式:
requiredDuringSchedulingIgnoredDuringExecution 强制亲和性,表示一定要把pod部署在指定的标签节点上,如果指定的标签节点不存在,pod就一直显示pending状态;
preferredDuringSchedulingIgnoredDuringExecution 首选的节点亲和性,表示pod更倾向于部署在指定的标签节点上,如果指定的标签节点不存在,则pod就部署在其他节点上。
如果要把大量pod部署在指定的标签节点上时,也会有少量的pod调度在其他节点上,否则所有pod调度在一个节点上,一旦节点异常,影响所有的pod。
所以软亲和性指pod更倾向于调度在指定的节点上,而不是必须。
[root@k8s-master test]# cat pod-nodeAffinity.yaml
apiVersion: v1
kind: Pod
metadata:
name: nginx
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution: ## 强制亲和性
nodeSelectorTerms:
- matchExpressions:
- key: disk
operator: In
values:
- ssd
preferredDuringSchedulingIgnoredDuringExecution: ## 首选的节点亲和性
- weight: 1
preference:
matchExpressions:
- key: topology.kubernetes.io/zone
operator: In
values:
- Shanghai
containers:
- name: nginx
image: docker.io/library/nginx:latest
imagePullPolicy: IfNotPresent
[root@k8s-master test]# kubectl apply -f pod-nodeAffinity.yaml
pod/nginx created
[root@k8s-master test]# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx 1/1 Running 0 12s 100.97.125.31 k8s-node01 <none> <none>
Pod间亲和性(podAffinity)调度
Pod间亲和性表名,创建的Pod与哪些已存在的Pod倾向于调度到同一节点。与节点亲和性类似,Pod 的亲和性与反亲和性也有两种类型:
requiredDuringSchedulingIgnoredDuringExecution:强制
preferredDuringSchedulingIgnoredDuringExecution:首选
requiredDuringSchedulingIgnoredDuringExecution用法
[root@k8s-master test]# cat requiredDuringSchedulingIgnoredDuringExecution.yaml
apiVersion: v1
kind: Pod
metadata:
name: redis
labels: ## 定义一个Pod,包含标签: dbType= v
dbType: kv
spec:
containers:
- name: redis
image: docker.io/library/redis:latest
imagePullPolicy: IfNotPresent
---
apiVersion: v1
kind: Pod
metadata:
name: nginx
spec:
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector: ## 通过labelSeletor匹配Pod。Pod调度时需要满足,部署的Node上有能够满足该条件的Pod。
matchExpressions: ## 这里指定的选择器会匹配到Pod:redis, 因此该Pod(nginx)与Pod(redis)会运行在同一个Node上。
- key: dbType
operator: In
values:
- kv
topologyKey: topology.kubernetes.io/zone ## 拓扑域, 不允许为空。
## 这里表示Pod所在节点的标签中包含topology.kubernetes.io/zone这个Key
containers:
- name: nginx
image: docker.io/library/nginx:latest
imagePullPolicy: IfNotPresent
[root@k8s-master test]# kubectl apply -f requiredDuringSchedulingIgnoredDuringExecution.yaml
pod/redis created
pod/nginx created
[root@k8s-master test]# kubectl get po -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx 1/1 Running 0 19s 100.97.125.33 k8s-node01 <none> <none>
redis 1/1 Running 0 19s 100.97.125.34 k8s-node01 <none> <none>
preferredDuringSchedulingIgnoredDuringExecution用法
preferredDuringSchedulingIgnoredDuringExecution在pod倾向性亲和性用法与node中用法一致,表示更倾向于和匹配的Pod部署在同一节点上,但不是必须的。
[root@k8s-master test]# cat preferredDuringSchedulingIgnoredDuringExecution.yaml
apiVersion: v1
kind: Pod
metadata:
name: redis
labels:
dbType: kv
spec:
nodeName: k8s-master
containers:
- name: redis
image: docker.io/library/redis:latest
imagePullPolicy: IfNotPresent
---
apiVersion: v1
kind: Pod
metadata:
name: nginx
spec:
affinity:
podAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: dbType
operator: In
values:
- kv
topologyKey: topology.kubernetes.io/zone
containers:
- name: nginx
image: docker.io/library/nginx:latest
imagePullPolicy: IfNotPresent
[root@k8s-master test]# kubectl apply -f preferredDuringSchedulingIgnoredDuringExecution.yaml
pod/redis created
pod/nginx created
[root@k8s-master test]# kubectl get po -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx 1/1 Running 0 15s 100.97.125.35 k8s-node01 <none> <none>
redis 1/1 Running 0 15s 100.116.59.85 k8s-master <none> <none>
Pod反亲和性(podAntiAffinity)调度Pod
Pod反亲和性能够让带有相同标签的副本,部署到不同的节点上。
如下,创建包含3个副本的deployment。
[root@k8s-master test]# cat redis.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis-cache
spec:
selector:
matchLabels:
app: store
replicas: 4 ## 4个副本
template:
metadata:
labels: ## 定义标签 app=store
app: store
spec:
affinity:
podAntiAffinity: ## 反亲和性
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions: ## 反亲和性会使得3个副本不运行在同一个Node上
- key: app
operator: In
values:
- store
topologyKey: "kubernetes.io/hostname" ## 只能是"kubernetes.io/hostname"
containers:
- name: redis-server
image: docker.io/library/redis:latest
imagePullPolicy: IfNotPresent
测试的集群只有2个Node,创建deployment后会发现每个Node上运行一个对应的Pod,还有2个Pod处于Pending状态
[root@k8s-master test]# kubectl apply -f redis.yaml
deployment.apps/redis-cache created
[root@k8s-master test]# kubectl get po -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
redis-cache-6fbbd9d45d-2shkr 0/1 Pending 0 51s <none> <none> <none> <none>
redis-cache-6fbbd9d45d-9hpcl 0/1 Pending 0 51s <none> <none> <none> <none>
redis-cache-6fbbd9d45d-lxcj8 1/1 Running 0 51s 100.97.125.36 k8s-node01 <none> <none>
redis-cache-6fbbd9d45d-w8sjx 1/1 Running 0 51s 100.125.152.28 k8s-node02 <none> <none>
查看Pending的pod,在event中可以看到,正式因为反亲和性的限制,导致Pod无法调度到合法的Node上。
[root@k8s-master test]# kubectl describe pod redis-cache-6fbbd9d45d-2shkr
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning FailedScheduling 2m17s default-scheduler 0/3 nodes are available: 1 node(s) had untolerated taint {node-role.kubernetes.io/master: }, 2 node(s) didn't match pod anti-affinity rules. preemption: 0/3 nodes are available: 1 Preemption is not helpful for scheduling, 2 No preemption victims found for incoming pod.
[root@k8s-master test]# kubectl describe pod redis-cache-6fbbd9d45d-9hpcl
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning FailedScheduling 3m8s default-scheduler 0/3 nodes are available: 1 node(s) had untolerated taint {node-role.kubernetes.io/master: }, 2 node(s) didn't match pod anti-affinity rules. preemption: 0/3 nodes are available: 1 Preemption is not helpful for scheduling, 2 No preemption victims found for incoming pod.