daemonset controller分析
daemonset controller简介
daemonset controller是kube-controller-manager组件中众多控制器中的一个,是 daemonset 资源对象的控制器,其通过对daemonset、pod、node、ControllerRevision四种资源的监听,当这四种资源发生变化时会触发 daemonset controller 对相应的daemonset资源进行调谐操作,从而完成daemonset在合适node上pod的创建、在不合适node上pod的删除、daemonset的滚动更新、daemonset状态status更新、旧版本daemonset清理等操作。
daemonset controller架构图
daemonset controller的大致组成和处理流程如下图,daemonset controller对daemonset、pod、node、ControllerRevision对象注册了event handler,当有事件时,会watch到然后将对应的daemonset对象放入到queue中,然后syncDaemonset
方法为daemonset controller调谐daemonset对象的核心处理逻辑所在,从queue中取出daemonset对象,做调谐处理。
daemonset更新策略
(1)OnDelete:使用 OnDelete 更新策略时,在更新 DaemonSet pod模板后,只有当你手动删除老的 DaemonSet pods 之后,新的 DaemonSet Pod 才会被自动创建。
(2)RollingUpdate:默认的更新策略。使用 RollingUpdate 更新策略时,在更新 DaemonSet pod模板后, 老的 DaemonSet pods 将被删除,并且将根据滚动更新配置自动创建新的 DaemonSet pods。 滚动更新期间,最多只能有 DaemonSet 的一个 Pod 运行于每个节点上。
daemonset controller分析将分为两大块进行,分别是:
(1)daemonset controller初始化与启动分析;
(2)daemonset controller处理逻辑分析。
1.daemonset controller初始化与启动分析
基于tag v1.17.4
https://github.com/kubernetes/kubernetes/releases/tag/v1.17.4
直接看到startDaemonSetController函数,作为daemonset controller初始化与启动分析的入口。
startDaemonSetController
startDaemonSetController主要逻辑:
(1)调用daemon.NewDaemonSetsController新建并初始化DaemonSetsController;
(2)拉起一个goroutine,跑DaemonSetsController的Run方法。
// cmd/kube-controller-manager/app/apps.go
func startDaemonSetController(ctx ControllerContext) (http.Handler, bool, error) {
if !ctx.AvailableResources[schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "daemonsets"}] {
return nil, false, nil
}
dsc, err := daemon.NewDaemonSetsController(
ctx.InformerFactory.Apps().V1().DaemonSets(),
ctx.InformerFactory.Apps().V1().ControllerRevisions(),
ctx.InformerFactory.Core().V1().Pods(),
ctx.InformerFactory.Core().V1().Nodes(),
ctx.ClientBuilder.ClientOrDie("daemon-set-controller"),
flowcontrol.NewBackOff(1*time.Second, 15*time.Minute),
)
if err != nil {
return nil, true, fmt.Errorf("error creating DaemonSets controller: %v", err)
}
go dsc.Run(int(ctx.ComponentConfig.DaemonSetController.ConcurrentDaemonSetSyncs), ctx.Stop)
return nil, true, nil
}
1.1 daemon.NewDaemonSetsController
从daemon.NewDaemonSetsController
函数代码中可以看到,daemonset controller注册了daemonset、node、pod与ControllerRevisions对象的EventHandler,也即对这几个对象的event进行监听,把event放入事件队列并做处理。并且将dsc.syncDaemonSet
方法赋值给dsc.syncHandler
,也即注册为核心处理方法,在dsc.Run
方法中会调用该核心处理方法来调谐daemonset对象(核心处理方法后面会进行详细分析)。
// pkg/controller/daemon/daemon_controller.go
func NewDaemonSetsController(
daemonSetInformer appsinformers.DaemonSetInformer,
historyInformer appsinformers.ControllerRevisionInformer,
podInformer coreinformers.PodInformer,
nodeInformer coreinformers.NodeInformer,
kubeClient clientset.Interface,
failedPodsBackoff *flowcontrol.Backoff,
) (*DaemonSetsController, error) {
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(klog.Infof)
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
if kubeClient != nil && kubeClient.CoreV1().RESTClient().GetRateLimiter() != nil {
if err := ratelimiter.RegisterMetricAndTrackRateLimiterUsage("daemon_controller", kubeClient.CoreV1().RESTClient().GetRateLimiter()); err != nil {
return nil, err
}
}
dsc := &DaemonSetsController{
kubeClient: kubeClient,
eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "daemonset-controller"}),
podControl: controller.RealPodControl{
KubeClient: kubeClient,
Recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "daemonset-controller"}),
},
crControl: controller.RealControllerRevisionControl{
KubeClient: kubeClient,
},
burstReplicas: BurstReplicas,
expectations: controller.NewControllerExpectations(),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "daemonset"),
}
daemonSetInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
ds := obj.(*apps.DaemonSet)
klog.V(4).Infof("Adding daemon set %s", ds.Name)
dsc.enqueueDaemonSet(ds)
},
UpdateFunc: func(old, cur interface{}) {
oldDS := old.(*apps.DaemonSet)
curDS := cur.(*apps.DaemonSet)
klog.V(4).Infof("Updating daemon set %s", oldDS.Name)
dsc.enqueueDaemonSet(curDS)
},
DeleteFunc: dsc.deleteDaemonset,
})
dsc.dsLister = daemonSetInformer.Lister()
dsc.dsStoreSynced = daemonSetInformer.Informer().HasSynced
historyInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: dsc.addHistory,
UpdateFunc: dsc.updateHistory,
DeleteFunc: dsc.deleteHistory,
})
dsc.historyLister = historyInformer.Lister()
dsc.historyStoreSynced = historyInformer.Informer().HasSynced
// Watch for creation/deletion of pods. The reason we watch is that we don't want a daemon set to create/delete
// more pods until all the effects (expectations) of a daemon set's create/delete have been observed.
podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: dsc.addPod,
UpdateFunc: dsc.updatePod,
DeleteFunc: dsc.deletePod,
})
dsc.podLister = podInformer.Lister()
// This custom indexer will index pods based on their NodeName which will decrease the amount of pods we need to get in simulate() call.
podInformer.Informer().GetIndexer().AddIndexers(cache.Indexers{
"nodeName": indexByPodNodeName,
})
dsc.podNodeIndex = podInformer.Informer().GetIndexer()
dsc.podStoreSynced = podInformer.Informer().HasSynced
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: dsc.addNode,
UpdateFunc: dsc.updateNode,
},
)
dsc.nodeStoreSynced = nodeInformer.Informer().HasSynced
dsc.nodeLister = nodeIn