初始化
我们看下nodeMetricInformer是干嘛用的。
nodeMetricInformer在stateInformer一开始的时候作为一个Plugin进行了初始化。
var DefaultPluginRegistry = map[PluginName]informerPlugin{
nodeSLOInformerName: NewNodeSLOInformer(),
pvcInformerName: NewPVCInformer(),
nodeTopoInformerName: NewNodeTopoInformer(),
nodeInformerName: NewNodeInformer(),
podsInformerName: NewPodsInformer(),
nodeMetricInformerName: NewNodeMetricInformer(),
}
func NewNodeMetricInformer() *nodeMetricInformer {
return &nodeMetricInformer{}
}
继续往下看,stateInformer启动plugin
func (s *statesInformer) startPlugins(stopCh <-chan struct{}) {
for name, p := range s.states.informerPlugins {
klog.V(4).Infof("starting informer plugin %v", name)
go p.Start(stopCh)
}
}
启动Start()
nodeMetricInformer也在这里进行了Start()
- nodeMetricInformer.Run(stopCh) 就不用说了,正常的informer流程。
- syncNodeMetricWorker(stopCh) 从名称上来看是进行同步nodeMetric数据,怎样同步?同步到哪?
func (r *nodeMetricInformer) Start(stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
klog.Infof("starting nodeMetricInformer")
if !r.reportEnabled {
klog.Infof("node metric report is disabled.")
return
}
go r.nodeMetricInformer.Run(stopCh)
if !cache.WaitForCacheSync(stopCh, r.nodeMetricInformer.HasSynced, r.podsInformer.HasSynced) {
klog.Errorf("timed out waiting for node metric caches to sync")
}
go r.syncNodeMetricWorker(stopCh)
klog.Info("start nodeMetricInformer successfully")
<-stopCh
klog.Info("shutting down nodeMetricInformer daemon")
}
syncNodeMetricWorker()
- 先获取下上报的interval,然后进入一个for循环。
- stopCh有数据则跳过for循环,否则每过interval后进行调用sync()方法
func (r *nodeMetricInformer) syncNodeMetricWorker(stopCh <-chan struct{}) {
reportInterval := r.getNodeMetricReportInterval()
for {
select {
case <-stopCh:
return
case <-time.After(reportInterval):
r.sync()
reportInterval = r.getNodeMetricReportInterval()
}
}
}
sync()
-
核心方法collectMetrics(),采集nodeMetricInfo, podMetricInfo, prodReclaimableMetric
-
把采集的数据放入nodeMetricStatus,node对应的更新NodeMetric CR
-
- 这个有点要注意,若没有找到对应NodeMetric直接返回。
func (r *nodeMetricInformer) sync() {
if !r.isNodeMetricInited() {
klog.Warningf("node metric has not initialized, skip this round.")
return
}
nodeMetricInfo, podMetricInfo, prodReclaimableMetric := r.collectMetric()
if nodeMetricInfo == nil {
klog.Warningf("node metric is not ready, skip this round.")
return
}
newStatus := &slov1alpha1.NodeMetricStatus{
UpdateTime: &metav1.Time{Time: time.Now()},
NodeMetric: nodeMetricInfo,
PodsMetric: podMetricInfo,
ProdReclaimableMetric: prodReclaimableMetric,
}
retErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
nodeMetric, err := r.nodeMetricLister.Get(r.nodeName)
if errors.IsNotFound(err) {
klog.Warningf("nodeMetric %v not found, skip", r.nodeName)
return nil
} else if err != nil {
klog.Warningf("failed to get %s nodeMetric: %v", r.nodeName, err)
return err
}
err = r.statusUpdater.updateStatus(nodeMetric, newStatus)
return err
})
if retErr != nil {
klog.Warningf("update node metric status failed, status %v, err %v", util.DumpJSON(newStatus), retErr)
} else {
klog.V(4).Infof("update node metric status success, detail: %v", util.DumpJSON(newStatus))
}
}
再看下nodeMetric的数据结构
type NodeMetric struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Spec NodeMetricSpec `json:"spec,omitempty"`
Status NodeMetricStatus `json:"status,omitempty"`
}
type NodeMetricStatus struct {
// UpdateTime is the last time this NodeMetric was updated.
UpdateTime *metav1.Time `json:"updateTime,omitempty"`
// NodeMetric contains the metrics for this node.
NodeMetric *NodeMetricInfo `json:"nodeMetric,omitempty"`
// PodsMetric contains the metrics for pods belong to this node.
PodsMetric []*PodMetricInfo `json:"podsMetric,omitempty"`
// HostApplicationMetric contains the metrics of out-out-band applications on node.
HostApplicationMetric []*HostApplicationMetricInfo `json:"hostApplicationMetric,omitempty"`
// ProdReclaimableMetric is the indicator statistics of Prod type resources reclaimable
ProdReclaimableMetric *ReclaimableMetric `json:"prodReclaimableMetric,omitempty"`
}