目录
sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go
Controller::processNextWorkItem
摘要:
记录对k8s的operator的Reconcile执行追踪
调用堆栈:
(gdb) bt
#0 hello/controllers.(*ApplicationReconciler).Reconcile (r=0xc0004f1980, ctx=..., req=..., ~r2=..., ~r3=...) at /root/work/hello/controllers/application_controller.go:70
#1 0x0000000001a1b750 in sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile (c=0xc000346840, ctx=..., req=..., ~b2=..., err=...)
at /root/work/hello/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:114
#2 0x0000000001a1e770 in sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler (c=0xc000346840, ctx=..., obj=...)
at /root/work/hello/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:311
#3 0x0000000001a1dc25 in sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem (c=0xc000346840, ctx=..., ~r1=false)
at /root/work/hello/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:266
#4 0x0000000001a1d248 in sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 ()
at /root/work/hello/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:227
#5 0x000000000046afe1 in runtime.goexit () at /usr/local/go/src/runtime/asm_amd64.s:1581
#6 0x00000000020455d8 in com/gogo/protobuf/proto.Message ()
#7 0x0000000002044438 in com/gogo/protobuf/proto.Message ()
#8 0x0000000002045698 in com/gogo/protobuf/proto.Message ()
#9 0x000000000203e930 in io/apimachinery/pkg/runtime.Object ()
#10 0x0000000000000006 in ?? ()
#11 0xf462ffb400000000 in ?? ()
#12 0x000000c0000a5110 in ?? ()
#13 0x0000000000000000 in ?? ()
核心函数:
sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go
Controller::Start
// Start implements controller.Controller.
func (c *Controller) Start(ctx context.Context) error {
// use an IIFE to get proper lock handling
// but lock outside to get proper handling of the queue shutdown
c.mu.Lock()
if c.Started {
return errors.New("controller was started more than once. This is likely to be caused by being added to a manager multiple times")
}
c.initMetrics()
// Set the internal context.
c.ctx = ctx
c.Queue = c.MakeQueue()
go func() {
<-ctx.Done()
c.Queue.ShutDown()
}()
wg := &sync.WaitGroup{}
err := func() error {
defer c.mu.Unlock()
// TODO(pwittrock): Reconsider HandleCrash
defer utilruntime.HandleCrash()
// NB(directxman12): launch the sources *before* trying to wait for the
// caches to sync so that they have a chance to register their intendeded
// caches.
for _, watch := range c.startWatches {
c.Log.Info("Starting EventSource", "source", fmt.Sprintf("%s", watch.src))
if err := watch.src.Start(ctx, watch.handler, c.Queue, watch.predicates...); err != nil {
return err
}
}
// Start the SharedIndexInformer factories to begin populating the SharedIndexInformer caches
c.Log.Info("Starting Controller")
for _, watch := range c.startWatches {
syncingSource, ok := watch.src.(source.SyncingSource)
if !ok {
continue
}
if err := func() error {
// use a context with timeout for launching sources and syncing caches.
sourceStartCtx, cancel := context.WithTimeout(ctx, c.CacheSyncTimeout)
defer cancel()
// WaitForSync waits for a definitive timeout, and returns if there
// is an error or a timeout
if err := syncingSource.WaitForSync(sourceStartCtx); err != nil {
err := fmt.Errorf("failed to wait for %s caches to sync: %w", c.Name, err)
c.Log.Error(err, "Could not wait for Cache to sync")
return err
}
return nil
}(); err != nil {
return err
}
}
// All the watches have been started, we can reset the local slice.
//
// We should never hold watches more than necessary, each watch source can hold a backing cache,
// which won't be garbage collected if we hold a reference to it.
c.startWatches = nil
// Launch workers to process resources
c.Log.Info("Starting workers", "worker count", c.MaxConcurrentReconciles)
wg.Add(c.MaxConcurrentReconciles)
for i := 0; i < c.MaxConcurrentReconciles; i++ {
go func() {
defer wg.Done()
// Run a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the reconcileHandler is never invoked concurrently with the same object.
for c.processNextWorkItem(ctx) {
}
}()
}
c.Started = true
return nil
}()
if err != nil {
return err
}
<-ctx.Done()
c.Log.Info("Shutdown signal received, waiting for all workers to finish")
wg.Wait()
c.Log.Info("All workers finished")
return nil
}
关键处理:
for i := 0; i < c.MaxConcurrentReconciles; i++ {
go func() {
defer wg.Done()
// Run a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the reconcileHandler is never invoked concurrently with the same object.
for c.processNextWorkItem(ctx) {
}
}()
}
Controller::processNextWorkItem
// processNextWorkItem will read a single work item off the workqueue and
// attempt to process it, by calling the reconcileHandler.
func (c *Controller) processNextWorkItem(ctx context.Context) bool {
obj, shutdown := c.Queue.Get()
if shutdown {
// Stop working
return false
}
// We call Done here so the workqueue knows we have finished
// processing this item. We also must remember to call Forget if we
// do not want this work item being re-queued. For example, we do
// not call Forget if a transient error occurs, instead the item is
// put back on the workqueue and attempted again after a back-off
// period.
defer c.Queue.Done(obj)
ctrlmetrics.ActiveWorkers.WithLabelValues(c.Name).Add(1)
defer ctrlmetrics.ActiveWorkers.WithLabelValues(c.Name).Add(-1)
c.reconcileHandler(ctx, obj)
return true
}
Controller::reconcileHandler
func (c *Controller) reconcileHandler(ctx context.Context, obj interface{}) {
// Update metrics after processing each item
reconcileStartTS := time.Now()
defer func() {
c.updateMetrics(time.Since(reconcileStartTS))
}()
// Make sure that the the object is a valid request.
req, ok := obj.(reconcile.Request)
if !ok {
// As the item in the workqueue is actually invalid, we call
// Forget here else we'd go into a loop of attempting to
// process a work item that is invalid.
c.Queue.Forget(obj)
c.Log.Error(nil, "Queue item was not a Request", "type", fmt.Sprintf("%T", obj), "value", obj)
// Return true, don't take a break
return
}
log := c.Log.WithValues("name", req.Name, "namespace", req.Namespace)
ctx = logf.IntoContext(ctx, log)
// RunInformersAndControllers the syncHandler, passing it the Namespace/Name string of the
// resource to be synced.
result, err := c.Reconcile(ctx, req)
switch {
case err != nil:
c.Queue.AddRateLimited(req)
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc()
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelError).Inc()
log.Error(err, "Reconciler error")
case result.RequeueAfter > 0:
// The result.RequeueAfter request will be lost, if it is returned
// along with a non-nil error. But this is intended as
// We need to drive to stable reconcile loops before queuing due
// to result.RequestAfter
c.Queue.Forget(obj)
c.Queue.AddAfter(req, result.RequeueAfter)
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeueAfter).Inc()
case result.Requeue:
c.Queue.AddRateLimited(req)
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeue).Inc()
default:
// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
c.Queue.Forget(obj)
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelSuccess).Inc()
}
}
Controller:Reconcile
// Reconcile implements reconcile.Reconciler.
func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) {
if c.RecoverPanic {
defer func() {
if r := recover(); r != nil {
for _, fn := range utilruntime.PanicHandlers {
fn(r)
}
err = fmt.Errorf("panic: %v [recovered]", r)
}
}()
}
log := c.Log.WithValues("name", req.Name, "namespace", req.Namespace)
ctx = logf.IntoContext(ctx, log)
return c.Do.Reconcile(ctx, req)
}
Reconciliation is level-based, meaning action isn't driven off changes in individual Events, but instead is
driven by actual cluster state read from the apiserver or a local cache.
For example if responding to a Pod Delete Event, the Request won't contain that a Pod was deleted,
instead the reconcile function observes this when reading the cluster state and seeing the Pod as missing.
*/
type Reconciler interface {
// Reconcile performs a full reconciliation for the object referred to by the Request.
// The Controller will requeue the Request to be processed again if an error is non-nil or
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
Reconcile(context.Context, Request) (Result, error)
}