【内核调度、负载均衡】【entity

本文详细剖析了CFS调度器中的关键函数task_tick_fair和entity_tick的工作原理，包括如何进行周期性调度、更新运行时间和负载平均值，以及如何在适当时候进行进程抢占决策。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

在scheduler_tick函数中会调用task_tick函数，具体到fair class，他的入口函数为

task_tick_fair

/*
 * scheduler tick hitting a task of our scheduling class:
 */
 /*  获取到当前进程curr所在的调度实体  */
static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
{
	struct cfs_rq *cfs_rq;
	/*  获取到当前进程curr所在的调度实体  */
	struct sched_entity *se = &curr->se;
	/* for_each_sched_entity
     * 在不支持组调度条件下, 只循环一次
     * 在组调度的条件下, 调度实体存在层次关系,
     * 更新子调度实体的同时必须更新父调度实体  */
	for_each_sched_entity(se) {
		/*  获取当当前运行的进程所在的CFS就绪队列  */
		cfs_rq = cfs_rq_of(se);
		/*  完成周期性调度  */
		entity_tick(cfs_rq, se, queued);
	}

	if (static_branch_unlikely(&sched_numa_balancing))
		task_tick_numa(rq, curr);

	update_misfit_status(curr, rq);

	update_overutilized_status(rq);
}

我们可以看到, CFS周期性调度的功能实际上是委托给entity_tick函数来完成的

entity_tick

static void
entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
{
	/*
	 * Update run-time statistics of the 'current'.
	 */
	update_curr(cfs_rq);

	/*
	 * Ensure that runnable average is periodically updated.
	 */
	update_load_avg(curr, UPDATE_TG);
	update_cfs_shares(curr);

	//接下来是hrtimer的更新, 这些由内核通过参数CONFIG_SCHED_HRTICK开启
#ifdef CONFIG_SCHED_HRTICK
	/*
	 * queued ticks are scheduled to match the slice, so don't bother
	 * validating it and just reschedule.
	 */
	if (queued) {
		resched_curr(rq_of(cfs_rq));
		return;
	}
	/*
	 * don't let the period tick interfere with the hrtick preemption
	 */
	if (!sched_feat(DOUBLE_TICK) &&
			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
		return;
#endif
	//如果进程的数目不少于两个, 则由check_preempt_tick作出决策
	if (cfs_rq->nr_running > 1)
		check_preempt_tick(cfs_rq, curr);
}

update_curr主要是更新vruntime和min_vruntime

https://blog.youkuaiyun.com/feifei_csdn/article/details/107200966

update_load_avg主要是更新entity和cpu的负载

https://blog.youkuaiyun.com/feifei_csdn/article/details/107381373

check_preempt_tick

/*
 * Preempt the current task with a newly woken task if needed:
 */
static void
check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
	unsigned long ideal_runtime, delta_exec;
	struct sched_entity *se;
	s64 delta;
	/*  计算curr的理论上应该运行的时间  */
	ideal_runtime = sched_slice(cfs_rq, curr);
	 /*  计算curr的实际运行时间
     *  sum_exec_runtime: 进程执行的总时间
     *  prev_sum_exec_runtime:进程在切换进CPU时的sum_exec_runtime值  */
	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
	 /*  如果实际运行时间比理论上应该运行的时间长
     *  说明curr进程已经运行了足够长的时间
     *  应该调度新的进程抢占CPU了  */
	if (delta_exec > ideal_runtime) {
		//resched_curr(rq_of(cfs_rq))设置重调度标识, 从而触发延迟调度
		resched_curr(rq_of(cfs_rq));
		/*
		 * The current task ran long enough, ensure it doesn't get
		 * re-elected due to buddy favours.
		 */
		clear_buddies(cfs_rq, curr);
		return;
	}

	/*
	 * Ensure that a task that missed wakeup preemption by a
	 * narrow margin doesn't have to wait for a full slice.
	 * This also mitigates buddy induced latencies under load.
	 */
	if (delta_exec < sysctl_sched_min_granularity)
		return;

	se = __pick_first_entity(cfs_rq);
	delta = curr->vruntime - se->vruntime;

	if (delta < 0)
		return;
//curr进程与红黑树中最左进程left虚拟运行时间的差值大于curr的期望运行时间ideal_runtime
	if (delta > ideal_runtime)
		resched_curr(rq_of(cfs_rq));
}

// 选择最左边的entity
struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
{
	struct rb_node *left = rb_first_cached(&cfs_rq->tasks_timeline);

	if (!left)
		return NULL;

	return rb_entry(left, struct sched_entity, run_node);
}