linux sched init简介

最新推荐文章于 2025-09-12 03:46:57 发布

原创最新推荐文章于 2025-09-12 03:46:57 发布 · 2.3k 阅读

4 ·

CC 4.0 BY-SA版权

文章标签：

#linux sched #sched init

linux 专栏收录该内容

9 篇文章

订阅专栏

本文档介绍了Linux内核4.10中调度器初始化的过程，涉及`init_idle()`和`set_cpu_rq_start_time()`等关键步骤。文章详细解析了`for_each_possible_cpu()`遍历CPU、`init_rt_bandwidth()`和`init_dl_bandwidth()`初始化带宽分配，以及公平调度器和实时进程调度器的定义和接口设计。调度类`struct sched_class`作为统一接口，使得不同调度算法的使用变得简单一致。

调度器的初始化，前面的android 开机流程讲过，uboot（bootloader）执行完一些初始化动作后，会将kernel加载到内存，然后跳到kernel。
kernel在执行完一段汇编代码，准备好c的运行环境后，跳到 start_kernel()。

linux-4.10/init/main.c

linux-4.10/init/main.c
482  asmlinkage __visible void __init start_kernel(void)
483  {
...
542  	/*
543  	 * Set up the scheduler prior starting any interrupts (such as the
544  	 * timer interrupt). Full topology setup happens at smp_init()
545  	 * time - but meanwhile we still have a functioning scheduler.
546  	 */
547  	sched_init();  //调度器的初始化
...
672  	/* Do the rest non-__init'ed, we're now alive */
673  	rest_init();
674  }

sched_init() 初始化了很多调度相关的数据结构，下面只会把它们简单列出来

linux-4.10/kernel/sched/core.c

linux-4.10/kernel/sched/core.c
7543  void __init sched_init(void)
7544  {
7545  	int i, j;
7546  	unsigned long alloc_size = 0, ptr;
7547  
/*
 *  linux-4.10/include/linux/types.h
 *   struct list_head {    //双向链表
 *   	struct list_head *next, *prev;
 *  };
 *  linux-4.10/include/linux/wait.h
 *  struct __wait_queue_head {  //所以__wait_queue_head 是一个双向链表，用于保存 spinlock_t （自旋锁）
 *  	spinlock_t		lock;
 *  	struct list_head	task_list;
 *   };
   typedef struct __wait_queue_head wait_queue_head_t;
*/
7548  	for (i = 0; i < WAIT_TABLE_SIZE; i++) // #define WAIT_TABLE_BITS 8  #define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)  所以WAIT_TABLE_SIZE 等于 256
7549  		init_waitqueue_head(bit_wait_table + i);  //static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;  
7550  
7551  #ifdef CONFIG_FAIR_GROUP_SCHED   //普通进程调度
7552  	alloc_size += 2 * nr_cpu_ids * sizeof(void **);  //nr_cpu_ids 为支持的cpu核个数，也就是常说的几核，64位系统上 sizeof(void **) = 8
7553  #endif
7554  #ifdef CONFIG_RT_GROUP_SCHED   //实时进程调度
7555  	alloc_size += 2 * nr_cpu_ids * sizeof(void **);  
7556  #endif
7557  	if (alloc_size) {
7558  		ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);  //申请内存
7559  
7560  #ifdef CONFIG_FAIR_GROUP_SCHED
7561  		root_task_group.se = (struct sched_entity **)ptr; //指向调度实体
7562  		ptr += nr_cpu_ids * sizeof(void **);//ptr指针移动nr_cpu_ids * sizeof(void **) nr_cpu_ids， 为支持的cpu核个数，64位系统上 sizeof(void **) = 8
7563  
7564  		root_task_group.cfs_rq = (struct cfs_rq **)ptr;  //指向调度队列指针，每个核有一个调度队列
7565  		ptr += nr_cpu_ids * sizeof(void **);//ptr指针移动nr_cpu_ids * sizeof(void **) nr_cpu_ids，为支持的cpu核个数，64位系统上 sizeof(void **) = 8
7566  
7567  #endif /* CONFIG_FAIR_GROUP_SCHED */
7568  #ifdef CONFIG_RT_GROUP_SCHED
7569  		root_task_group.rt_se = (struct sched_rt_entity **)ptr; //
7570  		ptr += nr_cpu_ids * sizeof(void **); //ptr指针移动nr_cpu_ids * sizeof(void **) nr_cpu_ids， 为支持的cpu核个数，64位系统上 sizeof(void **) = 8
7571  
7572  		root_task_group.rt_rq = (struct rt_rq **)ptr;  //
7573  		ptr += nr_cpu_ids * sizeof(void **);
7574  
7575  #endif /* CONFIG_RT_GROUP_SCHED */
7576  	}
7577  #ifdef CONFIG_CPUMASK_OFFSTACK
7578  	for_each_possible_cpu(i) {  // （1）...
7579  		per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
7580  			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
7581  		per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
7582  			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
7583  	}
7584  #endif /* CONFIG_CPUMASK_OFFSTACK */
7585    // （2）...
7586  	init_rt_bandwidth(&def_rt_bandwidth, //初始化实时进程对cpu的占有率 ，超过会有一定的惩罚机制（rt throttled）
7587  			global_rt_period(), global_rt_runtime());
7588  	init_dl_bandwidth(&def_dl_bandwidth,
7589  			global_rt_period(), global_rt_runtime());
7590  
7591  #ifdef CONFIG_SMP
7592  	init_defrootdomain();  // （3）...
7593  #endif
7594  
7595  #ifdef CONFIG_RT_GROUP_SCHED 
7596  	init_rt_bandwidth(&root_task_group.rt_bandwidth,/初始化root_task_group进程组实时进程对cpu的占有率
7597  			global_rt_period(), global_rt_runtime());
7598  #endif /* CONFIG_RT_GROUP_SCHED */
7599  
7600  #ifdef CONFIG_CGROUP_SCHED  //如果支持进程组，可以理解为多用户，每个用户下面的所有进程为一个进程组
7601  	task_group_cache = KMEM_CACHE(task_group, 0);
7602  
7603  	list_add(&root_task_group.list, &task_groups); //将root_task_group添加到task_groups队列中
7604  	INIT_LIST_HEAD(&root_task_group.children);
7605  	INIT_LIST_HEAD(&root_task_group.siblings);
7606  	autogroup_init(&init_task);
7607  #endif /* CONFIG_CGROUP_SCHED */
7608  
7609  	for_each_possible_cpu(i) {
7610  		struct rq *rq;
/*
 *  linux-4.10/kernel/sched/sched.h
 *  struct rq {
 *    	/* runqueue lock: */
 *    	raw_spinlock_t lock;  //自旋锁
 *  ...
 *  604  	unsigned int nr_running; //此CPU上总共就绪的进程数
 *  ...
 *   	u64 nr_switches; // 进行上下文切换次数
 *    
 *    	struct cfs_rq cfs; // cfs调度运行队列
 *    	struct rt_rq rt;   //实时调度运行队列
 *    	struct dl_rq dl;   //dl调度运行队列
 *  ...
 *    	struct task_struct *curr, *idle, *stop;  //curr: 当前正在此CPU上运行的进程, idle: 当前CPU上idle进程的指针,
 *  ...
 *    	int cpu;  //该运行队列所属CPU ID
 *    	int online;  //online状态
 *  ...
 *  };
*/
7612  		rq = cpu_rq(i); //获取对应cpu上的运行队列  // （4）...
7613  		raw_spin_lock_init(&rq->lock); //初始化自旋锁
7614  		rq->nr_running = 0;
7615  		rq->calc_load_active = 0;
7616  		rq->calc_load_update = jiffies + LOAD_FREQ;
7617  		init_cfs_rq(&rq->cfs);  //初始化cfs调度运行队列 其实就是赋初始值
7618  		init_rt_rq(&rq->rt);
7619  		init_dl_rq(&rq->dl);
7620  #ifdef CONFIG_FAIR_GROUP_SCHED
7621  		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
7622  		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
7623  		rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
7624  		/*
7625  		 * How much cpu bandwidth does root_task_group get?
7626  		 *
7627  		 * In case of task-groups formed thr' the cgroup filesystem, it
7628  		 * gets 100% of the cpu resources in the system. This overall
7629  		 * system cpu resource is divided among the tasks of
7630  		 * root_task_group and its child task-groups in a fair manner,
7631  		 * based on each entity's (task or task-group's) weight
7632  		 * (se->load.weight).
7633  		 *
7634  		 * In other words, if root_task_group has 10 tasks of weight
7635  		 * 1024) and two child groups A0 and A1 (of weight 1024 each),
7636  		 * then A0's share of the cpu resource is:
7637  		 *
7638  		 *	A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
7639  		 *
7640  		 * We achieve this by letting root_task_group's tasks sit
7641  		 * directly in rq->cfs (i.e root_task_group->se[] = NULL).
7642  		 */
7643  		init_cfs_bandwidth(&root_task_group.cfs_bandwidth); //设置root_task_group进程组普通进程在CPU中所占用比的
7644  		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); //
7645  #endif /* CONFIG_FAIR_GROUP_SCHED */
7646  
7647  		rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
7648  #ifdef CONFIG_RT_GROUP_SCHED
7649  		init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
7650  #endif
7651  
7652  		for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
7653  			rq->cpu_load[j] = 0;
7654  
7655  #ifdef CONFIG_SMP
7656  		rq->sd = NULL;
7657  		rq->rd = NULL;
7658  		rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
7659  		rq->balance_callback = NULL;
7660  		rq->active_balance = 0;
7661  		rq->next_balance = jiffies;
7662  		rq->push_cpu = 0;
7663  		rq->cpu = i;
7664  		rq->online = 0;
7665  		rq->idle_stamp = 0;
7666  		rq->avg_idle = 2*sysctl_sched_migration_cost;
7667  		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
7668  
7669  		INIT_LIST_HEAD(&rq->cfs_tasks);
7670  
7671  		rq_attach_root(rq, &def_root_domain); //将CPU运行队列加入到默认调度域中
7672  #ifdef CONFIG_NO_HZ_COMMON
7673  		rq->last_load_update_tick = jiffies;
7674  		rq->nohz_flags = 0;
7675  #endif
7676  #ifdef CONFIG_NO_HZ_FULL
7677  		rq->last_sched_tick = 0;
7678  #endif
7679  #endif /* CONFIG_SMP */
7680  		init_rq_hrtick(rq);
7681  		atomic_set(&rq->nr_iowait, 0);
7682  	}
7683  
7684  	set_load_weight(&init_task);  //负载均衡设置
7685  
7686  	/*
7687  	 * The boot idle thread does lazy MMU switching as well:
7688  	 */
7689  	atomic_inc(&init_mm.mm_count);
7690  	enter_lazy_tlb(&init_mm, current);
7691  
7692  	/*
7693  	 * Make us the idle thread. Technically, schedule() should not be
7694  	 * called from this thread, however somewhere below it might be,
7695  	 * but because we are the idle thread, we just pick up running again
7696  	 * when this runqueue becomes "idle".
7697  	 */
7698  	init_idle(current, smp_processor_id());  // （5）...
7699  
7700  	calc_load_update = jiffies + LOAD_FREQ;
7701  
7702  #ifdef CONFIG_SMP
7703  	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
7704  	/* May be allocated at isolcpus cmdline parse time */
7705  	if (cpu_isolated_map == NULL)
7706  		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
7707  	idle_thread_set_boot_cpu();  // （6）...
7708  	set_cpu_rq_start_time(smp_processor_id());
7709  #endif
7710  	init_sched_fair_class(); //初始化公平调度器  // （7）...
7711  
7712  	init_schedstats();
7713  
7714  	scheduler_running = 1;
7715  }

(1) for_each_possible_cpu()

/* 网上的解释，暂时没有理解
 * CPU mask机制被用于表示系统中多个处理器的各种组合,但是随着处理器数量的增长将消耗堆栈上大量的空间。
 * 新设计的API可以将CPU masks从堆栈上移出来. 
*/
7577  #ifdef CONFIG_CPUMASK_OFFSTACK  
7578  	for_each_possible_cpu(i) {  //相当于一个for循环
7579  		per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node( //可以认为load_balance_mask 定义在某个属性（__attribute__）中
7580  			cpumask_size(), GFP_KERNEL, cpu_to_node(i));  //kzalloc_node()用于申请内存，暂不深入研究
7581  		per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node( //可以认为select_idle_mask 定义在某个属性（__attribute__）中
7582  			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
7583  	}
7584  #endif /* CONFIG_CPUMASK_OFFSTACK */

上面是sched_init()里面的部分，因为上面代码已经足够多了，不好再贴出相关定义，所以放在这里在单独分析。

linux-4.10/include/linux/cpumask.h

222  #define for_each_cpu(cpu, mask)				\
223  	for ((cpu) = -1;				\
224  		(cpu) = cpumask_next((cpu), (mask)),	\
225  		(cpu) < nr_cpu_ids;)
226  
extern struct cpumask __cpu_possible_mask;
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)

linux-4.10/include/linux/percpu-defs.h

204  #define __verify_pcpu_ptr(ptr)						\
205  do {									\
206  	const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL;	\
207  	(void)__vpp_verify;						\
208  } while (0)
209  
220  #define per_cpu_ptr(ptr, cpu)						\
221  ({									\
222  	__verify_pcpu_ptr(ptr);						\
223  	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)));			\
224  })

256  #define per_cpu(var, cpu)	(*per_cpu_ptr(&(var), cpu))

根据相关定义，可知 for_each_possible_cpu(i) 相当于 for ((i) = -1;(i = cpumask_next((i), (mask)), (i) < nr_cpu_ids;) 也就是一个for 循环
per_cpu(load_balance_mask, i) 可以先理解为 load_balance_mask[i] load_balance_mask 定义的时候写的不是很直观，这里把它当作对应的结构体变量就可以了，只不过是放在某个特殊的section里面而已。

(2) init_rt_bandwidth()和init_dl_bandwidth()

7586  	init_rt_bandwidth(&def_rt_bandwidth, //初始化实时进程对cpu的占有率 ，超过会有一定的惩罚机制（throttled）
7587  			global_rt_period(), global_rt_runtime());
7588  	init_dl_bandwidth(&def_dl_bandwidth,
7589  			global_rt_period(), global_rt_runtime());

linux-4.10/kernel/sched/rt.c

41  void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
42  {
43  	rt_b->rt_period = ns_to_ktime(period);  //时长
44  	rt_b->rt_runtime = runtime;  // 总时长
45  
46  	raw_spin_lock_init(&rt_b->rt_runtime_lock);
47  
48  	hrtimer_init(&rt_b->rt_period_timer,
49  			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
50  	rt_b->rt_period_timer.function = sched_rt_period_timer; //sched_rt_period_timer 定时器超时回调函数
51  }

linux-4.10/kernel/sched/deadline.c

53  void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
54  {
55  	raw_spin_lock_init(&dl_b->dl_runtime_lock);
56  	dl_b->dl_period = period;
57  	dl_b->dl_runtime = runtime;
58  }

(3)init_defrootdomain()

7591  #ifdef CONFIG_SMP
7592  	init_defrootdomain();
7593  #endif

linux-4.10/kernel/sched/core.c

5902  /*
5903   * By default the system creates a single root-domain with all cpus as
5904   * members (mimicking the global state we have today).
5905   */
5906  struct root_domain def_root_domain;
5907  
5908  static void init_defrootdomain(void)
5909  {
5910  	init_rootdomain(&def_root_domain); //rootdomain 指示rq可运行的cpu集合 
5911  
5912  	atomic_set(&def_root_domain.refcount, 1);
5913  }

5869  static int init_rootdomain(struct root_domain *rd)
5870  {
5871  	memset(rd, 0, sizeof(*rd));
5872  
5873  	if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))   //申请内存
5874  		goto out;
5875  	if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL)) //申请内存
5876  		goto free_span;
5877  	if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL)) //申请内存
5878  		goto free_online;
5879  	if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) //申请内存
5880  		goto free_dlo_mask;
5881  
5882  	init_dl_bw(&rd->dl_bw);  //deadline 值越小优先级越高
5883  	if (cpudl_init(&rd->cpudl) != 0)  //initialize the cpudl structure
5884  		goto free_dlo_mask;
5885  
5886  	if (cpupri_init(&rd->cpupri) != 0) //initialize the cpupri structure
5887  		goto free_rto_mask;
5888  	return 0;
5889  
5890  free_rto_mask:
5891  	free_cpumask_var(rd->rto_mask);
5892  free_dlo_mask:
5893  	free_cpumask_var(rd->dlo_mask);
5894  free_online:
5895  	free_cpumask_var(rd->online);
5896  free_span:
5897  	free_cpumask_var(rd->span);
5898  out:
5899  	return -ENOMEM;
5900  }

又是分配了一堆的数据结构体，暂时先这样了解吧

(4) cpu_rq()

linux-4.10/kernel/sched/sched.h

758  DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
759  
760  #define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))

cpu_rq(cpu) 函数会返回指定cpu 上的运行队列，又是一串宏定义，可以边不纠结它的具体实现，这里我们知道runqueues 这个变量指向了已经分别好的 struct rq成员。

linux-4.10/include/linux/percpu-defs.h

139  #define DECLARE_PER_CPU_SHARED_ALIGNED(type, name)			\
140  	DECLARE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
141  	____cacheline_aligned_in_smp

212  /*
213   * Add an offset to a pointer but keep the pointer as-is.  Use RELOC_HIDE()
214   * to prevent the compiler from making incorrect assumptions about the
215   * pointer value.  The weird cast keeps both GCC and sparse happy.
216   */
217  #define SHIFT_PERCPU_PTR(__p, __offset)					\
218  	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset))
219  
220  #define per_cpu_ptr(ptr, cpu)						\
221  ({									\
222  	__verify_pcpu_ptr(ptr);						\
223  	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)));			\
224  })

linux-4.10 / include / linux / compiler.h

linux-4.10/include/linux/compiler.h
209  # define RELOC_HIDE(ptr, off)					\
210    ({ unsigned long __ptr;					\
211       __ptr = (unsigned long) (ptr);				\
212      (typeof(ptr)) (__ptr + (off)); })
213  #endif

所以 cpu_rq(i) 可以先理解为 (struct rq*)（runqueues +I)

(5)init_idle()
linux-4.10/kernel/sched/core.c

5264  /**
5265   * init_idle - set up an idle thread for a given CPU
5266   * @idle: task in question
5267   * @cpu: cpu the idle task belongs to
5268   *
5269   * NOTE: this function does not set the idle thread's NEED_RESCHED
5270   * flag, to make booting more robust.
5271   */
5272  void init_idle(struct task_struct *idle, int cpu)
5273  {
5274  	struct rq *rq = cpu_rq(cpu);
5275  	unsigned long flags;
5276  
5277  	raw_spin_lock_irqsave(&idle->pi_lock, flags);
5278  	raw_spin_lock(&rq->lock);
5279  
5280  	__sched_fork(0, idle); //创建idle 为0号进程，可以认为是给idle（task_struct)结构体赋值
5281  	idle->state = TASK_RUNNING;
5282  	idle->se.exec_start = sched_clock();
5283  	idle->flags |= PF_IDLE;
5284  
5285  	kasan_unpoison_task_stack(idle);
5286  
5287  #ifdef CONFIG_SMP
5288  	/*
5289  	 * Its possible that init_idle() gets called multiple times on a task,
5290  	 * in that case do_set_cpus_allowed() will not do the right thing.
5291  	 *
5292  	 * And since this is boot we can forgo the serialization.
5293  	 */
5294  	set_cpus_allowed_common(idle, cpumask_of(cpu));
5295  #endif
5296  	/*
5297  	 * We're having a chicken and egg problem, even though we are
5298  	 * holding rq->lock, the cpu isn't yet set to this cpu so the
5299  	 * lockdep check in task_group() will fail.
5300  	 *
5301  	 * Similar case to sched_fork(). / Alternatively we could
5302  	 * use task_rq_lock() here and obtain the other rq->lock.
5303  	 *
5304  	 * Silence PROVE_RCU
5305  	 */
5306  	rcu_read_lock();
5307  	__set_task_cpu(idle, cpu);
5308  	rcu_read_unlock();
5309  
5310  	rq->curr = rq->idle = idle;
5311  	idle->on_rq = TASK_ON_RQ_QUEUED;
5312  #ifdef CONFIG_SMP
5313  	idle->on_cpu = 1;
5314  #endif
5315  	raw_spin_unlock(&rq->lock);
5316  	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
5317  
5318  	/* Set the preempt count _outside_ the spinlocks! */
5319  	init_idle_preempt_count(idle, cpu);  //设置为可抢占
5320  
5321  	/*
5322  	 * The idle tasks have their own, simple scheduling class:
5323  	 */
5324  	idle->sched_class = &idle_sched_class;  //设置为idle调度器
5325  	ftrace_graph_init_idle_task(idle, cpu);
5326  	vtime_init_idle(idle, cpu);
5327  #ifdef CONFIG_SMP
5328  	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
5329  #endif
5330  }

给idle 这个数据结果赋值，也就是初始化

(6)set_cpu_rq_start_time()

linux-4.10/kernel/sched/core.c

5631  static void set_cpu_rq_start_time(unsigned int cpu)
5632  {
5633  	struct rq *rq = cpu_rq(cpu);
5634  
5635  	rq->age_stamp = sched_clock_cpu(cpu);
5636  }

linux-4.10/kernel/sched/clock.c

294  /*
295   * Similar to cpu_clock(), but requires local IRQs to be disabled.
296   *
297   * See cpu_clock().
298   */
299  u64 sched_clock_cpu(int cpu)
300  {
301  	struct sched_clock_data *scd;
302  	u64 clock;
303  
304  	if (sched_clock_stable())
305  		return sched_clock();
306  
307  	if (unlikely(!sched_clock_running))
308  		return 0ull;
309  
310  	preempt_disable_notrace();
311  	scd = cpu_sdc(cpu);
312  
313  	if (cpu != smp_processor_id())
314  		clock = sched_clock_remote(scd);
315  	else
316  		clock = sched_clock_local(scd);
317  	preempt_enable_notrace();
318  
319  	return clock;
320  }
321  EXPORT_SYMBOL_GPL(sched_clock_cpu);

(7) init_sched_fair_class()

linux-4.10/kernel/sched/fair.c

9474  __init void init_sched_fair_class(void)
9475  {
9476  #ifdef CONFIG_SMP
9477  	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
9478  
9479  #ifdef CONFIG_NO_HZ_COMMON  //在系统idle的时候，停掉tick。
9480  	nohz.next_balance = jiffies;
9481  	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
9482  #endif
9483  #endif /* SMP */
9484  
9485  }

公平调度器类和实时进程调度器类被定义成为一个全局常量，分别是fair_sched_class 和 rt_sched_class。 struct sched_class 调度类，理解为接口的设计就可以了，虽然每个调度类有自己的调度算法（调度函数），但是外部使用调度类的接口都是一致的。

linux-4.10/kernel/sched/fair.c

9399  /*
9400   * All the scheduling class methods:
9401   */
9402  const struct sched_class fair_sched_class = {
9403  	.next			= &idle_sched_class,
9404  	.enqueue_task		= enqueue_task_fair,
9405  	.dequeue_task		= dequeue_task_fair,
9406  	.yield_task		= yield_task_fair,
9407  	.yield_to_task		= yield_to_task_fair,
9408  
9409  	.check_preempt_curr	= check_preempt_wakeup,
9410  
9411  	.pick_next_task		= pick_next_task_fair,
9412  	.put_prev_task		= put_prev_task_fair,
9413  
9414  #ifdef CONFIG_SMP
9415  	.select_task_rq		= select_task_rq_fair,
9416  	.migrate_task_rq	= migrate_task_rq_fair,
9417  
9418  	.rq_online		= rq_online_fair,
9419  	.rq_offline		= rq_offline_fair,
9420  
9421  	.task_dead		= task_dead_fair,
9422  	.set_cpus_allowed	= set_cpus_allowed_common,
9423  #endif
9424  
9425  	.set_curr_task          = set_curr_task_fair,
9426  	.task_tick		= task_tick_fair,
9427  	.task_fork		= task_fork_fair,
9428  
9429  	.prio_changed		= prio_changed_fair,
9430  	.switched_from		= switched_from_fair,
9431  	.switched_to		= switched_to_fair,
9432  
9433  	.get_rr_interval	= get_rr_interval_fair,
9434  
9435  	.update_curr		= update_curr_fair,
9436  
9437  #ifdef CONFIG_FAIR_GROUP_SCHED
9438  	.task_change_group	= task_change_group_fair,
9439  #endif
9440  };

linux-4.10/kernel/sched/rt.c

2325  const struct sched_class rt_sched_class = {
2326  	.next			= &fair_sched_class,
2327  	.enqueue_task		= enqueue_task_rt,
2328  	.dequeue_task		= dequeue_task_rt,
2329  	.yield_task		= yield_task_rt,
2330  
2331  	.check_preempt_curr	= check_preempt_curr_rt,
2332  
2333  	.pick_next_task		= pick_next_task_rt,
2334  	.put_prev_task		= put_prev_task_rt,
2335  
2336  #ifdef CONFIG_SMP
2337  	.select_task_rq		= select_task_rq_rt,
2338  
2339  	.set_cpus_allowed       = set_cpus_allowed_common,
2340  	.rq_online              = rq_online_rt,
2341  	.rq_offline             = rq_offline_rt,
2342  	.task_woken		= task_woken_rt,
2343  	.switched_from		= switched_from_rt,
2344  #endif
2345  
2346  	.set_curr_task          = set_curr_task_rt,
2347  	.task_tick		= task_tick_rt,
2348  
2349  	.get_rr_interval	= get_rr_interval_rt,
2350  
2351  	.prio_changed		= prio_changed_rt,
2352  	.switched_to		= switched_to_rt,
2353  
2354  	.update_curr		= update_curr_rt,
2355  };

这里只是简单列出sched_init() 的流程代码，在这个流程中初始化了很多数据结构体，这些数据结构的作用没有具体介绍，在后面需要了解的时候在具体介绍吧。现在先有个简单的了解，需要时再深入研究。