linux sched init简介

本文档介绍了Linux内核4.10中调度器初始化的过程,涉及`init_idle()`和`set_cpu_rq_start_time()`等关键步骤。文章详细解析了`for_each_possible_cpu()`遍历CPU、`init_rt_bandwidth()`和`init_dl_bandwidth()`初始化带宽分配,以及公平调度器和实时进程调度器的定义和接口设计。调度类`struct sched_class`作为统一接口,使得不同调度算法的使用变得简单一致。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

调度器的初始化,前面的android 开机流程讲过,uboot(bootloader)执行完一些初始化动作后,会将kernel加载到内存,然后跳到kernel。
kernel在执行完一段汇编代码,准备好c的运行环境后,跳到 start_kernel()。

linux-4.10/init/main.c

linux-4.10/init/main.c
482  asmlinkage __visible void __init start_kernel(void)
483  {
...
542  	/*
543  	 * Set up the scheduler prior starting any interrupts (such as the
544  	 * timer interrupt). Full topology setup happens at smp_init()
545  	 * time - but meanwhile we still have a functioning scheduler.
546  	 */
547  	sched_init();  //调度器的初始化
...
672  	/* Do the rest non-__init'ed, we're now alive */
673  	rest_init();
674  }

sched_init() 初始化了很多调度相关的数据结构,下面只会把它们简单列出来

linux-4.10/kernel/sched/core.c

linux-4.10/kernel/sched/core.c
7543  void __init sched_init(void)
7544  {
7545  	int i, j;
7546  	unsigned long alloc_size = 0, ptr;
7547  
/*
 *  linux-4.10/include/linux/types.h
 *   struct list_head {    //双向链表
 *   	struct list_head *next, *prev;
 *  };
 *  linux-4.10/include/linux/wait.h
 *  struct __wait_queue_head {  //所以__wait_queue_head 是一个双向链表,用于保存 spinlock_t (自旋锁)
 *  	spinlock_t		lock;
 *  	struct list_head	task_list;
 *   };
   typedef struct __wait_queue_head wait_queue_head_t;
*/
7548  	for (i = 0; i < WAIT_TABLE_SIZE; i++) // #define WAIT_TABLE_BITS 8  #define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)  所以WAIT_TABLE_SIZE 等于 256
7549  		init_waitqueue_head(bit_wait_table + i);  //static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;  
7550  
7551  #ifdef CONFIG_FAIR_GROUP_SCHED   //普通进程调度
7552  	alloc_size += 2 * nr_cpu_ids * sizeof(void **);  //nr_cpu_ids 为支持的cpu核个数,也就是常说的几核,64位系统上 sizeof(void **) = 8
7553  #endif
7554  #ifdef CONFIG_RT_GROUP_SCHED   //实时进程调度
7555  	alloc_size += 2 * nr_cpu_ids * sizeof(void **);  
7556  #endif
7557  	if (alloc_size) {
7558  		ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);  //申请内存
7559  
7560  #ifdef CONFIG_FAIR_GROUP_SCHED
7561  		root_task_group.se = (struct sched_entity **)ptr; //指向调度实体
7562  		ptr += nr_cpu_ids * sizeof(void **);//ptr指针移动nr_cpu_ids * sizeof(void **) nr_cpu_ids, 为支持的cpu核个数,64位系统上 sizeof(void **) = 8
7563  
7564  		root_task_group.cfs_rq = (struct cfs_rq **)ptr;  //指向调度队列指针,每个核有一个调度队列
7565  		ptr += nr_cpu_ids * sizeof(void **);//ptr指针移动nr_cpu_ids * sizeof(void **) nr_cpu_ids,为支持的cpu核个数,64位系统上 sizeof(void **) = 8
7566  
7567  #endif /* CONFIG_FAIR_GROUP_SCHED */
7568  #ifdef CONFIG_RT_GROUP_SCHED
7569  		root_task_group.rt_se = (struct sched_rt_entity **)ptr; //
7570  		ptr += nr_cpu_ids * sizeof(void **); //ptr指针移动nr_cpu_ids * sizeof(void **) nr_cpu_ids, 为支持的cpu核个数,64位系统上 sizeof(void **) = 8
7571  
7572  		root_task_group.rt_rq = (struct rt_rq **)ptr;  //
7573  		ptr += nr_cpu_ids * sizeof(void **);
7574  
7575  #endif /* CONFIG_RT_GROUP_SCHED */
7576  	}
7577  #ifdef CONFIG_CPUMASK_OFFSTACK
7578  	for_each_possible_cpu(i) {  // (1)...
7579  		per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
7580  			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
7581  		per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
7582  			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
7583  	}
7584  #endif /* CONFIG_CPUMASK_OFFSTACK */
7585    // (2)...
7586  	init_rt_bandwidth(&def_rt_bandwidth, //初始化实时进程对cpu的占有率 ,超过会有一定的惩罚机制(rt throttled)
7587  			global_rt_period(), global_rt_runtime());
7588  	init_dl_bandwidth(&def_dl_bandwidth,
7589  			global_rt_period(), global_rt_runtime());
7590  
7591  #ifdef CONFIG_SMP
7592  	init_defrootdomain();  // (3)...
7593  #endif
7594  
7595  #ifdef CONFIG_RT_GROUP_SCHED 
7596  	init_rt_bandwidth(&root_task_group.rt_bandwidth,/初始化root_task_group进程组实时进程对cpu的占有率
7597  			global_rt_period(), global_rt_runtime());
7598  #endif /* CONFIG_RT_GROUP_SCHED */
7599  
7600  #ifdef CONFIG_CGROUP_SCHED  //如果支持进程组,可以理解为多用户,每个用户下面的所有进程为一个进程组
7601  	task_group_cache = KMEM_CACHE(task_group, 0);
7602  
7603  	list_add(&root_task_group.list, &task_groups); //将root_task_group添加到task_groups队列中
7604  	INIT_LIST_HEAD(&root_task_group.children);
7605  	INIT_LIST_HEAD(&root_task_group.siblings);
7606  	autogroup_init(&init_task);
7607  #endif /* CONFIG_CGROUP_SCHED */
7608  
7609  	for_each_possible_cpu(i) {
7610  		struct rq *rq;
/*
 *  linux-4.10/kernel/sched/sched.h
 *  struct rq {
 *    	/* runqueue lock: */
 *    	raw_spinlock_t lock;  //自旋锁
 *  ...
 *  604  	unsigned int nr_running; //此CPU上总共就绪的进程数
 *  ...
 *   	u64 nr_switches; // 进行上下文切换次数
 *    
 *    	struct cfs_rq cfs; // cfs调度运行队列
 *    	struct rt_rq rt;   //实时调度运行队列
 *    	struct dl_rq dl;   //dl调度运行队列
 *  ...
 *    	struct task_struct *curr, *idle, *stop;  //curr: 当前正在此CPU上运行的进程, idle: 当前CPU上idle进程的指针,
 *  ...
 *    	int cpu;  //该运行队列所属CPU ID
 *    	int online;  //online状态
 *  ...
 *  };
*/
7612  		rq = cpu_rq(i); //获取对应cpu上的运行队列  // (4)...
7613  		raw_spin_lock_init(&rq->lock); //初始化自旋锁
7614  		rq->nr_running = 0;
7615  		rq->calc_load_active = 0;
7616  		rq->calc_load_update = jiffies + LOAD_FREQ;
7617  		init_cfs_rq(&rq->cfs);  //初始化cfs调度运行队列 其实就是赋初始值
7618  		init_rt_rq(&rq->rt);
7619  		init_dl_rq(&rq->dl);
7620  #ifdef CONFIG_FAIR_GROUP_SCHED
7621  		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
7622  		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
7623  		rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
7624  		/*
7625  		 * How much cpu bandwidth does root_task_group get?
7626  		 *
7627  		 * In case of task-groups formed thr' the cgroup filesystem, it
7628  		 * gets 100% of the cpu resources in the system. This overall
7629  		 * system cpu resource is divided among the tasks of
7630  		 * root_task_group and its child task-groups in a fair manner,
7631  		 * based on each entity's (task or task-group's) weight
7632  		 * (se->load.weight).
7633  		 *
7634  		 * In other words, if root_task_group has 10 tasks of weight
7635  		 * 1024) and two child groups A0 and A1 (of weight 1024 each),
7636  		 * then A0's share of the cpu resource is:
7637  		 *
7638  		 *	A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
7639  		 *
7640  		 * We achieve this by letting root_task_group's tasks sit
7641  		 * directly in rq->cfs (i.e root_task_group->se[] = NULL).
7642  		 */
7643  		init_cfs_bandwidth(&root_task_group.cfs_bandwidth); //设置root_task_group进程组普通进程在CPU中所占用比的
7644  		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); //
7645  #endif /* CONFIG_FAIR_GROUP_SCHED */
7646  
7647  		rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
7648  #ifdef CONFIG_RT_GROUP_SCHED
7649  		init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
7650  #endif
7651  
7652  		for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
7653  			rq->cpu_load[j] = 0;
7654  
7655  #ifdef CONFIG_SMP
7656  		rq->sd = NULL;
7657  		rq->rd = NULL;
7658  		rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
7659  		rq->balance_callback = NULL;
7660  		rq->active_balance = 0;
7661  		rq->next_balance = jiffies;
7662  		rq->push_cpu = 0;
7663  		rq->cpu = i;
7664  		rq->online = 0;
7665  		rq->idle_stamp = 0;
7666  		rq->avg_idle = 2*sysctl_sched_migration_cost;
7667  		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
7668  
7669  		INIT_LIST_HEAD(&rq->cfs_tasks);
7670  
7671  		rq_attach_root(rq, &def_root_domain); //将CPU运行队列加入到默认调度域中
7672  #ifdef CONFIG_NO_HZ_COMMON
7673  		rq->last_load_update_tick = jiffies;
7674  		rq->nohz_flags = 0;
7675  #endif
7676  #ifdef CONFIG_NO_HZ_FULL
7677  		rq->last_sched_tick = 0;
7678  #endif
7679  #endif /* CONFIG_SMP */
7680  		init_rq_hrtick(rq);
7681  		atomic_set(&rq->nr_iowait, 0);
7682  	}
7683  
7684  	set_load_weight(&init_task);  //负载均衡设置
7685  
7686  	/*
7687  	 * The boot idle thread does lazy MMU switching as well:
7688  	 */
7689  	atomic_inc(&init_mm.mm_count);
7690  	enter_lazy_tlb(&init_mm, current);
7691  
7692  	/*
7693  	 * Make us the idle thread. Technically, schedule() should not be
7694  	 * called from this thread, however somewhere below it might be,
7695  	 * but because we are the idle thread, we just pick up running again
7696  	 * when this runqueue becomes "idle".
7697  	 */
7698  	init_idle(current, smp_processor_id());  // (5)...
7699  
7700  	calc_load_update = jiffies + LOAD_FREQ;
7701  
7702  #ifdef CONFIG_SMP
7703  	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
7704  	/* May be allocated at isolcpus cmdline parse time */
7705  	if (cpu_isolated_map == NULL)
7706  		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
7707  	idle_thread_set_boot_cpu();  // (6)...
7708  	set_cpu_rq_start_time(smp_processor_id());
7709  #endif
7710  	init_sched_fair_class(); //初始化公平调度器  // (7)...
7711  
7712  	init_schedstats();
7713  
7714  	scheduler_running = 1;
7715  }


(1) for_each_possible_cpu()

/* 网上的解释,暂时没有理解
 * CPU mask机制被用于表示系统中多个处理器的各种组合,但是随着处理器数量的增长将消耗堆栈上大量的空间。
 * 新设计的API可以将CPU masks从堆栈上移出来. 
*/
7577  #ifdef CONFIG_CPUMASK_OFFSTACK  
7578  	for_each_possible_cpu(i) {  //相当于一个for循环
7579  		per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node( //可以认为load_balance_mask 定义在某个属性(__attribute__)中
7580  			cpumask_size(), GFP_KERNEL, cpu_to_node(i));  //kzalloc_node()用于申请内存,暂不深入研究
7581  		per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node( //可以认为select_idle_mask 定义在某个属性(__attribute__)中
7582  			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
7583  	}
7584  #endif /* CONFIG_CPUMASK_OFFSTACK */
上面是sched_init()里面的部分,因为上面代码已经足够多了,不好再贴出相关定义,所以放在这里在单独分析。

linux-4.10/include/linux/cpumask.h

222  #define for_each_cpu(cpu, mask)				\
223  	for ((cpu) = -1;				\
224  		(cpu) = cpumask_next((cpu), (mask)),	\
225  		(cpu) < nr_cpu_ids;)
226  
extern struct cpumask __cpu_possible_mask;
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
linux-4.10/include/linux/percpu-defs.h
204  #define __verify_pcpu_ptr(ptr)						\
205  do {									\
206  	const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL;	\
207  	(void)__vpp_verify;						\
208  } while (0)
209  
220  #define per_cpu_ptr(ptr, cpu)						\
221  ({									\
222  	__verify_pcpu_ptr(ptr);						\
223  	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)));			\
224  })

256  #define per_cpu(var, cpu)	(*per_cpu_ptr(&(var), cpu))
根据相关定义,可知 for_each_possible_cpu(i) 相当于 for ((i) = -1;(i = cpumask_next((i), (mask)), (i) < nr_cpu_ids;)   也就是一个for 循环
per_cpu(load_balance_mask, i)  可以先理解为 load_balance_mask[i]  load_balance_mask 定义的时候写的不是很直观,这里把它当作对应的结构体变量就可以了,只不过是放在某个特殊的section里面而已。


(2) init_rt_bandwidth()和init_dl_bandwidth()

7586  	init_rt_bandwidth(&def_rt_bandwidth, //初始化实时进程对cpu的占有率 ,超过会有一定的惩罚机制(throttled)
7587  			global_rt_period(), global_rt_runtime());
7588  	init_dl_bandwidth(&def_dl_bandwidth,
7589  			global_rt_period(), global_rt_runtime());
linux-4.10/kernel/sched/rt.c

41  void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
42  {
43  	rt_b->rt_period = ns_to_ktime(period);  //时长
44  	rt_b->rt_runtime = runtime;  // 总时长
45  
46  	raw_spin_lock_init(&rt_b->rt_runtime_lock);
47  
48  	hrtimer_init(&rt_b->rt_period_timer,
49  			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
50  	rt_b->rt_period_timer.function = sched_rt_period_timer; //sched_rt_period_timer 定时器超时回调函数
51  }
linux-4.10/kernel/sched/deadline.c
53  void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
54  {
55  	raw_spin_lock_init(&dl_b->dl_runtime_lock);
56  	dl_b->dl_period = period;
57  	dl_b->dl_runtime = runtime;
58  }

(3)init_defrootdomain()

7591  #ifdef CONFIG_SMP
7592  	init_defrootdomain();
7593  #endif
linux-4.10/kernel/sched/core.c

5902  /*
5903   * By default the system creates a single root-domain with all cpus as
5904   * members (mimicking the global state we have today).
5905   */
5906  struct root_domain def_root_domain;
5907  
5908  static void init_defrootdomain(void)
5909  {
5910  	init_rootdomain(&def_root_domain); //rootdomain 指示rq可运行的cpu集合 
5911  
5912  	atomic_set(&def_root_domain.refcount, 1);
5913  }

5869  static int init_rootdomain(struct root_domain *rd)
5870  {
5871  	memset(rd, 0, sizeof(*rd));
5872  
5873  	if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))   //申请内存
5874  		goto out;
5875  	if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL)) //申请内存
5876  		goto free_span;
5877  	if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL)) //申请内存
5878  		goto free_online;
5879  	if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) //申请内存
5880  		goto free_dlo_mask;
5881  
5882  	init_dl_bw(&rd->dl_bw);  //deadline 值越小优先级越高
5883  	if (cpudl_init(&rd->cpudl) != 0)  //initialize the cpudl structure
5884  		goto free_dlo_mask;
5885  
5886  	if (cpupri_init(&rd->cpupri) != 0) //initialize the cpupri structure
5887  		goto free_rto_mask;
5888  	return 0;
5889  
5890  free_rto_mask:
5891  	free_cpumask_var(rd->rto_mask);
5892  free_dlo_mask:
5893  	free_cpumask_var(rd->dlo_mask);
5894  free_online:
5895  	free_cpumask_var(rd->online);
5896  free_span:
5897  	free_cpumask_var(rd->span);
5898  out:
5899  	return -ENOMEM;
5900  }
又是分配了一堆的数据结构体,暂时先这样了解吧

(4) cpu_rq()

linux-4.10/kernel/sched/sched.h

758  DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
759  
760  #define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
cpu_rq(cpu) 函数会返回指定cpu 上的运行队列,又是一串宏定义,可以边不纠结它的具体实现,这里我们知道runqueues 这个变量指向了已经分别好的 struct rq成员。

linux-4.10/include/linux/percpu-defs.h

139  #define DECLARE_PER_CPU_SHARED_ALIGNED(type, name)			\
140  	DECLARE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
141  	____cacheline_aligned_in_smp

212  /*
213   * Add an offset to a pointer but keep the pointer as-is.  Use RELOC_HIDE()
214   * to prevent the compiler from making incorrect assumptions about the
215   * pointer value.  The weird cast keeps both GCC and sparse happy.
216   */
217  #define SHIFT_PERCPU_PTR(__p, __offset)					\
218  	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset))
219  
220  #define per_cpu_ptr(ptr, cpu)						\
221  ({									\
222  	__verify_pcpu_ptr(ptr);						\
223  	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)));			\
224  })
linux-4.10 / include / linux / compiler.h

linux-4.10/include/linux/compiler.h
209  # define RELOC_HIDE(ptr, off)					\
210    ({ unsigned long __ptr;					\
211       __ptr = (unsigned long) (ptr);				\
212      (typeof(ptr)) (__ptr + (off)); })
213  #endif
所以 cpu_rq(i)  可以先理解为  (struct rq*)(runqueues +I) 

(5)init_idle()
linux-4.10/kernel/sched/core.c

5264  /**
5265   * init_idle - set up an idle thread for a given CPU
5266   * @idle: task in question
5267   * @cpu: cpu the idle task belongs to
5268   *
5269   * NOTE: this function does not set the idle thread's NEED_RESCHED
5270   * flag, to make booting more robust.
5271   */
5272  void init_idle(struct task_struct *idle, int cpu)
5273  {
5274  	struct rq *rq = cpu_rq(cpu);
5275  	unsigned long flags;
5276  
5277  	raw_spin_lock_irqsave(&idle->pi_lock, flags);
5278  	raw_spin_lock(&rq->lock);
5279  
5280  	__sched_fork(0, idle); //创建idle 为0号进程,可以认为是给idle(task_struct)结构体赋值
5281  	idle->state = TASK_RUNNING;
5282  	idle->se.exec_start = sched_clock();
5283  	idle->flags |= PF_IDLE;
5284  
5285  	kasan_unpoison_task_stack(idle);
5286  
5287  #ifdef CONFIG_SMP
5288  	/*
5289  	 * Its possible that init_idle() gets called multiple times on a task,
5290  	 * in that case do_set_cpus_allowed() will not do the right thing.
5291  	 *
5292  	 * And since this is boot we can forgo the serialization.
5293  	 */
5294  	set_cpus_allowed_common(idle, cpumask_of(cpu));
5295  #endif
5296  	/*
5297  	 * We're having a chicken and egg problem, even though we are
5298  	 * holding rq->lock, the cpu isn't yet set to this cpu so the
5299  	 * lockdep check in task_group() will fail.
5300  	 *
5301  	 * Similar case to sched_fork(). / Alternatively we could
5302  	 * use task_rq_lock() here and obtain the other rq->lock.
5303  	 *
5304  	 * Silence PROVE_RCU
5305  	 */
5306  	rcu_read_lock();
5307  	__set_task_cpu(idle, cpu);
5308  	rcu_read_unlock();
5309  
5310  	rq->curr = rq->idle = idle;
5311  	idle->on_rq = TASK_ON_RQ_QUEUED;
5312  #ifdef CONFIG_SMP
5313  	idle->on_cpu = 1;
5314  #endif
5315  	raw_spin_unlock(&rq->lock);
5316  	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
5317  
5318  	/* Set the preempt count _outside_ the spinlocks! */
5319  	init_idle_preempt_count(idle, cpu);  //设置为可抢占
5320  
5321  	/*
5322  	 * The idle tasks have their own, simple scheduling class:
5323  	 */
5324  	idle->sched_class = &idle_sched_class;  //设置为idle调度器
5325  	ftrace_graph_init_idle_task(idle, cpu);
5326  	vtime_init_idle(idle, cpu);
5327  #ifdef CONFIG_SMP
5328  	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
5329  #endif
5330  }
给idle 这个数据结果赋值,也就是初始化


(6)set_cpu_rq_start_time()

linux-4.10/kernel/sched/core.c

5631  static void set_cpu_rq_start_time(unsigned int cpu)
5632  {
5633  	struct rq *rq = cpu_rq(cpu);
5634  
5635  	rq->age_stamp = sched_clock_cpu(cpu);
5636  }

linux-4.10/kernel/sched/clock.c
294  /*
295   * Similar to cpu_clock(), but requires local IRQs to be disabled.
296   *
297   * See cpu_clock().
298   */
299  u64 sched_clock_cpu(int cpu)
300  {
301  	struct sched_clock_data *scd;
302  	u64 clock;
303  
304  	if (sched_clock_stable())
305  		return sched_clock();
306  
307  	if (unlikely(!sched_clock_running))
308  		return 0ull;
309  
310  	preempt_disable_notrace();
311  	scd = cpu_sdc(cpu);
312  
313  	if (cpu != smp_processor_id())
314  		clock = sched_clock_remote(scd);
315  	else
316  		clock = sched_clock_local(scd);
317  	preempt_enable_notrace();
318  
319  	return clock;
320  }
321  EXPORT_SYMBOL_GPL(sched_clock_cpu);

(7) init_sched_fair_class()

linux-4.10/kernel/sched/fair.c

9474  __init void init_sched_fair_class(void)
9475  {
9476  #ifdef CONFIG_SMP
9477  	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
9478  
9479  #ifdef CONFIG_NO_HZ_COMMON  //在系统idle的时候,停掉tick。
9480  	nohz.next_balance = jiffies;
9481  	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
9482  #endif
9483  #endif /* SMP */
9484  
9485  }


公平调度器类和实时进程调度器类 被定义成为一个全局常量,分别是fair_sched_class 和 rt_sched_class。 struct sched_class 调度类,理解为接口的设计就可以了,虽然每个调度类有自己的调度算法(调度函数),但是外部使用调度类的接口都是一致的。

linux-4.10/kernel/sched/fair.c

9399  /*
9400   * All the scheduling class methods:
9401   */
9402  const struct sched_class fair_sched_class = {
9403  	.next			= &idle_sched_class,
9404  	.enqueue_task		= enqueue_task_fair,
9405  	.dequeue_task		= dequeue_task_fair,
9406  	.yield_task		= yield_task_fair,
9407  	.yield_to_task		= yield_to_task_fair,
9408  
9409  	.check_preempt_curr	= check_preempt_wakeup,
9410  
9411  	.pick_next_task		= pick_next_task_fair,
9412  	.put_prev_task		= put_prev_task_fair,
9413  
9414  #ifdef CONFIG_SMP
9415  	.select_task_rq		= select_task_rq_fair,
9416  	.migrate_task_rq	= migrate_task_rq_fair,
9417  
9418  	.rq_online		= rq_online_fair,
9419  	.rq_offline		= rq_offline_fair,
9420  
9421  	.task_dead		= task_dead_fair,
9422  	.set_cpus_allowed	= set_cpus_allowed_common,
9423  #endif
9424  
9425  	.set_curr_task          = set_curr_task_fair,
9426  	.task_tick		= task_tick_fair,
9427  	.task_fork		= task_fork_fair,
9428  
9429  	.prio_changed		= prio_changed_fair,
9430  	.switched_from		= switched_from_fair,
9431  	.switched_to		= switched_to_fair,
9432  
9433  	.get_rr_interval	= get_rr_interval_fair,
9434  
9435  	.update_curr		= update_curr_fair,
9436  
9437  #ifdef CONFIG_FAIR_GROUP_SCHED
9438  	.task_change_group	= task_change_group_fair,
9439  #endif
9440  };

linux-4.10/kernel/sched/rt.c

2325  const struct sched_class rt_sched_class = {
2326  	.next			= &fair_sched_class,
2327  	.enqueue_task		= enqueue_task_rt,
2328  	.dequeue_task		= dequeue_task_rt,
2329  	.yield_task		= yield_task_rt,
2330  
2331  	.check_preempt_curr	= check_preempt_curr_rt,
2332  
2333  	.pick_next_task		= pick_next_task_rt,
2334  	.put_prev_task		= put_prev_task_rt,
2335  
2336  #ifdef CONFIG_SMP
2337  	.select_task_rq		= select_task_rq_rt,
2338  
2339  	.set_cpus_allowed       = set_cpus_allowed_common,
2340  	.rq_online              = rq_online_rt,
2341  	.rq_offline             = rq_offline_rt,
2342  	.task_woken		= task_woken_rt,
2343  	.switched_from		= switched_from_rt,
2344  #endif
2345  
2346  	.set_curr_task          = set_curr_task_rt,
2347  	.task_tick		= task_tick_rt,
2348  
2349  	.get_rr_interval	= get_rr_interval_rt,
2350  
2351  	.prio_changed		= prio_changed_rt,
2352  	.switched_to		= switched_to_rt,
2353  
2354  	.update_curr		= update_curr_rt,
2355  };

这里只是简单列出sched_init() 的流程代码,在这个流程中初始化了很多数据结构体,这些数据结构的作用没有具体介绍,在后面需要了解的时候在具体介绍吧。现在先有个简单的了解,需要时再深入研究。



Linux 内核中,`init_task` 是初始进程的任务结构体实例化的一部分。它定义了一个静态初始化的 `task_struct` 对象,用于表示系统的第一个进程(即 PID 为 0 的 idle 进程)。以下是与 `init_task` 相关的一些重要头文件及其作用: ### 头文件及相关说明 #### 1. `<linux/sched.h>` 该头文件包含了任务调度的核心数据结构和函数声明,其中包括 `struct task_struct` 和 `init_task` 的定义[^1]。 ```c #include <linux/sched.h> ``` 此文件通常会引入其他必要的头文件,并提供关于任务管理的基础接口。 --- #### 2. `<linux/init_task.h>` 这是专门针对 `init_task` 定义的一个独立头文件,在较新的内核版本中可能被包含到更广泛的头文件集合中。 ```c #include <linux/init_task.h> ``` 在这个头文件中,可以找到类似于以下的内容: ```c #define INIT_TASK(name) \ { \ .state = TASK_RUNNING, \ /* ... other fields initialized here */ \ } ``` 这表明了如何通过宏来初始化 `task_struct` 结构中的字段。 --- #### 3. `<asm/current.h>` 或 `<linux/thread_info.h>` 这些头文件提供了访问当前运行线程的信息方法,其中涉及到了一些辅助功能以支持 `current` 指针的操作。 ```c #include <asm/current.h> // or <linux/thread_info.h> ``` 它们间接关联到 `init_task`,因为所有的任务都需要能够快速定位自身的上下文环境。 --- #### 4. `<linux/rcupdate.h>` 虽然主要关注的是读复制更新 (RCU),但它也涉及到某些情况下对全局变量状态变化的通知机制,比如当检测到 CPU 停滞警告时的行为调整[^3]。 ```c #include <linux/rcupdate.h> ``` 尽管这不是直接联系于 `init_task` 自身,但在复杂场景下的调试或者性能优化过程中可能会用得上此类工具链组件。 --- ### 示例代码片段展示如何引用上述头文件并操作 init_task 数据成员 下面给出了一段伪代码样例演示怎样利用前述提到过的那些关键部分来进行实际开发工作: ```c // Example usage of headers and accessing elements within init_task structure. #include <linux/module.h> #include <linux/kernel.h> #include <linux/sched.h> static int __init example_init(void){ printk(KERN_INFO "Init Task State:%ld\n", atomic_read(&init_task.state)); return 0; } module_init(example_init); MODULE_LICENSE("GPL"); ``` 以上程序简单打印出了系统启动时刻默认状态下 Idle Process 所处的状态标志位数值。 ---
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值