Timer源代码分析

最新推荐文章于 2022-11-12 18:09:27 发布

原创最新推荐文章于 2022-11-12 18:09:27 发布 · 927 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#timer #代码分析 #struct #list #firing #function

linux 专栏收录该内容

1 篇文章

订阅专栏

本文详细分析了Linux内核中的Timer机制，包括jiffies的初始值设定、per-CPU timer vector的定义、不同时间间隔定时器的组织方式、以及如何通过__run_timers函数处理已到期的定时器。此外，还介绍了如何使用__mod_timer函数修改定时器的到期时间，并探讨了如何判断和处理可延迟定时器。

Timer源代码分析

u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;

EXPORT_SYMBOL(jiffies_64);

* Have the 32 bit jiffies value wrap 5 minutes after boot

* so jiffies wrap bugs show up earlier.

#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))

一般在嵌入式的平台上，HZ取值为100，所以300*HZ的值为30000。30000的十六进制表示为0x7530。数据都是以补码的形式存储的，正数的补码是它本身，负数的补码是取反再加1。0x7530取反后是0xffff8acf，再加1后是0xffff8ad0。由于经过了强制类型转换(unsigned long)(unsigned int)，所以jiffies_64被赋值为0xffff8ad0

* per-CPU timer vector definitions:

#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)

#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)

#define TVN_SIZE (1 << TVN_BITS)

#define TVR_SIZE (1 << TVR_BITS)

#define TVN_MASK (TVN_SIZE - 1)

#define TVR_MASK (TVR_SIZE - 1)

struct tvec {

struct list_head vec[TVN_SIZE];

};

struct tvec_root {

struct list_head vec[TVR_SIZE];

};

struct tvec_base {

spinlock_t lock;

struct timer_list *running_timer;

unsigned long timer_jiffies;

unsigned long next_timer;

struct tvec_root tv1;

struct tvec tv2;

struct tvec tv3;

struct tvec tv4;

struct tvec tv5;

} ____cacheline_aligned;

struct tvec_base boot_tvec_bases;

EXPORT_SYMBOL(boot_tvec_bases);

static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;

这段代码是per-CPU定时器矢量的定义。

一般在内存比较少的平台才会定义CONFIG_BASE_SMALL为1，以便缩减内存的占用。在我们的平台上，CONFIG_BASE_SMALL定义为0。所以TVN_BITS为6，TVR_BITS为8，TVN_SIZE为26 =64，TVN_SIZE为28=256，TVN_MASK为0X3F，TVR_MASK为0XFF。

struct tvec 是一个有64个链表头的数组，struct tvec_base是一个有256个链表头的数组。

struct tvec_base结构体中：

lock是个自旋锁，操作struct tvec_base结构体中的变量值时都必须持有锁，以免引起同步等问题。

running_timer是本地cpu正在运行的定时器。

timer_jiffies 表示需要检查的动态定时器的最早到期时间。该字段在系统启动时被设置成jiffies的值，且只能由run_timer_softirq()函数增加它的值。一般情况下，它的值bijiffies的值大1。

next_timer是本地cpu最早到期的定时器。

tv1是包含256个list_head元素的数组，包含了在28-1个节拍内将要到期的所有动态定时器。

tv2是包含64个list_head元素的数组，包含了在214-1个节拍内将要到期的所有动态定时器。

tv3是包含64个list_head元素的数组，包含了在220-1个节拍内将要到期的所有动态定时器。

tv4是包含64个list_head元素的数组，包含了在226-1个节拍内将要到期的所有动态定时器。

tv5 与前面的字段几乎相同，但唯一的区别就是vec数组的最后一项是一个大expires字段值的动态定时器。

/* Functions below help us manage 'deferrable' flag */

static inline unsigned int tbase_get_deferrable(struct tvec_base *base)

{

return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);

}

static inline struct tvec_base *tbase_get_base(struct tvec_base *base)

{

return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));

}

static inline void timer_set_deferrable(struct timer_list *timer)

{

timer->base = TBASE_MAKE_DEFERRED(timer->base);

}

static inline void

timer_set_base(struct timer_list *timer, struct tvec_base *new_base)

{

timer->base = (struct tvec_base *)((unsigned long)(new_base) |

tbase_get_deferrable(timer->base));

}

* Note that all tvec_bases are 2 byte aligned and lower bit of

* base in timer_list is guaranteed to be zero. Use the LSB to

* indicate whether the timer is deferrable.

* A deferrable timer will work normally when the system is busy, but

* will not cause a CPU to come out of idle just to service it; instead,

* the timer will be serviced when the CPU eventually wakes up with a

* subsequent non-deferrable timer.

#define TBASE_DEFERRABLE_FLAG (0x1)

刚看这段代码时看得一头雾水，竟然绕个弯来判断定时器是否是可延迟的。它的原理是这样的：tvec_bases这个数据结构是2个字节对齐的，而timer_list结构体的base字段就是指向tvec_base的地址，所以base的值肯定是2的倍数，即最低位为0。搞不明白为什么要这样设计，增加代码复杂性，难道仅仅是为了省个保存标志位的内存？

#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *) \

((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG))

这个宏定义的作用是将原来的地址加1，即如果原来timer_list的base字段的地址为0x11223344，调用timer_set_deferrable(struct timer_list *timer)之后timer->base的值变为0x11223345。

static unsigned long round_jiffies_common(unsigned long j, int cpu,

bool force_up)

{

int rem;

unsigned long original = j;

* We don't want all cpus firing their timers at once hitting the

* same lock or cachelines, so we skew each extra cpu with an extra

* 3 jiffies. This 3 jiffies came originally from the mm/ code which

* already did this.

* The skew is done by adding 3*cpunr, then round, then subtract this

* extra offset again.

j += cpu * 3;

rem = j % HZ;

* If the target jiffie is just after a whole second (which can happen

* due to delays of the timer irq, long irq off times etc etc) then

* we should round down to the whole second, not up. Use 1/4th second

* as cutoff for this rounding as an extreme upper bound for this.

* But never round down if @force_up is set.

if (rem < HZ/4 && !force_up) /* round down */

j = j - rem;

else /* round up */

j = j - rem + HZ;

/* now that we have rounded, subtract the extra skew again */

j -= cpu * 3;

if (j <= jiffies) /* rounding ate our timeout entirely; */

return original;

return j;

}

/**

* __round_jiffies - function to round jiffies to a full second

* @j: the time in (absolute) jiffies that should be rounded

* @cpu: the processor number on which the timeout will happen

* __round_jiffies() rounds an absolute time in the future (in jiffies)

* up or down to (approximately) full seconds. This is useful for timers

* for which the exact time they fire does not matter too much, as long as

* they fire approximately every X seconds.

* By rounding these timers to whole seconds, all such timers will fire

* at the same time, rather than at various times spread out. The goal

* of this is to have the CPU wake up less, which saves power.

* The exact rounding is skewed for each processor to avoid all

* processors firing at the exact same time, which could lead

* to lock contention or spurious cache line bouncing.

* The return value is the rounded version of the @j parameter.

unsigned long __round_jiffies(unsigned long j, int cpu)

{

return round_jiffies_common(j, cpu, false);

}

EXPORT_SYMBOL_GPL(__round_jiffies);

/**

* __round_jiffies_relative - function to round jiffies to a full second

* @j: the time in (relative) jiffies that should be rounded

* @cpu: the processor number on which the timeout will happen

* __round_jiffies_relative() rounds a time delta in the future (in jiffies)

* up or down to (approximately) full seconds. This is useful for timers

* for which the exact time they fire does not matter too much, as long as

* they fire approximately every X seconds.

* By rounding these timers to whole seconds, all such timers will fire

* at the same time, rather than at various times spread out. The goal

* of this is to have the CPU wake up less, which saves power.

* The exact rounding is skewed for each processor to avoid all

* processors firing at the exact same time, which could lead

* to lock contention or spurious cache line bouncing.

* The return value is the rounded version of the @j parameter.

unsigned long __round_jiffies_relative(unsigned long j, int cpu)

{

unsigned long j0 = jiffies;

/* Use j0 because jiffies might change while we run */

return round_jiffies_common(j + j0, cpu, false) - j0;

}

EXPORT_SYMBOL_GPL(__round_jiffies_relative);

/**

* round_jiffies - function to round jiffies to a full second

* @j: the time in (absolute) jiffies that should be rounded

* round_jiffies() rounds an absolute time in the future (in jiffies)

* up or down to (approximately) full seconds. This is useful for timers

* for which the exact time they fire does not matter too much, as long as

* they fire approximately every X seconds.

* By rounding these timers to whole seconds, all such timers will fire

* at the same time, rather than at various times spread out. The goal

* of this is to have the CPU wake up less, which saves power.

* The return value is the rounded version of the @j parameter.

unsigned long round_jiffies(unsigned long j)

{

return round_jiffies_common(j, raw_smp_processor_id(), false);

}

EXPORT_SYMBOL_GPL(round_jiffies);

/**

* round_jiffies_relative - function to round jiffies to a full second

* @j: the time in (relative) jiffies that should be rounded

* round_jiffies_relative() rounds a time delta in the future (in jiffies)

* up or down to (approximately) full seconds. This is useful for timers

* for which the exact time they fire does not matter too much, as long as

* they fire approximately every X seconds.

* By rounding these timers to whole seconds, all such timers will fire

* at the same time, rather than at various times spread out. The goal

* of this is to have the CPU wake up less, which saves power.

* The return value is the rounded version of the @j parameter.

unsigned long round_jiffies_relative(unsigned long j)

{

return __round_jiffies_relative(j, raw_smp_processor_id());

}

EXPORT_SYMBOL_GPL(round_jiffies_relative);

/**

* __round_jiffies_up - function to round jiffies up to a full second

* @j: the time in (absolute) jiffies that should be rounded

* @cpu: the processor number on which the timeout will happen

* This is the same as __round_jiffies() except that it will never

* round down. This is useful for timeouts for which the exact time

* of firing does not matter too much, as long as they don't fire too

* early.

unsigned long __round_jiffies_up(unsigned long j, int cpu)

{

return round_jiffies_common(j, cpu, true);

}

EXPORT_SYMBOL_GPL(__round_jiffies_up);

/**

* __round_jiffies_up_relative - function to round jiffies up to a full second

* @j: the time in (relative) jiffies that should be rounded

* @cpu: the processor number on which the timeout will happen

* This is the same as __round_jiffies_relative() except that it will never

* round down. This is useful for timeouts for which the exact time

* of firing does not matter too much, as long as they don't fire too

* early.

unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)

{

unsigned long j0 = jiffies;

/* Use j0 because jiffies might change while we run */

return round_jiffies_common(j + j0, cpu, true) - j0;

}

EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);

/**

* round_jiffies_up - function to round jiffies up to a full second

* @j: the time in (absolute) jiffies that should be rounded

* This is the same as round_jiffies() except that it will never

* round down. This is useful for timeouts for which the exact time

* of firing does not matter too much, as long as they don't fire too

* early.

unsigned long round_jiffies_up(unsigned long j)

{

return round_jiffies_common(j, raw_smp_processor_id(), true);

}

EXPORT_SYMBOL_GPL(round_jiffies_up);

/**

* round_jiffies_up_relative - function to round jiffies up to a full second

* @j: the time in (relative) jiffies that should be rounded

* This is the same as round_jiffies_relative() except that it will never

* round down. This is useful for timeouts for which the exact time

* of firing does not matter too much, as long as they don't fire too

* early.

unsigned long round_jiffies_up_relative(unsigned long j)

{

return __round_jiffies_up_relative(j, raw_smp_processor_id());

}

EXPORT_SYMBOL_GPL(round_jiffies_up_relative);

这部分代码是处理jiffies的向上取整和向下取整问题，作用是为了防止不同cpu上的定时器在同一时间触发，从而导致一些问题。其核心函数是jiffies_common(unsigned long j, int cpu, bool force_up)。它先将j加上cpu*3的值，然后对100取模。如果小于25，就向下取整，如果大于，则向上取整。最后再减去cpu*3的值。刚开始看代码时，我以为最开始的加cpu*3是没必要的，因为取整后再减去cpu*3就可以让每个定时器的到期时间不一样。但后来发现，在cpu数很多的时候，先加cpu*3可以让最后得出的值围绕原来的值上下波动，没加的话就一直对原来的值递减。

static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)

{

unsigned long expires = timer->expires;

unsigned long idx = expires - base->timer_jiffies;

struct list_head *vec;

if (idx < TVR_SIZE) {

int i = expires & TVR_MASK;

vec = base->tv1.vec + i;

} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {

int i = (expires >> TVR_BITS) & TVN_MASK;

vec = base->tv2.vec + i;

} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {

int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;

vec = base->tv3.vec + i;

} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {

int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;

vec = base->tv4.vec + i;

} else if ((signed long) idx < 0) {

* Can happen if you add a timer with expires == jiffies,

* or you set a timer to go off in the past

vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);

} else {

int i;

/* If the timeout is larger than 0xffffffff on 64-bit

* architectures then we use the maximum timeout:

if (idx > 0xffffffffUL) {

idx = 0xffffffffUL;

expires = idx + base->timer_jiffies;

}

i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;

vec = base->tv5.vec + i;

}

* Timers are FIFO:

list_add_tail(&timer->entry, vec);

}

internal_add_timer是个比较重要的函数，它将timer加入到tvec_base的链表数组中。

1 将定时器的到期时间与base->timer_jiffies比较，得到一个索引值。根据这个值判断这个定时器属于哪个链表数组。

2 知道定时器属于哪个数组后，还要知道这个定时器属于数值中的哪个链表中。这是由expires中相应位段的值决定的。每个expires都被分成5个位段。0-7,8-13,14-19,20-25,26-31。

static void __init_timer(struct timer_list *timer,

const char *name,

struct lock_class_key *key)

{

timer->entry.next = NULL;

timer->base = __raw_get_cpu_var(tvec_bases);

timer->slack = -1;

#ifdef CONFIG_TIMER_STATS

timer->start_site = NULL;

timer->start_pid = -1;

memset(timer->start_comm, 0, TASK_COMM_LEN);

#endif

lockdep_init_map(&timer->lockdep_map, name, key, 0);

}

void setup_deferrable_timer_on_stack_key(struct timer_list *timer,

const char *name,

struct lock_class_key *key,

void (*function)(unsigned long),

unsigned long data)

{

timer->function = function;

timer->data = data;

init_timer_on_stack_key(timer, name, key);

timer_set_deferrable(timer);

}

EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);

/**

* init_timer_key - initialize a timer

* @timer: the timer to be initialized

* @name: name of the timer

* @key: lockdep class key of the fake lock used for tracking timer

* sync lock dependencies

* init_timer_key() must be done to a timer prior calling *any* of the

* other timer functions.

void init_timer_key(struct timer_list *timer,

const char *name,

struct lock_class_key *key)

{

debug_init(timer);

__init_timer(timer, name, key);

}

EXPORT_SYMBOL(init_timer_key);

void init_timer_deferrable_key(struct timer_list *timer,

const char *name,

struct lock_class_key *key)

{

init_timer_key(timer, name, key);

timer_set_deferrable(timer);

}

EXPORT_SYMBOL(init_timer_deferrable_key);

定时器的初始化比较简单，只是设置下timer_list各成员变量的初始值。注意要将每个定时器的base设置成本地cpu的tvec_bases，因为每个cpu都有属于自己的定时器。

static inline int

__mod_timer(struct timer_list *timer, unsigned long expires,

bool pending_only, int pinned)

{

struct tvec_base *base, *new_base;

unsigned long flags;

int ret = 0 , cpu;

timer_stats_timer_set_start_info(timer);

BUG_ON(!timer->function);

base = lock_timer_base(timer, &flags);

if (timer_pending(timer)) {

detach_timer(timer, 0);

if (timer->expires == base->next_timer &&

!tbase_get_deferrable(timer->base))

base->next_timer = base->timer_jiffies;

ret = 1;

} else {

if (pending_only)

goto out_unlock;

}

debug_activate(timer, expires);

cpu = smp_processor_id();

#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)

if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))

cpu = get_nohz_timer_target();

#endif

new_base = per_cpu(tvec_bases, cpu);

if (base != new_base) {

* We are trying to schedule the timer on the local CPU.

* However we can't change timer's base while it is running,

* otherwise del_timer_sync() can't detect that the timer's

* handler yet has not finished. This also guarantees that

* the timer is serialized wrt itself.

if (likely(base->running_timer != timer)) {

/* See the comment in lock_timer_base() */

timer_set_base(timer, NULL);

spin_unlock(&base->lock);

base = new_base;

spin_lock(&base->lock);

timer_set_base(timer, base);

}

timer->expires = expires;

if (time_before(timer->expires, base->next_timer) &&

!tbase_get_deferrable(timer->base))

base->next_timer = timer->expires;

internal_add_timer(base, timer);

out_unlock:

spin_unlock_irqrestore(&base->lock, flags);

return ret;

}

这个函数的作用是修改一个定时器：

1 先得到这个定时器所属的base，如果这个定时器已经加入了base的某个链表，则在链表中将这个定时器删除。如果这个base->next_timer刚好等于删掉的这个timer，就设置base->next_timer为base->timer_jiffies。

2 得到本地cpu的new_base值，如果与timer原来的base值不相等，说明不是同一个cpu，需要重新设置timer的base字段为new_base。

3 设置timer的expires字段为新的时间到期值。如果这个值比base->next_timer还小，则设置base->next_timer = timer->expires。

4 调用internal_add_timer函数将timer加入到链表中。

static int cascade(struct tvec_base *base, struct tvec *tv, int index)

{

/* cascade all the timers from tv up one level */

struct timer_list *timer, *tmp;

struct list_head tv_list;

list_replace_init(tv->vec + index, &tv_list);

* We are removing _all_ timers from the list, so we

* don't have to detach them individually.

list_for_each_entry_safe(timer, tmp, &tv_list, entry) {

BUG_ON(tbase_get_base(timer->base) != base);

internal_add_timer(base, timer);

}

return index;

}

static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),

unsigned long data)

{

int preempt_count = preempt_count();

#ifdef CONFIG_LOCKDEP

* It is permissible to free the timer from inside the

* function that is called from it, this we need to take into

* account for lockdep too. To avoid bogus "held lock freed"

* warnings as well as problems when looking into

* timer->lockdep_map, make a copy and use that here.

struct lockdep_map lockdep_map = timer->lockdep_map;

#endif

* Couple the lock chain with the lock chain at

* del_timer_sync() by acquiring the lock_map around the fn()

* call here and in del_timer_sync().

lock_map_acquire(&lockdep_map);

trace_timer_expire_entry(timer);

fn(data);

trace_timer_expire_exit(timer);

lock_map_release(&lockdep_map);

if (preempt_count != preempt_count()) {

WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",

fn, preempt_count, preempt_count());

* Restore the preempt count. That gives us a decent

* chance to survive and extract information. If the

* callback kept a lock held, bad luck, but not worse

* than the BUG() we had.

preempt_count() = preempt_count;

}

#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)

/**

* __run_timers - run all expired timers (if any) on this CPU.

* @base: the timer vector to be processed.

* This function cascades all vectors and executes all expired timer

* vectors.

static inline void __run_timers(struct tvec_base *base)

{

struct timer_list *timer;

spin_lock_irq(&base->lock);

while (time_after_eq(jiffies, base->timer_jiffies)) {

struct list_head work_list;

struct list_head *head = &work_list;

int index = base->timer_jiffies & TVR_MASK;

* Cascade timers:

if (!index &&

(!cascade(base, &base->tv2, INDEX(0))) &&

(!cascade(base, &base->tv3, INDEX(1))) &&

!cascade(base, &base->tv4, INDEX(2)))

cascade(base, &base->tv5, INDEX(3));

++base->timer_jiffies;

list_replace_init(base->tv1.vec + index, &work_list);

while (!list_empty(head)) {

void (*fn)(unsigned long);

unsigned long data;

timer = list_first_entry(head, struct timer_list,entry);

fn = timer->function;

data = timer->data;

timer_stats_account_timer(timer);

base->running_timer = timer;

detach_timer(timer, 1);

spin_unlock_irq(&base->lock);

call_timer_fn(timer, fn, data);

spin_lock_irq(&base->lock);

}

base->running_timer = NULL;

spin_unlock_irq(&base->lock);

}

这几个函数主要处理定时器到期时的处理。分析下__run_timers函数：

1 如果base->timer_jiffies小于jiffies，则先从base->timer_jiffies的低位段开始计算，如果等于0，表示低位段的定时器都运行完了，需要将高位段的定时器搬移到低位段。搬移完后将base->timer_jiffies的值加1。

2 将base->tv1.vec + index链表的定时器摘下来。然后遍历这些链表，将每个定时器的超时函数执行一次。

Timer源代码分析

1 条评论