linux基础之workqueue

最新推荐文章于 2025-05-19 11:30:29 发布

chenpuo

最新推荐文章于 2025-05-19 11:30:29 发布

阅读量584

点赞数

CC 4.0 BY-SA版权

分类专栏： kernel

本文链接：https://blog.youkuaiyun.com/chenpuo/article/details/54290633

kernel 专栏收录该内容

49 篇文章

订阅专栏

本文详细介绍了Linux内核中的Workqueue机制，包括其与Tasklet的区别、基本数据结构、核心函数及其实现原理，并提供了示例代码。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

一、workqueue简介

workqueue与tasklet类似，都是允许内核代码请求某个函数在将来的时间被调用（抄《ldd3》上的）
每个workqueue就是一个内核进程。

workqueue与tasklet的区别：
   1.tasklet是通过软中断实现的，在软中断上下文中运行，tasklet代码必须是原子的
     workqueue是通过内核进程实现的，就没有上述限制的，最爽的是，工作队列函数可以休眠

     PS: 我的驱动模块就是印在计时器中调用了可休眠函数，所以出现了cheduling while atomic告警
         内核计时器也是通过软中断实现的

   2.tasklet始终运行在被初始提交的同一处理器上，workqueue不一定
   3.tasklet不能确定延时时间（即使很短），workqueue可以设定延迟时间

tasklet 参考《linux 内核设计与实现》第三版介绍。在do_softirq里面调用tasklet_action 判断tasklet是否
TASKLET_STATE_RUN 状态，若是则表明其他core正在执行此tasklet就跳过。如果不是次状态就置为次状态，这样其他core就不会执行次tasklet 了。

void do_do_tasklet(unsigned long data) {
    //做很多耗时的操作
}

DECLARE_TASKLET(my_tasklet, do_do_tasklet, 0);

/* 中断处理例程 */
irqreturn_t do_do_interrupt(int irq, void *dev) {
    //...
    tasklet_schedule(&my_tasklet);
    my_task_count++;
    return IRQ_HANDLED;
}

void __tasklet_schedule(struct tasklet_struct *t)
{
unsigned long flags;

        local_irq_save(flags);
        t->next = NULL;
        *__this_cpu_read(tasklet_vec.tail) = t;
        __this_cpu_write(tasklet_vec.tail, &(t->next));
        raise_softirq_irqoff(TASKLET_SOFTIRQ);
        local_irq_restore(flags);
}
EXPORT_SYMBOL(__tasklet_schedule);

1. 1 动态定义初始化

static struct work_struct marlin_wq;

INIT_WORK(&marlin_wq, marlin_workq);

INIT_DELAYED_WORK( &marlin_wq, marlin_workq );

1.2 静态定义初始化

#define __WORK_INITIALIZER(n, f, d) { \
.entry = { &(n).entry, &(n).entry }, \
.func = (f), \
.data = (d), \
.timer = TIMER_INITIALIZER(NULL, 0, 0), \
}

#define DECLARE_WORK(n, f, d) \
struct work_struct n = __WORK_INITIALIZER(n, f, d)

struct work_struct {
atomic_long_t data;
struct list_head entry;
work_func_t func;
};

2.1 把 work 压入系统默认 wq system_wq

int schedule_work( struct work_struct *work );

int schedule_work_on( int cpu, struct work_struct *work );

int scheduled_delayed_work( struct delayed_work *dwork, unsigned long delay );

int scheduled_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay );

static void marlin_workq(void)
{

/* anything to do */

}

kernel/include/linux/workqueue.h

* schedule_work - put work task in global workqueue
* @work: job to be done
* Returns %false if @work was already on the kernel-global workqueue and
* %true otherwise.
* This puts a job in the kernel-global workqueue if it was not already
* queued and leaves it in the same position on the kernel-global workqueue otherwise.
static inline bool schedule_work(struct work_struct *work)
{
return queue_work(system_wq, work);
}

* queue_work - queue work on a workqueue
* @wq: workqueue to use
* @work: work to queue
* Returns %false if @work was already on a queue, %true otherwise.
* We queue the work to the CPU on which it was submitted, but if the CPU dies

* it can be processed by another CPU.

* WORK_CPU_UNBOUND 指定 worker 为当前 CPU 绑定的 normal worker_pool 创建的 worker

static inline bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
return queue_work_on(WORK_CPU_UNBOUND, wq, work);
}

* queue_work_on - queue work on specific cpu
* @cpu: CPU number to execute work on
* @wq: workqueue to use
* @work: work to queue
* Returns %false if @work was already on a queue, %true otherwise.
* We queue the work to a specific CPU, the caller must ensure it can't go away.
bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
bool ret = false;
unsigned long flags;
local_irq_save(flags);
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
__queue_work(cpu, wq, work);
ret = true;
}
local_irq_restore(flags);
return ret;
}

./kernel/kernel/workqueue.c

static void __queue_work(int cpu, struct workqueue_struct *wq, struct work_struct *work)

{
struct pool_workqueue *pwq;
struct worker_pool *last_pool;
struct list_head *worklist;
unsigned int work_flags;
unsigned int req_cpu = cpu;

/* While a work item is PENDING && off queue, a task trying to
* steal the PENDING will busy-loop waiting for it to either get
* queued or lose PENDING. Grabbing PENDING and queueing should
* happen with IRQ disabled.*/
WARN_ON_ONCE(!irqs_disabled());

debug_work_activate(work);

/* if draining, only works from the same workqueue are allowed */
if (unlikely(wq->flags & __WQ_DRAINING) && WARN_ON_ONCE(!is_chained_work(wq)))
        return;
retry:
// (1) 如果没有指定 cpu，则使用当前 cpu
if (req_cpu == WORK_CPU_UNBOUND)
        cpu = raw_smp_processor_id();

/* pwq which will be used unless @work is executing elsewhere */
if (!(wq->flags & WQ_UNBOUND))
        // (2) 对于 normal wq，使用当前 cpu 对应的 pool_workqueue
        pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
else
        // (3) 对于 unbound wq，使用当前 cpu 对应 node 的 UNBOUD类型pool_workqueue
        pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));

// (4) 如果 work 在其他 worker 上正在被执行，把 work 压到对应的 worker 上去, 避免 work 出现重入的问题
* If @work was previously on a different pool, it might still be
* running there, in which case the work needs to be queued on that

* pool to guarantee non-reentrancy.

//work_struct 结构体的data成员记录worker_pool的ID号，get_work_pool即根据ID获取上一次worker_pool

last_pool = get_work_pool(work);

//当发现两次worker_pool不同时，需要进一步处理了

if (last_pool && last_pool != pwq->pool) {
struct worker *worker;

spin_lock(&last_pool->lock);

//判断一个work是否在某个worker_pool上正在执行

worker = find_worker_executing_work(last_pool, work);

if (worker && worker->current_pwq->wq == wq) {

                pwq = worker->current_pwq;
        } else {
                /* meh... not running there, queue here */
                spin_unlock(&last_pool->lock);
                spin_lock(&pwq->pool->lock);
        }
} else {
        spin_lock(&pwq->pool->lock);
}

/*
* pwq is determined and locked. For unbound pools, we could have
* raced with pwq release and it could already be dead. If its
* refcnt is zero, repeat pwq selection. Note that pwqs never die
* without another pwq replacing it in the numa_pwq_tbl or while
* work items are executing on it, so the retrying is guaranteed to
* make forward-progress.
*/
if (unlikely(!pwq->refcnt)) {
    if (wq->flags & WQ_UNBOUND) {
            spin_unlock(&pwq->pool->lock);
            cpu_relax();
            goto retry;
    }
    /* oops */
    WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
    wq->name, cpu);
}

/* pwq determined, queue */
trace_workqueue_queue_work(req_cpu, pwq, work);

if (WARN_ON(!list_empty(&work->entry))) {
    spin_unlock(&pwq->pool->lock);
    return;
}

pwq->nr_in_flight[pwq->work_color]++;
work_flags = work_color_to_flags(pwq->work_color);

// (5) pool_workqueue的活跃work数少于max_active，就加入到pool_workqueue->worker_pool->worklist中
if (likely(pwq->nr_active < pwq->max_active)) {
    trace_workqueue_activate_work(work);
pwq->nr_active++;
    worklist = &pwq->pool->worklist;
    // 否则加入到pool_workqueue->delayed_works链表中延迟执行
} else {
    work_flags |= WORK_STRUCT_DELAYED;
    worklist = &pwq->delayed_works;
}

// (6) 将 work 压入 worklist 当中
insert_work(pwq, work, worklist, work_flags);

spin_unlock(&pwq->pool->lock);
}

static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
struct list_head *head, unsigned int extra_flags)
{
struct worker_pool *pool = pwq->pool;

/* we own @work, set data and link */
//设置work_struct->data的标志位
set_work_pwq(work, pwq, extra_flags);
//head参数对应worker_pool->worklist成员或pool_workqueue->delay_works成员，将work_struct->entry加入其中，或pengding或delay
list_add_tail(&work->entry, head);
get_pwq(pwq);

/* Ensure either wq_worker_sleeping() sees the above
* list_add_tail() or we see zero nr_running to avoid workers lying
* around lazily while there are works to be processed.*/
smp_mb();

if (__need_more_worker(pool))
wake_up_worker(pool);
}

2.2 添加work到给定的workqueue。如果使用 queue_delay_work, 则实际的工作至少要经过指定的jiffies 才会被执行。这些函数若返回 1 则工作被成功加入到队列; 若为0，则意味着这个 work 已经在队列中等待，不能再次加入。

int queue_work( struct workqueue_struct *wq, struct work_struct *work );

int queue_work_on( int cpu, struct workqueue_struct *wq, struct work_struct *work );

int queue_delayed_work( struct workqueue_struct *wq,struct delayed_work *dwork, unsigned long delay );

int queue_delayed_work_on( int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay );

2.3 其他API

还有一些帮助函数用于清理或取消工作队列中的任务。想清理特定的任务项目并阻塞任务，直到任务完成为止，可以调用 flush_work 来实现。指定工作队列中的所有任务能够通过调用flush_workqueue 来完成。这两种情形下，调用者阻塞直到操作完成为止。为了清理内核全局工作队列，可调用 flush_scheduled_work。

int flush_work( struct work_struct *work );

int flush_workqueue( struct workqueue_struct *wq );

void flush_scheduled_work( void );

在 flush_workqueue 返回后, 没有在这个调用前提交的函数在系统中任何地方运行。

还没有在处理程序当中执行的任务可以被取消。调用 cancel_work_sync 将会终止队列中的任务或者阻塞任务直到回调结束（如果处理程序已经在处理该任务）。如果任务被延迟，可以调用cancel_delayed_work_sync。

int cancel_work_sync( struct work_struct *work );

int cancel_delayed_work_sync( struct delayed_work *dwork );

最后，可以通过调用 work_pending 或者 delayed_work_pending 来确定任务项目是否在进行中。

work_pending( work );

delayed_work_pending( work );

这就是工作队列 API 的核心。在 ./kernel/workqueue.c 中能够找到工作队列 API 的实现方法， API 在./include/linux/workqueue.h 中定义。

3. 创建工作队列

create_workqueue() -> __create_workqueue() -> create_workqueue_thread() -> worker_thread()
//本函数在一个死循环等待工作的到来，这一般在睡眠状态中，等待被唤醒执行工作
//当有工作到来时queue_work()会将这个线程唤醒
static int worker_thread(void *__cwq)
{
    struct cpu_workqueue_struct *cwq = __cwq;
    DECLARE_WAITQUEUE(wait, current);
    ...
    current->flags |= PF_NOFREEZE;

    //设置优先级
    set_user_nice(current, -5);
    ...
    set_current_state(TASK_INTERRUPTIBLE);
    while (!kthread_should_stop()) {
    //将本线程加入睡眠队列，用于睡眠后可以被唤醒
    add_wait_queue(&cwq->more_work, &wait);

    //如果没用被执行的“工作”，则将自己切换出去，进入睡眠状态
    if (list_empty(&cwq->worklist))
        schedule();
    else //否则或是被唤醒
        __set_current_state(TASK_RUNNING);
    remove_wait_queue(&cwq->more_work, &wait);

    //工作队列非空，执行真正的工作
    if (!list_empty(&cwq->worklist))
        run_workqueue(cwq);
    set_current_state(TASK_INTERRUPTIBLE);
}
    __set_current_state(TASK_RUNNING);
    return 0;
}

create_workqueue() -> __create_workqueue() -> create_workqueue_thread()  -> worker_thread() -> run_workqueue()
//该函数执行真正的工作
static void run_workqueue(struct cpu_workqueue_struct *cwq)
{

unsigned long flags;

spin_lock_irqsave(&cwq->lock, flags);

...

    //顺次执行队列中的所有工作
    while (!list_empty(&cwq->worklist)) {
        struct work_struct *work = list_entry(cwq->worklist.next, struct work_struct, entry);
        void (*f) (void *) = work->func;
        void *data = work->data;

        //从队列中删除待执行的任务
        list_del_init(cwq->worklist.next);
        spin_unlock_irqrestore(&cwq->lock, flags);

        BUG_ON(work->wq_data != cwq);

clear_bit(0, &work->pending);

//执行“工作”

f(data);

        spin_lock_irqsave(&cwq->lock, flags);
        cwq->remove_sequence++;
        wake_up(&cwq->work_done);  //
}
    cwq->run_depth--;
    spin_unlock_irqrestore(&cwq->lock, flags);
}

4. 内核启动时创建的

static int __init init_workqueues(void)

{

    /* initialize CPU pools */
    for_each_possible_cpu(cpu) {
    struct worker_pool *pool;
            i = 0;
    for_each_cpu_worker_pool(pool, cpu) {
BUG_ON(init_worker_pool(pool));
pool->cpu = cpu;
cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
pool->attrs->nice = std_nice[i++];
pool->node = cpu_to_node(cpu);

/* alloc pool ID */
mutex_lock(&wq_pool_mutex);
BUG_ON(worker_pool_assign_id(pool));
mutex_unlock(&wq_pool_mutex);
     }
}

/* create the initial worker */
for_each_online_cpu(cpu) {
struct worker_pool *pool;

for_each_cpu_worker_pool(pool, cpu) {
pool->flags &= ~POOL_DISASSOCIATED;
BUG_ON(!create_worker(pool));
}

}

//创建系统开机后一系列默认的workqueue

system_wq = alloc_workqueue("events", 0, 0);
system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
system_long_wq = alloc_workqueue("events_long", 0, 0);
system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,WQ_UNBOUND_MAX_ACTIVE);
system_freezable_wq = alloc_workqueue("events_freezable",WQ_FREEZABLE, 0);
system_power_efficient_wq = alloc_workqueue("events_power_efficient", WQ_POWER_EFFICIENT, 0);
system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
WQ_FREEZABLE | WQ_POWER_EFFICIENT,0);
BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||!system_unbound_wq || !system_freezable_wq ||
!system_power_efficient_wq ||!system_freezable_power_efficient_wq);
return 0;
}

5. demo

    #include <linux/init.h>  
    #include <linux/kernel.h>  
    #include <linux/module.h>  
      
    MODULE_AUTHOR("Mike Feng");  
      
    /*测试数据结构*/  
    struct my_data  
    {  
             structwork_struct my_work;  
             intvalue;   
    };  
      
    struct workqueue_struct *wq=NULL;  
    struct work_struct work_queue;  
      
    /*初始化我们的测试数据*/  
    struct my_data* init_data(structmy_data *md)  
    {  
             md=(structmy_data*)kmalloc(sizeof(struct my_data),GFP_KERNEL);  
             md->value=1;  
             md->my_work=work_queue;  
             returnmd;  
    }  
      
    /*工作队列函数*/  
    static void work_func(struct work_struct *work)  
    {  
             structmy_data *md=container_of(work,structmy_data,my_work);  
             printk("<2>""Thevalue of my data is:%d\n",md->value);  
    }  
      
    static __init intwork_init(void)  
    {  
             structmy_data *md=NULL;  
             structmy_data *md2=NULL;  
             md2=init_data(md2);  
             md=init_data(md);       
             md2->value=20;  
             md->value=10;  
      
             /*第一种方式：使用统默认的workqueue队列——keventd_wq，直接调度*/  
             INIT_WORK(&md->my_work,work_func);  
             schedule_work(&md->my_work);  
      
             /*第二种方式：创建自己的工作队列，加入工作到工作队列（加入内核就对其调度执行）*/  
             wq=create_workqueue("test");  
             INIT_WORK(&md2->my_work,work_func);  
             queue_work(wq,&md2->my_work);      
             return0;  
    }  
      
    static void work_exit(void)  
    {  
             /*工作队列销毁*/  
             destroy_workqueue(wq);  
    }  
      
    module_init(work_init);  
    module_exit(work_exit);

通过上面例子可以看到，将work加入队列后，加入我们想传参给work_func似乎没发做大。

其实还是可以实现的，2.6.20版本之后使用工作队列需要把work_struct定义在用户的数据结构中，然后通过container_of来得到用户数据，从而可实现传参

参考资料：

https://blog.youkuaiyun.com/xujianqun/article/details/6778529

http://blog.chinaunix.net/uid-20583479-id-1920134.html