一、workqueue简介
workqueue与tasklet类似,都是允许内核代码请求某个函数在将来的时间被调用(抄《ldd3》上的)
每个workqueue就是一个内核进程。
workqueue与tasklet的区别:
1.tasklet是通过软中断实现的,在软中断上下文中运行,tasklet代码必须是原子的
workqueue是通过内核进程实现的,就没有上述限制的,最爽的是,工作队列函数可以休眠
PS: 我的驱动模块就是印在计时器中调用了可休眠函数,所以出现了cheduling while atomic告警
内核计时器也是通过软中断实现的
2.tasklet始终运行在被初始提交的同一处理器上,workqueue不一定
3.tasklet不能确定延时时间(即使很短),workqueue可以设定延迟时间
tasklet 参考《linux 内核设计与实现》第三版介绍。 在do_softirq里面调用tasklet_action 判断tasklet是否
TASKLET_STATE_RUN 状态,若是则表明其他core正在执行此tasklet就跳过。 如果不是次状态就置为次状态,这样其他core就不会执行次tasklet 了。
void do_do_tasklet(unsigned long data) {
//做很多耗时的操作
}
DECLARE_TASKLET(my_tasklet, do_do_tasklet, 0);
/* 中断处理例程 */
irqreturn_t do_do_interrupt(int irq, void *dev) {
//...
tasklet_schedule(&my_tasklet);
my_task_count++;
return IRQ_HANDLED;
}
void __tasklet_schedule(struct tasklet_struct *t)
{
unsigned long flags;
local_irq_save(flags);
t->next = NULL;
*__this_cpu_read(tasklet_vec.tail) = t;
__this_cpu_write(tasklet_vec.tail, &(t->next));
raise_softirq_irqoff(TASKLET_SOFTIRQ);
local_irq_restore(flags);
}
EXPORT_SYMBOL(__tasklet_schedule);
1. 1 动态定义初始化
static struct work_struct marlin_wq;
INIT_WORK(&marlin_wq, marlin_workq);
INIT_DELAYED_WORK( &marlin_wq, marlin_workq );
1.2 静态定义初始化
#define __WORK_INITIALIZER(n, f, d) { \
.entry = { &(n).entry, &(n).entry }, \
.func = (f), \
.data = (d), \
.timer = TIMER_INITIALIZER(NULL, 0, 0), \
}
#define DECLARE_WORK(n, f, d) \
struct work_struct n = __WORK_INITIALIZER(n, f, d)
struct work_struct {
atomic_long_t data;
struct list_head entry;
work_func_t func;
};
2.1 把 work 压入系统默认 wq system_wq
int schedule_work( struct work_struct *work );
int schedule_work_on( int cpu, struct work_struct *work );
int scheduled_delayed_work( struct delayed_work *dwork, unsigned long delay );
int scheduled_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay );
static void marlin_workq(void)
{
/* anything to do */
}
kernel/include/linux/workqueue.h
* schedule_work - put work task in global workqueue
* @work: job to be done
* Returns %false if @work was already on the kernel-global workqueue and
* %true otherwise.
* This puts a job in the kernel-global workqueue if it was not already
* queued and leaves it in the same position on the kernel-global workqueue otherwise.
static inline bool schedule_work(struct work_struct *work)
{
return queue_work(system_wq, work);
}
* queue_work - queue work on a workqueue
* @wq: workqueue to use
* @work: work to queue
* Returns %false if @work was already on a queue, %true otherwise.
* We queue the work to the CPU on which it was submitted, but if the CPU dies
* it can be processed by another CPU.
* WORK_CPU_UNBOUND 指定 worker 为当前 CPU 绑定的 normal worker_pool 创建的 worker
static inline bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
return queue_work_on(WORK_CPU_UNBOUND, wq, work);
}
* queue_work_on - queue work on specific cpu
* @cpu: CPU number to execute work on
* @wq: workqueue to use
* @work: work to queue
* Returns %false if @work was already on a queue, %true otherwise.
* We queue the work to a specific CPU, the caller must ensure it can't go away.
bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
bool ret = false;
unsigned long flags;
local_irq_save(flags);
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
__queue_work(cpu, wq, work);
ret = true;
}
local_irq_restore(flags);
return ret;
}
./kernel/kernel/workqueue.c
static void __queue_work(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
struct pool_workqueue *pwq;
struct worker_pool *last_pool;
struct list_head *worklist;
unsigned int work_flags;
unsigned int req_cpu = cpu;
/* While a work item is PENDING && off queue, a task trying to
* steal the PENDING will busy-loop waiting for it to either get
* queued or lose PENDING. Grabbing PENDING and queueing should
* happen with IRQ disabled.*/
WARN_ON_ONCE(!irqs_disabled());
debug_work_activate(work);
/* if draining, only works from the same workqueue are allowed */
if (unlikely(wq->flags & __WQ_DRAINING) && WARN_ON_ONCE(!is_chained_work(wq)))
return;
retry:
// (1) 如果没有指定 cpu,则使用当前 cpu
if (req_cpu == WORK_CPU_UNBOUND)
cpu = raw_smp_processor_id();
/* pwq which will be used unless @work is executing elsewhere */
if (!(wq->flags & WQ_UNBOUND))
// (2) 对于 normal wq,使用当前 cpu 对应的 pool_workqueue
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
else
// (3) 对于 unbound wq,使用当前 cpu 对应 node 的 UNBOUD类型pool_workqueue
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
// (4) 如果 work 在其他 worker 上正在被执行,把 work 压到对应的 worker 上去, 避免 work 出现重入的问题
* If @work was previously on a different pool, it might still be
* running there, in which case the work needs to be queued on that
* pool to guarantee non-reentrancy.
//work_struct 结构体的data成员记录worker_pool的ID号,get_work_pool即根据ID获取上一次worker_pool
last_pool = get_work_pool(work);
//当发现两次worker_pool不同时,需要进一步处理了
if (last_pool && last_pool != pwq->pool) {
struct worker *worker;
spin_lock(&last_pool->lock);
//判断一个work是否在某个worker_pool上正在执行
worker = find_worker_executing_work(last_pool, work);
if (worker && worker->current_pwq->wq == wq) {
pwq = worker->current_pwq;
} else {
/* meh... not running there, queue here */
spin_unlock(&last_pool->lock);
spin_lock(&pwq->pool->lock);
}
} else {
spin_lock(&pwq->pool->lock);
}
/*
* pwq is determined and locked. For unbound pools, we could have
* raced with pwq release and it could already be dead. If its
* refcnt is zero, repeat pwq selection. Note that pwqs never die
* without another pwq replacing it in the numa_pwq_tbl or while
* work items are executing on it, so the retrying is guaranteed to
* make forward-progress.
*/
if (unlikely(!pwq->refcnt)) {
if (wq->flags & WQ_UNBOUND) {
spin_unlock(&pwq->pool->lock);
cpu_relax();
goto retry;
}
/* oops */
WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
wq->name, cpu);
}
/* pwq determined, queue */
trace_workqueue_queue_work(req_cpu, pwq, work);
if (WARN_ON(!list_empty(&work->entry))) {
spin_unlock(&pwq->pool->lock);
return;
}
pwq->nr_in_flight[pwq->work_color]++;
work_flags = work_color_to_flags(pwq->work_color);
// (5) pool_workqueue的活跃work数少于max_active,就加入到pool_workqueue->worker_pool->worklist中
if (likely(pwq->nr_active < pwq->max_active)) {
trace_workqueue_activate_work(work);
pwq->nr_active++;
worklist = &pwq->pool->worklist;
// 否则加入到pool_workqueue->delayed_works链表中延迟执行
} else {
work_flags |= WORK_STRUCT_DELAYED;
worklist = &pwq->delayed_works;
}
// (6) 将 work 压入 worklist 当中
insert_work(pwq, work, worklist, work_flags);
spin_unlock(&pwq->pool->lock);
}
//
static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
struct list_head *head, unsigned int extra_flags)
{
struct worker_pool *pool = pwq->pool;
/* we own @work, set data and link */
//设置work_struct->data的标志位
set_work_pwq(work, pwq, extra_flags);
//head参数对应worker_pool->worklist成员或pool_workqueue->delay_works成员,将work_struct->entry加入其中,或pengding或delay
list_add_tail(&work->entry, head);
get_pwq(pwq);
/* Ensure either wq_worker_sleeping() sees the above
* list_add_tail() or we see zero nr_running to avoid workers lying
* around lazily while there are works to be processed.*/
smp_mb();
if (__need_more_worker(pool))
wake_up_worker(pool);
}
2.2 添加work到给定的workqueue。如果使用 queue_delay_work, 则实际的工作至少要经过指定的jiffies 才会被执行。 这些函数若返回 1 则工作被成功加入到队列; 若为0,则意味着这个 work 已经在队列中等待,不能再次加入。
int queue_work( struct workqueue_struct *wq, struct work_struct *work );
int queue_work_on( int cpu, struct workqueue_struct *wq, struct work_struct *work );
int queue_delayed_work( struct workqueue_struct *wq,struct delayed_work *dwork, unsigned long delay );
int queue_delayed_work_on( int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay );
2.3 其他API
还有一些帮助函数用于清理或取消工作队列中的任务。想清理特定的任务项目并阻塞任务, 直到任务完成为止, 可以调用 flush_work 来实现。 指定工作队列中的所有任务能够通过调用flush_workqueue 来完成。 这两种情形下,调用者阻塞直到操作完成为止。 为了清理内核全局工作队列,可调用 flush_scheduled_work。
int flush_work( struct work_struct *work );
int flush_workqueue( struct workqueue_struct *wq );
void flush_scheduled_work( void );
在 flush_workqueue 返回后, 没有在这个调用前提交的函数在系统中任何地方运行。
还没有在处理程序当中执行的任务可以被取消。 调用 cancel_work_sync 将会终止队列中的任务或者阻塞任务直到回调结束(如果处理程序已经在处理该任务)。 如果任务被延迟,可以调用cancel_delayed_work_sync。
int cancel_work_sync( struct work_struct *work );
int cancel_delayed_work_sync( struct delayed_work *dwork );
最后,可以通过调用 work_pending 或者 delayed_work_pending 来确定任务项目是否在进行中。
work_pending( work );
delayed_work_pending( work );
这就是工作队列 API 的核心。在 ./kernel/workqueue.c 中能够找到工作队列 API 的实现方法, API 在./include/linux/workqueue.h 中定义。
3. 创建工作队列
create_workqueue() -> __create_workqueue() -> create_workqueue_thread() -> worker_thread()
//本函数在一个死循环等待工作的到来,这一般在睡眠状态中,等待被唤醒执行工作
//当有工作到来时queue_work()会将这个线程唤醒
static int worker_thread(void *__cwq)
{
struct cpu_workqueue_struct *cwq = __cwq;
DECLARE_WAITQUEUE(wait, current);
...
current->flags |= PF_NOFREEZE;
//设置优先级
set_user_nice(current, -5);
...
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
//将本线程加入睡眠队列,用于睡眠后可以被唤醒
add_wait_queue(&cwq->more_work, &wait);
//如果没用被执行的“工作”,则将自己切换出去,进入睡眠状态
if (list_empty(&cwq->worklist))
schedule();
else //否则或是被唤醒
__set_current_state(TASK_RUNNING);
remove_wait_queue(&cwq->more_work, &wait);
//工作队列非空,执行真正的工作
if (!list_empty(&cwq->worklist))
run_workqueue(cwq);
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return 0;
}
create_workqueue() -> __create_workqueue() -> create_workqueue_thread() -> worker_thread() -> run_workqueue()
//该函数执行真正的工作
static void run_workqueue(struct cpu_workqueue_struct *cwq)
{
unsigned long flags;
spin_lock_irqsave(&cwq->lock, flags);
...
//顺次执行队列中的所有工作
while (!list_empty(&cwq->worklist)) {
struct work_struct *work = list_entry(cwq->worklist.next, struct work_struct, entry);
void (*f) (void *) = work->func;
void *data = work->data;
//从队列中删除待执行的任务
list_del_init(cwq->worklist.next);
spin_unlock_irqrestore(&cwq->lock, flags);
BUG_ON(work->wq_data != cwq);
clear_bit(0, &work->pending);
//执行“工作”
f(data);
spin_lock_irqsave(&cwq->lock, flags);
cwq->remove_sequence++;
wake_up(&cwq->work_done); //
}
cwq->run_depth--;
spin_unlock_irqrestore(&cwq->lock, flags);
}
4. 内核启动时创建的
static int __init init_workqueues(void)
{
/* initialize CPU pools */
for_each_possible_cpu(cpu) {
struct worker_pool *pool;
i = 0;
for_each_cpu_worker_pool(pool, cpu) {
BUG_ON(init_worker_pool(pool));
pool->cpu = cpu;
cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
pool->attrs->nice = std_nice[i++];
pool->node = cpu_to_node(cpu);
/* alloc pool ID */
mutex_lock(&wq_pool_mutex);
BUG_ON(worker_pool_assign_id(pool));
mutex_unlock(&wq_pool_mutex);
}
}
/* create the initial worker */
for_each_online_cpu(cpu) {
struct worker_pool *pool;
for_each_cpu_worker_pool(pool, cpu) {
pool->flags &= ~POOL_DISASSOCIATED;
BUG_ON(!create_worker(pool));
}
}
//创建系统开机后一系列默认的workqueue
system_wq = alloc_workqueue("events", 0, 0);
system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
system_long_wq = alloc_workqueue("events_long", 0, 0);
system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,WQ_UNBOUND_MAX_ACTIVE);
system_freezable_wq = alloc_workqueue("events_freezable",WQ_FREEZABLE, 0);
system_power_efficient_wq = alloc_workqueue("events_power_efficient", WQ_POWER_EFFICIENT, 0);
system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
WQ_FREEZABLE | WQ_POWER_EFFICIENT,0);
BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||!system_unbound_wq || !system_freezable_wq ||
!system_power_efficient_wq ||!system_freezable_power_efficient_wq);
return 0;
}
5. demo
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
MODULE_AUTHOR("Mike Feng");
/*测试数据结构*/
struct my_data
{
structwork_struct my_work;
intvalue;
};
struct workqueue_struct *wq=NULL;
struct work_struct work_queue;
/*初始化我们的测试数据*/
struct my_data* init_data(structmy_data *md)
{
md=(structmy_data*)kmalloc(sizeof(struct my_data),GFP_KERNEL);
md->value=1;
md->my_work=work_queue;
returnmd;
}
/*工作队列函数*/
static void work_func(struct work_struct *work)
{
structmy_data *md=container_of(work,structmy_data,my_work);
printk("<2>""Thevalue of my data is:%d\n",md->value);
}
static __init intwork_init(void)
{
structmy_data *md=NULL;
structmy_data *md2=NULL;
md2=init_data(md2);
md=init_data(md);
md2->value=20;
md->value=10;
/*第一种方式:使用统默认的workqueue队列——keventd_wq,直接调度*/
INIT_WORK(&md->my_work,work_func);
schedule_work(&md->my_work);
/*第二种方式:创建自己的工作队列,加入工作到工作队列(加入内核就对其调度执行)*/
wq=create_workqueue("test");
INIT_WORK(&md2->my_work,work_func);
queue_work(wq,&md2->my_work);
return0;
}
static void work_exit(void)
{
/*工作队列销毁*/
destroy_workqueue(wq);
}
module_init(work_init);
module_exit(work_exit);
通过上面例子可以看到,将work加入队列后,加入我们想传参给work_func似乎没发做大。
其实还是可以实现的,2.6.20版本之后使用工作队列需要把work_struct定义在用户的数据结构中,然后通过container_of来得到用户数据,从而可实现传参
参考资料:
https://blog.youkuaiyun.com/xujianqun/article/details/6778529
http://blog.chinaunix.net/uid-20583479-id-1920134.html