阻塞与非阻塞是设备访问的两种不同的模式。什么是阻塞操作呢?其是指在执行设备操作的时候,如果不能获得资源,则挂起进程直到满足可操作的条件后再进行操作.而非阻塞操作则是在进程不能进行设备操作时,并不挂起到等待队列,而是放弃或者不断的查询,直到能够进行操作。
应用程序以阻塞的方式进行read操作的时候,会调用一个system call,将系统的控制权交给kernel后就进入等待状态,等kernel将这个system执行完成后向应用程序返回响应,应用程序的得到响应后,就推出阻塞状态,并进行后面的工作。
应用程序以非阻塞的方式进行write操作的时候,通过设置文件描述符的属性O_NONBLOCK使其进入非阻塞的访问状态,这时进程也会调用相应的system call,但是system call会立即从kernel中返回。
从表面上看,阻塞状态貌似没有非阻塞的访问方式效率高,事实上却不是这样,非阻塞的访问方式虽然不用等待,会立即返回,可是他不一定就完成了相应的工作,比如上面的例子里面,虽然立即返回,但是数据可能还没有真正的写入文件中,所以说效率的高低并不是表面看上去的那样。
在linux驱动中,可以使用等待队列来实现阻塞进程的唤醒。wait queue以队列为基础数据结构,与进程调度机制紧密结合,能够用于实现内核中的异步事件通知机制等。下面就先看下一些关于等待队列的基本的操作。
定义一个等待队列头并初始化:
- wait_queue_head_t my_queue;
- init_waitqueue_head(&my_queue);
- DECLARE_WAIT_QUEUE_HEAD(my_queue);
- struct __wait_queue_head {
- spinlock_t lock;
- struct list_head task_list;
- };
- typedef struct __wait_queue_head wait_queue_head_t;
- extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *);
- #define init_waitqueue_head(q) \
- do { \
- static struct lock_class_key __key; \
- \
- __init_waitqueue_head((q), &__key); \
- } while (0)
- void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
- {
- spin_lock_init(&q->lock);
- lockdep_set_class(&q->lock, key);
- INIT_LIST_HEAD(&q->task_list);
- }
- #define DECLARE_WAIT_QUEUE_HEAD(name) \
- wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
- #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
- .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
- .task_list = { &(name).task_list, &(name).task_list } }
定义等待队列用DECLARE_WAITQUEUE函数来实现
- DECLARE_WAITQUEUE(name,tsk);
- #define DECLARE_WAITQUEUE(name, tsk) \
- wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
- typedef struct __wait_queue wait_queue_t;
- struct __wait_queue {
- unsigned int flags;
- #define WQ_FLAG_EXCLUSIVE 0x01
- void *private;
- wait_queue_func_t func;
- struct list_head task_list;
- };
- #define __WAITQUEUE_INITIALIZER(name, tsk) { \
- .private = tsk, \
- .func = default_wake_function, \
- .task_list = { NULL, NULL } }
flag:它的值有WQ_FLAG_EXCLUSIVE或者0,他说明等待的进程是否是互斥的。当为WQ_FLAG_EXCLUSIVE表示互斥;
private:一般用来指向等待进程的task_struct实例;
func:其唤醒等待进程;
task_list:用于链接等待队列中的进程
下面看下添加和移除等待队列的API函数:
- void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
- {
- unsigned long flags;
- wait->flags &= ~WQ_FLAG_EXCLUSIVE;
- spin_lock_irqsave(&q->lock, flags);
- __add_wait_queue(q, wait);
- spin_unlock_irqrestore(&q->lock, flags);
- }
- EXPORT_SYMBOL(add_wait_queue);
- static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
- {
- list_add(&new->task_list, &head->task_list);
- }
- void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
- {
- unsigned long flags;
- spin_lock_irqsave(&q->lock, flags);
- __remove_wait_queue(q, wait);
- spin_unlock_irqrestore(&q->lock, flags);
- }
- EXPORT_SYMBOL(remove_wait_queue);
- static inline void __remove_wait_queue(wait_queue_head_t *head,
- wait_queue_t *old)
- {
- list_del(&old->task_list);
- }
下面介绍的是等待事件函数,其就是依据condition条件是否满足来选择是否返回或者阻塞等待。
- wait_event(wq, condition)
- wait_event_timeout(wq, condition, timeout)
- wait_event_interruptible(wq, condition)
- wait_event_interruptible_timeout(wq, condition, timeout)
- /**
- * wait_event - sleep until a condition gets true
- * @wq: the waitqueue to wait on
- * @condition: a C expression for the event to wait for
- *
- * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
- * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
- *
- * wake_up() has to be called after changing any variable that could
- * change the result of the wait condition.
- */
- #define wait_event(wq, condition) \
- do { \
- if (condition) \
- break; \
- __wait_event(wq, condition); \
- } while (0)
- #define __wait_event(wq, condition) \
- do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
- if (condition) \
- break; \
- schedule(); \
- } \
- finish_wait(&wq, &__wait); \
- } while (0)
这里首先是定义了一个等待队列项__wait:
- #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
- #define DEFINE_WAIT_FUNC(name, function) \
- wait_queue_t name = { \
- .private = current, \
- .func = function, \
- .task_list = LIST_HEAD_INIT((name).task_list), \
- }
下面就进入循环,开始是prepare_to_wait函数,这个函数的作用是将等待队列项__wait插入到等待队列透wq中,然后设置为TASK_UNINTERRUPTIBLE,即改阻塞状态不能被信号打断,而TASK_INTERRUPTIBLE状态可以被信号打断唤醒。然后再检查一次condition,当condition刚好为真时函数立即返回,否则调用schedule()函数使得进程睡眠,执行schedule()进行了进程的切换以后,直到进程被唤醒才会调度该进程。for循环是等进程被唤醒后再一次检查condition条件是否满足,防止同时唤醒的进程已经抢先占据了资源。最后finish_wait将进程状态属性改为TASK_RUNNING,并且将进程从等待队列中删除。下面看下实现过程:
- prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
- {
- unsigned long flags;
- wait->flags &= ~WQ_FLAG_EXCLUSIVE;
- spin_lock_irqsave(&q->lock, flags);
- if (list_empty(&wait->task_list))
- __add_wait_queue(q, wait);
- set_current_state(state);
- spin_unlock_irqrestore(&q->lock, flags);
- }
- EXPORT_SYMBOL(prepare_to_wait);
- void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
- {
- unsigned long flags;
- __set_current_state(TASK_RUNNING);
- /*
- * We can check for list emptiness outside the lock
- * IFF:
- * - we use the "careful" check that verifies both
- * the next and prev pointers, so that there cannot
- * be any half-pending updates in progress on other
- * CPU's that we haven't seen yet (and that might
- * still change the stack area.
- * and
- * - all other users take the lock (ie we can only
- * have _one_ other CPU that looks at or modifies
- * the list).
- */
- if (!list_empty_careful(&wait->task_list)) {
- spin_lock_irqsave(&q->lock, flags);
- list_del_init(&wait->task_list);
- spin_unlock_irqrestore(&q->lock, flags);
- }
- }
- EXPORT_SYMBOL(finish_wait);
- #define wait_event_timeout(wq, condition, timeout) \
- ({ \
- long __ret = timeout; \
- if (!(condition)) \
- __wait_event_timeout(wq, condition, __ret); \
- __ret; \
- })
- #define __wait_event_timeout(wq, condition, ret) \
- do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
- if (condition) \
- break; \
- ret = schedule_timeout(ret); \
- if (!ret) \
- break; \
- } \
- finish_wait(&wq, &__wait); \
- } while (0)
剩余的两个wait()函数过程都一样,这里列出实现过程:
- #define wait_event_interruptible(wq, condition) \
- ({ \
- int __ret = 0; \
- if (!(condition)) \
- __wait_event_interruptible(wq, condition, __ret); \
- __ret; \
- })
- #define __wait_event_interruptible(wq, condition, ret) \
- do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
- if (condition) \
- break; \
- if (!signal_pending(current)) { \
- schedule(); \
- continue; \
- } \
- ret = -ERESTARTSYS; \
- break; \
- } \
- finish_wait(&wq, &__wait); \
- } while (0)
- #define wait_event_interruptible_timeout(wq, condition, timeout) \
- ({ \
- long __ret = timeout; \
- if (!(condition)) \
- __wait_event_interruptible_timeout(wq, condition, __ret); \
- __ret; \
- })
- #define __wait_event_interruptible_timeout(wq, condition, ret) \
- do { \
- wait_queue_t __wait; \
- init_waitqueue_entry(&__wait, current); \
- add_wait_queue(&wq, &__wait); \
- for (;;) { \
- set_current_state(TASK_INTERRUPTIBLE); \
- if (condition) \
- break; \
- if (!signal_pending(current)) { \
- ret = schedule_timeout(ret); \
- if (!ret) \
- break; \
- continue; \
- } \
- ret = -ERESTARTSYS; \
- break; \
- } \
- current->state = TASK_RUNNING; \
- remove_wait_queue(&wq, &__wait); \
- } while (0)
- static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
- {
- q->flags = 0;
- q->private = p;
- q->func = default_wake_function;
- }
下面来看一下唤醒函数,常用的有:
- #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
- #define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
- void __wake_up(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, void *key)
- {
- unsigned long flags;
- spin_lock_irqsave(&q->lock, flags);
- __wake_up_common(q, mode, nr_exclusive, 0, key);
- spin_unlock_irqrestore(&q->lock, flags);
- }
- EXPORT_SYMBOL(__wake_up);
- static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, int wake_flags, void *key)
- {
- wait_queue_t *curr, *next;
- list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
- unsigned flags = curr->flags;
- if (curr->func(curr, mode, wake_flags, key) &&
- (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
- break;
- }
- }
通过上面的分析,对于等待队列的阻塞以及唤醒已经很清楚了,下面还有一套sleep()函数,其目的是使进程在等待队列上睡眠,如:
- sleep_on(wait_queue_head_t *q)
- interruptible_sleep_on(wait_queue_head_t *q)
而interruptible_sleep_on函数是将进程设置为TASK_INTERRUPTIBLE。
sleep_on与wake_up、interruptible_sleep_on与wake_up_interruptible都是成对出现使用的。
- void __sched sleep_on(wait_queue_head_t *q)
- {
- sleep_on_common(q, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
- }
- EXPORT_SYMBOL(sleep_on);
- void __sched interruptible_sleep_on(wait_queue_head_t *q)
- {
- sleep_on_common(q, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
- }
- EXPORT_SYMBOL(interruptible_sleep_on);
- static long __sched
- sleep_on_common(wait_queue_head_t *q, int state, long timeout)
- {
- unsigned long flags;
- wait_queue_t wait;
- init_waitqueue_entry(&wait, current);
- __set_current_state(state);
- spin_lock_irqsave(&q->lock, flags);
- __add_wait_queue(q, &wait);
- spin_unlock(&q->lock);
- timeout = schedule_timeout(timeout);
- spin_lock_irq(&q->lock);
- __remove_wait_queue(q, &wait);
- spin_unlock_irqrestore(&q->lock, flags);
- return timeout;
- }
在许多的设备驱动中,并不调用sleep_on()或interruptible_sleep_on(),而是亲自进行进程的状态改变和切换,这样代码的效率比较高,下面我们根据前面的globlemem虚拟字符设备驱动的例子来进行改进,增加队列等待机制,可以对照之前的代码来看加入阻塞访问前后的区别。
首先定义设备结构体,添加了r_wait和w_wait两个读写的等待队列头:
- struct globalmem_dev{
- struct cdev cdev;
- unsigned int current_len;
- unsigned char mem[GLOBALMEM_SIZE];
- struct semaphore sem;
- wait_queue_head_t r_wait;
- wait_queue_head_t w_wait;
- };
- int globalmem_init(void)
- {
- int result;
- dev_t devno=MKDEV(globalmem_major,0);
- if(globalmem_major)
- result=register_chrdev_region(devno,1,"globalmem");
- else{
- result=alloc_chrdev_region(&devno,0,1,"globalmem");
- globalmem_major=MAJOR(devno);
- }
- if(result<0)
- return result;
- globalmem_devp = kmalloc(sizeof(struct globalmem_dev),GFP_KERNEL);
- if(!globalmem_devp){
- result = -ENOMEM;
- goto fail_malloc;
- }
- memset(&globalmem_devp,0,sizeof(struct globalmem_dev));
- globalmem_setup_cdev(&globalmem_devp,0);
- init_MUTEX(&globalmem_devp->sem);
- init_waitqueue_head(&globalfifo_devp->r_wait); //初始化读等待队列
- init_waitqueue_head(&globalfifo_devp->w_wait); //初始化写等待队列
- return 0;
- fail_malloc:
- unregister_chrdev_region(devno,1);
- return result;
- }
- static ssize_t globalmem_read(struct file *filp,char __user *buf,size_t count,loff_t *ppos)
- {
- unsigned long p = *ppos;
- int ret = 0;
- struct globalmem_dev *dev = filp->private_data;
- DECLARE_WAITQUEUE(wait,cuerrent);
- down(&dev->sem);
- add_wait_queue(&dev->r_wait,&wait);
- while(dev->current_len==0){
- if(filp->f_flags & O_NONBLOCK){
- ret = -EAGAIN;
- goto out;
- }
- __set_current_state(TASK_INTERRUPTIBLE);
- up(&dev->sem);
- schedule();
- if(signal_pending(current)){
- ret = -ERESTARTSYS;
- goto out2;
- }
- down(&dev->sem);
- }
- if(count > dev->current_len)
- count = dev->current_len;
- if(copy_to_user(buf,dev->mem,count)){
- ret = -EFAULT;
- goto out;
- }else{
- memcpy(dev->mem,dev->mem+count,dev->current_len-count);
- dev->current_len -= count;
- printk(KERN_INFO "read %d bytes(s),current_len:%d\n",count,dev->current_len);
- wake_up_interruptible(&dev->w_wait);
- ret = count;
- }
- out:up(&dev->sem);
- out2:remove_wait_queue(&dev->r_wait,&wait);
- set_current_state(TASK_RUNNING);
- return ret;
- }
- static ssize_t globalmem_write(struct file *filp,const char __user *buf,size_t count,loff_t *ppos)
- {
- unsigned long p = *ppos;
- int ret = 0;
- struct globalmem_dev *dev = filp->private_data;
- DECLARE_WAITQUEUE(wait,cuerrent);
- down(&dev->sem);
- add_wait_queue(&dev->w_wait,&wait);
- while(dev->current_len==GLOBALFIFO_SIZE){
- if(filp->f_flags & O_NONBLOCK){
- ret = -EAGAIN;
- goto out;
- }
- __set_current_state(TASK_INTERRUPTIBLE);
- up(&dev->sem);
- schedule();
- if(signal_pending(current)){
- ret = -ERESTARTSYS;
- goto out2;
- }
- down(&dev->sem);
- }
- if(count > GLOBALFIFO_SIZE-dev->current_len)
- count = GLOBALFIFO-dev->current_len;
- if(copy_from_user(dev->mem+dev->current_len,buf,count)){
- ret = -EFAULT;
- goto out;
- }else{
- dev->current_len += count;
- printk(KERN_INFO "written %d bytes(s),current_len:%d\n",count,dev->current_len);
- wake_up_interruptible(&dev->r_wait);
- ret = count;
- }
- out:up(&dev->sem);
- out2:remove_wait_queue(&dev->w_wait,&wait);
- set_current_state(TASK_RUNNING);
- return ret;
- }
其并没有调用seelp_on()等函数,选择自己设置状态以及进程的切换等动作。将上述的过程用wait_event_interruptible()函数替换的话,可能会出现死锁的状态,可以自己思考一下这个过程。上面读缓冲区的数据需要在写函数中唤醒r_wait,才可以进行读的操作,而进行写的过程需要在读函数中唤醒w_wait才可以写入。