4: linux 信号量在wait_event_interruptible_timeout 调用返回 -ERESTARTSYS的问题

问题概述:

在做编解码开始编码时,app通过system call 调用内核态,内核态代码如下。

rval = wait_event_interruptible_timeout(wait_queue,  condition, 5s) ;
printk( “ hello world,  rval = %d\n”, rval );

rval返回0表示时间片流完,condition还没有满足
rval返回>0 表示时间片没流完,condition满足了,rval值即为剩余的时间片值(单位ms)
但是我们在debug时候发现 rval 返回了 -512.

问题剖析

根据返回值找到了-512其实就是-ERESTARTSYS,本着有问题找百度,解决不了照google的思想,参考如下链接定位大致问题是返回 -ERESTARTSYS 是因在做system call 时候被某信号量(特指如**SIGINT,SIGQUIT,SIGTERM ** )打断了,这个时候system call 被打断,返回 -ERESTARTSYS.
而信号产生的原因诸多,不详解,我们遇到的场景是 app嵌套app执行导致中断信号产生。

系统调用 -ERESTARTSYS返回参考链接

系统调用返回 -ERESTARTSYS 问题解析1.
系统调用返回 -ERESTARTSYS 问题解析2.

问题解决方法

在app执行时候,可以将信号量给屏蔽,待执行完毕后将信号量打开。

sigset_t mask_sigset, prev;
/* 初始化信号量集 */
sigemptyset(&mask_sigset);
sigaddset(&mask_sigset, SIGINT);
sigaddset(&mask_sigset, SIGQUIT);
sigaddset(&mask_sigset, SIGTERM);
/* 给信号量绑定函数 */
signal(SIGINT, signal_handler);
signal(SIGQUIT, signal_handler);
signal(SIGTERM, signal_handler);
/* 阻塞信号量参数集mask_sigset,并将阻塞的信号量集和mask_sigset保存到信号量集prev */
sigprocmask(SIG_BLOCK, &mask_sigset, NULL);
task 1 //本任务需要block信号量下执行
/* */
sigprocmask(SIG_UNBLOCK, &mask_sigset, NULL);
task 2 //本任务不需要在block信号量下执行
信号量block和unblock参考链接

信号量block和unblock解析.

/* -*- C -*- * main.c -- the bare scullp char module * * Copyright (C) 2001 Alessandro Rubini and Jonathan Corbet * Copyright (C) 2001 O'Reilly & Associates * * The source code in this file can be freely used, adapted, * and redistributed in source or binary form, so long as an * acknowledgment appears in derived source files. The citation * should list that the code comes from the book "Linux Device * Drivers" by Alessandro Rubini and Jonathan Corbet, published * by O'Reilly & Associates. No warranty is attached; * we cannot take responsibility for errors or fitness for use. * * $Id: _main.c.in,v 1.21 2004/10/14 20:11:39 corbet Exp $ */ #include <linux/config.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/kernel.h> /* printk() */ #include <linux/slab.h> /* kmalloc() */ #include <linux/fs.h> /* everything... */ #include <linux/errno.h> /* error codes */ #include <linux/types.h> /* size_t */ #include <linux/proc_fs.h> #include <linux/fcntl.h> /* O_ACCMODE */ #include <linux/aio.h> #include <asm/uaccess.h> #include "scullp.h" /* local definitions */ int scullp_major = SCULLP_MAJOR; int scullp_devs = SCULLP_DEVS; /* number of bare scullp devices */ int scullp_qset = SCULLP_QSET; int scullp_order = SCULLP_ORDER; module_param(scullp_major, int, 0); module_param(scullp_devs, int, 0); module_param(scullp_qset, int, 0); module_param(scullp_order, int, 0); MODULE_AUTHOR("Alessandro Rubini"); MODULE_LICENSE("Dual BSD/GPL"); struct scullp_dev *scullp_devices; /* allocated in scullp_init */ int scullp_trim(struct scullp_dev *dev); void scullp_cleanup(void); #ifdef SCULLP_USE_PROC /* don't waste space if unused */ /* * The proc filesystem: function to read and entry */ void scullp_proc_offset(char *buf, char **start, off_t *offset, int *len) { if (*offset == 0) return; if (*offset >= *len) { /* Not there yet */ *offset -= *len; *len = 0; } else { /* We're into the interesting stuff now */ *start = buf + *offset; *offset = 0; } } /* FIXME: Do we need this here?? It be ugly */ int scullp_read_procmem(char *buf, char **start, off_t offset, int count, int *eof, void *data) { int i, j, order, qset, len = 0; int limit = count - 80; /* Don't print more than this */ struct scullp_dev *d; *start = buf; for(i = 0; i < scullp_devs; i++) { d = &scullp_devices[i]; if (down_interruptible (&d->sem)) return -ERESTARTSYS; qset = d->qset; /* retrieve the features of each device */ order = d->order; len += sprintf(buf+len,"\nDevice %i: qset %i, order %i, sz %li\n", i, qset, order, (long)(d->size)); for (; d; d = d->next) { /* scan the list */ len += sprintf(buf+len," item at %p, qset at %p\n",d,d->data); scullp_proc_offset (buf, start, &offset, &len); if (len > limit) goto out; if (d->data && !d->next) /* dump only the last item - save space */ for (j = 0; j < qset; j++) { if (d->data[j]) len += sprintf(buf+len," % 4i:%8p\n",j,d->data[j]); scullp_proc_offset (buf, start, &offset, &len); if (len > limit) goto out; } } out: up (&scullp_devices[i].sem); if (len > limit) break; } *eof = 1; return len; } #endif /* SCULLP_USE_PROC */ /* * Open and close */ int scullp_open (struct inode *inode, struct file *filp) { struct scullp_dev *dev; /* device information */ /* Find the device */ dev = container_of(inode->i_cdev, struct scullp_dev, cdev); /* now trim to 0 the length of the device if open was write-only */ if ( (filp->f_flags & O_ACCMODE) == O_WRONLY) { if (down_interruptible (&dev->sem)) return -ERESTARTSYS; scullp_trim(dev); /* ignore errors */ up (&dev->sem); } /* and use filp->private_data to point to the device data */ filp->private_data = dev; return 0; /* success */ } int scullp_release (struct inode *inode, struct file *filp) { return 0; } /* * Follow the list */ struct scullp_dev *scullp_follow(struct scullp_dev *dev, int n) { while (n--) { if (!dev->next) { dev->next = kmalloc(sizeof(struct scullp_dev), GFP_KERNEL); memset(dev->next, 0, sizeof(struct scullp_dev)); } dev = dev->next; continue; } return dev; } /* * Data management: read and write */ ssize_t scullp_read (struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct scullp_dev *dev = filp->private_data; /* the first listitem */ struct scullp_dev *dptr; int quantum = PAGE_SIZE << dev->order; int qset = dev->qset; int itemsize = quantum * qset; /* how many bytes in the listitem */ int item, s_pos, q_pos, rest; ssize_t retval = 0; if (down_interruptible (&dev->sem)) return -ERESTARTSYS; if (*f_pos > dev->size) goto nothing; if (*f_pos + count > dev->size) count = dev->size - *f_pos; /* find listitem, qset index, and offset in the quantum */ item = ((long) *f_pos) / itemsize; rest = ((long) *f_pos) % itemsize; s_pos = rest / quantum; q_pos = rest % quantum; /* follow the list up to the right position (defined elsewhere) */ dptr = scullp_follow(dev, item); if (!dptr->data) goto nothing; /* don't fill holes */ if (!dptr->data[s_pos]) goto nothing; if (count > quantum - q_pos) count = quantum - q_pos; /* read only up to the end of this quantum */ if (copy_to_user (buf, dptr->data[s_pos]+q_pos, count)) { retval = -EFAULT; goto nothing; } up (&dev->sem); *f_pos += count; return count; nothing: up (&dev->sem); return retval; } ssize_t scullp_write (struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) { struct scullp_dev *dev = filp->private_data; struct scullp_dev *dptr; int quantum = PAGE_SIZE << dev->order; int qset = dev->qset; int itemsize = quantum * qset; int item, s_pos, q_pos, rest; ssize_t retval = -ENOMEM; /* our most likely error */ if (down_interruptible (&dev->sem)) return -ERESTARTSYS; /* find listitem, qset index and offset in the quantum */ item = ((long) *f_pos) / itemsize; rest = ((long) *f_pos) % itemsize; s_pos = rest / quantum; q_pos = rest % quantum; /* follow the list up to the right position */ dptr = scullp_follow(dev, item); if (!dptr->data) { dptr->data = kmalloc(qset * sizeof(void *), GFP_KERNEL); if (!dptr->data) goto nomem; memset(dptr->data, 0, qset * sizeof(char *)); } /* Here's the allocation of a single quantum */ if (!dptr->data[s_pos]) { dptr->data[s_pos] = (void *)__get_free_pages(GFP_KERNEL, dptr->order); if (!dptr->data[s_pos]) goto nomem; memset(dptr->data[s_pos], 0, PAGE_SIZE << dptr->order); } if (count > quantum - q_pos) count = quantum - q_pos; /* write only up to the end of this quantum */ if (copy_from_user (dptr->data[s_pos]+q_pos, buf, count)) { retval = -EFAULT; goto nomem; } *f_pos += count; /* update the size */ if (dev->size < *f_pos) dev->size = *f_pos; up (&dev->sem); return count; nomem: up (&dev->sem); return retval; } /* * The ioctl() implementation */ int scullp_ioctl (struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { int err = 0, ret = 0, tmp; /* don't even decode wrong cmds: better returning ENOTTY than EFAULT */ if (_IOC_TYPE(cmd) != SCULLP_IOC_MAGIC) return -ENOTTY; if (_IOC_NR(cmd) > SCULLP_IOC_MAXNR) return -ENOTTY; /* * the type is a bitmask, and VERIFY_WRITE catches R/W * transfers. Note that the type is user-oriented, while * verify_area is kernel-oriented, so the concept of "read" and * "write" is reversed */ if (_IOC_DIR(cmd) & _IOC_READ) err = !access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd)); else if (_IOC_DIR(cmd) & _IOC_WRITE) err = !access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd)); if (err) return -EFAULT; switch(cmd) { case SCULLP_IOCRESET: scullp_qset = SCULLP_QSET; scullp_order = SCULLP_ORDER; break; case SCULLP_IOCSORDER: /* Set: arg points to the value */ ret = __get_user(scullp_order, (int __user *) arg); break; case SCULLP_IOCTORDER: /* Tell: arg is the value */ scullp_order = arg; break; case SCULLP_IOCGORDER: /* Get: arg is pointer to result */ ret = __put_user (scullp_order, (int __user *) arg); break; case SCULLP_IOCQORDER: /* Query: return it (it's positive) */ return scullp_order; case SCULLP_IOCXORDER: /* eXchange: use arg as pointer */ tmp = scullp_order; ret = __get_user(scullp_order, (int __user *) arg); if (ret == 0) ret = __put_user(tmp, (int __user *) arg); break; case SCULLP_IOCHORDER: /* sHift: like Tell + Query */ tmp = scullp_order; scullp_order = arg; return tmp; case SCULLP_IOCSQSET: ret = __get_user(scullp_qset, (int __user *) arg); break; case SCULLP_IOCTQSET: scullp_qset = arg; break; case SCULLP_IOCGQSET: ret = __put_user(scullp_qset, (int __user *)arg); break; case SCULLP_IOCQQSET: return scullp_qset; case SCULLP_IOCXQSET: tmp = scullp_qset; ret = __get_user(scullp_qset, (int __user *)arg); if (ret == 0) ret = __put_user(tmp, (int __user *)arg); break; case SCULLP_IOCHQSET: tmp = scullp_qset; scullp_qset = arg; return tmp; default: /* redundant, as cmd was checked against MAXNR */ return -ENOTTY; } return ret; } /* * The "extended" operations */ loff_t scullp_llseek (struct file *filp, loff_t off, int whence) { struct scullp_dev *dev = filp->private_data; long newpos; switch(whence) { case 0: /* SEEK_SET */ newpos = off; break; case 1: /* SEEK_CUR */ newpos = filp->f_pos + off; break; case 2: /* SEEK_END */ newpos = dev->size + off; break; default: /* can't happen */ return -EINVAL; } if (newpos<0) return -EINVAL; filp->f_pos = newpos; return newpos; } /* * A simple asynchronous I/O implementation. */ struct async_work { struct kiocb *iocb; int result; struct work_struct work; }; /* * "Complete" an asynchronous operation. */ static void scullp_do_deferred_op(void *p) { struct async_work *stuff = (struct async_work *) p; aio_complete(stuff->iocb, stuff->result, 0); kfree(stuff); } static int scullp_defer_op(int write, struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { struct async_work *stuff; int result; /* Copy now while we can access the buffer */ if (write) result = scullp_write(iocb->ki_filp, buf, count, &pos); else result = scullp_read(iocb->ki_filp, buf, count, &pos); /* If this is a synchronous IOCB, we return our status now. */ if (is_sync_kiocb(iocb)) return result; /* Otherwise defer the completion for a few milliseconds. */ stuff = kmalloc (sizeof (*stuff), GFP_KERNEL); if (stuff == NULL) return result; /* No memory, just complete now */ stuff->iocb = iocb; stuff->result = result; INIT_WORK(&stuff->work, scullp_do_deferred_op, stuff); schedule_delayed_work(&stuff->work, HZ/100); return -EIOCBQUEUED; } static ssize_t scullp_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { return scullp_defer_op(0, iocb, buf, count, pos); } static ssize_t scullp_aio_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { return scullp_defer_op(1, iocb, (char __user *) buf, count, pos); } /* * Mmap *is* available, but confined in a different file */ extern int scullp_mmap(struct file *filp, struct vm_area_struct *vma); /* * The fops */ struct file_operations scullp_fops = { .owner = THIS_MODULE, .llseek = scullp_llseek, .read = scullp_read, .write = scullp_write, .ioctl = scullp_ioctl, .mmap = scullp_mmap, .open = scullp_open, .release = scullp_release, .aio_read = scullp_aio_read, .aio_write = scullp_aio_write, }; int scullp_trim(struct scullp_dev *dev) { struct scullp_dev *next, *dptr; int qset = dev->qset; /* "dev" is not-null */ int i; if (dev->vmas) /* don't trim: there are active mappings */ return -EBUSY; for (dptr = dev; dptr; dptr = next) { /* all the list items */ if (dptr->data) { /* This code frees a whole quantum-set */ for (i = 0; i < qset; i++) if (dptr->data[i]) free_pages((unsigned long)(dptr->data[i]), dptr->order); kfree(dptr->data); dptr->data=NULL; } next=dptr->next; if (dptr != dev) kfree(dptr); /* all of them but the first */ } dev->size = 0; dev->qset = scullp_qset; dev->order = scullp_order; dev->next = NULL; return 0; } static void scullp_setup_cdev(struct scullp_dev *dev, int index) { int err, devno = MKDEV(scullp_major, index); cdev_init(&dev->cdev, &scullp_fops); dev->cdev.owner = THIS_MODULE; dev->cdev.ops = &scullp_fops; err = cdev_add (&dev->cdev, devno, 1); /* Fail gracefully if need be */ if (err) printk(KERN_NOTICE "Error %d adding scull%d", err, index); } /* * Finally, the module stuff */ int scullp_init(void) { int result, i; dev_t dev = MKDEV(scullp_major, 0); /* * Register your major, and accept a dynamic number. */ if (scullp_major) result = register_chrdev_region(dev, scullp_devs, "scullp"); else { result = alloc_chrdev_region(&dev, 0, scullp_devs, "scullp"); scullp_major = MAJOR(dev); } if (result < 0) return result; /* * allocate the devices -- we can't have them static, as the number * can be specified at load time */ scullp_devices = kmalloc(scullp_devs*sizeof (struct scullp_dev), GFP_KERNEL); if (!scullp_devices) { result = -ENOMEM; goto fail_malloc; } memset(scullp_devices, 0, scullp_devs*sizeof (struct scullp_dev)); for (i = 0; i < scullp_devs; i++) { scullp_devices[i].order = scullp_order; scullp_devices[i].qset = scullp_qset; sema_init (&scullp_devices[i].sem, 1); scullp_setup_cdev(scullp_devices + i, i); } #ifdef SCULLP_USE_PROC /* only when available */ create_proc_read_entry("scullpmem", 0, NULL, scullp_read_procmem, NULL); #endif return 0; /* succeed */ fail_malloc: unregister_chrdev_region(dev, scullp_devs); return result; } void scullp_cleanup(void) { int i; #ifdef SCULLP_USE_PROC remove_proc_entry("scullpmem", NULL); #endif for (i = 0; i < scullp_devs; i++) { cdev_del(&scullp_devices[i].cdev); scullp_trim(scullp_devices + i); } kfree(scullp_devices); unregister_chrdev_region(MKDEV (scullp_major, 0), scullp_devs); } module_init(scullp_init); module_exit(scullp_cleanup);详细说一下这个文件里的异步IO机制
最新发布
06-27
<think>我们正在讨论Linuxscullp字符设备驱动中的异步I/O实现原理和工作机制。scullp是一个示例性的字符设备驱动程序,常用于演示Linux内核驱动的编写方法。在scullp驱动中实现异步I/O(AIO)需要几个关键步骤:###1.**异步I/O的基本概念**-异步I/O允许应用程序发起I/O操作后立即返回,无需等待操作完成。当操作完成时,内核会通知应用程序。-Linux中,异步I/O可以通过两种方式实现:-**glibc的AIO**(用户空间库,如`libaio`)-**内核AIO**(通过`io_submit`等系统调用)###2.**scullp驱动中实现异步I/O的关键机制**在字符设备驱动中实现异步I/O,主要涉及以下方面:####a.**支持异步I/O的文件操作结构**-驱动需要提供`file_operations`结构,并实现异步I/O相关的回调函数:```cstaticconststructfile_operationsscullp_fops={.owner=THIS_MODULE,.read=scullp_read,//同步读.write=scullp_write,//同步写.aio_read=scullp_aio_read,//异步读.aio_write=scullp_aio_write,//异步写//...其他操作...};```-注意:`aio_read`和`aio_write`分别处理异步读和写请求。####b.**异步I/O操作函数(以读为例)**-`scullp_aio_read`函数的典型实现如下:```cstaticssize_tscullp_aio_read(structkiocb*iocb,conststructiovec*iov,unsignedlongnr_segs,loff_tpos){//1.获取设备结构体structscullp_dev*dev=iocb->ki_filp->private_data;ssize_tret=-ENOMEM;//2.检查参数有效性(如偏移量、缓冲区等)//3.执行实际的读取操作(可能异步)//通常将请求加入队列,并返回-EIOCBQUEUED表示已排队ret=scullp_do_aio_op(dev,iocb,iov,nr_segs,pos,READ_OP);//4.如果操作是同步完成的,则返回结果;否则返回-EIOCBQUEUEDif(ret>0)//同步完成returnret;return-EIOCBQUEUED;//异步进行中}```####c.**异步操作完成后的回调**-当异步I/O操作完成后,驱动需要调用`aio_complete`函数通知内核:```cvoidscullp_aio_complete(structscullp_dev*dev,structkiocb*iocb,ssize_tret){//调用内核提供的完成函数aio_complete(iocb,ret,0);}```###3.**异步I/O请求的处理流程**1.**用户空间提交请求**:-应用程序通过`io_submit`系统调用提交异步I/O请求。2.**内核转发请求**:-内核调用驱动中注册的`aio_read`或`aio_write`函数。3.**驱动处理请求**:-驱动将请求加入工作队列或直接启动操作(如DMA传输)。-如果操作不能立即完成,驱动返回`-EIOCBQUEUED`。4.**操作完成**:-当设备操作完成(例如,DMA中断发生),驱动调用`aio_complete`通知内核。5.**用户空间获取结果**:-应用程序通过`io_getevents`或信号/SIGEV_THREAD等方式获知操作完成。###4.**示例:scullp的异步读实现片段**```cstaticssize_tscullp_aio_read(structkiocb*iocb,conststructiovec*iov,unsignedlongnr_segs,loff_tpos){structfile*filp=iocb->ki_filp;structscullp_dev*dev=filp->private_data;ssize_tcount=iov_length(iov,nr_segs);ssize_tret;//检查设备是否可读if(down_interruptible(&dev->sem))return-ERESTARTSYS;//检查偏移量是否有效if(pos>=dev->size)gotoout;//调整读取长度if(pos+count>dev->size)count=dev->size-pos;//执行数据拷贝(这里简化成同步操作,实际中可能异步)ret=scullp_copy_to_user(dev,pos,iov,count);if(ret<0){up(&dev->sem);returnret;}//更新文件位置指针iocb->ki_pos=pos+count;up(&dev->sem);returncount;//同步完成,返回实际读取的字节数out:up(&dev->sem);return0;}```>注意:上述代码是简化版,实际异步实现通常需要将请求放入队列,由后台线程或中断处理程序完成操作。###5.**异步I/O与直接I/O的结合**-异步I/O常与直接I/O(`O_DIRECT`)结合使用,以避免数据在用户空间和内核页缓存之间的复制。-在scullp驱动中,如果用户以`O_DIRECT`方式打开设备,则异步操作应直接访问设备内存。###6.**性能与复杂性考量**-**优势**:异步I/O避免了进程阻塞,提高了并发性能。-**挑战**:-需要正确处理并发和竞态条件。-错误处理更复杂(如部分完成的情况)。###总结在scullp字符设备驱动中实现异步I/O,核心在于:1.实现`file_operations`中的`aio_read`和`aio_write`方法。2.在驱动内部管理异步请求队列。3.在操作完成后调用`aio_complete`通知内核。这种机制使得应用程序能够非阻塞地执行I/O操作,特别适合高并发场景[^1][^2]。---§§相关问题§§1.如何测试字符设备驱动的异步I/O性能?2.异步I/O在驱动中如何处理错误和部分完成的情况?3.在多核系统中,异步I/O驱动如何优化以降低锁竞争?</think>### Linux scullp字符设备驱动中的异步I/O实现详解 scullp(Simple Character Utility for Loading Processes)是一个示例性的Linux字符设备驱动,用于演示异步I/O(AIO)的实现原理。其核心机制如下: --- #### 1. **异步I/O在驱动中的核心结构** 在Linux内核中,异步I/O通过以下关键结构实现: ```c struct kiocb { // I/O控制块(Kernel I/O Control Block) struct file *ki_filp; // 关联的文件对象 loff_t ki_pos; // 文件偏移量 void (*ki_complete)(...); // I/O完成回调函数 // ...其他字段... }; struct iov_iter { // 描述用户空间缓冲区 // 包含分散/聚集(scatter-gather)列表信息 }; ``` [^3] --- #### 2. **驱动实现的关键步骤** ##### a. 文件操作注册 在`file_operations`结构中注册异步处理函数: ```c static const struct file_operations scullp_fops = { .owner = THIS_MODULE, .aio_read = scullp_aio_read, // 异步读函数 .aio_write = scullp_aio_write, // 异步写函数 // ...其他同步操作... }; ``` [^3] ##### b. 异步读实现示例 (`scullp_aio_read`) ```c static ssize_t scullp_aio_read(struct kiocb *iocb, struct iov_iter *to) { struct scullp_dev *dev = iocb->ki_filp->private_data; ssize_t ret = 0; // 1. 检查设备状态和参数有效性 if (down_interruptible(&dev->sem)) return -ERESTARTSYS; // 2. 计算可读取的数据量 size_t available = min(dev->size - iocb->ki_pos, iov_iter_count(to)); // 3. 执行实际的数据拷贝(到用户空间) if (copy_to_iter(dev->data + iocb->ki_pos, available, to) != available) { ret = -EFAULT; goto out; } // 4. 更新文件位置 iocb->ki_pos += available; ret = available; out: up(&dev->sem); // 5. 如果操作是异步的,需调用完成回调 if (iocb->ki_complete) iocb->ki_complete(iocb, ret, 0); return ret; } ``` ##### c. 异步操作触发流程 用户空间提交异步I/O请求: ```c // 用户空间代码 struct aiocb cb = { .aio_fildes = fd, // 文件描述符 .aio_buf = buffer, // 目标缓冲区 .aio_nbytes = size, // 请求大小 .aio_offset = offset, // 文件偏移 .aio_sigevent = { ... } // 完成通知方式 }; aio_read(&cb); // 提交异步读请求 ``` [^2] --- #### 3. **内核与驱动的协作机制** 1. **请求提交** 用户调用`io_submit()` → 内核创建`kiocb`结构 → 调用驱动的`aio_read/aio_write` 2. **操作执行** - 驱动可能立即完成操作(如scullp的内存设备) - 或加入工作队列延迟处理(如真实硬件设备) 3. **完成通知** 通过以下方式之一通知用户空间: ```c // 方式1: 信号通知 (SIGEV_SIGNAL) cb.aio_sigevent.sigev_notify = SIGEV_SIGNAL; cb.aio_sigevent.sigev_signo = SIGIO; // 方式2: 回调线程 (SIGEV_THREAD) cb.aio_sigevent.sigev_notify = SIGEV_THREAD; cb.aio_sigevent.sigev_notify_function = completion_handler; // 方式3: 轮询检查 (aio_error()/aio_return()) ``` [^2] --- #### 4. **与同步操作的差异** | **特性** | **同步I/O** | **异步I/O** | |----------------|---------------------|--------------------------| | 阻塞行为 | 阻塞调用进程 | 非阻塞 | | 完成通知 | 函数返回即完成 | 信号/回调/轮询 | | 驱动实现复杂度 | 简单 (直接操作) | 中等 (需管理异步上下文) | | 适用场景 | 简单顺序操作 | 高并发/延迟敏感型应用 | --- #### 5. **性能优化关键点** 1. **零拷贝优化** 使用`iov_iter`接口直接操作用户空间缓冲区,避免中间复制。 2. **批量请求处理** 驱动实现`->io_poll`方法支持`io_uring`的批处理模式: ```c static __poll_t scullp_poll(struct file *filp, poll_table *wait) { // 检查设备是否就绪 return EPOLLIN | EPOLLOUT; } ``` 3. **锁粒度控制** 使用每设备锁而非全局锁,减少竞争: ```c struct scullp_dev { struct semaphore sem; // 设备级信号量 // ...其他字段... }; ``` --- ### 总结 scullp驱动中的异步I/O实现核心在于: 1. 通过`file_operations`注册异步处理函数 2. 使用`kiocb`结构管理I/O上下文 3. 利用`iov_iter`实现零拷贝数据传输 4. 通过完成回调机制通知用户空间 5. 结合锁机制保证并发安全性 这种设计允许高并发I/O操作,特别适合数据库、高频交易等场景[^1][^2]。 ---
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值