write函数过程解析 __write_nocancel

最新推荐文章于 2025-10-18 15:50:39 发布

转载最新推荐文章于 2025-10-18 15:50:39 发布 · 5.5k 阅读

本文详细解析了write函数的工作原理，从用户空间调用开始，逐步深入到内核空间的处理流程，包括汇编代码示例及系统调用表的查找。

write函数作为用户向终端或者文件进行写数据的重要函数，有着重要的作用。

|------| |---------|　　　　 |---------|　　　　 |----------|　
| write |----->|sys_write|-------->|vfs_write|------->|ext4_write|
|------| |---------| |---------| |----------|

上面是write函数从用户空间到内核空间的过程。比如下面这个具体的例子write_test.c：

#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>

int main(void)
{
   char buffer[10] = "hello\n";
   int count;
   int fd = open ("ab.txt",O_RDWR);
   if (fd == -1)
   {
       fprintf(stderr,"can't open file:[%s]\n","ab.txt");
       exit(EXIT_FAILURE);
   }
   count = write(fd,buffer,strlen(buffer));
   if (count == -1)
   {
       fprintf(stderr,"write error\n");
       exit(EXIT_FAILURE);
   }

   exit(EXIT_SUCCESS);
}

采用gcc静态编译的方式：gcc -static -o read_write read_write.c 【使用静态编译的方式是为了更好的观看汇编代码】

接下来进行如下：root@node1:~/unixprogram# objdump -d write_test > assemble 【注：因显示的汇编代码有11万行，所以保存为文件，采用动态链接库的方式文件小很多，这是默认的gcc编译选项】

查看assemble文件：【挑选重要的行】

08048250 <main>:
......
80482d9:   89 44 24 04             mov    %eax,0x4(%esp)
80482dd:   8b 44 24 18             mov    0x18(%esp),%eax
80482e1:   89 04 24                mov    %eax,(%esp)
80482e4:   e8 e7 77 00 00          call   804fad0 <__libc_write>
......

上面是对应于main函数中的汇编，显示的是调用的是__libc_write函数

0804fad0 <__libc_write>:
804fad0:   65 83 3d 0c 00 00 00    cmpl   $0x0,%gs:0xc
804fad7:   00
804fad8:   75 21                   jne    804fafb <__write_nocancel+0x21>

上面对应的是__libc_write函数的汇编代码，在__libc_write函数中显示它调用了__write_nocancel函数

0804fada <__write_nocancel>:
804fada:   53                      push   %ebx
804fadb:   8b 54 24 10             mov    0x10(%esp),%edx
804fadf:   8b 4c 24 0c             mov    0xc(%esp),%ecx
804fae3:   8b 5c 24 08             mov    0x8(%esp),%ebx
804fae7:   b8 04 00 00 00          mov    $0x4,%eax
804faec:   cd 80                   int    $0x80
......

从__write_nocancel函数中我们可以看到，在这里传递给eax寄存器立即数4，这是系统调用write的调用number，然后就进行软中断int $0x80进入到内核状态，进行的是system_call()中断处理函数。

【补充说明】在glibc-2.11.2中只有__libc_write函数原型，而对于__write_nocancel是根据系统调用模板生成的，在源码中没有，其中__libc_write在glibc-2.11.2中的io/write.c中有定义。如下

     /* Write NBYTES of BUF to FD. Return the number written, or -1. */
ssize_t
　　__libc_write (int fd, const void *buf, size_t nbytes)
　　{
　　 if (nbytes == 0)
    　　　　return 0;
　　 if (fd < 0)
   　　 {
      　　__set_errno (EBADF);
      　　return -1;
    　　}
　　if (buf == NULL)
   　　 {
     　　 __set_errno (EINVAL);
      　　return -1;
    　　}

　　 __set_errno (ENOSYS);
　　return -1;
　　}
　　libc_hidden_def (__libc_write)
　　stub_warning (write)

　　weak_alias (__libc_write, __write)
　　libc_hidden_weak (__write)
　　weak_alias (__libc_write, write)
　　#include <stub-tag.h>

而系统调用部分是根据如下几个文件生成：glibc-2.11.2/sysdeps/unix/syscall.S glibc-2.11.2/sysdeps/unix/syscalls.list glibc-2.11.2/sysdeps/unix/syscall-template.S

对于系统调用处理的汇编代码位于linux源码中的 arch/x86/kernel/entry_32.S 【注：内核版本为2.6.39 处理器：x86中的32位】

ENTRY(system_call)
   RING0_INT_FRAME           # can't unwind into user space anyway
   pushl_cfi %eax           # save orig_eax
   SAVE_ALL
   GET_THREAD_INFO(%ebp)
                   # system call tracing in operation / emulation
   testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
   jnz syscall_trace_entry
   cmpl $(nr_syscalls), %eax
   jae syscall_badsys
syscall_call:
   call *sys_call_table(,%eax,4)
   movl %eax,PT_EAX(%esp)       # store the return value
......

这是系统调用的处理函数，通过找到系统调用表中的write系统调用，然后进入到sys_write。

/*
* This file contains the system call numbers.
*/

#define __NR_restart_syscall      0
#define __NR_exit          1
#define __NR_fork          2
#define __NR_read          3
#define __NR_write          4
#define __NR_open          5
#define __NR_close          6
#define __NR_waitpid          7
#define __NR_creat          8
........

上面的是linux下的系统调用号列表，位于linux-2.6.39/arch/x86/include/asm/unistd_32.h中。

而对于系统调用表中的write函数位于linux-2.6.39/fs/read_write.c中，如下：

SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
       size_t, count)
{
   struct file *file;
   ssize_t ret = -EBADF;
   int fput_needed;

   file = fget_light(fd, &fput_needed);
   if (file) {
       loff_t pos = file_pos_read(file);
       ret = vfs_write(file, buf, count, &pos);
       file_pos_write(file, pos);
       fput_light(file, fput_needed);
   }

   return ret;
}