Linux 详谈基础IO

#include <stdio.h>
#define ONE 0001 //0000 0001
#define TWO 0002 //0000 0010
#define THREE 0004 //0000 0100
void func(int flags) {
    if (flags & ONE) printf("flags has ONE! ");
    if (flags & TWO) printf("flags has TWO! ");
    if (flags & THREE) printf("flags has THREE! ");
    printf("\n");
}
int main() {
    func(ONE);
    func(THREE);
    func(ONE | TWO);
    func(ONE | THREE | TWO);
    return 0;
}

操作⽂件，除了上⼩节的C接⼝（当然，C++也有接⼝，其他语⾔也有），我们还可以采⽤系统接⼝来进⾏⽂件访问，先来直接以系统代码的形式，实现和上⾯⼀模⼀样的代码：

写文件:

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
int main()
{
    umask(0);
    int fd = open("myfile", O_WRONLY|O_CREAT, 0644);
    if(fd < 0){
        perror("open");
        return 1;
    }
    int count = 5;
    const char *msg = "hello!\n";
    int len = strlen(msg);
    while(count--){
    write(fd, msg, len);//fd: 后⾯讲， msg：缓冲区⾸地址， len: 本次读取，期望写
    // ⼊多少个字节的数据。 返回值：实际写了多少字节数据
    }
    close(fd);
    return 0;
}

读文件:

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

#include <unistd.h>
#include <string.h>
int main()
{
    int fd = open("myfile", O_RDONLY);
    if(fd < 0){
        perror("open");
        return 1;
    }
    const char *msg = "hello!\n";
    char buf[1024];
    while(1){
        ssize_t s = read(fd, buf, strlen(msg));//类⽐write
        if(s > 0){
            printf("%s", buf);
        }else{
            break;
        }
    }
    close(fd);
    return 0;
}

接⼝介绍

open

man open

# include <sys/types.h>

# include <sys/stat.h>

# include <fcntl.h>

int open ( const char *pathname, int flags);

int open ( const char *pathname, int flags, mode_t mode);

pathname: 要打开或创建的⽬标⽂件

flags: 打开⽂件时，可以传⼊多个参数选项，⽤下⾯的⼀个或者多个常量进⾏ “ 或 ” 运算，构成

flags 。

参数 :

        O_RDONLY: 只读打开

        O_WRONLY: 只写打开

        O_RDWR : 读，写打开

                这三个常量，必须指定⼀个且只能指定⼀个

        O_CREAT : 若⽂件不存在，则创建它。需要使⽤ mode 选项，来指明新⽂件的访问

                        权限

        O_APPEND: 追加写

返回值：

        成功：新打开的⽂件描述符

        失败： -1

mode_t理解：直接 man ⼿册，⽐什么都清楚。

open 函数具体使⽤哪个，和具体应⽤场景相关，如⽬标⽂件不存在，需要open创建，则第三个参数表⽰创建⽂件的默认权限,否则，使⽤两个参数的open。

write read close lseek ,类⽐C⽂件相关接⼝。

⽂件描述符fd

通过对open函数的学习，我们知道了⽂件描述符就是⼀个⼩整数, Linux进程默认情况下会有3个缺省打开的⽂件描述符，分别是标准输⼊0，标准输出1，标准错误2.

0,1,2对应的物理设备⼀般是：键盘，显⽰器，显⽰器

所以输⼊输出还可以采⽤如下⽅式：

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
int main()
{
    char buf[1024];
    ssize_t s = read(0, buf, sizeof(buf));
    if(s > 0){
        buf[s] = 0;
    write(1, buf, strlen(buf));
    write(2, buf, strlen(buf));
    }
    return 0;
}

⽽现在知道，⽂件描述符就是从0开始的⼩整数。当我们打开⽂件时，操作系统在内存中要创建相应的数据结构来描述⽬标⽂件。于是就有了file结构体。表⽰⼀个已经打开的⽂件对象。⽽进程执⾏open系统调⽤，所以必须让进程和⽂件关联起来。每个进程都有⼀个指针*files, 指向⼀张表files_struct,该表

最重要的部分就是包含⼀个指针数组，每个元素都是⼀个指向打开⽂件的指针！所以，本质上，⽂件描述符就是该数组的下标。所以，只要拿着⽂件描述符，就可以找到对应的⽂件。

对于以上原理结论我们可通过内核源码验证：

⾸先要找到 task_struct 结构体在内核中为位置，地址为： /usr/src/kernels/3.10.0- 1160.71.1.el7.x86_64/include/linux/sched.h （3.10.0-1160.71.1.el7.x86_64是内核版本，可使⽤ uname -a ⾃⾏查看服务器配置，因为这个⽂件夹只有⼀个，所以也不⽤刻意去分辨，内核版本其实也随意）

⽂件描述符的分配规则

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main()
{
    int fd = open("myfile", O_RDONLY);
    if(fd < 0){
        perror("open");
        return 1;
    }
    printf("fd: %d\n", fd);
    close(fd);
    return 0;
}

输出发现是 fd: 3 关闭0或者2，在看

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main()
{
    close(0);
    //close(2);
    int fd = open("myfile", O_RDONLY);
    if(fd < 0){
        perror("open");
        return 1;
    }
    printf("fd: %d\n", fd);
    close(fd);
    return 0;
}

发现是结果是： fd: 0 或者 fd 2 ，可⻅，⽂件描述符的分配规则：在files_struct数组当中，找到当前没有被使⽤的最⼩的⼀个下标，作为新的⽂件描述符。

重定向

那如果关闭1呢？看代码：

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
int main()
{
    close(1);
    int fd = open("myfile", O_WRONLY|O_CREAT, 00644);
    if(fd < 0){
        perror("open");
        return 1;
    }
    printf("fd: %d\n", fd);
    fflush(stdout);
    close(fd);
    exit(0);
}

此时，我们发现，本来应该输出到显⽰器上的内容，输出到了⽂件 myfile 当中，其中，fd＝1。这种现象叫做输出重定向。常⻅的重定向有: > , >> , <

那重定向的本质是什么呢？

使⽤ dup2 系统调⽤

函数原型如下:

#include <unistd.h>
int dup2(int oldfd, int newfd);

#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
int main() {
    int fd = open("./log", O_CREAT | O_RDWR);
    if (fd < 0) {
        perror("open");
        return 1;
    }
    close(1);
    dup2(fd, 1);
    for (;;) {
        char buf[1024] = {0};
        ssize_t read_size = read(0, buf, sizeof(buf) - 1);
        if (read_size < 0) {
            perror("read");
            break;
        }
        printf("%s", buf);
        fflush(stdout);
    }
    return 0;
}

printf是C库当中的IO函数，⼀般往 stdout 中输出，但是stdout底层访问⽂件的时候，找的还是fd:1, 但此时，fd:1下标所表⽰内容，已经变成了myfifile的地址，不再是显⽰器⽂件的地址，所以，输出的任何消息都会往⽂件中写⼊，进⽽完成输出重定向。那追加和输⼊重定向如何完成呢？

理解“⼀切皆⽂件”

⾸先，在windows中是⽂件的东西，它们在linux中也是⽂件；其次⼀些在windows中不是⽂件的东西，⽐如进程、磁盘、显⽰器、键盘这样硬件设备也被抽象成了⽂件，你可以使⽤访问⽂件的⽅法访问它们获得信息；甚⾄管道，也是⽂件；将来我们要学习⽹络编程中的socket（套接字）这样的东西, 使⽤的接⼝跟⽂件接⼝也是⼀致的。

这样做最明显的好处是，开发者仅需要使⽤⼀套 API 和开发⼯具，即可调取 Linux 系统中绝⼤部分的资源。举个简单的例⼦，Linux 中⼏乎所有读（读⽂件，读系统状态，读PIPE）的操作都可以⽤ read 函数来进⾏；⼏乎所有更改（更改⽂件，更改系统参数，写 PIPE）的操作都可以⽤ write 函数来进⾏。

之前我们讲过，当打开⼀个⽂件时，操作系统为了管理所打开的⽂件，都会为这个⽂件创建⼀个file结构体，该结构体定义在 /usr/src/kernels/3.10.0- 1160.71.1.el7.x86_64/include/linux/fs.h 下，以下展⽰了该结构部分我们关系的内容：

struct file {
    ...
    struct inode *f_inode; /* cached value */
    const struct file_operations *f_op;
    ...
    atomic_long_t f_count; // 表⽰打开⽂件的引⽤计数，如果有多个⽂件指针指向
    // 它，就会增加f_count的值。
    unsigned int f_flags; // 表⽰打开⽂件的权限
    fmode_t f_mode; // 设置对⽂件的访问模式,例如：只读，只写等。所有
    // 的标志在头⽂件<fcntl.h> 中定义
    loff_t f_pos; // 表⽰当前读写⽂件的位置
    ...
} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */

值得关注的是 struct file 中的 f_op 指针指向了⼀个 file_operations 结构体，这个结构体中的成员除了struct module* owner 其余都是函数指针。该结构和 struct file 都在fs.h下。

file_operation 就是把系统调⽤和驱动程序关联起来的关键数据结构，这个结构的每⼀个成员都对应着⼀个系统调⽤。读取 file_operation 中相应的函数指针，接着把控制权转交给函数，从⽽完成了Linux设备驱动程序的⼯作。

介绍完相关代码，⼀张图总结：

上图中的外设，每个设备都可以有⾃⼰的read、write，但⼀定是对应着不同的操作⽅法！！但通过 struct file 下 file_operation 中的各种函数回调，让我们开发者只⽤file便可调取 Linux 系统中绝⼤部分的资源！！这便是“linux下⼀切皆⽂件”的核⼼理解。

缓冲区

什么是缓冲区

缓冲区是内存空间的⼀部分。也就是说，在内存空间中预留了⼀定的存储空间，这些存储空间⽤来缓冲输⼊或输出的数据，这部分预留的空间就叫做缓冲区。缓冲区根据其对应的是输⼊设备还是输出设备，分为输⼊缓冲区和输出缓冲区。

为什么要引⼊缓冲区机制

读写⽂件时，如果不会开辟对⽂件操作的缓冲区，直接通过系统调⽤对磁盘进⾏操作(读、写等)，那么每次对⽂件进⾏⼀次读写操作时，都需要使⽤读写系统调⽤来处理此操作，即需要执⾏⼀次系统调⽤，执⾏⼀次系统调⽤将涉及到CPU状态的切换，即从⽤⼾空间切换到内核空间，实现进程上下⽂的切换，这将损耗⼀定的CPU时间，频繁的磁盘访问对程序的执⾏效率造成很⼤的影响。

为了减少使⽤系统调⽤的次数，提⾼效率，我们就可以采⽤缓冲机制。⽐如我们从磁盘⾥取信息，可以在磁盘⽂件进⾏操作时，可以⼀次从⽂件中读出⼤量的数据到缓冲区中，以后对这部分的访问就不需要再使⽤系统调⽤了，等缓冲区的数据取完后再去磁盘中读取，这样就可以减少磁盘的读写次数，再加上计算机对缓冲区的操作⼤快于对磁盘的操作，故应⽤缓冲区可⼤提⾼计算机的运⾏速度。

⼜⽐如，我们使⽤打印机打印⽂档，由于打印机的打印速度相对较慢，我们先把⽂档输出到打印机相应的缓冲区，打印机再⾃⾏逐步打印，这时我们的CPU可以处理别的事情。可以看出，缓冲区就是⼀块内存区，它⽤在输⼊输出设备和CPU之间，⽤来缓存数据。它使得低速的输⼊输出设备和⾼速的CPU能够协调⼯作，避免低速的输⼊输出设备占⽤CPU，解放出CPU，使其能够⾼效率⼯作。

FILE

因为IO相关函数与系统调⽤接⼝对应，并且库函数封装系统调⽤，所以本质上，访问⽂件都是通过fd访问的。所以C库当中的FILE结构体内部，必定封装了fd。

#include <stdio.h>
#include <string.h>
int main()
{
    const char *msg0="hello printf\n";
    const char *msg1="hello fwrite\n";
    const char *msg2="hello write\n";
    printf("%s", msg0);
    fwrite(msg1, strlen(msg0), 1, stdout);
    write(1, msg2, strlen(msg2));
    fork();
    return 0;
}

运⾏出结果：