Linux系统下可以通过open/create/socket/socketpair/pipe等系统调用创建文件描述符fds.之后可以基于创建的文件描述符进行IO操作,操作完毕之后执行close(fd)关闭文件描述符。
1.通过socket sendfile接口传输文件
头文件sendfile.hpp:
#include<iostream>
#include<stdio.h>
#include<unistd.h>
#include<string.h>
#include<sys/socket.h>
#include<netinet/in.h>
#include<arpa/inet.h>
class tcpsvr
{
public:
tcpsvr(void)
{
_sockfd = -1;
}
~tcpsvr(void)
{}
bool Createsocket(void) //创建套接字
{
_sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (_sockfd < 0) {
perror("socket");
return false;
}
return true;
}
bool Bind(const std::string &ip, uint16_t port) //绑定地址信息
{
struct sockaddr_in addr;
addr.sin_family = AF_INET;
addr.sin_port = htons(port);
addr.sin_addr.s_addr = inet_addr(ip.c_str());
int ret = bind(_sockfd, (struct sockaddr *)&addr, sizeof(addr));
if (ret < 0) {
perror("bind");
return false;
}
return true;
}
bool Listen(int backlog = 5)//监听
{
int ret = listen(_sockfd, backlog);
if (ret < 0) {
perror("listen");
return false;
}
return true;
}
bool Accept(tcpsvr *newts, struct sockaddr_in *peeraddr) //获取连接
{
socklen_t addrlen = sizeof(struct sockaddr_in);
int newfd = accept(_sockfd, (struct sockaddr *)peeraddr, &addrlen);
if (newfd < 0) {
perror("accept");
return false;
}
newts->_sockfd = newfd;
return true;
}
bool Connect(const std::string &ip, uint16_t port) //发起连接
{
struct sockaddr_in dest_addr;
dest_addr.sin_family = AF_INET;
dest_addr.sin_port = htons(port);
dest_addr.sin_addr.s_addr = inet_addr(ip.c_str());
int ret = connect(_sockfd, (struct sockaddr *)&dest_addr, sizeof(dest_addr));
if (ret < 0) {
perror("connect");
return false;
}
return true;
}
bool Recv(std::string *data)//接收数据
{
char buf[1024] = {0};
int recv_size = recv(_sockfd, buf, sizeof(buf) - 1, 0);
if (recv_size < 0) {
perror("recv");
return false;
} else if (recv_size == 0) {
//对端关闭了连接
printf("peer shutdown connect\n");
return false;
}
data->assign(buf, recv_size);
return true;
}
int GetFd()//获取套接字描述符
{
return _sockfd;
}
void Close()//关闭套接字
{
close(_sockfd);
}
private:
int _sockfd;
};
服务器server.cpp
#include "sendfile.hpp"
#include <unistd.h>
#include <fcntl.h>
#include <sys/sendfile.h>
#include <sys/types.h>
#include <sys/stat.h>
#define CHECK_RET(p) if(p == false){return 0;}//判断封装接口是否调用成功,失败直接返回
int main(void)
{
tcpsvr tp, newts;
struct stat stat_buf;
struct sockaddr_in cli_addr;
//1、创建套接字
CHECK_RET(tp.Createsocket());
//2、绑定地址信息
CHECK_RET(tp.Bind("127.0.0.1", 18888));
//3、监听
CHECK_RET(tp.Listen());
while (1) {
//4、获取连接
CHECK_RET(tp.Accept(&newts, &cli_addr));
//5、打开文件
int file_fd = open("./czl.txt", O_RDWR | O_CREAT, 664);
if (file_fd < 0) {
perror("open");
return -1;
}
//6、//设置file_fd文件描述符属性
fstat(file_fd, &stat_buf);
//7、//把目标文件传递给client.
int ret = sendfile(newts.GetFd(), file_fd, NULL, stat_buf.st_size);
if (ret < 0) {
perror("sendfile");
return -1;
}
}
//8、关闭套接字
tp.Close();
newts.Close();
return 0;
}
客户端client.cpp
#include "sendfile.hpp"
#define CHECK_RET(p) if(p == false){return 0;}//判断封装的接口是否调用成功,失败直接返回
int main(void)
{
tcpsvr tp;
//1、创建套接字
CHECK_RET(tp.Createsocket());
//2、发起连接
CHECK_RET(tp.Connect("127.0.0.1", 18888));
//3、接收
std::string buf;
tp.Recv(&buf);
printf("%s", buf.c_str());
//4、关闭套接字
tp.Close();
return 0;
}
编译Makefile
all:cli svr
cli:client.cpp
g++ $^ -o $@
svr:server.cpp
g++ $^ -o $@
clean:
rm -fr cli svr
format:
astyle --options=linux.astyle *.cpp *.hpp
在测试目录创建文本文件czl.txt,输入测试字符串,运行测试用例,测试输出如下:

客户端运行,成功从服务器端获取到测试文本中的字符。
sendfile的限制:
1.sendfile在代表输入文件的描述符in_fd和代表输出文件的描述符out_fd之间传递文件内容,目标文件out_fd必须指向一个套接字,参数in_fd指向的文件必须是可以进行mmap操作的,在实践中,通常是一个普通文件。
2.上述局限多少限制了sendfile的使用,可以使用sendfile将数据从文件传递到套接字上,但反过来就不可以,也不能通过sendfile在两个套接字间直接传送数据。
如果sendfile可以用来在两个普通文件之间传送数据,也可以获得性能上的优势,在Linux2.4及早期版本中,out_fd是可以指向一个普通文件的,内核底层实现做了修改之后,这种做法在2.6版的内核上消失了,但是这个功能在今后的内核版本中可能会重新启用。
更新:
幸福来的太突然,根据LINUX系统文档描述,自从Linux 2.6.33版本开始,sendfile的output_fd可以是任何文件,不再强制要求是socket fd文件。同时input fd的限制没有变,必须可以被mmap,等价于不是socket文件。
![]()

一个用sendfile实现的文件拷贝命令cp的实现:
#include <stdlib.h>
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/sendfile.h>
// allow "gcc -DBUF_FIZE=xxx" to override definition.
#ifndef BUF_SIZE
#define BUF_SIZE 1024
#endif
int main(int argc, char **argv)
{
int inputfd, outputfd, openflags;
mode_t fileperms;
ssize_t num_read, size;
char buf[BUF_SIZE];
int ret;
if ((argc != 3) || (strcmp(argv[1], "--help") == 0)) {
printf("%s old-file new-file\n", argv[0]);
exit(0);
}
inputfd = open(argv[1], O_RDONLY);
if (inputfd == -1) {
printf("%s line %d, opening file %s error.\n", __func__, __LINE__, argv[1]);
exit(-1);
}
openflags = O_CREAT | O_WRONLY | O_TRUNC;
fileperms = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
outputfd = open(argv[2], openflags, fileperms);
if (outputfd == -1) {
printf("%s line %d, opening file %s error.\n", __func__, __LINE__, argv[2]);
exit(-1);
}
size = lseek(inputfd, 0, SEEK_END);
if (size == -1) {
printf("%s line %d, fatal error, get size failure.\n", __func__, __LINE__);
return -1;
}
printf("%s line %d, size = 0x%lx.\n", __func__, __LINE__, size);
lseek(inputfd, 0, SEEK_SET);
#if 0
// transfer data untile we meet end of input or an error.
while ((num_read = read(inputfd, buf, BUF_SIZE)) > 0) {
if ((write(outputfd, buf, num_read)) != num_read) {
printf("%s line %d, fatal error, write file failure.\n",
__func__, __LINE__);
exit(-1);
}
}
if (num_read == -1) {
printf("%s line %d, fatal error, cp file failure.\n",
__func__, __LINE__);
exit(-1);
}
#else
ret = sendfile(outputfd, inputfd, 0, size);
printf("%s line %d, ret %d, %s.\n", __func__, __LINE__, ret, strerror(errno));
#endif
if (fsync(outputfd) != 0) {
printf("%s line %d, fatal error, flush target file to disk failure.\n",
__func__, __LINE__);
exit(-1);
}
if (close(inputfd) == -1) {
printf("%s line %d, fatal error, close input file %s failure.\n",
__func__, __LINE__, argv[1]);
}
if (close(outputfd) == -1) {
printf("%s line %d, fatal error, close output file %s failure.\n",
__func__, __LINE__, argv[2]);
}
return 0;
}

内核执行sendfile时的callstack

2.通过socket传递struct file给目标进程创建fd
通过sendmsg和recvmsg,我们可以在同一台主机上通过UNIX域套接字将包含文件描述符的辅助数据从一个进程传递到另一个进程上,以这种方式可以传递任意类型的文件描述符,包括从open, pipe等调用创建的描述符。
虽然这种技术通常称为传递文件描述符,但实际上在两个进程之间传递的是对同一个打开文件描述符的引用,在接收端进程中使用的描述符一般和发送端的描述符号不同。
server.c
#include <fcntl.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <sys/socket.h>
#define handle_error(msg) do { perror(msg); exit(EXIT_FAILURE); } while(0)
static int *recv_fd(int socket, int n)
{
int *fds = malloc(n * sizeof(int));
struct msghdr msg = {0};
struct cmsghdr *cmsg;
char buf[CMSG_SPACE(n * sizeof(int))], dup[256];
memset(buf, 0x00, sizeof(buf));
struct iovec io = { .iov_base = &dup, .iov_len = sizeof(dup) };
msg.msg_iov = &io;
msg.msg_iovlen = 1;
msg.msg_control = buf;
msg.msg_controllen = sizeof(buf);
if (recvmsg(socket, &msg, 0) < 0)
handle_error("Failed to receive message");
cmsg = CMSG_FIRSTHDR(&msg);
memcpy(fds, (int *) CMSG_DATA(cmsg), n * sizeof(int));
return fds;
}
int main(int argc, char *argv[])
{
ssize_t nbytes;
char buffer[256];
int sfd, cfd, *fds;
struct sockaddr_un addr;
sfd = socket(AF_UNIX, SOCK_STREAM, 0);
if (sfd == -1)
handle_error("Failed to create socket");
if (unlink("/tmp/fd-pass.socket") == -1 && errno != ENOENT)
handle_error("Removing socket file failed");
memset(&addr, 0, sizeof(struct sockaddr_un));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, "/tmp/fd-pass.socket", sizeof(addr.sun_path) - 1);
if (bind(sfd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un)) == -1)
handle_error("Failed to bind to socket");
if (listen(sfd, 5) == -1)
handle_error("Failed to listen on socket");
cfd = accept(sfd, NULL, NULL);
if (cfd == -1)
handle_error("Failed to accept incoming connection");
fds = recv_fd(cfd, 2);
for (int i = 0; i < 2; ++i) {
fprintf(stdout, "Reading from passed fd % d\n", fds[i]);
while ((nbytes = read(fds[i], buffer, sizeof(buffer))) > 0)
write(1, buffer, nbytes);
*buffer = '\0';
}
if (close(cfd) == -1)
handle_error("Failed to close client socket");
return 0;
}
client.c
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <sys/socket.h>
#define handle_error(msg) do { perror(msg); exit(EXIT_FAILURE); } while(0)
static void send_fd(int socket, int *fds, int n) // send fd by socket
{
struct msghdr msg = {0};
struct cmsghdr *cmsg;
char buf[CMSG_SPACE(n * sizeof(int))], dup[256];
memset(buf, 0x00, sizeof(buf));
struct iovec io = { .iov_base = &dup, .iov_len = sizeof(dup) };
msg.msg_iov = &io;
msg.msg_iovlen = 1;
msg.msg_control = buf;
msg.msg_controllen = sizeof(buf);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
memcpy((int *) CMSG_DATA(cmsg), fds, n * sizeof(int));
if (sendmsg(socket, &msg, 0) < 0)
handle_error("Failed to send message");
}
int main(int argc, char *argv[])
{
int sfd, fds[2];
struct sockaddr_un addr;
if (argc != 3) {
fprintf(stderr, "Usage: %s <file-name1> <file-name2>\n", argv[0]);
exit(1);
}
sfd = socket(AF_UNIX, SOCK_STREAM, 0);
if (sfd == -1)
handle_error("Failed to create socket");
memset(&addr, 0, sizeof(struct sockaddr_un));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, "/tmp/fd-pass.socket", sizeof(addr.sun_path) - 1);
fds[0] = open(argv[1], O_RDONLY);
if (fds[0] < 0)
handle_error("Failed to open file 1 for reading");
else
fprintf(stdout, "Opened fd % d in parent\n", fds[0]);
fds[1] = open(argv[2], O_RDONLY);
if (fds[1] < 0)
handle_error("Failed to open file 2 for reading");
else
fprintf(stdout, "Opened fd % d in parent\n", fds[1]);
if (connect(sfd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un)) == -1)
handle_error("Failed to connect to socket");
send_fd(sfd, fds, 2);
exit(EXIT_SUCCESS);
}
Makefile
all:
gcc server.c -o server
gcc client.c -o client
clean:
rm -fr client server
format:
测试,客户端打开两个文件FD,传递给服务器,服务器接收到从客户端传递过来的两个FD,读出其中的内容,并打印到屏幕上。

callstack:

内核中的关键处理源码细节:

基于上述代码,实现的一个小tricky,将本地标准输出文件发送给远端终端,远程终端收到后将其替换为本地输出描述符,这样远程程序的输出就可以重定位到本地了。
https://gitee.com/tugouxp/dumpstack/tree/master/test/posix/sendfd/improve
一个典型的应用场景是,主服务器在TCP监听套接字上接受客户端连接,然后将返回的文件描述符传递给进程池中的其中一个成员上,这些成员由服务器的子进程组成,之后,子进程就可以响应客户端的请求了。

资料
宋宝华:世上最好的共享内存(Linux共享内存最透彻的一篇)-腾讯云开发者社区-腾讯云
从内核看文件描述符传递的实现(基于5.9.9) - 码农教程
https://www.man7.org/tlpi/code/download/tlpi-231120-dist.tar.gz
Passing open file descriptors over unix domain sockets – Openforums
Linux下文件描述符与sendfile在TCP/UNIX套接字间的传输

172万+

被折叠的 条评论
为什么被折叠?



