Linux SockFS文件系统分析3

Linux SockFS文件系统分析3(基于Linux6.6)---socket fd创建介绍

一、创建过程

在 Linux 中,Socket 是一种通信机制,用于进程间的数据传输,可以是同一主机内的进程间通信,也可以是不同主机间的网络通信。基于套接字的通信机制,主要通过使用 socket API 来创建、配置和管理套接字。

在socket系统调用中,主要包括网络部分相关的初始化、socket与vfs关联设置这两部分,下面是socket的流程图,其主要调用两个函数完成socket的创建工作:

1.调用sock_create,完成网络相关的socket创建、inode创建、相关协议的处理接口函数指针的设置等;

2.sock_map_fd,申请文件fd以及文件描述符创建,并完成与socket的关联。

socket函数的代码如下,我们接着就以socket_create、sock_map_fd这两个接口进行分析。

net/socket.c

int __sys_socket(int family, int type, int protocol)
{
	struct socket *sock;
	int flags;

	sock = __sys_socket_create(family, type,
				   update_socket_protocol(family, type, protocol));
	if (IS_ERR(sock))
		return PTR_ERR(sock);

	flags = type & ~SOCK_TYPE_MASK;
	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;

	return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}

SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
	return __sys_socket(family, type, protocol);
}

二、socket_create接口分析

socket_create接口是通过调用__sock_create接口进行socket创建与设置的,重点关注这个接口。

net/socket.c

int __sock_create(struct net *net, int family, int type, int protocol,
			 struct socket **res, int kern)
{
	int err;
	struct socket *sock;
	const struct net_proto_family *pf;

	/*
	 *      Check protocol is in range
	 */
	if (family < 0 || family >= NPROTO)
		return -EAFNOSUPPORT;
	if (type < 0 || type >= SOCK_MAX)
		return -EINVAL;

	/* Compatibility.

	   This uglymoron is moved from INET layer to here to avoid
	   deadlock in module load.
	 */
	if (family == PF_INET && type == SOCK_PACKET) {
		pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
			     current->comm);
		family = PF_PACKET;
	}

	err = security_socket_create(family, type, protocol, kern);
	if (err)
		return err;

	/*
	 *	Allocate the socket and allow the family to set things up. if
	 *	the protocol is 0, the family is instructed to select an appropriate
	 *	default.
	 */
	sock = sock_alloc();
	if (!sock) {
		net_warn_ratelimited("socket: no more sockets\n");
		return -ENFILE;	/* Not exactly a match, but its the
				   closest posix thing */
	}

	sock->type = type;

#ifdef CONFIG_MODULES
	/* Attempt to load a protocol module if the find failed.
	 *
	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
	 * requested real, full-featured networking support upon configuration.
	 * Otherwise module support will break!
	 */
	if (rcu_access_pointer(net_families[family]) == NULL)
		request_module("net-pf-%d", family);
#endif

	rcu_read_lock();
	pf = rcu_dereference(net_families[family]);
	err = -EAFNOSUPPORT;
	if (!pf)
		goto out_release;

	/*
	 * We will call the ->create function, that possibly is in a loadable
	 * module, so we have to bump that loadable module refcnt first.
	 */
	if (!try_module_get(pf->owner))
		goto out_release;

	/* Now protected by module ref count */
	rcu_read_unlock();

	err = pf->create(net, sock, protocol, kern);
	if (err < 0)
		goto out_module_put;

	/*
	 * Now to bump the refcnt of the [loadable] module that owns this
	 * socket at sock_release time we decrement its refcnt.
	 */
	if (!try_module_get(sock->ops->owner))
		goto out_module_busy;

	/*
	 * Now that we're done with the ->create function, the [loadable]
	 * module can have its refcnt decremented
	 */
	module_put(pf->owner);
	err = security_socket_post_create(sock, family, type, protocol, kern);
	if (err)
		goto out_sock_release;
	*res = sock;

	return 0;

out_module_busy:
	err = -EAFNOSUPPORT;
out_module_put:
	sock->ops = NULL;
	module_put(pf->owner);
out_sock_release:
	sock_release(sock);
	return err;

out_release:
	rcu_read_unlock();
	goto out_sock_release;
}
EXPORT_SYMBOL(__sock_create);

下面是__sock_create接口的处理流程图,其主要完成如下几个功能:

  1. 调用sock_alloc接口进行struct socket类型变量的创建、inode节点的创建,主要是通过调用sockfs的超级块的alloc_inode接口sock_alloc_inode接口,该接口创建struct socket_alloc类型的内存变量,该变量中包含了inode成员、socket成员,并完成inode节点的初始化与设置。针对sock_alloc_inode接口,我们之前在第一篇文章《LINUX 套接字文件系统(sockfs)分析之一文件系统注册分析》中已经介绍过,需要了解的查看这篇文章的内容。
  2. 根据传递的网络协议簇的类型(AF_INET、AF_INET6、AF_NETLINK等),从全局数组net_families中查找对应协议簇的struct net_proto_family类型的指针pf;
  3. 调用pf->create接口,继续进行socket的设置操作。

以上即为sock_create的处理流程,针对上述第3步的pf->create接口,进行进一步的初始化操作,主要就是进行具体协议的初始化以及该协议簇相关的变量以及接口的初始化操作。为了更好理解协议簇相关的create接口,我们以ipv4的create接口进行详细的分析与说明。

 

三、inet的create接口(inet_create)

net/ipv4/af_inet.c 

/*
 *	Create an inet socket.
 */

static int inet_create(struct net *net, struct socket *sock, int protocol,
		       int kern)
{
	struct sock *sk;
	struct inet_protosw *answer;
	struct inet_sock *inet;
	struct proto *answer_prot;
	unsigned char answer_flags;
	int try_loading_module = 0;
	int err;

	if (protocol < 0 || protocol >= IPPROTO_MAX)
		return -EINVAL;

	sock->state = SS_UNCONNECTED;

	/* Look for the requested type/protocol pair. */
lookup_protocol:
	err = -ESOCKTNOSUPPORT;
	rcu_read_lock();
	list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {

		err = 0;
		/* Check the non-wild match. */
		if (protocol == answer->protocol) {
			if (protocol != IPPROTO_IP)
				break;
		} else {
			/* Check for the two wild cases. */
			if (IPPROTO_IP == protocol) {
				protocol = answer->protocol;
				break;
			}
			if (IPPROTO_IP == answer->protocol)
				break;
		}
		err = -EPROTONOSUPPORT;
	}

	if (unlikely(err)) {
		if (try_loading_module < 2) {
			rcu_read_unlock();
			/*
			 * Be more specific, e.g. net-pf-2-proto-132-type-1
			 * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
			 */
			if (++try_loading_module == 1)
				request_module("net-pf-%d-proto-%d-type-%d",
					       PF_INET, protocol, sock->type);
			/*
			 * Fall back to generic, e.g. net-pf-2-proto-132
			 * (net-pf-PF_INET-proto-IPPROTO_SCTP)
			 */
			else
				request_module("net-pf-%d-proto-%d",
					       PF_INET, protocol);
			goto lookup_protocol;
		} else
			goto out_rcu_unlock;
	}

	err = -EPERM;
	if (sock->type == SOCK_RAW && !kern &&
	    !ns_capable(net->user_ns, CAP_NET_RAW))
		goto out_rcu_unlock;

	sock->ops = answer->ops;
	answer_prot = answer->prot;
	answer_flags = answer->flags;
	rcu_read_unlock();

	WARN_ON(!answer_prot->slab);

	err = -ENOMEM;
	sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
	if (!sk)
		goto out;

	err = 0;
	if (INET_PROTOSW_REUSE & answer_flags)
		sk->sk_reuse = SK_CAN_REUSE;

	if (INET_PROTOSW_ICSK & answer_flags)
		inet_init_csk_locks(sk);

	inet = inet_sk(sk);
	inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);

	inet_clear_bit(NODEFRAG, sk);

	if (SOCK_RAW == sock->type) {
		inet->inet_num = protocol;
		if (IPPROTO_RAW == protocol)
			inet_set_bit(HDRINCL, sk);
	}

	if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
		inet->pmtudisc = IP_PMTUDISC_DONT;
	else
		inet->pmtudisc = IP_PMTUDISC_WANT;

	atomic_set(&inet->inet_id, 0);

	sock_init_data(sock, sk);

	sk->sk_destruct	   = inet_sock_destruct;
	sk->sk_protocol	   = protocol;
	sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
	sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);

	inet->uc_ttl	= -1;
	inet_set_bit(MC_LOOP, sk);
	inet->mc_ttl	= 1;
	inet_set_bit(MC_ALL, sk);
	inet->mc_index	= 0;
	inet->mc_list	= NULL;
	inet->rcv_tos	= 0;

	if (inet->inet_num) {
		/* It assumes that any protocol which allows
		 * the user to assign a number at socket
		 * creation time automatically
		 * shares.
		 */
		inet->inet_sport = htons(inet->inet_num);
		/* Add to protocol hash chains. */
		err = sk->sk_prot->hash(sk);
		if (err) {
			sk_common_release(sk);
			goto out;
		}
	}

	if (sk->sk_prot->init) {
		err = sk->sk_prot->init(sk);
		if (err) {
			sk_common_release(sk);
			goto out;
		}
	}

	if (!kern) {
		err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
		if (err) {
			sk_common_release(sk);
			goto out;
		}
	}
out:
	return err;
out_rcu_unlock:
	rcu_read_unlock();
	goto out;
}

 以上即为sock_create的处理流程,针对上述第3步的pf->create接口,此处以inet协议簇的create接口进行解析,inet的create接口名称为inet_create。该接口的处理流程如下:

1.将socket的状态社社长为SS_UNCONNETED;

2.根据socket类型、协议号,查找对应的struct inet_protosw类型的inet协议变量,若没有查找到,则返回创建失败;若成功,则继续执行如下步骤;

3.创建inet sock变量,并进行相应的初始化,包括sk_family、sk_proto、sk_net等成员的设置,同时inet sock中包含struct sock类型的成员;

4.设置socket变量的ops指针,ops为相应socket(SOCK_STREAM、SOCKET_DGRAM)类型对应的操作处理接口;

5.若sk->sk_proto->init指针不为空,则调用该接口进行具体协议的socket初始化操作(如针对tcp协议而言,则调用tcp_v4_init_sock接口进行相应的初始化)。

针对该接口涉及的结构体变量struct proto、struct proto_ops以及全局变量inetsw。

以上即为sock_create接口的处理流程,简而言之,即完成socket的创建、inode的创建、具体协议处理接口的设置等。

 

四、sock_map_fd接口分析

 net/socket.c

static int sock_map_fd(struct socket *sock, int flags)
{
	struct file *newfile;
	int fd = get_unused_fd_flags(flags);
	if (unlikely(fd < 0)) {
		sock_release(sock);
		return fd;
	}

	newfile = sock_alloc_file(sock, flags, NULL);
	if (!IS_ERR(newfile)) {
		fd_install(fd, newfile);
		return fd;
	}

	put_unused_fd(fd);
	return PTR_ERR(newfile);
}

该接口主要完成文件描述符的创建以及文件描述符与socket的关联等信息,sock_map_fd接口的处理流程。

该接口实现的具体功能如下:

1.通过调用get_unused_fd_flags获取一个未使用的fd(关于get_unused_fd_flags,参见《LINUX 文件系统分析sys_open调用之get_unused_fd_flags接口分析》);

2.调用sock_alloc_file接口,进行文件描述符的创建,并与socket进行关联;

3.调用fd_install接口实现文件描述符与文件fd的关联(关于fd_install相关的内容,参见《LINUX VFS分析之do_sys_open接口实现的简要说明》)。

以上即为sock_map_fd接口实现的主要功能。

五、sock_alloc_file接口分析

net/socket.c 

struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
	struct file *file;

	if (!dname)
		dname = sock->sk ? sock->sk->sk_prot_creator->name : "";

	file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
				O_RDWR | (flags & O_NONBLOCK),
				&socket_file_ops);
	if (IS_ERR(file)) {
		sock_release(sock);
		return file;
	}

	file->f_mode |= FMODE_NOWAIT;
	sock->file = file;
	file->private_data = sock;
	stream_open(SOCK_INODE(sock), file);
	return file;
}
EXPORT_SYMBOL(sock_alloc_file);

 该接口实现的具体功能在上面流程图中已经说明,此处再说明一下:

A.调用d_alloc_pseudo创建socket对应的dentry;

B.调用alloc_file,创建文件描述符,并设置文件描述符的操作处理接口指针f_op指针为socket_file_ops;

而socket_file_ops的定义如下,其实现了aio_read、aio_write接口,而在系统调用read、write接口

的分析中(见《LINUX VFS分析之write调用接口的内核实现分析》、《LINUX VFS分析之read调用接口的内核实现分析》)可知,其会通过调用文件描述的f_op->read/f_op->aio_read、f_op->write/f_op->aio_write,实现文件的读写操作。因此针对socket而言,也可以使用read、write接口实现数据的收发操作。

net/socket.c 

static const struct file_operations socket_file_ops = {
	.owner =	THIS_MODULE,
	.llseek =	no_llseek,
	.read_iter =	sock_read_iter,
	.write_iter =	sock_write_iter,
	.poll =		sock_poll,
	.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
	.compat_ioctl = compat_sock_ioctl,
#endif
	.uring_cmd =    io_uring_cmd_sock,
	.mmap =		sock_mmap,
	.release =	sock_close,
	.fasync =	sock_fasync,
	.splice_write = splice_to_socket,
	.splice_read =	sock_splice_read,
	.splice_eof =	sock_splice_eof,
	.show_fdinfo =	sock_show_fdinfo,
};

六、举例应用

下面是一个简单的 TCP 服务器和客户端 示例,展示如何在 Linux 环境下创建和使用套接字文件描述符(FD)。

示例 1:简单的 TCP 服务器应用

1. 服务器端(Server)

服务器将创建一个 TCP 套接字,绑定到本地的 IP 地址和端口,并监听连接。服务器接受客户端的连接后,会与客户端进行简单的交互。

server.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>

#define PORT 8080
#define BACKLOG 5 // 最大等待连接数

int main() {
    int sockfd, new_sock;
    struct sockaddr_in server_addr, client_addr;
    socklen_t client_addr_len = sizeof(client_addr);
    char buffer[1024];
    
    // 创建套接字
    sockfd = socket(AF_INET, SOCK_STREAM, 0);
    if (sockfd == -1) {
        perror("Socket creation failed");
        exit(1);
    }
    
    // 设置服务器地址结构
    memset(&server_addr, 0, sizeof(server_addr));
    server_addr.sin_family = AF_INET;
    server_addr.sin_addr.s_addr = INADDR_ANY; // 任意本地地址
    server_addr.sin_port = htons(PORT); // 转换为网络字节序
    
    // 绑定套接字
    if (bind(sockfd, (struct sockaddr *)&server_addr, sizeof(server_addr)) == -1) {
        perror("Bind failed");
        close(sockfd);
        exit(1);
    }
    
    // 监听连接
    if (listen(sockfd, BACKLOG) == -1) {
        perror("Listen failed");
        close(sockfd);
        exit(1);
    }
    printf("Server listening on port %d...\n", PORT);
    
    // 接受客户端连接
    new_sock = accept(sockfd, (struct sockaddr *)&client_addr, &client_addr_len);
    if (new_sock == -1) {
        perror("Accept failed");
        close(sockfd);
        exit(1);
    }
    printf("Connection accepted from %s:%d\n",
            inet_ntoa(client_addr.sin_addr), ntohs(client_addr.sin_port));
    
    // 与客户端交换数据
    recv(new_sock, buffer, sizeof(buffer), 0);
    printf("Received: %s\n", buffer);
    
    // 发送数据到客户端
    strcpy(buffer, "Hello from server");
    send(new_sock, buffer, strlen(buffer), 0);
    
    // 关闭套接字
    close(new_sock);
    close(sockfd);
    
    return 0;
}

2. 客户端(Client)

客户端将连接到服务器的 IP 地址和端口,然后发送一些数据,等待服务器的回应。

client.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>

#define SERVER_IP "127.0.0.1"
#define PORT 8080

int main() {
    int sockfd;
    struct sockaddr_in server_addr;
    char buffer[1024];
    
    // 创建套接字
    sockfd = socket(AF_INET, SOCK_STREAM, 0);
    if (sockfd == -1) {
        perror("Socket creation failed");
        exit(1);
    }
    
    // 设置服务器地址结构
    memset(&server_addr, 0, sizeof(server_addr));
    server_addr.sin_family = AF_INET;
    server_addr.sin_port = htons(PORT);
    if (inet_pton(AF_INET, SERVER_IP, &server_addr.sin_addr) <= 0) {
        perror("Invalid address");
        close(sockfd);
        exit(1);
    }
    
    // 连接服务器
    if (connect(sockfd, (struct sockaddr *)&server_addr, sizeof(server_addr)) == -1) {
        perror("Connection failed");
        close(sockfd);
        exit(1);
    }
    printf("Connected to server\n");
    
    // 发送数据到服务器
    strcpy(buffer, "Hello from client");
    send(sockfd, buffer, strlen(buffer), 0);
    
    // 接收来自服务器的数据
    recv(sockfd, buffer, sizeof(buffer), 0);
    printf("Received from server: %s\n", buffer);
    
    // 关闭套接字
    close(sockfd);
    
    return 0;
}

编译和运行步骤

  1. 编译服务器端和客户端代码

    在终端中使用 gcc 编译器编译两个程序:

  • gcc -o server server.c
    gcc -o client client.c
    
  • 启动服务器

    先启动服务器程序,它会在本地端口 8080 上监听客户端的连接。

./server

服务器会打印出:

  • Server listening on port 8080...
    
  • 启动客户端

    在另一个终端中启动客户端程序,它将连接到服务器,发送数据并接收回应。

./client

客户端会打印出:

  • Connected to server
    Received from server: Hello from server
    
  • 服务器输出

    服务器端在接收到客户端消息后,会打印:

Connection accepted from 127.0.0.1:xxxx
Received: Hello from client

评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值