网络编程函数socket源码分析

本文详细剖析了socket函数的工作流程,包括socket结构体的分配、sock结构体的创建及关联,以及文件描述符的获取。重点介绍了socket_socket、sock_alloc、inet_create和get_fd等关键函数的实现细节。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

// 新建一个socket结构体,并且创建一个下层的sock结构体,互相关联
static int sock_socket(int family, int type, int protocol)
{
	int i, fd;
	struct socket *sock;
	struct proto_ops *ops;
	for (i = 0; i < NPROTO; ++i) 
	{	// 从props数组中找到family协议对应的操作函数集,props由系统初始化时sock_register进行操作
		if (pops[i] == NULL) continue;
		if (pops[i]->family == family) 
			break;
	}
	if (i == NPROTO) 
	{
  		return -EINVAL;
	}
	// 函数集
	ops = pops[i];
	// 检查一下类型
	if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
		type != SOCK_SEQPACKET && type != SOCK_RAW &&
		type != SOCK_PACKET) || protocol < 0)
			return(-EINVAL);
	// 分配一个新的socket结构体,下面进行分析
	if (!(sock = sock_alloc())) 
	{
		printk("NET: sock_socket: no more sockets\n");
		return(-ENOSR);	/* Was: EAGAIN, but we are out of
				   system resources! */
	}
	// 设置类型和操作函数集
	sock->type = type;
	sock->ops = ops;
	// 创建一个struct sock结构体,和sock_alloc分配的socket结构体互相关联
	if ((i = sock->ops->create(sock, protocol)) < 0) 
	{
		sock_release(sock);
		return(i);
	}
	// 返回一个新的文件描述符,下面分析
	if ((fd = get_fd(SOCK_INODE(sock))) < 0) 
	{
		sock_release(sock);
		return(-EINVAL);
	}

	return(fd);
}

由上面的代码可以知道,socket函数主要是三个步骤,下面逐个分析。
1 拿到一个新的socket结构体

struct socket *sock_alloc(void)
{
	struct inode * inode;
	struct socket * sock;
	// 获取一个可用的inode节点
	inode = get_empty_inode();
	if (!inode)
		return NULL;
	// 初始化某些字段
	inode->i_mode = S_IFSOCK;
	inode->i_sock = 1;// socket文件
	inode->i_uid = current->uid;
	inode->i_gid = current->gid;
	// 执行inode的socket结构体,初始化inode结构体的socket结构体
	sock = &inode->u.socket_i;
	sock->state = SS_UNCONNECTED;
	sock->flags = 0;
	sock->ops = NULL;
	sock->data = NULL;
	sock->conn = NULL;
	sock->iconn = NULL;
	sock->next = NULL;
	// 这个结构很重要,在阻塞性的网络函数里会用到,主要是用于阻塞和唤醒进程
	sock->wait = &inode->i_wait;
	// 互相引用
	sock->inode = inode;		/* "backlink": we could use pointer arithmetic instead */
	sock->fasync_list = NULL;
	// socket数加一
	sockets_in_use++;
	// 返回新的socket结构体,实际上是inode中的一个字段
	return sock;
}

2 sock->ops->create,根据props(网络源码初始化分析)数组的结构可知,create函数对应的是inet_create

// 创建一个sock结构体,和socket结构体互相关联
static int inet_create(struct socket *sock, int protocol)
{
	struct sock *sk;
	struct proto *prot;
	int err;
	// 分配一个sock结构体
	sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL);
	if (sk == NULL) 
		return(-ENOBUFS);
	sk->num = 0;
	sk->reuse = 0;
	switch(sock->type) 
	{
		case SOCK_STREAM:
		case SOCK_SEQPACKET:
			if (protocol && protocol != IPPROTO_TCP) 
			{
				kfree_s((void *)sk, sizeof(*sk));
				return(-EPROTONOSUPPORT);
			}
			protocol = IPPROTO_TCP;
			sk->no_check = TCP_NO_CHECK;
			// 函数集
			prot = &tcp_prot;
			break;

		case SOCK_DGRAM:
			if (protocol && protocol != IPPROTO_UDP) 
			{
				kfree_s((void *)sk, sizeof(*sk));
				return(-EPROTONOSUPPORT);
			}
			protocol = IPPROTO_UDP;
			sk->no_check = UDP_NO_CHECK;
			prot=&udp_prot;
			break;
		// 下面两种类型需要root身份
		case SOCK_RAW:
			if (!suser()) 
			{
				kfree_s((void *)sk, sizeof(*sk));
				return(-EPERM);
			}
			if (!protocol) 
			{
				kfree_s((void *)sk, sizeof(*sk));
				return(-EPROTONOSUPPORT);
			}
			prot = &raw_prot;
			sk->reuse = 1;
			sk->no_check = 0;	/*
						 * Doesn't matter no checksum is
						 * performed anyway.
						 */
			sk->num = protocol;
			break;

		case SOCK_PACKET:
			if (!suser()) 
			{
				kfree_s((void *)sk, sizeof(*sk));
				return(-EPERM);
			}
			if (!protocol) 
			{
				kfree_s((void *)sk, sizeof(*sk));
				return(-EPROTONOSUPPORT);
			}
			prot = &packet_prot;
			sk->reuse = 1;
			sk->no_check = 0;	/* Doesn't matter no checksum is
						 * performed anyway.
						 */
			sk->num = protocol;
			break;

		default:
			kfree_s((void *)sk, sizeof(*sk));
			return(-ESOCKTNOSUPPORT);
	}
	// sock结构体的socket字段指向上层的socket结构体
	sk->socket = sock;
#ifdef CONFIG_TCP_NAGLE_OFF
	sk->nonagle = 1;
#else    
	sk->nonagle = 0;
#endif  
	sk->type = sock->type;
	sk->stamp.tv_sec=0;
	sk->protocol = protocol;
	sk->wmem_alloc = 0;
	sk->rmem_alloc = 0;
	sk->sndbuf = SK_WMEM_MAX;
	sk->rcvbuf = SK_RMEM_MAX;
	sk->pair = NULL;
	sk->opt = NULL;
	sk->write_seq = 0;
	sk->acked_seq = 0;
	sk->copied_seq = 0;
	sk->fin_seq = 0;
	sk->urg_seq = 0;
	sk->urg_data = 0;
	sk->proc = 0;
	sk->rtt = 0;				/*TCP_WRITE_TIME << 3;*/
	sk->rto = TCP_TIMEOUT_INIT;		/*TCP_WRITE_TIME*/
	sk->mdev = 0;
	sk->backoff = 0;
	sk->packets_out = 0;
	sk->cong_window = 1; /* start with only sending one packet at a time. */
	sk->cong_count = 0;
	sk->ssthresh = 0;
	sk->max_window = 0;
	sk->urginline = 0;
	sk->intr = 0;
	sk->linger = 0;
	sk->destroy = 0;
	sk->priority = 1;
	sk->shutdown = 0;
	sk->keepopen = 0;
	sk->zapped = 0;
	sk->done = 0;
	sk->ack_backlog = 0;
	sk->window = 0;
	sk->bytes_rcv = 0;
	sk->state = TCP_CLOSE;
	sk->dead = 0;
	sk->ack_timed = 0;
	sk->partial = NULL;
	sk->user_mss = 0;
	sk->debug = 0;

	/* this is how many unacked bytes we will accept for this socket.  */
	sk->max_unacked = 2048; /* needs to be at most 2 full packets. */

	/* how many packets we should send before forcing an ack. 
	   if this is set to zero it is the same as sk->delay_acks = 0 */
	sk->max_ack_backlog = 0;
	sk->inuse = 0;
	sk->delay_acks = 0;
	skb_queue_head_init(&sk->write_queue);
	skb_queue_head_init(&sk->receive_queue);
	sk->mtu = 576;
	// 下层的操作函数集
	sk->prot = prot;
	// 来自socket结构体的wait字段,wait字段来自inode的wait字段
	sk->sleep = sock->wait;
	sk->daddr = 0;
	sk->saddr = 0 /* ip_my_addr() */;
	sk->err = 0;
	sk->next = NULL;
	sk->pair = NULL;
	sk->send_tail = NULL;
	sk->send_head = NULL;
	sk->timeout = 0;
	sk->broadcast = 0;
	sk->localroute = 0;
	init_timer(&sk->timer);
	init_timer(&sk->retransmit_timer);
	sk->timer.data = (unsigned long)sk;
	sk->timer.function = &net_timer;
	skb_queue_head_init(&sk->back_log);
	sk->blog = 0;
	// socket结构体的data字段指向底层的sock结构体
	sock->data =(void *) sk;
	// 初始化tcp头
	sk->dummy_th.doff = sizeof(sk->dummy_th)/4;
	sk->dummy_th.res1=0;
	sk->dummy_th.res2=0;
	sk->dummy_th.urg_ptr = 0;
	sk->dummy_th.fin = 0;
	sk->dummy_th.syn = 0;
	sk->dummy_th.rst = 0;
	sk->dummy_th.psh = 0;
	sk->dummy_th.ack = 0;
	sk->dummy_th.urg = 0;
	sk->dummy_th.dest = 0;
	sk->ip_tos=0;
	sk->ip_ttl=64;
#ifdef CONFIG_IP_MULTICAST
	sk->ip_mc_loop=1;
	sk->ip_mc_ttl=1;
	*sk->ip_mc_name=0;
	sk->ip_mc_list=NULL;
#endif
  	// 下面两个函数用于阻塞型的网络函数被阻塞时,一旦底层条件符合,则回调下面的函数通知上层,即唤醒进程
	sk->state_change = def_callback1;
	sk->data_ready = def_callback2;
	sk->write_space = def_callback3;
	sk->error_report = def_callback1;

	if (sk->num) 
	{
	/*
	 * It assumes that any protocol which allows
	 * the user to assign a number at socket
	 * creation time automatically
	 * shares.
	 */
		// 根据端口,把sock结构体放到下层协议的sock_srray数组
		put_sock(sk->num, sk);
		sk->dummy_th.source = ntohs(sk->num);
	}
	// 执行底层的初始化函数,tcp和udp都没有init函数
	if (sk->prot->init) 
	{
		err = sk->prot->init(sk);
		if (err != 0) 
		{
			destroy_sock(sk);
			return(err);
		}
	}
	return(0);
}

3 get_fd,经过上面的几个步骤,我们拿到了一个inode、一个socket、一个sock。最后我们要再拿到一个文件描述符返回给应用层,在操作系统中,每个进程有一个fd数组,记录进程打开的文件信息,数组的一个或多个项指向一个struct file结构体,一个或多个file结构体又指向一个inode结构体。所以我们拿到一个inode后,还需要拿到一个file结构,最后拿到一个fd结构,返回给用户。

static int get_fd(struct inode *inode)
{
	int fd;
	struct file *file;

	/*
	 *	Find a file descriptor suitable for return to the user. 
	 */
	// 获取一个可以的file结构体
	file = get_empty_filp();
	if (!file) 
		return(-1);
	// 挂载到进程的fd数组中
	for (fd = 0; fd < NR_OPEN; ++fd)
		if (!current->files->fd[fd]) 
			break;
	if (fd == NR_OPEN) 
	{
		file->f_count = 0;
		return(-1);
	}

	FD_CLR(fd, &current->files->close_on_exec);
		current->files->fd[fd] = file;
	// 设置文件操作函数集,操作socket像操作文件一样
	file->f_op = &socket_file_ops;
	file->f_mode = 3;
	file->f_flags = O_RDWR;
	file->f_count = 1;
	// 关联inode节点
	file->f_inode = inode;
	if (inode) 
		inode->i_count++;
	file->f_pos = 0;
	return(fd);
}

最后,当我们指向一个socket函数时,内存视图是:
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值