glibc-2.3.6/sysdeps/generic中的socket.c
glibc-2.3.6/include中的libc-symbols.h
/* Define ALIASNAME as a weak alias for NAME.
If weak aliases are not available, this defines a strong alias. */
# define weak_alias(name, aliasname) _weak_alias (name, aliasname)
# define _weak_alias(name, aliasname) \
extern __typeof (name) aliasname __attribute__ ((weak, alias (#name)));
weak_alias()为socket()声明了一个“函数别名”_socket()。因此,_socket()函数就是库函数socket()。
Weak Alias 是GCC编译器扩展内容,指定了函数的weak属性。
_socket()真实的定义是用汇编语言实现的。在glibc-2.3.6/sysdeps/unix/sysv/linux/i386中的socket.S文件。
ENTRY (__socket)
.......
movl $SYS_ify(socketcall), %eax /* System call number in %eax. */
/* Use ## so `socket' is a separate token that might be #define'd. */
movl $P(SOCKOP_,socket), %ebx /* Subcode is first arg to syscall. */
lea 4(%esp), %ecx /* Address of args is 2nd arg. */
/* Do the system call trap. */
ENTER_KERNEL
..........
对于i386系统内核来说,ENTER_KERNEL和SYS_ify宏声明如下:
ENTER_KERNEL和SYS_ify在glibc-2.3.6/sysdeps/unix/sysv/linux/i386中的sysdep.h文件:
/* For Linux we can use the system call table in the header file
/usr/include/asm/unistd.h
of the kernel. But these symbols do not follow the SYS_* syntax
so we have to redefine the `SYS_ify' macro here. */
#undef SYS_ify
#define SYS_ify(syscall_name) __NR_##syscall_name
/* The original calling convention for system calls on Linux/i386 is
to use int $0x80. */
#ifdef I386_USE_SYSENTER
# ifdef SHARED
# define ENTER_KERNEL call *%gs:SYSINFO_OFFSET
# else
# define ENTER_KERNEL call *_dl_sysinfo
# endif
#else
# define ENTER_KERNEL int $0x80
#endif
当glibc在Linux系统中编译时,__NR_socketcall会从内核linux-2.6.29.6\include\asm-m32r\unistd.h文件中找到它的定义作为系统调用号。
#define __NR_socketcall 102
SOCKOP_socket的定义在glibc-2.3.6/sysdeps/unix/sysv/linux/socketcall.h中。
#define SOCKOP_socket 1
当服务器程序运行后,调用socket()函数就会执行glibc中的上述代码,从而将系统调用号102保存到寄存器eax中,然后执行软中断指令int $0x80到达系统调用的总入口system_call()函数,这个函数也是用汇编实现的,在Linux内核目录linux-2.6.29.6\arch\x86\kernel下的entry_32.S文件。
ENTRY(system_call)
.....
syscall_call:
call *sys_call_table(,%eax,4)
......
这里省去了中断执行的许多细节以及寄存器传递参数的内容,system_call()最终使用汇编指令执行sys_call_table系统调用表102处的函数指针。系统调用表在linux-2.6.29.6\arch\x86\kernel下的syscall_table_32.S文件中。
ENTRY(sys_call_table)
.....
.long sys_fstatfs /* 100 */
.long sys_ioperm
.long sys_socketcall
.long sys_syslog
.long sys_setitimer
.long sys_getitimer /* 105 */
.....
所有的socket系统调用的总入口是sys_socketcall(),在include/linux/syscalls.h中定义。函数的两个参数分别通过寄存器ebx和ecx来传递,参数call是具体的socket调用号,ebx寄存器保存值为1。
asmlinkage long sys_socketcall(int call, unsigned long __user *args);
其中,
@param call 标识接口编号,
@param args 是接口参数指针
接口编号的定义在 include/linux/net.h中定义
接口编号对应的参数个数在net/socket.c文件中的nargs数组中定义
在net/socket.c中有一个函数SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)便是socket调用的入口。
/*
* System call vectors.
*
* Argument checking cleaned up. Saved 20% in size.
* This function doesn't need to set the kernel lock because
* it is set by the callees.
*/
SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
{
unsigned long a[6];
unsigned long a0, a1;
int err;
if (call < 1 || call > SYS_ACCEPT4)
return -EINVAL;
/* copy_from_user should be SMP safe. */
if (copy_from_user(a, args, nargs[call]))
return -EFAULT;
audit_socketcall(nargs[call] / sizeof(unsigned long), a);
a0 = a[0];
a1 = a[1];
switch (call) {
case SYS_SOCKET:
err = sys_socket(a0, a1, a[2]);
break;
case SYS_BIND:
err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
break;
case SYS_CONNECT:
err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
break;
case SYS_LISTEN:
err = sys_listen(a0, a1);
break;
case SYS_ACCEPT:
err = sys_accept4(a0, (struct sockaddr __user *)a1,
(int __user *)a[2], 0);
break;
case SYS_GETSOCKNAME:
err =
sys_getsockname(a0, (struct sockaddr __user *)a1,
(int __user *)a[2]);
break;
case SYS_GETPEERNAME:
err =
sys_getpeername(a0, (struct sockaddr __user *)a1,
(int __user *)a[2]);
break;
case SYS_SOCKETPAIR:
err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
break;
case SYS_SEND:
err = sys_send(a0, (void __user *)a1, a[2], a[3]);
break;
case SYS_SENDTO:
err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
(struct sockaddr __user *)a[4], a[5]);
break;
case SYS_RECV:
err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
break;
case SYS_RECVFROM:
err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
(struct sockaddr __user *)a[4],
(int __user *)a[5]);
break;
case SYS_SHUTDOWN:
err = sys_shutdown(a0, a1);
break;
case SYS_SETSOCKOPT:
err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
break;
case SYS_GETSOCKOPT:
err =
sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
(int __user *)a[4]);
break;
case SYS_SENDMSG:
err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
break;
case SYS_RECVMSG:
err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
break;
case SYS_ACCEPT4:
err = sys_accept4(a0, (struct sockaddr __user *)a1,
(int __user *)a[2], a[3]);
break;
default:
err = -EINVAL;
break;
}
return err;
}
其中SYSCALL_DEFINE2是一个宏,定义在include/linux/syscalls.h中。SYSCALL_DEFINEx里面的x代表的是系统调用参数个数。asmlinkage long sys_socketcall(int call, unsigned long __user *args);是两个参数。
下面我们来展开SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args):
首先##是连接符,__VA_ARGS__代表前面...里面的可变参数,也就是说展开后结果为,
SYSCALL_DEFINEx(2, _##socketcall, int, call, unsigned long __user *, args)
那么再将上面的展开,结果如下:
asmlinkage long sys_socketcall(__SC_DECL2( int, call, unsigned long __user *, args)); \
static inline long SYSC_socketcall(__SC_DECL2( int, call, unsigned long __user *, args)); \
asmlinkage long SyS_socketcall(__SC_LONG2( int, call, unsigned long __user *, args)) \
{ \
__SC_TEST2( int, call, unsigned long __user *, args); \
return (long) SYSC_socketcall(__SC_CAST2( int, call, unsigned long __user *, args)); \
} \
SYSCALL_ALIAS(sys_socketcall, SyS_socketcall); \
static inline long SYSC_socketcall(__SC_DECL3( int, call, unsigned long __user *, args))
这回大家看到了第一行熟悉的sys_socketcall了。哎,不对,这好像只是一个函数声明,不是定义吧,那么定义跑到哪里去了?向下接着看,定义其实在最后一行,结尾没有加分号,下面再加上一对大括号,可不就是定义。那么上面的一大堆是干什么用的呢?
先看SYSCALL_ALIAS,根据名字就可以知道,这个宏定义的意思其实就是将SyS_socketcall的别名设为sys_socketcall,也就是说调用sys_socketcall其实就是在调用SyS_socketcall。
#define SYSCALL_ALIAS(alias, name) \
asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \
"\t.globl ." #alias "\n\t.set ." #alias ", ." #name)
那么SyS_socketcall里面就调用到了SYSC_socketcall了,就调用到了函数定义里去了。这样子兜了一大圈,通过别名再调用到函数定义,如此麻烦到底是为了什么?难道是大神们在秀技术?显得高大上吗?NO,关键在那几个我们一直没有介绍到的宏,__SC_DECL2,__SC_LONG2,__SC_CAST2。
大家看一下定义:
#define __SC_DECL1(t1, a1) t1 a1
#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
#define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__)
#define __SC_DECL4(t4, a4, ...) t4 a4, __SC_DECL3(__VA_ARGS__)
#define __SC_DECL5(t5, a5, ...) t5 a5, __SC_DECL4(__VA_ARGS__)
#define __SC_DECL6(t6, a6, ...) t6 a6, __SC_DECL5(__VA_ARGS__)
#define __SC_LONG1(t1, a1) long a1
#define __SC_LONG2(t2, a2, ...) long a2, __SC_LONG1(__VA_ARGS__)
#define __SC_LONG3(t3, a3, ...) long a3, __SC_LONG2(__VA_ARGS__)
#define __SC_LONG4(t4, a4, ...) long a4, __SC_LONG3(__VA_ARGS__)
#define __SC_LONG5(t5, a5, ...) long a5, __SC_LONG4(__VA_ARGS__)
#define __SC_LONG6(t6, a6, ...) long a6, __SC_LONG5(__VA_ARGS__)
#define __SC_CAST1(t1, a1) (t1) a1
#define __SC_CAST2(t2, a2, ...) (t2) a2, __SC_CAST1(__VA_ARGS__)
#define __SC_CAST3(t3, a3, ...) (t3) a3, __SC_CAST2(__VA_ARGS__)
#define __SC_CAST4(t4, a4, ...) (t4) a4, __SC_CAST3(__VA_ARGS__)
#define __SC_CAST5(t5, a5, ...) (t5) a5, __SC_CAST4(__VA_ARGS__)
#define __SC_CAST6(t6, a6, ...) (t6) a6, __SC_CAST5(__VA_ARGS__)
#define __SC_TEST(type) BUILD_BUG_ON(sizeof(type) > sizeof(long))
#define __SC_TEST(type) BUILD_BUG_ON(sizeof(type) > sizeof(long))
#define __SC_TEST1(t1, a1) __SC_TEST(t1)
#define __SC_TEST2(t2, a2, ...) __SC_TEST(t2); __SC_TEST1(__VA_ARGS__)
#define __SC_TEST3(t3, a3, ...) __SC_TEST(t3); __SC_TEST2(__VA_ARGS__)
#define __SC_TEST4(t4, a4, ...) __SC_TEST(t4); __SC_TEST3(__VA_ARGS__)
#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
展开过程如下:
__SC_DECL2(int, call, unsigned long __user *, args) -> int call, __SC_DECL1(unsigned long __user *, args) -> int call, unsigned long __user * args
__SC_CAST2(int, call, unsigned long __user *, args) -> (int) call, __SC_CAST1(unsigned long __user *, args) -> (int) call, (unsigned long __user *) args
__SC_LONG2(int, call, unsigned long __user *, args) -> long call, __SC_LONG1(unsigned long __user *, args) -> long call, long __user * args
__SC_TEST2(int, call, unsigned long __user *, args) -> __SC_TEST(int); __SC_TEST1(unsigned long __user *, args) -> __SC_TEST(int); __SC_TEST(unsigned long __user *); --> BUILD_BUG_ON(sizeof(int) > sizeof(long)); BUILD_BUG_ON(sizeof(unsigned long __user *) > sizeof(long __user *));
那么上面的SYSCALL_DEFINEx就变为了下面这段比较清晰的代码了:
asmlinkage long sys_socketcall(int call, unsigned long __user * args); \
static inline long SYSC_socketcall(int call, unsigned long __user * args); \
asmlinkage long SyS_socketcall( long call, long __user * args)) \
{ \
BUILD_BUG_ON(sizeof(int) > sizeof(long)); BUILD_BUG_ON(sizeof(unsigned long __user *) > sizeof(long __user *)); \
return (long) SYSC_socketcall((int) call, (unsigned long __user *) args)); \
} \
SYSCALL_ALIAS(sys_socketcall, SyS_socketcall); \
static inline long SYSC_socketcall(int call, unsigned long __user * args)
大家这下总算看明白了吧,其实里面做的工作,就是将系统调用的参数统一变为了使用long型来接收,再强转转为int,也就是系统调用本来传下来的参数类型。那么这么强转一下究竟是为什么呢?原因就是64位的Linux有一个名为CVE-2009-2009的漏洞,这个漏洞的具体内容,http://blog.youkuaiyun.com/hxmhyp/article/details/22619729,有详细说明。