epoll是linux 特有的I/O复用函数,它在实现和使用上和select ,poll有很大的差异!首先epoll使用一组函数来完成任务,而不是单个函数。其实epoll吧用户关心的文件描述符的事件放在内核的一个事件表里,而无须像select 和poll那样每次调用都要重复传入文件描述符或事件集。但epoll需要使用一个额外的文件描述符,来唯一标识内核中的一个事件表,这文件描述符使用如下的epoll_create函数来创建:
下面的函数用来操作epoll的内核事件表:
其中event成员描述事件类型!epoll支持的事件类型和poll基本相同,但epoll有两个额外的事件类型-EPOLLET和EPOLLONESHOT。它们对于epoll的高效运作非常关键。data成员用于存储用户数据,其类型epoll_data_t的定义如下:
epoll系列系统调用的主要接口是epoll_wait函数。它在一段时间内等待一组文件描述符上的事件 ,其原型如下:
该函数成功时返回就绪的文件描述符的个数,失败时返回-1并设置errno。maxevents参数指定最多监听多少个事件,它必须大于0。
epoll_wait函数如果检测到事件,就将所有就绪的事件从内核事件表中复制到它的第二个参数events指向的数组中,这数组只用于输出epoll_wait检测到的就绪事件。
epoll对文件描述符的操作有两种模式:LT和ET模式。LT模式是默认的工作模式,这种模式下epoll相当于一个效率较高的poll。当往epoll内核事件表中注册一个文件描述符上的EPOLLET事件时,epoll将以ET模式来操作该文件描述符。ET模式是epoll的搞笑模式!
对于采用LT工作模式的文件描述符,当epoll_wait检测到其上事件发送并将此事件通知应用程序后,应用程序可以不立即处理该事件。这样当应用程序下次调用epoll_wait时,epoll_wait还会再次向应用程序通知此事件,直到该事件被处理。而对于ET模式的文件描述符,当epoll_wait检测到其上有事件发生并将此事件通知应用程序后,应用程序应该立即处理该事件。因为后续的epoll_wait调用将不再向应用程序通知这一事件。可见,ET模式在很大程序上降低了同一个epoll事件被重复触发的次数,因此效率要比LT模式要高。
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/epoll.h>
#include <pthread.h>
#define MAX_EVENT_NUMBER 1024
#define BUFFER_SIZE 10
int setnonblocking( int fd )
{
int old_option = fcntl( fd, F_GETFL );
int new_option = old_option | O_NONBLOCK;
fcntl( fd, F_SETFL, new_option );
return old_option;
}
void addfd( int epollfd, int fd, bool enable_et )
{
epoll_event event;
event.data.fd = fd;
event.events = EPOLLIN;
if( enable_et )
{
event.events |= EPOLLET;
}
epoll_ctl( epollfd, EPOLL_CTL_ADD, fd, &event );
setnonblocking( fd );
}
void lt( epoll_event* events, int number, int epollfd, int listenfd )
{
char buf[ BUFFER_SIZE ];
for ( int i = 0; i < number; i++ )
{
int sockfd = events[i].data.fd;
if ( sockfd == listenfd )
{
struct sockaddr_in client_address;
socklen_t client_addrlength = sizeof( client_address );
int connfd = accept( listenfd, ( struct sockaddr* )&client_address, &client_addrlength );
addfd( epollfd, connfd, false );
}
else if ( events[i].events & EPOLLIN )
{
printf( "event trigger once\n" );
memset( buf, '\0', BUFFER_SIZE );
int ret = recv( sockfd, buf, BUFFER_SIZE-1, 0 );
if( ret <= 0 )
{
close( sockfd );
continue;
}
printf( "get %d bytes of content: %s\n", ret, buf );
}
else
{
printf( "something else happened \n" );
}
}
}
void et( epoll_event* events, int number, int epollfd, int listenfd )
{
char buf[ BUFFER_SIZE ];
for ( int i = 0; i < number; i++ )
{
int sockfd = events[i].data.fd;
if ( sockfd == listenfd )
{
struct sockaddr_in client_address;
socklen_t client_addrlength = sizeof( client_address );
int connfd = accept( listenfd, ( struct sockaddr* )&client_address, &client_addrlength );
addfd( epollfd, connfd, true );
}
else if ( events[i].events & EPOLLIN )
{
printf( "event trigger once\n" );
while( 1 )
{
memset( buf, '\0', BUFFER_SIZE );
int ret = recv( sockfd, buf, BUFFER_SIZE-1, 0 );
if( ret < 0 )
{
if( ( errno == EAGAIN ) || ( errno == EWOULDBLOCK ) )
{
printf( "read later\n" );
break;
}
close( sockfd );
break;
}
else if( ret == 0 )
{
close( sockfd );
}
else
{
printf( "get %d bytes of content: %s\n", ret, buf );
}
}
}
else
{
printf( "something else happened \n" );
}
}
}
int main( int argc, char* argv[] )
{
if( argc <= 2 )
{
printf( "usage: %s ip_address port_number\n", basename( argv[0] ) );
return 1;
}
const char* ip = argv[1];
int port = atoi( argv[2] );
int ret = 0;
struct sockaddr_in address;
bzero( &address, sizeof( address ) );
address.sin_family = AF_INET;
inet_pton( AF_INET, ip, &address.sin_addr );
address.sin_port = htons( port );
int listenfd = socket( PF_INET, SOCK_STREAM, 0 );
assert( listenfd >= 0 );
ret = bind( listenfd, ( struct sockaddr* )&address, sizeof( address ) );
assert( ret != -1 );
ret = listen( listenfd, 5 );
assert( ret != -1 );
epoll_event events[ MAX_EVENT_NUMBER ];
int epollfd = epoll_create( 5 );
assert( epollfd != -1 );
addfd( epollfd, listenfd, true );
while( 1 )
{
int ret = epoll_wait( epollfd, events, MAX_EVENT_NUMBER, -1 );
if ( ret < 0 )
{
printf( "epoll failure\n" );
break;
}
lt( events, ret, epollfd, listenfd );
//et( events, ret, epollfd, listenfd );
}
close( listenfd );
return 0;
}
EPOLLONESHOT事件
即使我们使用ET模式,一个socket上的某个事件还是可能被触发多次。这在并发程序中就会引起一个问题,比如一个线程在读取完某个socket上的数据后开始处理这些数据,而在数据的处理过程中该socket上又有新数据可读(EPOLLIN再次被触发),此时另外个线程就被唤醒来读取这些新的数据。于是就出现了两个线程同时操作一个socket的局面。这当然不是我们所期望的,我们期望的是一个socket连接在任一时刻都只被一个线程处理!这一点我们可以使用epoll的 EPOLLONESHOT事件来实现!
对于注册了EPOLLONESHOT事件的文件描述符,操作系统最多触发其上注册的一个可读 可写或异常事件,且只触发一次!除非我们使用epoll_ctl函数重置该文件描述符上注册的EPOLLONESHOT事件,这样,当一个线程在处理某个socket时,其他线程是不可能有机会操作该socket的。但反过来思考该线程应该立即重置这socket上的EPOLLONESHOT事件,以确保这socket下一次可读时,其EPOLLIN事件被触发,进而让其他工作线程有机会继续处理这socket。
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/epoll.h>
#include <pthread.h>
#define MAX_EVENT_NUMBER 1024
#define BUFFER_SIZE 1024
struct fds
{
int epollfd;
int sockfd;
};
int setnonblocking( int fd )
{
int old_option = fcntl( fd, F_GETFL );
int new_option = old_option | O_NONBLOCK;
fcntl( fd, F_SETFL, new_option );
return old_option;
}
void addfd( int epollfd, int fd, bool oneshot )
{
epoll_event event;
event.data.fd = fd;
event.events = EPOLLIN | EPOLLET;
if( oneshot )
{
event.events |= EPOLLONESHOT;
}
epoll_ctl( epollfd, EPOLL_CTL_ADD, fd, &event );
setnonblocking( fd );
}
void reset_oneshot( int epollfd, int fd )
{
epoll_event event;
event.data.fd = fd;
event.events = EPOLLIN | EPOLLET | EPOLLONESHOT;
epoll_ctl( epollfd, EPOLL_CTL_MOD, fd, &event );
}
void* worker( void* arg )
{
int sockfd = ( (fds*)arg )->sockfd;
int epollfd = ( (fds*)arg )->epollfd;
printf( "start new thread to receive data on fd: %d\n", sockfd );
char buf[ BUFFER_SIZE ];
memset( buf, '\0', BUFFER_SIZE );
while( 1 )
{
int ret = recv( sockfd, buf, BUFFER_SIZE-1, 0 );
if( ret == 0 )
{
close( sockfd );
printf( "foreiner closed the connection\n" );
break;
}
else if( ret < 0 )
{
if( errno == EAGAIN )
{
reset_oneshot( epollfd, sockfd );
printf( "read later\n" );
break;
}
}
else
{
printf( "get content: %s\n", buf );
sleep( 5 );
}
}
printf( "end thread receiving data on fd: %d\n", sockfd );
}
int main( int argc, char* argv[] )
{
if( argc <= 2 )
{
printf( "usage: %s ip_address port_number\n", basename( argv[0] ) );
return 1;
}
const char* ip = argv[1];
int port = atoi( argv[2] );
int ret = 0;
struct sockaddr_in address;
bzero( &address, sizeof( address ) );
address.sin_family = AF_INET;
inet_pton( AF_INET, ip, &address.sin_addr );
address.sin_port = htons( port );
int listenfd = socket( PF_INET, SOCK_STREAM, 0 );
assert( listenfd >= 0 );
ret = bind( listenfd, ( struct sockaddr* )&address, sizeof( address ) );
assert( ret != -1 );
ret = listen( listenfd, 5 );
assert( ret != -1 );
epoll_event events[ MAX_EVENT_NUMBER ];
int epollfd = epoll_create( 5 );
assert( epollfd != -1 );
addfd( epollfd, listenfd, false );
while( 1 )
{
int ret = epoll_wait( epollfd, events, MAX_EVENT_NUMBER, -1 );
if ( ret < 0 )
{
printf( "epoll failure\n" );
break;
}
for ( int i = 0; i < ret; i++ )
{
int sockfd = events[i].data.fd;
if ( sockfd == listenfd )
{
struct sockaddr_in client_address;
socklen_t client_addrlength = sizeof( client_address );
int connfd = accept( listenfd, ( struct sockaddr* )&client_address, &client_addrlength );
addfd( epollfd, connfd, true );
}
else if ( events[i].events & EPOLLIN )
{
pthread_t thread;
fds fds_for_new_worker;
fds_for_new_worker.epollfd = epollfd;
fds_for_new_worker.sockfd = sockfd;
pthread_create( &thread, NULL, worker, ( void* )&fds_for_new_worker );
}
else
{
printf( "something else happened \n" );
}
}
}
close( listenfd );
return 0;
}