epoll讲解

目录
  1. epoll接口函数
    1. 创建epoll实例
    2. 管理epoll事件
    3. 等待epoll事件
  2. epoll实例
  3. mac下的epoll
  4. epoll源码实现

epoll接口函数

头文件: #include<sys/epoll.h>
可以通过man epoll查看对应的帮助信息
最大描述符限制:/proc/sys/fs/epoll/max_user_watches

创建epoll实例

1
2
int epoll_create(int size);
int epoll_create1(int flag);

epoll_create中,size只是给内核的一个维数提示,并不是队列中的最大数,Now days, size is ignored since Linux 2.6.8
epoll_create1中flag取值如下:

  • 0:epoll_create1 == epoll_create (size argument is dropped)
  • EPOLL_CLOEXEC:含义同open函数的O_CLOEXEC选项;当执行execve创建新进程时,打开的描述符自动关闭
    p.s: 当使用完毕时,需要调用close关闭epoll实例句柄

    管理epoll事件

1
int epoll_ctl (int epfd, int op, int fd, struct epoll_event *event);

参数说明:

  • epfd: epoll_create返回的epoll实例
  • op: 对应的操作
  • fd: 监听的fd
  • event: 监听的事件
    其中op取值如下:
  • EPOLL_CTL_ADD:添加监听的事件
  • EPOLL_CTL_DEL:删除监听的事件
  • EPOLL_CTL_MOD:修改监听的事件
    struct epoll_event定义如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
typedef union epoll_data
{
void *ptr;
int fd;
uint32_t u32;
uint64_t u64;
} epoll_data_t;
struct epoll_event
{
uint32_t events; /* Epoll events */
epoll_data_t data; /* User data variable */
};

其中events可以包含以下事件类型:

  • EPOLLIN: 描述符可读
  • EPOLLOUT: 描述符可写
  • EPOLLRDHUP(since Linux 2.6.17): 流套接字对端关闭连接或者关闭写端
  • EPOLLPRI: 紧急数据可读
  • EPOLLERR: 描述符发生错误,该事件由内核一直监听(比如connect套接字失败会返回EPOLLERR)
  • EPOLLHUP: 文件秒杀符被中断,该事件由内核一直监听
  • EPOLLET: 开启边缘触发,默认是水平触发
  • EPOLLONESHOT: 一个事件发生并读取之后,fd自动不再监控;若要重新监控需要使用EPOLL_CTL_MOD重新设置
    返回值: 成功返回0,失败返回-1并设置errno

    等待epoll事件

1
int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout);

参数说明:

  • epfd: epoll_create返回的epoll实例
  • events: 存储epoll_event的数组地址
  • maxevents: 最大事件的数量,需>0
  • timeout: 等待的最长时间
    返回值:
    成功时返回就绪的监听文件描述符数;当超出timeout指定的时间后如果无就绪的文件描述符,返回0;发生错误时返回-1并设置errno
    另外,Linux kernel 2.6.19 引入了epoll_pwait,可以在等待时设置信号掩码,其使用方式类似pselect

some problems:

  1. epoll 怎么判断是connect请求还是有数据可读?
    ans: 判断events[i].data.fd == listen_fd
  2. read 返回值说明:
  • return -1 and errno == EAGAIN: 数据已经读完,没有可读数据
  • return 0: end of file,对端关闭连接

关于水平触发(Level-Triggered)和边缘触发(Edge-Triggered)
当缓冲区有数据可读时,ET会触发一次事件,之后就不会再触发;而LT只要我们没有读完缓冲区的数据,事件就会一直触发。
推荐使用的epoll ET方式如下:

  1. 设置fd为非阻塞
  2. 当调用read或write读写时,在其返回-1,且errno == EAGAIN 后再调用epoll_wait等待
    tips:
    ET模式只能用于设置了O_NONBLOCK的fd,而LT则同时支持同步及异步。如果将ET模式应用与阻塞情况,将出现如下问题:
    当对端send 2 byte数据,而服务端只读取了1 byte后再去调用epoll_wait,这时将不产生读事件。直到对端又有数据发送过来,epoll_wait才会再次返回
    补充:
    Q:当又有事件产生时会怎么样,原来的数据还在吗?
    A:原来的数据还在socket缓冲区

    epoll实例

    epoll使用参考:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
static int
create_and_bind (char *port)
{
struct addrinfo hints;
struct addrinfo *result, *rp;
int s, sfd;
memset (&hints, 0, sizeof (struct addrinfo));
hints.ai_family = AF_UNSPEC; /* Return IPv4 and IPv6 choices */
hints.ai_socktype = SOCK_STREAM; /* We want a TCP socket */
hints.ai_flags = AI_PASSIVE; /* All interfaces */
s = getaddrinfo (NULL, port, &hints, &result);
if (s != 0)
{
fprintf (stderr, "getaddrinfo: %s\n", gai_strerror (s));
return -1;
}
for (rp = result; rp != NULL; rp = rp->ai_next)
{
sfd = socket (rp->ai_family, rp->ai_socktype, rp->ai_protocol);
if (sfd == -1)
continue;
s = bind (sfd, rp->ai_addr, rp->ai_addrlen);
if (s == 0)
{
/* We managed to bind successfully! */
break;
}
close (sfd);
}
if (rp == NULL)
{
fprintf (stderr, "Could not bind\n");
return -1;
}
freeaddrinfo (result);
return sfd;
}
static int
make_socket_non_blocking (int sfd)
{
int flags, s;
flags = fcntl (sfd, F_GETFL, 0);
if (flags == -1)
{
perror ("fcntl");
return -1;
}
flags |= O_NONBLOCK;
s = fcntl (sfd, F_SETFL, flags);
if (s == -1)
{
perror ("fcntl");
return -1;
}
return 0;
}
#define MAXEVENTS 64
int
main (int argc, char *argv[])
{
int sfd, s;
int efd;
struct epoll_event event;
struct epoll_event *events;
if (argc != 2)
{
fprintf (stderr, "Usage: %s [port]\n", argv[0]);
exit (EXIT_FAILURE);
}
sfd = create_and_bind (argv[1]);
if (sfd == -1)
abort ();
s = make_socket_non_blocking (sfd);
if (s == -1)
abort ();
s = listen (sfd, SOMAXCONN);
if (s == -1)
{
perror ("listen");
abort ();
}
efd = epoll_create1 (0);
if (efd == -1)
{
perror ("epoll_create");
abort ();
}
event.data.fd = sfd;
event.events = EPOLLIN | EPOLLET;
s = epoll_ctl (efd, EPOLL_CTL_ADD, sfd, &event);
if (s == -1)
{
perror ("epoll_ctl");
abort ();
}
/* Buffer where events are returned */
events = calloc (MAXEVENTS, sizeof event);
/* The event loop */
while (1)
{
int n, i;
n = epoll_wait (efd, events, MAXEVENTS, -1);
for (i = 0; i < n; i++)
{
if ((events[i].events & EPOLLERR) ||
(events[i].events & EPOLLHUP) ||
(!(events[i].events & EPOLLIN)))
{
/* An error has occured on this fd, or the socket is not
ready for reading (why were we notified then?) */
fprintf (stderr, "epoll error\n");
close (events[i].data.fd);
continue;
}
else if (sfd == events[i].data.fd)
{
/* We have a notification on the listening socket, which
means one or more incoming connections. */
while (1)
{
struct sockaddr in_addr;
socklen_t in_len;
int infd;
char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
in_len = sizeof in_addr;
infd = accept (sfd, &in_addr, &in_len);
if (infd == -1)
{
if ((errno == EAGAIN) ||
(errno == EWOULDBLOCK))
{
/* We have processed all incoming
connections. */
break;
}
else
{
perror ("accept");
break;
}
}
s = getnameinfo (&in_addr, in_len,
hbuf, sizeof hbuf,
sbuf, sizeof sbuf,
NI_NUMERICHOST | NI_NUMERICSERV);
if (s == 0)
{
printf("Accepted connection on descriptor %d "
"(host=%s, port=%s)\n", infd, hbuf, sbuf);
}
/* Make the incoming socket non-blocking and add it to the
list of fds to monitor. */
s = make_socket_non_blocking (infd);
if (s == -1)
abort ();
event.data.fd = infd;
event.events = EPOLLIN | EPOLLET;
s = epoll_ctl (efd, EPOLL_CTL_ADD, infd, &event);
if (s == -1)
{
perror ("epoll_ctl");
abort ();
}
}
continue;
}
else
{
/* We have data on the fd waiting to be read. Read and
display it. We must read whatever data is available
completely, as we are running in edge-triggered mode
and won't get a notification again for the same
data. */
int done = 0;
while (1)
{
ssize_t count;
char buf[512];
count = read (events[i].data.fd, buf, sizeof buf);
if (count == -1)
{
/* If errno == EAGAIN, that means we have read all
data. So go back to the main loop. */
if (errno != EAGAIN)
{
perror ("read");
done = 1;
}
break;
}
else if (count == 0)
{
/* End of file. The remote has closed the
connection. */
done = 1;
break;
}
/* Write the buffer to standard output */
s = write (1, buf, count);
if (s == -1)
{
perror ("write");
abort ();
}
}
if (done)
{
printf ("Closed connection on descriptor %d\n",
events[i].data.fd);
/* Closing the descriptor will make epoll remove it
from the set of descriptors which are monitored. */
close (events[i].data.fd);
}
}
}
}
free (events);
close (sfd);
return EXIT_SUCCESS;
}
  • 客户端代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <string.h>
#include <stdlib.h>
int create_and_connect(char * port)
{
if(NULL == port)
{
return -1;
}
struct addrinfo hints;
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE;
struct addrinfo *result;
int ret = getaddrinfo(NULL, port, &hints, &result);
if(ret != 0)
{
fprintf(stderr, "getaddrinfo error: %s\n", gai_strerror(ret));
return -1;
}
struct addrinfo *rp;
int cfd;
for(rp = result; rp != NULL; rp = rp->ai_next)
{
cfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
if(-1 == cfd)
{
continue;
}
//client connect
ret = connect(cfd, rp->ai_addr, rp->ai_addrlen);
if(0 == ret)
{
break;
}
close(cfd);
}
if(NULL == rp)
{
fprintf(stderr, "connect to port failed!\n");
return -1;
}
freeaddrinfo(result);
return cfd;
}
int main(int argc, char *argv[])
{
if(argc != 2)
{
fprintf(stderr, "Usage: %s [port]\n", argv[0]);
exit(-1);
}
//clinet send something
int cfd = create_and_connect(argv[1]);
if(-1 == cfd)
{
fprintf(stderr, "create_and_connect failed\n");
return -1;
}
char *pData = "Client hello!";
int dataLen = strlen(pData);
send(cfd, pData, dataLen, 0);
sleep(1);
send(cfd, pData, dataLen, 0);
return 0;
}

mac下的epoll

mac os不支持epoll,其使用kqueue实现(类似epoll),头文件 sys/event.h
linkhttps://zhuanlan.zhihu.com/p/21375144

epoll源码实现

epoll源码实现分析
epoll源码实现分析(整理)

参考博客:Linux epoll 详解

本站总访问量