一 模块的启动
1557static int __init eventpoll_init(void)
1558{
1559 struct sysinfo si;
1560
1561 si_meminfo(&si);
1562 /*
1563 * Allows top 4% of lomem to be allocated for epoll watches (per user).
1564 */
1565 max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) /
1566 EP_ITEM_COST;
1567 BUG_ON(max_user_watches < 0);
1568
1569 /*
1570 * Initialize the structure used to perform epoll file descriptor
1571 * inclusion loops checks.
1572 */
1573 ep_nested_calls_init(&poll_loop_ncalls);
1574
1575 /* Initialize the structure used to perform safe poll wait head wake ups */
1576 ep_nested_calls_init(&poll_safewake_ncalls);
1577
1578 /* Initialize the structure used to perform file's f_op->poll() calls */
1579 ep_nested_calls_init(&poll_readywalk_ncalls);
1580
1581 /* Allocates slab cache used to allocate "struct epitem" items */
1582 epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
1583 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
1584
1585 /* Allocates slab cache used to allocate "struct eppoll_entry" */
1586 pwq_cache = kmem_cache_create("eventpoll_pwq",
1587 sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL);
1588
1589 return 0;
1590}
二. epoll_create 的实现
1303/*
1304 * Open an eventpoll file descriptor.
1305 */
1306SYSCALL_DEFINE1(epoll_create1, int, flags)
1307{
1308 int error;
1309 struct eventpoll *ep = NULL;
1310
1311 /* Check the EPOLL_* constant for consistency. */
1312 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
1313
1314 if (flags & ~EPOLL_CLOEXEC)
1315 return -EINVAL;
1316 /*
1317 * Create the internal data structure ("struct eventpoll").
1318 */
1319 error = ep_alloc(&ep);
1320 if (error < 0)
1321 return error;
1322 /*
1323 * Creates all the items needed to setup an eventpoll file. That is,
1324 * a file structure and a free file descriptor.
1325 */
1326 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1327 O_RDWR | (flags & O_CLOEXEC));
1328 if (error < 0)
1329 ep_free(ep);
1330
1331 return error;
1332}
1333
1334SYSCALL_DEFINE1(epoll_create, int, size)
1335{
1336 if (size <= 0)
1337 return -EINVAL;
1338
1339 return sys_epoll_create1(0);
1340}
1341
748static int ep_alloc(struct eventpoll **pep)
749{
750 int error;
751 struct user_struct *user;
752 struct eventpoll *ep;
753
754 user = get_current_user();
755 error = -ENOMEM;
756 ep = kzalloc(sizeof(*ep), GFP_KERNEL);
757 if (unlikely(!ep))
758 goto free_uid;
759
760 spin_lock_init(&ep->lock);
761 mutex_init(&ep->mtx);
762 init_waitqueue_head(&ep->wq);
763 init_waitqueue_head(&ep->poll_wait);
764 INIT_LIST_HEAD(&ep->rdllist);
765 ep->rbr = RB_ROOT;
766 ep->ovflist = EP_UNACTIVE_PTR;
767 ep->user = user;
768
769 *pep = ep;
770
771 return 0;
772
773free_uid:
774 free_uid(user);
775 return error;
776}
777
141/**
142 * anon_inode_getfd - creates a new file instance by hooking it up to an
143 * anonymous inode, and a dentry that describe the "class"
144 * of the file
145 *
146 * @name: [in] name of the "class" of the new file
147 * @fops: [in] file operations for the new file
148 * @priv: [in] private data for the new file (will be file's private_data)
149 * @flags: [in] flags
150 *
151 * Creates a new file by hooking it on a single inode. This is useful for files
152 * that do not need to have a full-fledged inode in order to operate correctly.
153 * All the files created with anon_inode_getfd() will share a single inode,
154 * hence saving memory and avoiding code duplication for the file/inode/dentry
155 * setup. Returns new descriptor or an error code.
156 */
157int anon_inode_getfd(const char *name, const struct file_operations *fops,
158 void *priv, int flags)
159{
160 int error, fd;
161 struct file *file;
162
163 error = get_unused_fd_flags(flags);
164 if (error < 0)
165 return error;
166 fd = error;
167
168 file = anon_inode_getfile(name, fops, priv, flags);
169 if (IS_ERR(file)) {
170 error = PTR_ERR(file);
171 goto err_put_unused_fd;
172 }
173 fd_install(fd, file);
174
175 return fd;
176
177err_put_unused_fd:
178 put_unused_fd(fd);
179 return error;
180}
181EXPORT_SYMBOL_GPL(anon_inode_getfd);
696/* File callbacks that implement the eventpoll file behaviour */
697static const struct file_operations eventpoll_fops = {
698 .release = ep_eventpoll_release,
699 .poll = ep_eventpoll_poll,
700 .llseek = noop_llseek,
701};
639static int ep_eventpoll_release(struct inode *inode, struct file *file)
640{
641 struct eventpoll *ep = file->private_data;
642
643 if (ep)
644 ep_free(ep);
645
646 return 0;
647}
648
676static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
677{
678 int pollflags;
679 struct eventpoll *ep = file->private_data;
680
681 /* Insert inside our poll wait queue */
682 poll_wait(file, &ep->poll_wait, wait);
683
684 /*
685 * Proceed to find out if wanted events are really available inside
686 * the ready list. This need to be done under ep_call_nested()
687 * supervision, since the call to f_op->poll() done on listed files
688 * could re-enter here.
689 */
690 pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
691 ep_poll_readyevents_proc, ep, ep, current);
692
693 return pollflags != -1 ? pollflags : 0;
694}
695
106/**
107 * noop_llseek - No Operation Performed llseek implementation
108 * @file: file structure to seek on
109 * @offset: file offset to seek to
110 * @origin: type of seek
111 *
112 * This is an implementation of ->llseek useable for the rare special case when
113 * userspace expects the seek to succeed but the (device) file is actually not
114 * able to perform the seek. In this case you use noop_llseek() instead of
115 * falling back to the default implementation of ->llseek.
116 */
117loff_t noop_llseek(struct file *file, loff_t offset, int origin)
118{
119 return file->f_pos;
120}
121EXPORT_SYMBOL(noop_llseek);