Linux netfilter源码分析

版权声明:本文为博主原创文章,未经博主允许不得转载。
参考资料:《Linux netfilter源码分析》http://wenku.baidu.com/view/975d477da26925c52cc5bfe6.html
《netfilter 源码分析》http://blog.chinaunix.net/uid-20498361-id-3078906.html
1、先帖一个重要的数据结构,
struct nf_hook_ops
{
struct list_head list;
/* User fills in from here down. */
nf_hookfn *hook; //该钩子被调用时执行的函数
struct module *owner;
int pf;//协议族类型
int hooknum;//钩子类型
/* Hooks are ordered in ascending priority. */
int priority;
};
这样了一个nf_hook_ops就相当于一个钩子,以我的理解这个钩子就是这个结构体的hook函数指针。其它参数的目的之一是为了区别于其它钩子,目的之二就是通过这些参数来找到这个钩子。
2、注册钩子。其实就是把一个nf_hook_ops结构体指针放在一个链表结点中,这个链表的每个结点是个二维数组。
看看内核注册的代码就好理解了:
ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
static struct nf_hook_ops br_nf_ops[] __read_mostly = {
{ .hook = br_nf_pre_routing,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF, },
{ .hook = br_nf_local_in,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_LOCAL_IN,
.priority = NF_BR_PRI_BRNF, },
{ .hook = br_nf_forward_ip,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF - 1, },
{ .hook = br_nf_forward_arp,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF, },
{ .hook = br_nf_local_out,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_LOCAL_OUT,
.priority = NF_BR_PRI_FIRST, },
{ .hook = br_nf_post_routing,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
.priority = NF_BR_PRI_LAST, },
{ .hook = ip_sabotage_in,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_FIRST, },
{ .hook = ip_sabotage_in,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_FIRST, },
};
因为没有找到比较好看的已初始化的nf_hook_ops结构体,所以找了这个比较长的nf_hook_ops结构体数组,虽然长点,但我觉得能更好的理解nf_hook_ops结构。
nf_register_hooks是nf_register_hook的循环调用封装,所以只看nf_register_hook即可。
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
{
unsigned int i;
int err = 0;
for (i = 0; i < n; i++) {
err = nf_register_hook(®[i]);
if (err)
goto err;
}
return err;
err:
if (i > 0)
nf_unregister_hooks(reg, i);
return err;
}
int nf_register_hook(struct nf_hook_ops *reg)
{
struct nf_hook_ops *elem;
int err;
err = mutex_lock_interruptible(&nf_hook_mutex);
if (err < 0)
return err;
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
if (reg->priority < elem->priority)//以priority大小作为优先级,升序。
break;
}
list_add_rcu(®->list, elem->list.prev);//把所有nf_register_hook结构体数组成员放在nf_hooks二维数组中
mutex_unlock(&nf_hook_mutex);
return 0;
}
nf_hooks[NPROTO][NF_MAX_HOOKS],NPROTO表示所支持的协议族大小,NF_MAX_HOOKS(8)表示每种协议族所支持的最大钩子数。
3、把钩子注册上了,就通过NF_HOOK调用注册在钩子上的函数
NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,ip_rcv_finish);
在这里先明白一件事,因为是netfilter机制的分析,所以不关心数据报是怎么走的,那么我们现在假设已经知道数据报的走向,比如ip_rcv接收数据,ip_forward转发数据,也就是说数据必需经过这里,在这样的假设情况下我们就把精力集中于netfilter的实现分析了。至于在哪里放钩子,就是在哪里执行NF_HOOK函数。
NF_HOOK执行钩子注册时的函数,是个宏定义,假如内核不支持netfilter(即编译内核里没有CONFIG_NETFILTER选项),或者nf_hooks为空(即没有钩子),那么就不会去执行钩子函数,而是直接执行它的回调函数。否则就执行注册过的钩子函数,下面看实际代码实现后就能很好理解。
以ip_rcv函数中调用的NF_HOOK为例:
ip_rcv()
-> return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
ip_rcv_finish);
#ifdef CONFIG_NETFILTER//内核支持netfilter模块
#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
({int __ret; \
if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\、
//如果nf_hooks为空,或者钩子函数接收此数据包,就执行回调函数处理数据包,具体看下面nf_hook_thresh函数的实现
__ret = (okfn)(skb); \
__ret;})
#else//不支持netfilter
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)//这里就是执行回调函数ip_rcv_finish(skb)。
#endif
static inline int nf_hook_thresh(int pf, unsigned int hook,
struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *), int thresh,
int cond)
{
if (!cond)
return 1;
#ifndef CONFIG_NETFILTER_DEBUG
if (list_empty(&nf_hooks[pf][hook]))
return 1; //nf_hooks为空,返回1
#endif
return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
}
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
struct list_head *elem;
unsigned int verdict;
int ret = 0;
#ifdef CONFIG_NET_NS
struct net *net;
net = indev == NULL ? dev_net(outdev) : dev_net(indev);
if (net != &init_net)
return 1;
#endif
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
elem = &nf_hooks[pf][hook];
next_hook:
verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
outdev, &elem, okfn, hook_thresh);//这里去找到底用nf_hooks结构体数组链表中的哪个钩子,然后执行并返回执行结果。
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
goto unlock;
} else if (verdict == NF_DROP) {
kfree_skb(skb);
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
verdict >> NF_VERDICT_BITS))
goto next_hook;
}
unlock:
rcu_read_unlock();
return ret;
}
unsigned int nf_iterate(struct list_head *head,
struct sk_buff *skb,
int hook,
const struct net_device *indev,
const struct net_device *outdev,
struct list_head **i,
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
unsigned int verdict;
/*
* The caller must not block between calls to this
* function because of risk of continuing from deleted element.
*/
list_for_each_continue_rcu(*i, head) {//找到对应的钩子函数
struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
if (hook_thresh > elem->priority)
continue;
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
verdict = elem->hook(hook, skb, indev, outdev, okfn);//执行钩子函数
if (verdict != NF_ACCEPT) {//处理钩子函数返回值
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
> NF_MAX_VERDICT)) {
NFDEBUG("Evil return from %p(%u).\n",
elem->hook, hook);
continue;
}
#endif
if (verdict != NF_REPEAT)
return verdict;
*i = (*i)->prev;
}
}
return NF_ACCEPT;
}
总的来说就是通过判断nf_hook_thresh的返回值去决定是否处理这个数据包,而nf_hook_thresh里就是被钩子函数处理过的。
个人学习笔记,比较粗糙,详细可看参考文章。