一、netlink简介
linux的Netlink机制继承于unix,是一种基于socket的内核态与用户态进程间通信(PCI)机制。linux的进程间通信有多种机制,比方管道(Pipe)、共享内存(shmget)等。Netlink的方式有很多优点,最为重要的是其可以支持内核态和用户态进程间的异步通信。
1. Netlink与sysfs和ioctl的区别
linux中支持用户态和内核态的进程通信方式还有sysfs和ioctl的方式。
sysfs是linux内核的一种虚文件系统,通过在/sys目录下创建文件节点来提供用户层的访问节点,概括来说就是内核向用户空间的延伸。使用方式是在内核中通过接口函数来创建文件的节点,如usb驱动中创建一个loglevel向用户态提供节点来实时修改内核的log级别。
ret = driver_create_file(&stub_driver.drvwrap.driver,
&driver_attr_loglevel);
if (ret) {
print_err(" driver_create_file failed\n");
goto err_create_file;
}其中driver_attr_loglevel是一个设备文件属性结构体,需要使用DRIVER_ATTR宏来创建show和store函数,这两个函数分别代表读和写。
static ssize_t show_loglevel(struct device_driver* dev, char* buf)
{
ssize_t length = 0;
spin_lock(&loglevel_lock);
if(NONE == log_level){
length = sprintf(buf, "%s\n", "none");
}
else if(DEBUG == log_level){
length = sprintf(buf, "%s\n", "debug");
}
else if(INFO == log_level){
length = sprintf(buf, "%s\n", "info");
}
else if(WARNING == log_level){
length = sprintf(buf, "%s\n", "warning");
}
else if(ERROR == log_level){
length = sprintf(buf, "%s\n", "error");
}
else{
length = sprintf(buf, "%s\n", "unkown level");
}
spin_unlock(&loglevel_lock);
return length;
}
static ssize_t store_loglevel(struct device_driver* dev, const char* buf, size_t count)
{
if (count > 2)
return -EINVAL;
if(!strncmp(buf, "0", 1)){
log_level = NONE;
}
else if(!strncmp(buf, "1", 1)){
log_level = DEBUG;
}
else if(!strncmp(buf, "2", 1)){
log_level = INFO;
}
else if(!strncmp(buf, "3", 1)){
log_level = WARNING;
}
else if(!strncmp(buf, "4", 1)){
log_level = ERROR;
}
else{
log_level = NONE;
return -EINVAL;
}
return count;
}
static DRIVER_ATTR(loglevel, S_IRUSR | S_IWUSR, show_loglevel, store_loglevel);
这样在/sys相应目录下就有一个loglevel的节点可供用户态程序来读写。
写:echo 1 > loglevel 读: cat loglevel
ioctl也是通过创建文件节点的方式来提供用户空间的访问节点的。不过与sysfs不太一样的是sysfs是异步的,而ioctl是同步的阻塞的。首先ioctl需要在内核空间中定义一个file_operations的结构体并填充这个结构体,其中ioctl函数内核中需要如何去实现这个函数,因为用户空间通过调用write或者read时所传参数不同,才能根据具体的实现来达到目的。
//内核中 file_operations 结构体static const struct file_operations test_ux_fops = {
.owner = THIS_MODULE,
.read = test_ux_read,
.write = test_ux_write,
.unlocked_ioctl = test_ux_ioctl,
.compat_ioctl = test_ux_ioctl,
.open = test_ux_open,
.release = test_ux_release,
};// ioctl的具体实现
static long test_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct test_ux *ux = (struct test_ux *)file->private_data;
int ret;
if (((_IOC_DIR(cmd) & _IOC_READ) &&
!access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd))) ||
((_IOC_DIR(cmd) & _IOC_WRITE) &&
!access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd)))) {
return -EFAULT;
}
test_ux_get(ux);
switch (_IOC_NR(cmd)) {
case _IOC_NR(_UX_IOCSETSOCKFD):
ret = test_ux_set_sockfd(ux, (int)arg);
break;
case _IOC_NR(_UX_IOCINTR):
ret = test_ux_interrupt(ux);
break;
case _IOC_NR(_UX_IOCINTRPGRP):
ret = test_ux_interrupt_pgrp();
break;
case _IOC_NR(_UX_IOCGETKADDR):
ret = test_ux_getkaddr(ux, (void __user *)arg);
break;
default:
ret = -EINVAL;
}
test_ux_put(ux);
return ret;
}然后用户层通过open,ioctl的方式进行交互。
fd = open(DEVNAME, O_RDWR); ret = ioctl(fd, _UX_IOCSETSOCKFD, sock->fd);
if (ret) {
dbg("failed to set sock fd");
goto err_free;
}
ret = ioctl(fd, _UX_IOCGETKADDR, &ux->kaddr);
if (ret) {
dbg("failed to get kaddr");
goto err_free;
}
2. netlink的使用方式
1. 创建netlink的sock通道
netlinkfd = netlink_kernel_create(&init_net, NETLINK_PORT, &netlink_cfg);
if(!netlinkfd){
printk(KERN_ERR "can not create a netlink socket\n");
return -1;
}2. 填充netlink_cfg结构体
struct netlink_kernel_cfg netlink_cfg = {
.input = netlink_receive,
};其中netlink_receive函数是从userspace中接受到数据,最重要的数据就是用户空间进程的pid号,所以这个函数的实现可以是
void kernel_receive(struct sk_buff *__skb) //receive message from userspace
{
struct sk_buff *skb;
struct nlmsghdr *nlh = NULL;
char *data = "This is viz's test message from kernel";
printk(KERN_DEBUG "[kernel space] begin kernel_receive\n");
skb = skb_get(__skb);
if(skb->len >= sizeof(struct nlmsghdr)){
nlh = (struct nlmsghdr *)skb->data;
if((nlh->nlmsg_len >= sizeof(struct nlmsghdr))
&& (__skb->len >= nlh->nlmsg_len)){
user_process.pid = nlh->nlmsg_pid;
printk(KERN_DEBUG "[kernel space] data receive from user are:%s\n", (char *)NLMSG_DATA(nlh));
printk(KERN_DEBUG "[kernel space] user_pid:%d\n", user_process.pid);
send_to_user(data);
}
}else{
printk(KERN_DEBUG "[kernel space] data receive from user are:%s\n",(char *)NLMSG_DATA(nlmsg_hdr(__skb)));
send_to_user(data);
}
kfree_skb(skb);
}skb->data中包含有用户空间进程的pid号,这个很最要,因为只有这样内核才能够通过这个pid来发送数据到userspace。send_to_user就是向用户层发送数据,用到了netlink的一个重要的接口函数netlink_unicast。
retval = netlink_unicast(netlinkfd, skb, user_process.pid, MSG_DONTWAIT);3. netlink用户层的写法
skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_PORT);
if(skfd < 0){
printf("can not create a netlink socket\n");
return -1;
}创建一个socket,三个参数分别为PF_NETLINK, SOCK_RAW, NETLINK_PORT,其中前两个参数是标准的。NETLINK_PORT要和内核的port一致。通常使用17的端口。
if(bind(skfd, (struct sockaddr *)&local, sizeof(local)) != 0){
printf("bind() error\n");
return -1;
}绑定套接字,这样用户层相当于一个server,可以接收内核中传过来的数据。
ret = sendto(skfd, message, message->nlmsg_len, 0,(struct sockaddr *)&kpeer, sizeof(kpeer));
if(!ret){
perror("send pid:");
exit(-1);
}使用sendto函数来向内核传递数据,最重要的是pid。
// receive the confirm message from kernel
while(1){
ret = recvfrom(skfd, &info, sizeof(struct u_packet_info),0, (struct sockaddr*)&kpeer, &kpeerlen);
if(!ret){
perror("recv form kerner:");
exit(-1);
}使用recvfrom函数实时接收内核数据。
二、需要注意的几点
1. netlink_kernel_create函数的变更,netlink在不同的linux内核中netlink_kernel_create函数具体实现不同。在早期第三个参数是直接一个回调函数。4.4之后传入的是struct netlink_kernel_cfg netlink_cfg = {.input = kernel_receive,}结构体,结构体的input就是我们需要实现的receive函数。
2. sk_buff结构体,这是网络通信的核心结构体,一切网络数据交互的桥梁。
struct sk_buff {
union {
struct {
/* These two members must be first. */
struct sk_buff *next;
struct sk_buff *prev;
union {
ktime_t tstamp;
struct skb_mstamp skb_mstamp;
};
};
struct rb_node rbnode; /* used in netem & tcp stack */
};
struct sock *sk;
struct net_device *dev;
/*
* This is the control buffer. It is free to use for every
* layer. Please put your private variables there. If you
* want to keep them across layers you have to do a skb_clone()
* first. This is owned by whoever has the skb queued ATM.
*/
char cb[48] __aligned(8);
unsigned long _skb_refdst;
void (*destructor)(struct sk_buff *skb);
#ifdef CONFIG_XFRM
struct sec_path *sp;
#endif
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack *nfct;
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info *nf_bridge;
#endif
unsigned int len,
data_len;
__u16 mac_len,
hdr_len;
/* Following fields are _not_ copied in __copy_skb_header()
* Note that queue_mapping is here mostly to fill a hole.
*/
kmemcheck_bitfield_begin(flags1);
__u16 queue_mapping;
/* if you move cloned around you also must adapt those constants */
#ifdef __BIG_ENDIAN_BITFIELD
#define CLONED_MASK (1 << 7)
#else
#define CLONED_MASK 1
#endif
#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset)
__u8 __cloned_offset[0];
__u8 cloned:1,
nohdr:1,
fclone:2,
peeked:1,
head_frag:1,
xmit_more:1,
__unused:1; /* one bit hole */
kmemcheck_bitfield_end(flags1);
/* fields enclosed in headers_start/headers_end are copied
* using a single memcpy() in __copy_skb_header()
*/
/* private: */
__u32 headers_start[0];
/* public: */
/* if you move pkt_type around you also must adapt those constants */
#ifdef __BIG_ENDIAN_BITFIELD
#define PKT_TYPE_MAX (7 << 5)
#else
#define PKT_TYPE_MAX 7
#endif
#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset)
__u8 __pkt_type_offset[0];
__u8 pkt_type:3;
__u8 pfmemalloc:1;
__u8 ignore_df:1;
__u8 nfctinfo:3;
__u8 nf_trace:1;
__u8 ip_summed:2;
__u8 ooo_okay:1;
__u8 l4_hash:1;
__u8 sw_hash:1;
__u8 wifi_acked_valid:1;
__u8 wifi_acked:1;
__u8 no_fcs:1;
/* Indicates the inner headers are valid in the skbuff. */
__u8 encapsulation:1;
__u8 encap_hdr_csum:1;
__u8 csum_valid:1;
__u8 csum_complete_sw:1;
__u8 csum_level:2;
__u8 csum_bad:1;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
__u8 ipvs_property:1;
__u8 inner_protocol_type:1;
__u8 remcsum_offload:1;
#ifdef CONFIG_NET_SWITCHDEV
__u8 offload_fwd_mark:1;
#endif
/* 2, 4 or 5 bit hole */
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
__u16 tc_verd; /* traffic control verdict */
#endif
#endif
union {
__wsum csum;
struct {
__u16 csum_start;
__u16 csum_offset;
};
};
__u32 priority;
int skb_iif;
__u32 hash;
__be16 vlan_proto;
__u16 vlan_tci;
#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
union {
unsigned int napi_id;
unsigned int sender_cpu;
};
#endif
#ifdef CONFIG_NETWORK_SECMARK
__u32 secmark;
#endif
union {
__u32 mark;
__u32 reserved_tailroom;
};
union {
__be16 inner_protocol;
__u8 inner_ipproto;
};
__u16 inner_transport_header;
__u16 inner_network_header;
__u16 inner_mac_header;
__be16 protocol;
__u16 transport_header;
__u16 network_header;
__u16 mac_header;
/* private: */
__u32 headers_end[0];
/* public: */
/* These elements must be at the end, see alloc_skb() for details. */
sk_buff_data_t tail;
sk_buff_data_t end;
unsigned char *head,
*data;
unsigned int truesize;
atomic_t users;
};具体的用法,等以后文章再来好好说说。
三、使用netlink将内核log重定向到用户空间文件中实时监测内核日志
dmesg可以实时监控内核日志,watch -n 1 dmesg。但是如果想要将内核日志重定向到某个具体文件而且又不想影响到内核的运行效率,那么netlink是一个很好的方法。在用户层启动一个netlink的server,内核中通过netlink_unicast函数来发送log到用户层,server再将这些log写到具体的文件当中。
四、一个Demo
1. 内核空间
Makefile
obj-m := netlink_module.o
#/bin/sh
ver=`uname -r`
make -C /lib/modules/$ver/build M=$PWD "$@"
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <net/sock.h>
#include <linux/netlink.h>
#define NETLINK_TEST 17
void kernel_receive(struct sk_buff*);
struct {
__u32 pid;
}user_process;
static struct sock *netlinkfd = NULL;
struct netlink_kernel_cfg netlink_cfg = {
.input = kernel_receive,
};
int send_to_user(char *info) //send to userspace
{
int size;
struct sk_buff *skb;
unsigned char *old_tail;
struct nlmsghdr *nlh; //
int retval;
size = NLMSG_SPACE(strlen(info)); //size of pdu
skb = alloc_skb(size, GFP_ATOMIC); // alloc a new socket buffer, use GFP_ATOMIC flag that the process can not be set sleep
//initial a netlink message header
nlh = nlmsg_put(skb, 0, 0, 0, NLMSG_SPACE(strlen(info))-sizeof(struct nlmsghdr), 0);
old_tail = skb->tail;
memcpy(NLMSG_DATA(nlh), info, strlen(info)); //set data
nlh->nlmsg_len = (unsigned char*)skb->tail - (unsigned char*)old_tail; //set the length of message
// set control bits
NETLINK_CB(skb).portid = 0;
NETLINK_CB(skb).dst_group = 0;
printk(KERN_DEBUG "[kernel space] skb->data:%s\n", (char *)NLMSG_DATA((struct nlmsghdr *)skb->data));
// send date
retval = netlink_unicast(netlinkfd, skb, user_process.pid, MSG_DONTWAIT);
printk(KERN_DEBUG "portkernel space] netlink_unicast return: %d\n", retval);
return 0;
}
void kernel_receive(struct sk_buff *__skb) //receive message from userspace
{
struct sk_buff *skb;
struct nlmsghdr *nlh = NULL;
char *data = "This is viz's test message from kernel";
printk(KERN_DEBUG "[kernel space] begin kernel_receive\n");
skb = skb_get(__skb);
if(skb->len >= sizeof(struct nlmsghdr)){
nlh = (struct nlmsghdr *)skb->data;
if((nlh->nlmsg_len >= sizeof(struct nlmsghdr))
&& (__skb->len >= nlh->nlmsg_len)){
user_process.pid = nlh->nlmsg_pid;
printk(KERN_DEBUG "[kernel space] data receive from user are:%s\n", (char *)NLMSG_DATA(nlh));
printk(KERN_DEBUG "[kernel space] user_pid:%d\n", user_process.pid);
send_to_user(data);
}
}else{
printk(KERN_DEBUG "[kernel space] data receive from user are:%s\n",(char *)NLMSG_DATA(nlmsg_hdr(__skb)));
send_to_user(data);
}
// usleep(1000);
send_to_user("xujiwei----test\n");
kfree_skb(skb);
}
int __init test_netlink_init(void)
{
netlinkfd = netlink_kernel_create(&init_net, NETLINK_TEST, &netlink_cfg);
if(!netlinkfd){
printk(KERN_ERR "can not create a netlink socket\n");
return -1;
}
return 0;
}
void __exit test_netlink_exit(void)
{
sock_release(netlinkfd->sk_socket);
printk(KERN_DEBUG "test_netlink_exit!!\n");
}
module_init(test_netlink_init);
module_exit(test_netlink_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("xujiwei");2. 用户空间
user.c
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <string.h>
#include <linux/netlink.h>
#define NETLINK_TEST 17
#define MSG_LEN 100
struct msg_to_kernel
{
struct nlmsghdr hdr;
char data[MSG_LEN];
};
struct u_packet_info
{
struct nlmsghdr hdr;
char msg[MSG_LEN];
};
int main(int argc, char* argv[])
{
char *data = "This message is from viz's space";
//initial
struct sockaddr_nl local;
struct sockaddr_nl kpeer;
int skfd, ret, kpeerlen = sizeof(struct sockaddr_nl);
struct nlmsghdr *message;
struct u_packet_info info;
char *retval;
message = (struct nlmsghdr *)malloc(sizeof(char));
skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_TEST);
if(skfd < 0){
printf("can not create a netlink socket\n");
return -1;
}
memset(&local, 0, sizeof(local));
local.nl_family = AF_NETLINK;
local.nl_pid = getpid();
local.nl_groups = 0;
if(bind(skfd, (struct sockaddr *)&local, sizeof(local)) != 0){
printf("bind() error\n");
return -1;
}
memset(&kpeer, 0, sizeof(kpeer));
kpeer.nl_family = AF_NETLINK;
kpeer.nl_pid = 0;
kpeer.nl_groups = 0;
memset(message, '\0', sizeof(struct nlmsghdr));
message->nlmsg_len = NLMSG_SPACE(strlen(data));
message->nlmsg_flags = 0;
message->nlmsg_type = 0;
message->nlmsg_seq = 0;
message->nlmsg_pid = local.nl_pid;
retval = memcpy(NLMSG_DATA(message), data, strlen(data));
printf("message sendto kernel are:%s, len:%d\n", (char *)NLMSG_DATA(message), message->nlmsg_len);
ret = sendto(skfd, message, message->nlmsg_len, 0,(struct sockaddr *)&kpeer, sizeof(kpeer));
if(!ret){
perror("send pid:");
exit(-1);
}
// receive the confirm message from kernel
while(1){
ret = recvfrom(skfd, &info, sizeof(struct u_packet_info),0, (struct sockaddr*)&kpeer, &kpeerlen);
if(!ret){
perror("recv form kerner:");
exit(-1);
}
printf("message receive from kernel:%s\n",(char *)info.msg);
//kernel and userspace communicating
}
close(skfd);
return 0;
}
本文介绍了Linux系统中Netlink作为内核与用户态进程间通信的机制,对比了Netlink与sysfs和ioctl的区别,并详细阐述了Netlink的使用方式,包括创建Netlink通道及注意事项。通过示例展示了如何利用Netlink将内核日志实时重定向到用户空间文件,以高效监控内核日志。
1832

被折叠的 条评论
为什么被折叠?



