eBPF 之 ProgramType、AttachType和InputContext

本文介绍了Linux5.9 版本的 ProgramType、AttachType 和 InputContext 的定义。由于这些定义会随着 Linux 内核版本变更,因此也在文章中说明了每种定义的出处,不同的内核版本也可在相同的地方找到定义。

1. ProgramType 定义

定义在 include/uapi/linux/bpf.h 文件中,不同 Linux 版本会有变化,以下是 Linux 5.19 版本定义:

enum bpf_prog_type {
    BPF_PROG_TYPE_UNSPEC,
    BPF_PROG_TYPE_SOCKET_FILTER,
    BPF_PROG_TYPE_KPROBE,
    BPF_PROG_TYPE_SCHED_CLS,
    BPF_PROG_TYPE_SCHED_ACT,
    BPF_PROG_TYPE_TRACEPOINT,
    BPF_PROG_TYPE_XDP,
    BPF_PROG_TYPE_PERF_EVENT,
    BPF_PROG_TYPE_CGROUP_SKB,
    BPF_PROG_TYPE_CGROUP_SOCK,
    BPF_PROG_TYPE_LWT_IN,
    BPF_PROG_TYPE_LWT_OUT,
    BPF_PROG_TYPE_LWT_XMIT,
    BPF_PROG_TYPE_SOCK_OPS,
    BPF_PROG_TYPE_SK_SKB,
    BPF_PROG_TYPE_CGROUP_DEVICE,
    BPF_PROG_TYPE_SK_MSG,
    BPF_PROG_TYPE_RAW_TRACEPOINT,
    BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
    BPF_PROG_TYPE_LWT_SEG6LOCAL,
    BPF_PROG_TYPE_LIRC_MODE2,
    BPF_PROG_TYPE_SK_REUSEPORT,
    BPF_PROG_TYPE_FLOW_DISSECTOR,
    BPF_PROG_TYPE_CGROUP_SYSCTL,
    BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
    BPF_PROG_TYPE_CGROUP_SOCKOPT,
    BPF_PROG_TYPE_TRACING,
    BPF_PROG_TYPE_STRUCT_OPS,
    BPF_PROG_TYPE_EXT,
    BPF_PROG_TYPE_LSM,
    BPF_PROG_TYPE_SK_LOOKUP,
    BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
};

2. AttachType 定义

定义在 include/uapi/linux/bpf.h 文件中,不同 Linux 版本会有变化,以下是 Linux 5.19 版本定义:

enum bpf_attach_type {
    BPF_CGROUP_INET_INGRESS,
    BPF_CGROUP_INET_EGRESS,
    BPF_CGROUP_INET_SOCK_CREATE,
    BPF_CGROUP_SOCK_OPS,
    BPF_SK_SKB_STREAM_PARSER,
    BPF_SK_SKB_STREAM_VERDICT,
    BPF_CGROUP_DEVICE,
    BPF_SK_MSG_VERDICT,
    BPF_CGROUP_INET4_BIND,
    BPF_CGROUP_INET6_BIND,
    BPF_CGROUP_INET4_CONNECT,
    BPF_CGROUP_INET6_CONNECT,
    BPF_CGROUP_INET4_POST_BIND,
    BPF_CGROUP_INET6_POST_BIND,
    BPF_CGROUP_UDP4_SENDMSG,
    BPF_CGROUP_UDP6_SENDMSG,
    BPF_LIRC_MODE2,
    BPF_FLOW_DISSECTOR,
    BPF_CGROUP_SYSCTL,
    BPF_CGROUP_UDP4_RECVMSG,
    BPF_CGROUP_UDP6_RECVMSG,
    BPF_CGROUP_GETSOCKOPT,
    BPF_CGROUP_SETSOCKOPT,
    BPF_TRACE_RAW_TP,
    BPF_TRACE_FENTRY,
    BPF_TRACE_FEXIT,
    BPF_MODIFY_RETURN,
    BPF_LSM_MAC,
    BPF_TRACE_ITER,
    BPF_CGROUP_INET4_GETPEERNAME,
    BPF_CGROUP_INET6_GETPEERNAME,
    BPF_CGROUP_INET4_GETSOCKNAME,
    BPF_CGROUP_INET6_GETSOCKNAME,
    BPF_XDP_DEVMAP,
    BPF_CGROUP_INET_SOCK_RELEASE,
    BPF_XDP_CPUMAP,
    BPF_SK_LOOKUP,
    BPF_XDP,
    BPF_SK_SKB_VERDICT,
    BPF_SK_REUSEPORT_SELECT,
    BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
    BPF_PERF_EVENT,
    __MAX_BPF_ATTACH_TYPE
};

3. ProgramType、AttachType和 InputContext关系

在 Linux 源码 kernel/bpf/syscall.c 文件的 attach_type_to_prog_type 函数中有 ProgramType 与 AttachType 的映射关系,同时在 Linux 源码 include/linux/bpf_types.h 中定义了 ProgramType 与 InputContext 的映射关系。

整理后的映射关系如下:

ProgramType

AttachType

InputContext

BPF_PROG_TYPE_SOCKET_FILTER

None

struct __sk_buff

BPF_PROG_TYPE_KPROBE

AttachTraceKprobeMulti

struct pt_regs

BPF_PROG_TYPE_SCHED_CLS

None

struct __sk_buff

BPF_PROG_TYPE_SCHED_ACT

None

struct __sk_buff

BPF_PROG_TYPE_TRACEPOINT

None

__u64

BPF_PROG_TYPE_XDP

BPF_XDP_DEVMAP

BPF_XDP_CPUMAP

BPF_XDP

struct xdp_md

BPF_PROG_TYPE_PERF_EVENT

None

struct bpf_perf_event_data

BPF_PROG_TYPE_CGROUP_SKB

BPF_CGROUP_INET_INGRESS

BPF_CGROUP_INET_EGRESS

struct __sk_buff

BPF_PROG_TYPE_CGROUP_SOCK

BPF_CGROUP_INET_SOCK_CREATE

BPF_CGROUP_INET_SOCK_RELEASE

BPF_CGROUP_INET4_POST_BIND

BPF_CGROUP_INET6_POST_BIND

struct bpf_sock

BPF_PROG_TYPE_LWT_IN

None

struct __sk_buff

BPF_PROG_TYPE_LWT_OUT

None

struct __sk_buff

BPF_PROG_TYPE_LWT_XMIT

None

struct __sk_buff

BPF_PROG_TYPE_SOCK_OPS

BPF_CGROUP_SOCK_OPS

struct bpf_sock_ops

BPF_PROG_TYPE_SK_SKB

BPF_SK_SKB_STREAM_PARSER

BPF_SK_SKB_STREAM_VERDICT

BPF_SK_SKB_VERDICT

struct __sk_buff

BPF_PROG_TYPE_CGROUP_DEVICE

BPF_CGROUP_DEVICE

struct bpf_cgroup_dev_ctx

BPF_PROG_TYPE_SK_MSG

BPF_SK_MSG_VERDICT

struct sk_msg_md

BPF_PROG_TYPE_RAW_TRACEPOINT

None

struct bpf_raw_tracepoint_args

BPF_PROG_TYPE_CGROUP_SOCK_ADDR

BPF_CGROUP_INET4_BIND

BPF_CGROUP_INET6_BIND

BPF_CGROUP_INET4_CONNECT

BPF_CGROUP_INET6_CONNECT

BPF_CGROUP_UDP4_SENDMSG

BPF_CGROUP_UDP6_SENDMSG

BPF_CGROUP_UDP4_RECVMSG

BPF_CGROUP_UDP6_RECVMSG

BPF_CGROUP_INET4_GETPEERNAME

BPF_CGROUP_INET6_GETPEERNAME

BPF_CGROUP_INET4_GETSOCKNAME

BPF_CGROUP_INET6_GETSOCKNAME

struct bpf_sock_addr

BPF_PROG_TYPE_LWT_SEG6LOCAL

None

struct __sk_buff

BPF_PROG_TYPE_LIRC_MODE2

BPF_LIRC_MODE2

__u32

BPF_PROG_TYPE_SK_REUSEPORT

BPF_SK_REUSEPORT_SELECT

BPF_SK_REUSEPORT_SELECT_OR_MIGRATE

struct sk_reuseport_md

BPF_PROG_TYPE_FLOW_DISSECTOR

BPF_FLOW_DISSECTOR

struct __sk_buff

BPF_PROG_TYPE_CGROUP_SYSCTL

BPF_CGROUP_SYSCTL

struct bpf_sysctl

BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE

None

struct bpf_raw_tracepoint_args

BPF_PROG_TYPE_CGROUP_SOCKOPT

BPF_CGROUP_GETSOCKOPT BPF_CGROUP_SETSOCKOPT

struct bpf_sockopt

BPF_PROG_TYPE_TRACING

BPF_TRACE_RAW_TP

BPF_TRACE_FENTRY

BPF_TRACE_FEXIT

BPF_MODIFY_RETURN

BPF_TRACE_ITER

void *

BPF_PROG_TYPE_STRUCT_OPS

None

void *

BPF_PROG_TYPE_EXT

None

void *

BPF_PROG_TYPE_LSM

BPF_LSM_MAC

void *

BPF_PROG_TYPE_SK_LOOKUP

BPF_SK_LOOKUP

struct bpf_sk_lookup

BPF_PROG_TYPE_SYSCALL

None

void *

注:也参考了 github.com/cilium/ebpf/elf_reader.go 文件定义的映射关系。

4. InputContext 详细定义

4.1. struct xdp_md

在 include/uapi/linux/bpf.h 文件下定义:

struct xdp_md {                                                                                                               	  __u32 data;
    __u32 data_end;
    __u32 data_meta;
    /* Below access go through struct xdp_rxq_info */
    __u32 ingress_ifindex; /* rxq->dev->ifindex */
    __u32 rx_queue_index;  /* rxq->queue_index  */
    __u32 egress_ifindex;  /* txq->dev->ifindex */
};

4.2. struct pt_regs

struct pt_regs 的定义与系统架构相关,以 x86 系统为例,可以在 /usr/src/linux-headers-${uname -r}/arch/x86/include/uapi/asm/ptrace.h 文件中找到,下面是 x86_64 系统架构的定义:

struct pt_regs {
/*
 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
 * unless syscall needs a complete, fully filled "struct pt_regs".
 */
	unsigned long r15;
	unsigned long r14;
	unsigned long r13;
	unsigned long r12;
	unsigned long rbp;
	unsigned long rbx;
/* These regs are callee-clobbered. Always saved on kernel entry. */
	unsigned long r11;
	unsigned long r10;
	unsigned long r9;
	unsigned long r8;
	unsigned long rax;
	unsigned long rcx;
	unsigned long rdx;
	unsigned long rsi;
	unsigned long rdi;
/*
 * On syscall entry, this is syscall#. On CPU exception, this is error code.
 * On hw interrupt, it's IRQ number:
 */
	unsigned long orig_rax;
/* Return frame for iretq */
	unsigned long rip;
	unsigned long cs;
	unsigned long eflags;
	unsigned long rsp;
	unsigned long ss;
/* top of stack page */
};

4.3. struct __sk_buff

在 include/uapi/linux/bpf.h 文件下定义:

/* user accessible mirror of in-kernel sk_buff.
 * new fields can only be added to the end of this structure
 */
struct __sk_buff {
    __u32 len; 
    __u32 pkt_type;
    __u32 mark;
    __u32 queue_mapping;
    __u32 protocol;
    __u32 vlan_present;
    __u32 vlan_tci;
    __u32 vlan_proto;
    __u32 priority;
    __u32 ingress_ifindex;
    __u32 ifindex;
    __u32 tc_index;
    __u32 cb[5];
    __u32 hash;
    __u32 tc_classid;
    __u32 data;
    __u32 data_end;
    __u32 napi_id;

    /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
    __u32 family;
    __u32 remote_ip4;   /* Stored in network byte order */
    __u32 local_ip4;    /* Stored in network byte order */
    __u32 remote_ip6[4];    /* Stored in network byte order */
    __u32 local_ip6[4]; /* Stored in network byte order */
    __u32 remote_port;  /* Stored in network byte order */
    __u32 local_port;   /* stored in host byte order */
    /* ... here. */

    __u32 data_meta;
    __bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
    __u64 tstamp;
    __u32 wire_len;
    __u32 gso_segs;
    __bpf_md_ptr(struct bpf_sock *, sk); 
    __u32 gso_size;
};

4.4. struct bpf_perf_event_data

在 include/uapi/linux/bpf_perf_event.h 文件下定义:

typedef struct pt_regs bpf_user_pt_regs_t;

struct bpf_perf_event_data {
	bpf_user_pt_regs_t regs;
	__u64 sample_period;
	__u64 addr;
};

4.5. struct bpf_sock

在 include/uapi/linux/bpf.h 文件下定义:

struct bpf_sock {
	__u32 bound_dev_if;
	__u32 family;
	__u32 type;
	__u32 protocol;
	__u32 mark;
	__u32 priority;
	/* IP address also allows 1 and 2 bytes access */
	__u32 src_ip4;
	__u32 src_ip6[4];
	__u32 src_port;		/* host byte order */
	__be16 dst_port;	/* network byte order */
	__u16 :16;		/* zero padding */
	__u32 dst_ip4;
	__u32 dst_ip6[4];
	__u32 state;
	__s32 rx_queue_mapping;
};

4.6. struct bpf_sock_ops

在 include/uapi/linux/bpf.h 文件下定义:

struct bpf_sock_ops {
	__u32 op;
	union {
		__u32 args[4];		/* Optionally passed to bpf program */
		__u32 reply;		/* Returned by bpf program	    */
		__u32 replylong[4];	/* Optionally returned by bpf prog  */
	};
	__u32 family;
	__u32 remote_ip4;	/* Stored in network byte order */
	__u32 local_ip4;	/* Stored in network byte order */
	__u32 remote_ip6[4];	/* Stored in network byte order */
	__u32 local_ip6[4];	/* Stored in network byte order */
	__u32 remote_port;	/* Stored in network byte order */
	__u32 local_port;	/* stored in host byte order */
	__u32 is_fullsock;	/* Some TCP fields are only valid if
				 * there is a full socket. If not, the
				 * fields read as zero.
				 */
	__u32 snd_cwnd;
	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
	__u32 state;
	__u32 rtt_min;
	__u32 snd_ssthresh;
	__u32 rcv_nxt;
	__u32 snd_nxt;
	__u32 snd_una;
	__u32 mss_cache;
	__u32 ecn_flags;
	__u32 rate_delivered;
	__u32 rate_interval_us;
	__u32 packets_out;
	__u32 retrans_out;
	__u32 total_retrans;
	__u32 segs_in;
	__u32 data_segs_in;
	__u32 segs_out;
	__u32 data_segs_out;
	__u32 lost_out;
	__u32 sacked_out;
	__u32 sk_txhash;
	__u64 bytes_received;
	__u64 bytes_acked;
	__bpf_md_ptr(struct bpf_sock *, sk);
	/* [skb_data, skb_data_end) covers the whole TCP header.
	 *
	 * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received
	 * BPF_SOCK_OPS_HDR_OPT_LEN_CB:   Not useful because the
	 *                                header has not been written.
	 * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have
	 *				  been written so far.
	 * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:  The SYNACK that concludes
	 *					the 3WHS.
	 * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes
	 *					the 3WHS.
	 *
	 * bpf_load_hdr_opt() can also be used to read a particular option.
	 */
	__bpf_md_ptr(void *, skb_data);
	__bpf_md_ptr(void *, skb_data_end);
	__u32 skb_len;		/* The total length of a packet.
				 * It includes the header, options,
				 * and payload.
				 */
	__u32 skb_tcp_flags;	/* tcp_flags of the header.  It provides
				 * an easy way to check for tcp_flags
				 * without parsing skb_data.
				 *
				 * In particular, the skb_tcp_flags
				 * will still be available in
				 * BPF_SOCK_OPS_HDR_OPT_LEN even though
				 * the outgoing header has not
				 * been written yet.
				 */
};

4.7. struct bpf_cgroup_dev_ctx

在 include/uapi/linux/bpf.h 文件下定义:

struct bpf_cgroup_dev_ctx {
	/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
	__u32 access_type;
	__u32 major;
	__u32 minor;
};

4.8. struct sk_msg_md

在 include/uapi/linux/bpf.h 文件下定义:

struct sk_msg_md {
	__bpf_md_ptr(void *, data);
	__bpf_md_ptr(void *, data_end);

	__u32 family;
	__u32 remote_ip4;	/* Stored in network byte order */
	__u32 local_ip4;	/* Stored in network byte order */
	__u32 remote_ip6[4];	/* Stored in network byte order */
	__u32 local_ip6[4];	/* Stored in network byte order */
	__u32 remote_port;	/* Stored in network byte order */
	__u32 local_port;	/* stored in host byte order */
	__u32 size;		/* Total size of sk_msg */

	__bpf_md_ptr(struct bpf_sock *, sk); /* current socket */
};

4.9. struct sk_reuseport_md

在 include/uapi/linux/bpf.h 文件下定义:

struct sk_reuseport_md {
	/*
	 * Start of directly accessible data. It begins from
	 * the tcp/udp header.
	 */
	__bpf_md_ptr(void *, data);
	/* End of directly accessible data */
	__bpf_md_ptr(void *, data_end);
	/*
	 * Total length of packet (starting from the tcp/udp header).
	 * Note that the directly accessible bytes (data_end - data)
	 * could be less than this "len".  Those bytes could be
	 * indirectly read by a helper "bpf_skb_load_bytes()".
	 */
	__u32 len;
	/*
	 * Eth protocol in the mac header (network byte order). e.g.
	 * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
	 */
	__u32 eth_protocol;
	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
	__u32 bind_inany;	/* Is sock bound to an INANY address? */
	__u32 hash;		/* A hash of the packet 4 tuples */
	/* When reuse->migrating_sk is NULL, it is selecting a sk for the
	 * new incoming connection request (e.g. selecting a listen sk for
	 * the received SYN in the TCP case).  reuse->sk is one of the sk
	 * in the reuseport group. The bpf prog can use reuse->sk to learn
	 * the local listening ip/port without looking into the skb.
	 *
	 * When reuse->migrating_sk is not NULL, reuse->sk is closed and
	 * reuse->migrating_sk is the socket that needs to be migrated
	 * to another listening socket.  migrating_sk could be a fullsock
	 * sk that is fully established or a reqsk that is in-the-middle
	 * of 3-way handshake.
	 */
	__bpf_md_ptr(struct bpf_sock *, sk);
	__bpf_md_ptr(struct bpf_sock *, migrating_sk);
};

4.10. struct bpf_raw_tracepoint_args

在 include/uapi/linux/bpf.h 文件下定义:

struct bpf_raw_tracepoint_args {
	__u64 args[0];
};

4.11. struct bpf_sock_addr

在 include/uapi/linux/bpf.h 文件下定义:

struct bpf_sock_addr {
	__u32 user_family;	/* Allows 4-byte read, but no write. */
	__u32 user_ip4;		/* Allows 1,2,4-byte read and 4-byte write.
				 * Stored in network byte order.
				 */
	__u32 user_ip6[4];	/* Allows 1,2,4,8-byte read and 4,8-byte write.
				 * Stored in network byte order.
				 */
	__u32 user_port;	/* Allows 1,2,4-byte read and 4-byte write.
				 * Stored in network byte order
				 */
	__u32 family;		/* Allows 4-byte read, but no write */
	__u32 type;		/* Allows 4-byte read, but no write */
	__u32 protocol;		/* Allows 4-byte read, but no write */
	__u32 msg_src_ip4;	/* Allows 1,2,4-byte read and 4-byte write.
				 * Stored in network byte order.
				 */
	__u32 msg_src_ip6[4];	/* Allows 1,2,4,8-byte read and 4,8-byte write.
				 * Stored in network byte order.
				 */
	__bpf_md_ptr(struct bpf_sock *, sk);
};

4.12. struct bpf_sysctl

在 include/uapi/linux/bpf.h 文件下定义:

struct bpf_sysctl {
	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
				 * Allows 1,2,4-byte read, but no write.
				 */
	__u32	file_pos;	/* Sysctl file position to read from, write to.
				 * Allows 1,2,4-byte read an 4-byte write.
				 */
};

4.13. struct bpf_sockopt

在 include/uapi/linux/bpf.h 文件下定义:

struct bpf_sockopt {
	__bpf_md_ptr(struct bpf_sock *, sk);
	__bpf_md_ptr(void *, optval);
	__bpf_md_ptr(void *, optval_end);

	__s32	level;
	__s32	optname;
	__s32	optlen;
	__s32	retval;
};

4.14. struct bpf_sk_lookup

在 include/uapi/linux/bpf.h 文件下定义:

struct bpf_sk_lookup {
	union {
		__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
		__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
	};

	__u32 family;		/* Protocol family (AF_INET, AF_INET6) */
	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
	__u32 remote_ip4;	/* Network byte order */
	__u32 remote_ip6[4];	/* Network byte order */
	__be16 remote_port;	/* Network byte order */
	__u16 :16;		/* Zero padding */
	__u32 local_ip4;	/* Network byte order */
	__u32 local_ip6[4];	/* Network byte order */
	__u32 local_port;	/* Host byte order */
	__u32 ingress_ifindex;		/* The arriving interface. Determined by inet_iif. */
};
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值