本文介绍了Linux5.9 版本的 ProgramType、AttachType 和 InputContext 的定义。由于这些定义会随着 Linux 内核版本变更,因此也在文章中说明了每种定义的出处,不同的内核版本也可在相同的地方找到定义。
1. ProgramType 定义
定义在 include/uapi/linux/bpf.h 文件中,不同 Linux 版本会有变化,以下是 Linux 5.19 版本定义:
enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
BPF_PROG_TYPE_SOCKET_FILTER,
BPF_PROG_TYPE_KPROBE,
BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT,
BPF_PROG_TYPE_TRACEPOINT,
BPF_PROG_TYPE_XDP,
BPF_PROG_TYPE_PERF_EVENT,
BPF_PROG_TYPE_CGROUP_SKB,
BPF_PROG_TYPE_CGROUP_SOCK,
BPF_PROG_TYPE_LWT_IN,
BPF_PROG_TYPE_LWT_OUT,
BPF_PROG_TYPE_LWT_XMIT,
BPF_PROG_TYPE_SOCK_OPS,
BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_CGROUP_DEVICE,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_PROG_TYPE_LWT_SEG6LOCAL,
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
BPF_PROG_TYPE_CGROUP_SYSCTL,
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
BPF_PROG_TYPE_CGROUP_SOCKOPT,
BPF_PROG_TYPE_TRACING,
BPF_PROG_TYPE_STRUCT_OPS,
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
};
2. AttachType 定义
定义在 include/uapi/linux/bpf.h 文件中,不同 Linux 版本会有变化,以下是 Linux 5.19 版本定义:
enum bpf_attach_type {
BPF_CGROUP_INET_INGRESS,
BPF_CGROUP_INET_EGRESS,
BPF_CGROUP_INET_SOCK_CREATE,
BPF_CGROUP_SOCK_OPS,
BPF_SK_SKB_STREAM_PARSER,
BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_DEVICE,
BPF_SK_MSG_VERDICT,
BPF_CGROUP_INET4_BIND,
BPF_CGROUP_INET6_BIND,
BPF_CGROUP_INET4_CONNECT,
BPF_CGROUP_INET6_CONNECT,
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
BPF_CGROUP_UDP4_SENDMSG,
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
BPF_FLOW_DISSECTOR,
BPF_CGROUP_SYSCTL,
BPF_CGROUP_UDP4_RECVMSG,
BPF_CGROUP_UDP6_RECVMSG,
BPF_CGROUP_GETSOCKOPT,
BPF_CGROUP_SETSOCKOPT,
BPF_TRACE_RAW_TP,
BPF_TRACE_FENTRY,
BPF_TRACE_FEXIT,
BPF_MODIFY_RETURN,
BPF_LSM_MAC,
BPF_TRACE_ITER,
BPF_CGROUP_INET4_GETPEERNAME,
BPF_CGROUP_INET6_GETPEERNAME,
BPF_CGROUP_INET4_GETSOCKNAME,
BPF_CGROUP_INET6_GETSOCKNAME,
BPF_XDP_DEVMAP,
BPF_CGROUP_INET_SOCK_RELEASE,
BPF_XDP_CPUMAP,
BPF_SK_LOOKUP,
BPF_XDP,
BPF_SK_SKB_VERDICT,
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
BPF_PERF_EVENT,
__MAX_BPF_ATTACH_TYPE
};
3. ProgramType、AttachType和 InputContext关系
在 Linux 源码 kernel/bpf/syscall.c 文件的 attach_type_to_prog_type 函数中有 ProgramType 与 AttachType 的映射关系,同时在 Linux 源码 include/linux/bpf_types.h 中定义了 ProgramType 与 InputContext 的映射关系。
整理后的映射关系如下:
ProgramType | AttachType | InputContext |
BPF_PROG_TYPE_SOCKET_FILTER | None | struct __sk_buff |
BPF_PROG_TYPE_KPROBE | AttachTraceKprobeMulti | struct pt_regs |
BPF_PROG_TYPE_SCHED_CLS | None | struct __sk_buff |
BPF_PROG_TYPE_SCHED_ACT | None | struct __sk_buff |
BPF_PROG_TYPE_TRACEPOINT | None | __u64 |
BPF_PROG_TYPE_XDP | BPF_XDP_DEVMAP BPF_XDP_CPUMAP BPF_XDP | struct xdp_md |
BPF_PROG_TYPE_PERF_EVENT | None | struct bpf_perf_event_data |
BPF_PROG_TYPE_CGROUP_SKB | BPF_CGROUP_INET_INGRESS BPF_CGROUP_INET_EGRESS | struct __sk_buff |
BPF_PROG_TYPE_CGROUP_SOCK | BPF_CGROUP_INET_SOCK_CREATE BPF_CGROUP_INET_SOCK_RELEASE BPF_CGROUP_INET4_POST_BIND BPF_CGROUP_INET6_POST_BIND | struct bpf_sock |
BPF_PROG_TYPE_LWT_IN | None | struct __sk_buff |
BPF_PROG_TYPE_LWT_OUT | None | struct __sk_buff |
BPF_PROG_TYPE_LWT_XMIT | None | struct __sk_buff |
BPF_PROG_TYPE_SOCK_OPS | BPF_CGROUP_SOCK_OPS | struct bpf_sock_ops |
BPF_PROG_TYPE_SK_SKB | BPF_SK_SKB_STREAM_PARSER BPF_SK_SKB_STREAM_VERDICT BPF_SK_SKB_VERDICT | struct __sk_buff |
BPF_PROG_TYPE_CGROUP_DEVICE | BPF_CGROUP_DEVICE | struct bpf_cgroup_dev_ctx |
BPF_PROG_TYPE_SK_MSG | BPF_SK_MSG_VERDICT | struct sk_msg_md |
BPF_PROG_TYPE_RAW_TRACEPOINT | None | struct bpf_raw_tracepoint_args |
BPF_PROG_TYPE_CGROUP_SOCK_ADDR | BPF_CGROUP_INET4_BIND BPF_CGROUP_INET6_BIND BPF_CGROUP_INET4_CONNECT BPF_CGROUP_INET6_CONNECT BPF_CGROUP_UDP4_SENDMSG BPF_CGROUP_UDP6_SENDMSG BPF_CGROUP_UDP4_RECVMSG BPF_CGROUP_UDP6_RECVMSG BPF_CGROUP_INET4_GETPEERNAME BPF_CGROUP_INET6_GETPEERNAME BPF_CGROUP_INET4_GETSOCKNAME BPF_CGROUP_INET6_GETSOCKNAME | struct bpf_sock_addr |
BPF_PROG_TYPE_LWT_SEG6LOCAL | None | struct __sk_buff |
BPF_PROG_TYPE_LIRC_MODE2 | BPF_LIRC_MODE2 | __u32 |
BPF_PROG_TYPE_SK_REUSEPORT | BPF_SK_REUSEPORT_SELECT BPF_SK_REUSEPORT_SELECT_OR_MIGRATE | struct sk_reuseport_md |
BPF_PROG_TYPE_FLOW_DISSECTOR | BPF_FLOW_DISSECTOR | struct __sk_buff |
BPF_PROG_TYPE_CGROUP_SYSCTL | BPF_CGROUP_SYSCTL | struct bpf_sysctl |
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE | None | struct bpf_raw_tracepoint_args |
BPF_PROG_TYPE_CGROUP_SOCKOPT | BPF_CGROUP_GETSOCKOPT BPF_CGROUP_SETSOCKOPT | struct bpf_sockopt |
BPF_PROG_TYPE_TRACING | BPF_TRACE_RAW_TP BPF_TRACE_FENTRY BPF_TRACE_FEXIT BPF_MODIFY_RETURN BPF_TRACE_ITER | void * |
BPF_PROG_TYPE_STRUCT_OPS | None | void * |
BPF_PROG_TYPE_EXT | None | void * |
BPF_PROG_TYPE_LSM | BPF_LSM_MAC | void * |
BPF_PROG_TYPE_SK_LOOKUP | BPF_SK_LOOKUP | struct bpf_sk_lookup |
BPF_PROG_TYPE_SYSCALL | None | void * |
注:也参考了 github.com/cilium/ebpf/elf_reader.go 文件定义的映射关系。
4. InputContext 详细定义
4.1. struct xdp_md
在 include/uapi/linux/bpf.h 文件下定义:
struct xdp_md { __u32 data;
__u32 data_end;
__u32 data_meta;
/* Below access go through struct xdp_rxq_info */
__u32 ingress_ifindex; /* rxq->dev->ifindex */
__u32 rx_queue_index; /* rxq->queue_index */
__u32 egress_ifindex; /* txq->dev->ifindex */
};
4.2. struct pt_regs
struct pt_regs 的定义与系统架构相关,以 x86 系统为例,可以在 /usr/src/linux-headers-${uname -r}/arch/x86/include/uapi/asm/ptrace.h 文件中找到,下面是 x86_64 系统架构的定义:
struct pt_regs {
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long rbp;
unsigned long rbx;
/* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
unsigned long r8;
unsigned long rax;
unsigned long rcx;
unsigned long rdx;
unsigned long rsi;
unsigned long rdi;
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
unsigned long orig_rax;
/* Return frame for iretq */
unsigned long rip;
unsigned long cs;
unsigned long eflags;
unsigned long rsp;
unsigned long ss;
/* top of stack page */
};
4.3. struct __sk_buff
在 include/uapi/linux/bpf.h 文件下定义:
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
struct __sk_buff {
__u32 len;
__u32 pkt_type;
__u32 mark;
__u32 queue_mapping;
__u32 protocol;
__u32 vlan_present;
__u32 vlan_tci;
__u32 vlan_proto;
__u32 priority;
__u32 ingress_ifindex;
__u32 ifindex;
__u32 tc_index;
__u32 cb[5];
__u32 hash;
__u32 tc_classid;
__u32 data;
__u32 data_end;
__u32 napi_id;
/* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
__u32 local_ip4; /* Stored in network byte order */
__u32 remote_ip6[4]; /* Stored in network byte order */
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
/* ... here. */
__u32 data_meta;
__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
__u64 tstamp;
__u32 wire_len;
__u32 gso_segs;
__bpf_md_ptr(struct bpf_sock *, sk);
__u32 gso_size;
};
4.4. struct bpf_perf_event_data
在 include/uapi/linux/bpf_perf_event.h 文件下定义:
typedef struct pt_regs bpf_user_pt_regs_t;
struct bpf_perf_event_data {
bpf_user_pt_regs_t regs;
__u64 sample_period;
__u64 addr;
};
4.5. struct bpf_sock
在 include/uapi/linux/bpf.h 文件下定义:
struct bpf_sock {
__u32 bound_dev_if;
__u32 family;
__u32 type;
__u32 protocol;
__u32 mark;
__u32 priority;
/* IP address also allows 1 and 2 bytes access */
__u32 src_ip4;
__u32 src_ip6[4];
__u32 src_port; /* host byte order */
__be16 dst_port; /* network byte order */
__u16 :16; /* zero padding */
__u32 dst_ip4;
__u32 dst_ip6[4];
__u32 state;
__s32 rx_queue_mapping;
};
4.6. struct bpf_sock_ops
在 include/uapi/linux/bpf.h 文件下定义:
struct bpf_sock_ops {
__u32 op;
union {
__u32 args[4]; /* Optionally passed to bpf program */
__u32 reply; /* Returned by bpf program */
__u32 replylong[4]; /* Optionally returned by bpf prog */
};
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
__u32 local_ip4; /* Stored in network byte order */
__u32 remote_ip6[4]; /* Stored in network byte order */
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
__u32 is_fullsock; /* Some TCP fields are only valid if
* there is a full socket. If not, the
* fields read as zero.
*/
__u32 snd_cwnd;
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
__u32 state;
__u32 rtt_min;
__u32 snd_ssthresh;
__u32 rcv_nxt;
__u32 snd_nxt;
__u32 snd_una;
__u32 mss_cache;
__u32 ecn_flags;
__u32 rate_delivered;
__u32 rate_interval_us;
__u32 packets_out;
__u32 retrans_out;
__u32 total_retrans;
__u32 segs_in;
__u32 data_segs_in;
__u32 segs_out;
__u32 data_segs_out;
__u32 lost_out;
__u32 sacked_out;
__u32 sk_txhash;
__u64 bytes_received;
__u64 bytes_acked;
__bpf_md_ptr(struct bpf_sock *, sk);
/* [skb_data, skb_data_end) covers the whole TCP header.
*
* BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received
* BPF_SOCK_OPS_HDR_OPT_LEN_CB: Not useful because the
* header has not been written.
* BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have
* been written so far.
* BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: The SYNACK that concludes
* the 3WHS.
* BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes
* the 3WHS.
*
* bpf_load_hdr_opt() can also be used to read a particular option.
*/
__bpf_md_ptr(void *, skb_data);
__bpf_md_ptr(void *, skb_data_end);
__u32 skb_len; /* The total length of a packet.
* It includes the header, options,
* and payload.
*/
__u32 skb_tcp_flags; /* tcp_flags of the header. It provides
* an easy way to check for tcp_flags
* without parsing skb_data.
*
* In particular, the skb_tcp_flags
* will still be available in
* BPF_SOCK_OPS_HDR_OPT_LEN even though
* the outgoing header has not
* been written yet.
*/
};
4.7. struct bpf_cgroup_dev_ctx
在 include/uapi/linux/bpf.h 文件下定义:
struct bpf_cgroup_dev_ctx {
/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
__u32 access_type;
__u32 major;
__u32 minor;
};
4.8. struct sk_msg_md
在 include/uapi/linux/bpf.h 文件下定义:
struct sk_msg_md {
__bpf_md_ptr(void *, data);
__bpf_md_ptr(void *, data_end);
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
__u32 local_ip4; /* Stored in network byte order */
__u32 remote_ip6[4]; /* Stored in network byte order */
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
__u32 size; /* Total size of sk_msg */
__bpf_md_ptr(struct bpf_sock *, sk); /* current socket */
};
4.9. struct sk_reuseport_md
在 include/uapi/linux/bpf.h 文件下定义:
struct sk_reuseport_md {
/*
* Start of directly accessible data. It begins from
* the tcp/udp header.
*/
__bpf_md_ptr(void *, data);
/* End of directly accessible data */
__bpf_md_ptr(void *, data_end);
/*
* Total length of packet (starting from the tcp/udp header).
* Note that the directly accessible bytes (data_end - data)
* could be less than this "len". Those bytes could be
* indirectly read by a helper "bpf_skb_load_bytes()".
*/
__u32 len;
/*
* Eth protocol in the mac header (network byte order). e.g.
* ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
*/
__u32 eth_protocol;
__u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
__u32 bind_inany; /* Is sock bound to an INANY address? */
__u32 hash; /* A hash of the packet 4 tuples */
/* When reuse->migrating_sk is NULL, it is selecting a sk for the
* new incoming connection request (e.g. selecting a listen sk for
* the received SYN in the TCP case). reuse->sk is one of the sk
* in the reuseport group. The bpf prog can use reuse->sk to learn
* the local listening ip/port without looking into the skb.
*
* When reuse->migrating_sk is not NULL, reuse->sk is closed and
* reuse->migrating_sk is the socket that needs to be migrated
* to another listening socket. migrating_sk could be a fullsock
* sk that is fully established or a reqsk that is in-the-middle
* of 3-way handshake.
*/
__bpf_md_ptr(struct bpf_sock *, sk);
__bpf_md_ptr(struct bpf_sock *, migrating_sk);
};
4.10. struct bpf_raw_tracepoint_args
在 include/uapi/linux/bpf.h 文件下定义:
struct bpf_raw_tracepoint_args {
__u64 args[0];
};
4.11. struct bpf_sock_addr
在 include/uapi/linux/bpf.h 文件下定义:
struct bpf_sock_addr {
__u32 user_family; /* Allows 4-byte read, but no write. */
__u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
* Stored in network byte order.
*/
__u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
* Stored in network byte order.
*/
__u32 user_port; /* Allows 1,2,4-byte read and 4-byte write.
* Stored in network byte order
*/
__u32 family; /* Allows 4-byte read, but no write */
__u32 type; /* Allows 4-byte read, but no write */
__u32 protocol; /* Allows 4-byte read, but no write */
__u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write.
* Stored in network byte order.
*/
__u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
* Stored in network byte order.
*/
__bpf_md_ptr(struct bpf_sock *, sk);
};
4.12. struct bpf_sysctl
在 include/uapi/linux/bpf.h 文件下定义:
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
*/
__u32 file_pos; /* Sysctl file position to read from, write to.
* Allows 1,2,4-byte read an 4-byte write.
*/
};
4.13. struct bpf_sockopt
在 include/uapi/linux/bpf.h 文件下定义:
struct bpf_sockopt {
__bpf_md_ptr(struct bpf_sock *, sk);
__bpf_md_ptr(void *, optval);
__bpf_md_ptr(void *, optval_end);
__s32 level;
__s32 optname;
__s32 optlen;
__s32 retval;
};
4.14. struct bpf_sk_lookup
在 include/uapi/linux/bpf.h 文件下定义:
struct bpf_sk_lookup {
union {
__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
};
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
__u32 remote_ip4; /* Network byte order */
__u32 remote_ip6[4]; /* Network byte order */
__be16 remote_port; /* Network byte order */
__u16 :16; /* Zero padding */
__u32 local_ip4; /* Network byte order */
__u32 local_ip6[4]; /* Network byte order */
__u32 local_port; /* Host byte order */
__u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */
};