INTRODUCTION À EBPF OU COMMENT PISTER UN PAQUET LAMBDA SANS SON CONSENTEMENT Jean-Tiare Le Bigot ‒ Easymile
QUI SUIS-JE ? Jean-Tiare Le Bigot Torture de paquets // containers @oyadutaf // blog.yadutaf.fr
DE QUOI PARLE T-ON ? $> sudo python tracepkt.py 172.17.0.2 NETWORK NS PID INTERFACE TYPE SEQ ADDRESSES [ 4026531993] 2570 docker0 request 1 172.17.0.1 -> 172.17.0.2 [ 4026531993] 2570 veth1a054e5 request 1 172.17.0.1 -> 172.17.0.2 [ 4026532328] 2570 eth0 request 1 172.17.0.1 -> 172.17.0.2 [ 4026532328] 2570 eth0 reply 1 172.17.0.2 -> 172.17.0.1 [ 4026531993] 2570 veth1a054e5 reply 1 172.17.0.2 -> 172.17.0.1 [ 4026531993] 2570 docker0 reply 1 172.17.0.2 -> 172.17.0.1
POURQUOI ?
UN PROBLÈME SIMPLE... ...EN L3 (IP)
UNE AUTRE PAIRE DE MANCHE... ...EN L2 (ETHERNET)
ZOOM SUR LINUX (RÉCENT) Containers ( netns ) Interfaces virtuelles (Veth, Bridge, MacVlan, ...)
QUEL CHEMIN ? POUR UN CONTAINER LOCAL mtr -r -c 1 172.17.0.2 Start: Mon Nov 13 22:53:53 2017 HOST: jt-laptop Loss% Snt Last Avg Best Wrst StDev 1. | -- 172.17.0.2 0.0% 1 0.1 0.1 0.1 0.1 0.0
COMMENT FAIRE ? L'APPROCHE TRADITIONELLE
COMMENT FAIRE ? L'APPROCHE GEEK
VOUS AVEZ DIT " EBPF " ? « Extended BPF » Safe
GNÉ ? Plugins Linux, sans modules, hautes performances Réseau hautes performances Firewall hautes performances Traf�c Control hautes performances Tracing/Pro�ling hautes performances ... hautes performances
SOUS LE CAPOT CHEZ Cilium Net�ix Cisco Facebook BCC ...
L'IDÉE: PISTER UN PING IPV4 INNOCENT DANS SES MOINDRES DÉTOURS
PING UN PETIT BIJOU DE TECHNOLOGIE
PING /* See? ... someone runs another ping on this host. */ if (not_ours && sock->socktype == SOCK_RAW) fset->install_filter(sock); https://github.com/iputils/iputils/blob/665782e1d3c77df2c90f144b586da
PING void install_filter(socket_st *sock) { struct sock_filter insns[] = { BPF_STMT(BPF_LDX|BPF_B|BPF_MSH, 0), /* Skip IP header. F..g BPF_STMT(BPF_LD|BPF_H|BPF_IND, 4), /* Load icmp echo ident BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(ident), 0, 1), /* Ours? */ BPF_STMT(BPF_RET|BPF_K, ~0U), /* Yes, it passes. */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0), /* Load icmp type */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, ICMP_ECHOREPLY, 1, 0), /* Echo? */ BPF_STMT(BPF_RET|BPF_K, 0xFFFFFFF), /* No. It passes. */ BPF_STMT(BPF_RET|BPF_K, 0) /* Echo with wrong ident }; struct sock_fprog filter = { sizeof insns / sizeof (insns[0]), insns}; setsockopt(sock->fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof (filter)) }
BCC À LA RESCOUSSE Surcouche eBPF C-Like Python
HELLO BPF (C) #include <bcc/proto.h> #include <linux/sched.h> struct route_evt_t { char comm[TASK_COMM_LEN]; }; BPF_PERF_OUTPUT(route_evt); TRACEPOINT_PROBE(net, netif_rx) { struct route_evt_t evt = {}; bpf_get_current_comm(evt.comm, TASK_COMM_LEN); route_evt.perf_submit(args, &evt, sizeof (evt)); return 0; }
HELLO BPF (PYTHON 1/2) from bcc import BPF import ctypes as ct bpf_text = ''PREV SLIDE'' TASK_COMM_LEN = 16 # linux/sched.h class RouteEvt(ct.Structure): _fields_ = [ ("comm", ct.c_char * TASK_COMM_LEN), ] def event_printer(cpu, data, size): event = ct.cast(data, ct.POINTER(RouteEvt)).contents print "Just got a packet from %s" % (event.comm)
HELLO BPF (PYTHON 2/2) if __name__ == "__main__": b = BPF(text=bpf_text) b["route_evt"].open_perf_buffer(event_printer) while True: b.kprobe_poll()
HELLO BPF: SHOW TIME $> sudo python ./tracepkt.py ... Just got a packet from ping6 Just got a packet from ping6 Just got a packet from ping Just got a packet from irq/46-iwlwifi ...
LES TRACEPOINTS Points de surveillance Positionné par les devs
LES TRACEPOINTS ⇒ Comment savoir lesquels utiliser ‽
PROTOTYPAGE: PERF perf \ \ \ ping 172.17.0.2 -c1
PROTOTYPAGE: PERF perf trace \ \ \ ping 172.17.0.2 -c1
PROTOTYPAGE: PERF perf trace \ --event 'net:*' \ \ ping 172.17.0.2 -c1
PROTOTYPAGE: PERF perf trace \ --event 'net:*' \ --no-syscalls \ ping 172.17.0.2 -c1
PROTOTYPAGE: PERF net_dev_queue dev=docker0 skbaddr=0xffff96d481988700 net_dev_start_xmit dev=docker0 skbaddr=0xffff96d481988700 net_dev_queue dev=veth79215ff skbaddr=0xffff96d481988700 net_dev_start_xmit dev=veth79215ff skbaddr=0xffff96d481988700 netif_rx dev=eth0 skbaddr=0xffff96d481988700 net_dev_xmit dev=veth79215ff skbaddr=0xffff96d481988700 net_dev_xmit dev=docker0 skbaddr=0xffff96d481988700 netif_receive_skb dev=eth0 skbaddr=0xffff96d481988700 net_dev_queue dev=eth0 skbaddr=0xffff96d481988b00 net_dev_start_xmit dev=eth0 skbaddr=0xffff96d481988b00 netif_rx dev=veth79215ff skbaddr=0xffff96d481988b00 net_dev_xmit dev=eth0 skbaddr=0xffff96d481988b00 netif_receive_skb dev=veth79215ff skbaddr=0xffff96d481988b00 netif_receive_skb_entry dev=docker0 skbaddr=0xffff96d481988b00 netif receive skb dev=docker0 skbaddr=0xffff96d481988b00
TRACEPOINTS: LA SHORT LIST net_dev_queue netif_receive_skb_entry netif_rx napi_gro_receive_entry
LES TRACEPOINTS: L'API // /sys/kernel/debug/tracing/events/net/netif_rx/format name: netif_rx ID: 1183 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:void * skbaddr; offset:8; size:8; signed:0; field:unsigned int len; offset:16; size:4; signed:0; field:__data_loc char[] name; offset:20; size:4; signed:1; print fmt: "dev=%s skbaddr=%p len=%u", __get_str(name), REC->skbaddr, REC->len
CODE: LES 4 TRACEPOINTS TRACEPOINT_PROBE(net, netif_rx) { return do_trace(args, ( struct sk_buff*)args->skbaddr); } TRACEPOINT_PROBE(net, net_dev_queue) { return do_trace(args, ( struct sk_buff*)args->skbaddr); } TRACEPOINT_PROBE(net, napi_gro_receive_entry) { return do_trace(args, ( struct sk_buff*)args->skbaddr); } TRACEPOINT_PROBE(net, netif_receive_skb_entry) { return do_trace(args, ( struct sk_buff*)args->skbaddr); }
CODE: DO_TRACE static inline int do_trace(void* ctx, struct sk_buff* skb) { struct route_evt_t evt = {}; bpf_get_current_comm(evt.comm, TASK_COMM_LEN); route_evt.perf_submit(ctx, &evt, sizeof (evt)); return 0; }
CODE: NOM DE L'INTERFACE VERSION C CLASSIQUE strncpy(&evt.ifname, skb->dev->name, IFNAMSIZ); VERSION C EBPF struct net_device *dev; bpf_probe_read(&dev, sizeof (skb->dev), &skb->dev); bpf_probe_read(&evt.ifname, IFNAMSIZ, dev->name);
CODE: IDENTIFIANT DU NETNS VERSION C CLASSIQUE evt.netns = dev->nd_net.net->ns.inum VERSION C EBPF struct net* net; possible_net_t *skc_net = &dev->nd_net; bpf_probe_read(&net, sizeof (skc_net->net), &skc_net->net); struct ns_common* ns = &net->ns; bpf_probe_read(&evt.netns, sizeof (ns->inum), &ns->inum);
INSPECTION SURPRISE ! $> sudo python ./tracepkt.py [ 4026531957] docker0 [ 4026531957] vetha373ab6 [ 4026532258] eth0 [ 4026532258] eth0 [ 4026531957] vetha373ab6 [ 4026531957] docker0
ANATOMIE D'UN PAQUET RÉSEAU ┌───────────┐ │ ICMP │ ├───────────┤ │ IP │↕ iphdr.ihl * 4 ├───────────┤ │ MAC │↕ MAC_HEADER_SIZE (14) ├───────────┤
CODE: L'ENTÊTE ETHERNET char* head; u16 mac_header; bpf_probe_read(&head, sizeof (skb->head), &skb->head); bpf_probe_read(&mac_header, sizeof (skb->mac_header), &skb->mac_header);
CODE: L'ENTÊTE IP #define MAC_HEADER_SIZE 14; char* ip_header_address = head + mac_header + MAC_HEADER_SIZE; struct iphdr iphdr; bpf_probe_read(&iphdr, sizeof (iphdr), ip_header_address);
CODE: FILTRER IPV4 if (iphdr.version != 4) { return 0; }
CODE: CHARGER LES IPS evt.saddr = iphdr.saddr; evt.daddr = iphdr.daddr;
CODE: FILTRER ICMP if (iphdr.protocol != IPPROTO_ICMP) { return 0; }
CODE: L'ENTÊTE ICMP struct icmphdr icmphdr; u8 icmp_offset_from_ip_header = iphdr.ihl * 4; char* icmp_header_address = ip_header_address + icmp_offset_from_ip_header; bpf_probe_read(&icmphdr, sizeof (icmphdr), icmp_header_address);
CODE: FILTRER PING/PONG if (icmphdr.type != ICMP_ECHO && icmphdr.type != ICMP_ECHOREPLY) { return 0; }
CODE: CHARGER TYPE, ID, SEQ evt.icmptype = icmphdr.type; evt.icmpid = icmphdr.un.echo.id; evt.icmpseq = icmphdr.un.echo.sequence; evt.icmpid = be16_to_cpu(evt.icmpid); evt.icmpseq = be16_to_cpu(evt.icmpseq);
Recommend
More recommend