/* SPDX-License-Identifier: GPL-2.0 */ #define XDP_STATS_MAP_PINNING LIBBPF_PIN_NONE #include "xdp-trafficgen.h" #include #include #include #include #include #include #include #include #include #include #include #include #if defined(HAVE_LIBBPF_BPF_PROGRAM__FLAGS) && defined(DEBUG) /* We use the many-argument version of bpf_printk() for debugging, so only * enable it if we have the libbpf helper that selects the vprintf version. This * was introduced in libbpf 0.6.0, which is the same versionn as the * bpf_program__flags() method, so use that as an indicator since we don't * feature detect on the BPF helpers themselves. */ #define TCP_DEBUG #endif char _license[] SEC("license") = "GPL"; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); __type(key, __u32); __type(value, struct trafficgen_state); } state_map SEC(".maps"); const volatile struct trafficgen_config config; static void update_checksum(__u16 *sum, __u32 diff) { /* We use the RFC 1071 method for incremental checksum updates * because that can be used directly with the 32-bit sequence * number difference (relying on folding for large differences) */ __u32 cksum = diff + (__u16)~bpf_ntohs(*sum); while (cksum > 0xffff) cksum = (cksum & 0xffff) + (cksum >> 16); *sum = bpf_htons(~cksum); } static __u16 csum_fold_helper(__u32 csum) { csum = (csum & 0xffff) + (csum >> 16); return ~((csum & 0xffff) + (csum >> 16)); } SEC("xdp") int xdp_redirect_notouch(struct xdp_md *ctx) { __u32 key = bpf_get_smp_processor_id();; struct datarec *rec; rec = bpf_map_lookup_elem(&rx_cnt, &key); if (!rec) return XDP_ABORTED; NO_TEAR_INC(rec->xdp_redirect); return bpf_redirect(config.ifindex_out, 0); } SEC("xdp") int xdp_redirect_update_port(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct trafficgen_state *state; __u16 cur_port, port_diff; int action = XDP_ABORTED; struct datarec *rec; struct udphdr *hdr; __u32 key = 0; hdr = data + (sizeof(struct ethhdr) + sizeof(struct ipv6hdr)); if (hdr + 1 > data_end) goto out; state = bpf_map_lookup_elem(&state_map, &key); if (!state) goto out; key = bpf_get_smp_processor_id(); rec = bpf_map_lookup_elem(&rx_cnt, &key); if (!rec) goto out; cur_port = bpf_ntohs(hdr->dest); port_diff = state->next_port - cur_port; if (port_diff) { update_checksum(&hdr->check, port_diff); hdr->dest = bpf_htons(state->next_port); } if (state->next_port++ >= config.port_start + config.port_range - 1) state->next_port = config.port_start; action = bpf_redirect(config.ifindex_out, 0); NO_TEAR_INC(rec->processed); out: return action; } SEC("xdp") int xdp_drop(struct xdp_md *ctx) { return XDP_DROP; } struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1); __type(key, struct tcp_flowkey); __type(value, struct tcp_flowstate); } flow_state_map SEC(".maps"); static int cmp_ipaddr(struct in6_addr *a_, struct in6_addr *b_) { __u8 *a = (void *)a_, *b = (void *)b_; int i; for (i = 0; i < sizeof(struct in6_addr); i++) { if (*a > *b) return -1; if (*a < *b) return 1; a++; b++; } return 0; } static inline __u8 before(__u32 seq1, __u32 seq2) { return (__s32)(seq1 - seq2) < 0; } /* Fixed 2 second timeout */ #define TCP_RTO 2000000000UL SEC("xdp") int xdp_handle_tcp_recv(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; struct tcp_flowstate *fstate, new_fstate = {}; void *data = (void *)(long)ctx->data; struct hdr_cursor nh = { .pos = data }; struct trafficgen_state *state; struct tcp_flowkey key = {}; int eth_type, ip_type, err; struct ipv6hdr *ipv6hdr; struct tcphdr *tcphdr; int action = XDP_PASS; struct ethhdr *eth; __u8 new_match; __u32 ack_seq; int i; eth_type = parse_ethhdr(&nh, data_end, ð); if (eth_type != bpf_htons(ETH_P_IPV6)) goto out; ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); if (ip_type != IPPROTO_TCP) goto out; if (parse_tcphdr(&nh, data_end, &tcphdr) < 0) goto out; state = bpf_map_lookup_elem(&state_map, &key); if (!state) goto out; /* swap dst and src for received packet */ key.dst_ip = ipv6hdr->saddr; key.dst_port = tcphdr->source; new_match = !cmp_ipaddr(&key.dst_ip, &state->flow_key.dst_ip) && key.dst_port == state->flow_key.dst_port; key.src_ip = ipv6hdr->daddr; key.src_port = tcphdr->dest; fstate = bpf_map_lookup_elem(&flow_state_map, &key); if (!fstate) { if (!new_match) goto out; new_fstate.flow_state = FLOW_STATE_NEW; new_fstate.seq = bpf_ntohl(tcphdr->ack_seq); for (i = 0; i < ETH_ALEN; i++) { new_fstate.dst_mac[i] = eth->h_source[i]; new_fstate.src_mac[i] = eth->h_dest[i]; } err = bpf_map_update_elem(&flow_state_map, &key, &new_fstate, BPF_NOEXIST); if (err) goto out; fstate = bpf_map_lookup_elem(&flow_state_map, &key); if (!fstate) goto out; } ack_seq = bpf_ntohl(tcphdr->ack_seq); #ifdef TCP_DEBUG bpf_printk("Got state seq %u ack_seq %u new %u seq %u new %u window %u\n", fstate->seq, fstate->ack_seq, ack_seq, fstate->rcv_seq, bpf_ntohl(tcphdr->seq), bpf_htons(tcphdr->window)); #endif bpf_spin_lock(&fstate->lock); if (fstate->ack_seq == ack_seq) fstate->dupack++; fstate->window = bpf_ntohs(tcphdr->window); fstate->ack_seq = ack_seq; fstate->rcv_seq = bpf_ntohl(tcphdr->seq); if (tcphdr->syn) fstate->rcv_seq++; if (tcphdr->fin || tcphdr->rst) fstate->flow_state = FLOW_STATE_DONE; /* If we've taken over the flow management, (after the handshake), drop * the packet */ if (fstate->flow_state >= FLOW_STATE_RUNNING) action = XDP_DROP; bpf_spin_unlock(&fstate->lock); out: return action; } SEC("xdp") int xdp_redirect_send_tcp(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; __u32 new_seq, ack_seq, window; struct trafficgen_state *state; struct tcp_flowstate *fstate; int action = XDP_ABORTED; struct ipv6hdr *ipv6hdr; struct tcphdr *tcphdr; struct datarec *rec; __u8 resend = 0; #ifdef TCP_DEBUG __u8 print = 0; #endif __u16 pkt_len; __u32 key = 0; __u64 now; ipv6hdr = data + sizeof(struct ethhdr); tcphdr = data + (sizeof(struct ethhdr) + sizeof(struct ipv6hdr)); if (tcphdr + 1 > data_end || ipv6hdr + 1 > data_end) goto ret; pkt_len = bpf_ntohs(ipv6hdr->payload_len) - sizeof(*tcphdr); state = bpf_map_lookup_elem(&state_map, &key); if (!state) goto ret; key = bpf_get_smp_processor_id(); rec = bpf_map_lookup_elem(&rx_cnt, &key); if (!rec) goto ret; fstate = bpf_map_lookup_elem(&flow_state_map, (const void *)&state->flow_key); if (!fstate) goto out; now = bpf_ktime_get_coarse_ns(); bpf_spin_lock(&fstate->lock); if (fstate->flow_state != FLOW_STATE_RUNNING) { action = XDP_DROP; bpf_spin_unlock(&fstate->lock); goto out; } /* reset sequence on packet loss */ if (fstate->dupack || (fstate->last_progress && now - fstate->last_progress > TCP_RTO)) { fstate->seq = fstate->ack_seq; fstate->dupack = 0; } new_seq = fstate->seq; ack_seq = fstate->ack_seq; window = fstate->window << fstate->wscale; #ifdef TCP_DEBUG if (fstate->last_print != fstate->seq) { fstate->last_print = fstate->seq; print = 1; } #endif if (!before(new_seq + pkt_len, ack_seq + window)) { /* We caught up to the end up the RWIN, spin until ACKs come * back opening up the window */ action = XDP_DROP; bpf_spin_unlock(&fstate->lock); #ifdef TCP_DEBUG if (print) bpf_printk("Dropping because %u isn't before %u (ack_seq %u wnd %u)", new_seq + pkt_len, ack_seq + window, ack_seq, window); #endif goto out; } if (!before(new_seq, fstate->highest_seq)) { fstate->highest_seq = new_seq; } else { resend = 1; fstate->retransmits++; } fstate->seq = new_seq + pkt_len; fstate->last_progress = now; bpf_spin_unlock(&fstate->lock); new_seq = bpf_htonl(new_seq); if (new_seq != tcphdr->seq) { __u32 csum; csum = bpf_csum_diff(&tcphdr->seq, sizeof(__u32), &new_seq, sizeof(new_seq), ~tcphdr->check); tcphdr->seq = new_seq; tcphdr->check = csum_fold_helper(csum); } action = bpf_redirect(config.ifindex_out, 0); out: /* record retransmissions as XDP_TX return codes until we get better stats */ if (resend) NO_TEAR_INC(rec->issue); if (action == XDP_REDIRECT) NO_TEAR_INC(rec->xdp_redirect); else NO_TEAR_INC(rec->dropped); ret: return action; }