Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __BPF_TCP_HELPERS_H #define __BPF_TCP_HELPERS_H #include <stdbool.h> #include <linux/types.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> #include <bpf/bpf_tracing.h> #define BPF_STRUCT_OPS(name, args...) \ SEC("struct_ops/"#name) \ BPF_PROG(name, args) #ifndef SOL_TCP #define SOL_TCP 6 #endif #ifndef TCP_CA_NAME_MAX #define TCP_CA_NAME_MAX 16 #endif #define tcp_jiffies32 ((__u32)bpf_jiffies64()) struct sock_common { unsigned char skc_state; __u16 skc_num; } __attribute__((preserve_access_index)); enum sk_pacing { SK_PACING_NONE = 0, SK_PACING_NEEDED = 1, SK_PACING_FQ = 2, }; struct sock { struct sock_common __sk_common; #define sk_state __sk_common.skc_state unsigned long sk_pacing_rate; __u32 sk_pacing_status; /* see enum sk_pacing */ } __attribute__((preserve_access_index)); struct inet_sock { struct sock sk; } __attribute__((preserve_access_index)); struct inet_connection_sock { struct inet_sock icsk_inet; __u8 icsk_ca_state:6, icsk_ca_setsockopt:1, icsk_ca_dst_locked:1; struct { __u8 pending; } icsk_ack; __u64 icsk_ca_priv[104 / sizeof(__u64)]; } __attribute__((preserve_access_index)); struct request_sock { struct sock_common __req_common; } __attribute__((preserve_access_index)); struct tcp_sock { struct inet_connection_sock inet_conn; __u32 rcv_nxt; __u32 snd_nxt; __u32 snd_una; __u32 window_clamp; __u8 ecn_flags; __u32 delivered; __u32 delivered_ce; __u32 snd_cwnd; __u32 snd_cwnd_cnt; __u32 snd_cwnd_clamp; __u32 snd_ssthresh; __u8 syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ syn_smc:1; /* SYN includes SMC */ __u32 max_packets_out; __u32 lsndtime; __u32 prior_cwnd; __u64 tcp_mstamp; /* most recent packet received/sent */ bool is_mptcp; } __attribute__((preserve_access_index)); static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk) { return (struct inet_connection_sock *)sk; } static __always_inline void *inet_csk_ca(const struct sock *sk) { return (void *)inet_csk(sk)->icsk_ca_priv; } static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk) { return (struct tcp_sock *)sk; } static __always_inline bool before(__u32 seq1, __u32 seq2) { return (__s32)(seq1-seq2) < 0; } #define after(seq2, seq1) before(seq1, seq2) #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 #define TCP_ECN_SEEN 8 enum inet_csk_ack_state_t { ICSK_ACK_SCHED = 1, ICSK_ACK_TIMER = 2, ICSK_ACK_PUSHED = 4, ICSK_ACK_PUSHED2 = 8, ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ }; enum tcp_ca_event { CA_EVENT_TX_START = 0, CA_EVENT_CWND_RESTART = 1, CA_EVENT_COMPLETE_CWR = 2, CA_EVENT_LOSS = 3, CA_EVENT_ECN_NO_CE = 4, CA_EVENT_ECN_IS_CE = 5, }; struct ack_sample { __u32 pkts_acked; __s32 rtt_us; __u32 in_flight; } __attribute__((preserve_access_index)); struct rate_sample { __u64 prior_mstamp; /* starting timestamp for interval */ __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ __s32 delivered; /* number of packets delivered over interval */ long interval_us; /* time for tp->delivered to incr "delivered" */ __u32 snd_interval_us; /* snd interval for delivered packets */ __u32 rcv_interval_us; /* rcv interval for delivered packets */ long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ int losses; /* number of packets marked lost upon ACK */ __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ __u32 prior_in_flight; /* in flight before this ACK */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ } __attribute__((preserve_access_index)); #define TCP_CA_NAME_MAX 16 #define TCP_CONG_NEEDS_ECN 0x2 struct tcp_congestion_ops { char name[TCP_CA_NAME_MAX]; __u32 flags; /* initialize private data (optional) */ void (*init)(struct sock *sk); /* cleanup private data (optional) */ void (*release)(struct sock *sk); /* return slow start threshold (required) */ __u32 (*ssthresh)(struct sock *sk); /* do new cwnd calculation (required) */ void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, __u8 new_state); /* call when cwnd event occurs (optional) */ void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); /* call when ack arrives (optional) */ void (*in_ack_event)(struct sock *sk, __u32 flags); /* new value of cwnd after loss (required) */ __u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); /* override sysctl_tcp_min_tso_segs */ __u32 (*min_tso_segs)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ __u32 (*sndbuf_expand)(struct sock *sk); /* call when packets are delivered to update cwnd and pacing rate, * after all the ca_state processing. (optional) */ void (*cong_control)(struct sock *sk, const struct rate_sample *rs); void *owner; }; #define min(a, b) ((a) < (b) ? (a) : (b)) #define max(a, b) ((a) > (b) ? (a) : (b)) #define min_not_zero(x, y) ({ \ typeof(x) __x = (x); \ typeof(y) __y = (y); \ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp) { return tp->snd_cwnd < tp->snd_ssthresh; } static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); /* If in slow start, ensure cwnd grows to twice what was ACKed. */ if (tcp_in_slow_start(tp)) return tp->snd_cwnd < 2 * tp->max_packets_out; return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); } static __always_inline bool tcp_cc_eq(const char *a, const char *b) { int i; for (i = 0; i < TCP_CA_NAME_MAX; i++) { if (a[i] != b[i]) return false; if (!a[i]) break; } return true; } extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym; extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym; struct mptcp_sock { struct inet_connection_sock sk; __u32 token; struct sock *first; char ca_name[TCP_CA_NAME_MAX]; } __attribute__((preserve_access_index)); #endif |