xref: /xnu-10002.61.3/bsd/net/pktsched/pktsched_fq_codel.h (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _NET_PKTSCHED_FQ_CODEL_H_
30 #define _NET_PKTSCHED_FQ_CODEL_H_
31 
32 #ifdef PRIVATE
33 #include <sys/types.h>
34 #include <sys/param.h>
35 
36 #ifdef BSD_KERNEL_PRIVATE
37 #include <net/flowadv.h>
38 #include <net/pktsched/pktsched.h>
39 #endif /* BSD_KERNEL_PRIVATE */
40 
41 #ifdef __cplusplus
42 extern "C" {
43 #endif
44 
45 #ifdef BSD_KERNEL_PRIVATE
46 struct fcl_stat {
47 	u_int32_t fcl_flow_control;
48 	u_int32_t fcl_flow_feedback;
49 	u_int32_t fcl_dequeue_stall;
50 	u_int32_t fcl_flow_control_fail;
51 	u_int64_t fcl_drop_overflow;
52 	u_int64_t fcl_drop_early;
53 	u_int32_t fcl_drop_memfailure;
54 	u_int32_t fcl_flows_cnt;
55 	u_int32_t fcl_newflows_cnt;
56 	u_int32_t fcl_oldflows_cnt;
57 	u_int64_t fcl_pkt_cnt;
58 	u_int64_t fcl_dequeue;
59 	u_int64_t fcl_dequeue_bytes;
60 	u_int64_t fcl_byte_cnt;
61 	u_int32_t fcl_throttle_on;
62 	u_int32_t fcl_throttle_off;
63 	u_int32_t fcl_throttle_drops;
64 	u_int32_t fcl_dup_rexmts;
65 	u_int32_t fcl_pkts_compressible;
66 	u_int32_t fcl_pkts_compressed;
67 	uint64_t fcl_min_qdelay;
68 	uint64_t fcl_max_qdelay;
69 	uint64_t fcl_avg_qdelay;
70 	uint32_t fcl_overwhelming;
71 	uint64_t fcl_ce_marked;
72 	uint64_t fcl_ce_reported;
73 	uint64_t fcl_ce_mark_failures;
74 	uint64_t fcl_l4s_pkts;
75 	uint64_t fcl_ignore_tx_time;
76 	uint64_t fcl_paced_pkts;
77 	uint64_t fcl_fcl_pacemaker_needed;
78 };
79 
80 /*
81  * Use 8 bits from the flow id as the tag for set associative
82  * hashing
83  * NOTE: The first 2 bits of the flow id is being used to encode the flow
84  * domain information, so don't use the top 8 bits as it won't have a uniform
85  * distribution.
86  */
87 
88 #define FQ_IF_HASH_TAG_SIZE     8
89 #define FQ_IF_HASH_TAG_SHIFT    16
90 #define FQ_IF_HASH_TAG_MASK     0xFF
91 #define FQ_IF_HASH_TABLE_SIZE   (1 << FQ_IF_HASH_TAG_SIZE)
92 
93 /* Set the quantum to be one MTU */
94 #define FQ_IF_DEFAULT_QUANTUM   1500
95 
96 /* Max number of service classes currently supported */
97 #define FQ_IF_MAX_CLASSES       10
98 _Static_assert(FQ_IF_MAX_CLASSES < 127,
99     "maximum number of classes needs to fit in a single byte");
100 
101 #define FQ_IF_LARGE_FLOW_BYTE_LIMIT     15000
102 
103 /* Max number of classq groups currently supported */
104 #define FQ_IF_MAX_GROUPS                16
105 
106 typedef enum : uint8_t {
107 	FQ_TFC_C            = 0, /* classic traffic */
108 	FQ_TFC_L4S          = 1, /* L4S traffic */
109 	FQ_TFC_CNT          = 2,
110 } fq_tfc_type_t;
111 
112 struct flowq;
113 typedef u_int32_t pktsched_bitmap_t;
114 struct if_ifclassq_stats;
115 
116 typedef enum : uint8_t {
117 	FQ_IF_ER = 0,           /* eligible, ready */
118 	FQ_IF_IR = 1,           /* ineligible, ready */
119 	FQ_IF_EB = 2,           /* eligible blocked */
120 	FQ_IF_IB = 3,           /* ineligible, blocked */
121 	FQ_IF_MAX_STATE
122 } fq_if_state;
123 
124 /*
125  * This priority index is used for QFQ state bitmaps, lower index gets
126  * higher priority
127  */
128 #define FQ_IF_BK_SYS_INDEX      9
129 #define FQ_IF_BK_INDEX  8
130 #define FQ_IF_BE_INDEX  7
131 #define FQ_IF_RD_INDEX  6
132 #define FQ_IF_OAM_INDEX 5
133 #define FQ_IF_AV_INDEX  4
134 #define FQ_IF_RV_INDEX  3
135 #define FQ_IF_VI_INDEX  2
136 #define FQ_IF_SIG_INDEX 2
137 #define FQ_IF_VO_INDEX  1
138 #define FQ_IF_CTL_INDEX 0
139 
140 typedef SLIST_HEAD(, flowq) flowq_list_t;
141 typedef STAILQ_HEAD(, flowq) flowq_stailq_t;
142 typedef struct fq_if_classq {
143 	uint32_t fcl_pri;      /* class priority, lower the better */
144 	uint32_t fcl_service_class;    /* service class */
145 	uint32_t fcl_quantum;          /* quantum in bytes */
146 	uint32_t fcl_drr_max;          /* max flows per class for DRR */
147 	int64_t  fcl_budget;             /* budget for this classq */
148 	uint64_t fcl_next_tx_time;      /* next time a packet is ready */
149 	flowq_stailq_t fcl_new_flows;   /* List of new flows */
150 	flowq_stailq_t fcl_old_flows;   /* List of old flows */
151 	struct fcl_stat fcl_stat;
152 #define FCL_PACED               0x1
153 	uint8_t fcl_flags;
154 } fq_if_classq_t;
155 typedef struct fq_codel_classq_group {
156 	/* Target queue delays (ns) */
157 	uint64_t                fqg_target_qdelays[FQ_TFC_CNT];
158 	/* update intervals (ns) */
159 	uint64_t                fqg_update_intervals[FQ_TFC_CNT];
160 	/* classq bitmaps */
161 	pktsched_bitmap_t       fqg_bitmaps[FQ_IF_MAX_STATE];
162 	TAILQ_ENTRY(fq_codel_classq_group) fqg_grp_link;
163 	uint32_t                fqg_bytes;     /* bytes count */
164 	uint32_t                fqg_len;       /* pkts count */
165 	uint8_t                 fqg_flags;     /* flags */
166 #define FQ_IF_DEFAULT_GRP                   0x1
167 	uint8_t                 fqg_index;     /* group index */
168 	fq_if_classq_t          fqg_classq[FQ_IF_MAX_CLASSES]; /* class queues */
169 	struct flowq            *fqg_large_flow; /* flow has highest number of bytes */
170 } fq_if_group_t;
171 
172 #define FQG_LEN(_fqg)           ((_fqg)->fqg_len)
173 #define FQG_IS_EMPTY(_fqg)      (FQG_LEN(_fqg) == 0)
174 #define FQG_INC_LEN(_fqg)       (FQG_LEN(_fqg)++)
175 #define FQG_DEC_LEN(_fqg)       (FQG_LEN(_fqg)--)
176 #define FQG_ADD_LEN(_fqg, _len) (FQG_LEN(_fqg) += (_len))
177 #define FQG_SUB_LEN(_fqg, _len) (FQG_LEN(_fqg) -= (_len))
178 #define FQG_BYTES(_fqg)         ((_fqg)->fqg_bytes)
179 
180 #define FQG_INC_BYTES(_fqg, _len)     \
181     ((_fqg)->fqg_bytes = (_fqg)->fqg_bytes + (_len))
182 #define FQG_DEC_BYTES(_fqg, _len)     \
183     ((_fqg)->fqg_bytes = (_fqg)->fqg_bytes - (_len))
184 
185 typedef TAILQ_HEAD(, fq_codel_classq_group) fq_grp_tailq_t;
186 
187 typedef int (* fq_if_bitmaps_ffs)(fq_grp_tailq_t *, int, fq_if_state, fq_if_group_t **);
188 typedef boolean_t (* fq_if_bitmaps_zeros)(fq_grp_tailq_t *, int, fq_if_state);
189 typedef void (* fq_if_bitmaps_cpy)(fq_grp_tailq_t *, int, fq_if_state, fq_if_state);
190 typedef void (* fq_if_bitmaps_clr)(fq_grp_tailq_t *, int, fq_if_state);
191 typedef void (* fq_if_bitmaps_move)(fq_grp_tailq_t *, int, fq_if_state, fq_if_state);
192 
193 /*
194  * Functions that are used to look at groups'
195  * bitmaps and decide which pri and group are the
196  * next one to dequeue from.
197  */
198 typedef struct fq_if_bitmap_ops {
199 	fq_if_bitmaps_ffs       ffs;
200 	fq_if_bitmaps_zeros     zeros;
201 	fq_if_bitmaps_cpy       cpy;
202 	fq_if_bitmaps_clr       clr;
203 	fq_if_bitmaps_move      move;
204 } bitmap_ops_t;
205 
206 typedef struct fq_codel_sched_data {
207 	struct ifclassq         *fqs_ifq;       /* back pointer to ifclassq */
208 	flowq_list_t            fqs_flows[FQ_IF_HASH_TABLE_SIZE]; /* flows table */
209 	uint32_t                fqs_pkt_droplimit;  /* drop limit */
210 	uint8_t                 fqs_throttle;   /* throttle on or off */
211 	uint8_t                 fqs_flags;      /* flags */
212 #define FQS_DRIVER_MANAGED      0x1
213 	struct flowadv_fclist   fqs_fclist; /* flow control state */
214 	struct flowq            *fqs_large_flow; /* flow has highest number of bytes */
215 	TAILQ_HEAD(, flowq)     fqs_empty_list; /* list of empty flows */
216 	/* list of groups in combined mode */
217 	fq_grp_tailq_t          fqs_combined_grp_list;
218 	uint32_t                fqs_empty_list_cnt;
219 	/* bitmap indicating which grp is in combined mode */
220 	pktsched_bitmap_t       fqs_combined_grp_bitmap;
221 	classq_pkt_type_t       fqs_ptype;
222 	bitmap_ops_t            *fqs_bm_ops;
223 #define grp_bitmaps_ffs     fqs_bm_ops->ffs
224 #define grp_bitmaps_zeros   fqs_bm_ops->zeros
225 #define grp_bitmaps_cpy     fqs_bm_ops->cpy
226 #define grp_bitmaps_clr     fqs_bm_ops->clr
227 #define grp_bitmaps_move    fqs_bm_ops->move
228 	fq_if_group_t           *fqs_classq_groups[FQ_IF_MAX_GROUPS];
229 } fq_if_t;
230 
231 #define FQS_GROUP(_fqs, _group_idx)                                      \
232 	(fq_if_find_grp((_fqs), (_group_idx)))
233 
234 #define FQS_CLASSQ(_fqs, _group_idx, _sc_idx)                            \
235     (FQS_GROUP((_fqs), (_group_idx))->fqg_classq[_sc_idx])
236 
237 #define FQ_GROUP(_fq)                                      \
238 	((_fq)->fq_group)
239 
240 #define FQ_GRP_LEN(_fq)                                    \
241 	(FQ_GROUP((_fq))->fqg_len)
242 #define FQ_GRP_IS_EMPTY(_fq)                               \
243 	(FQ_GRP_LEN((_fq)) == 0)
244 #define FQ_GRP_INC_LEN(_fq)                                \
245     (FQ_GRP_LEN((_fq))++)
246 #define FQ_GRP_DEC_LEN(_fq)                                \
247     (FQ_GRP_LEN((_fq))--)
248 #define FQ_GRP_ADD_LEN(_fq, _len)                          \
249 	(FQ_GRP_LEN((_fq)) += (_len))
250 #define FQ_GRP_SUB_LEN(_fq, _len)                          \
251 	(FQ_GRP_LEN((_fq)) -= (_len))
252 
253 #define FQS_GRP_ADD_LEN(_fqs, _grp_idx, _len)              \
254 	(FQS_GROUP(_fqs, grp_idx)->fqg_len += (_len))
255 
256 
257 #define FQ_GRP_BYTES(_fq)                     \
258 	(FQ_GROUP((_fq))->fqg_bytes)
259 #define FQ_GRP_INC_BYTES(_fq, _len)           \
260     (FQ_GRP_BYTES((_fq)) += (_len))
261 #define FQ_GRP_DEC_BYTES(_fq, _len)           \
262     (FQ_GRP_BYTES((_fq)) -= (_len))
263 
264 #define FQS_GRP_INC_BYTES(_fqs, grp_idx, _len)           \
265 	(FQS_GROUP(_fqs, grp_idx)->fqg_bytes += (_len))
266 
267 #define FQ_CLASSQ(_fq)                                   \
268 	(FQ_GROUP((_fq))->fqg_classq[(_fq)->fq_sc_index])
269 
270 #define FQ_TARGET_DELAY(_fq)              \
271 	(FQ_GROUP((_fq))->fqg_target_qdelays[(_fq)->fq_tfc_type])
272 #define FQ_UPDATE_INTERVAL(_fq)           \
273 	(FQ_GROUP((_fq))->fqg_update_intervals[(_fq)->fq_tfc_type])
274 
275 #endif /* BSD_KERNEL_PRIVATE */
276 
277 struct fq_codel_flowstats {
278 	u_int32_t       fqst_min_qdelay;
279 #define FQ_FLOWSTATS_OLD_FLOW   0x1
280 #define FQ_FLOWSTATS_NEW_FLOW   0x2
281 #define FQ_FLOWSTATS_LARGE_FLOW 0x4
282 #define FQ_FLOWSTATS_DELAY_HIGH 0x8
283 #define FQ_FLOWSTATS_FLOWCTL_ON 0x10
284 	u_int32_t       fqst_flags;
285 	u_int32_t       fqst_bytes;
286 	u_int32_t       fqst_flowhash;
287 };
288 
289 #define FQ_IF_MAX_FLOWSTATS     20
290 #define FQ_IF_STATS_MAX_GROUPS  16
291 
292 struct fq_codel_classstats {
293 	u_int32_t       fcls_pri;
294 	u_int32_t       fcls_service_class;
295 	u_int32_t       fcls_quantum;
296 	u_int32_t       fcls_drr_max;
297 	int64_t         fcls_budget;
298 	u_int64_t       fcls_target_qdelay;
299 	u_int64_t       fcls_l4s_target_qdelay;
300 	u_int64_t       fcls_update_interval;
301 	u_int32_t       fcls_flow_control;
302 	u_int32_t       fcls_flow_feedback;
303 	u_int32_t       fcls_dequeue_stall;
304 	u_int32_t       fcls_flow_control_fail;
305 	u_int64_t       fcls_drop_overflow;
306 	u_int64_t       fcls_drop_early;
307 	u_int32_t       fcls_drop_memfailure;
308 	u_int32_t       fcls_flows_cnt;
309 	u_int32_t       fcls_newflows_cnt;
310 	u_int32_t       fcls_oldflows_cnt;
311 	u_int64_t       fcls_pkt_cnt;
312 	u_int64_t       fcls_dequeue;
313 	u_int64_t       fcls_dequeue_bytes;
314 	u_int64_t       fcls_byte_cnt;
315 	u_int32_t       fcls_throttle_on;
316 	u_int32_t       fcls_throttle_off;
317 	u_int32_t       fcls_throttle_drops;
318 	u_int32_t       fcls_dup_rexmts;
319 	u_int32_t       fcls_flowstats_cnt;
320 	struct fq_codel_flowstats fcls_flowstats[FQ_IF_MAX_FLOWSTATS];
321 	u_int32_t       fcls_pkts_compressible;
322 	u_int32_t       fcls_pkts_compressed;
323 	uint64_t        fcls_min_qdelay;
324 	uint64_t        fcls_max_qdelay;
325 	uint64_t        fcls_avg_qdelay;
326 	uint32_t        fcls_overwhelming;
327 	uint64_t        fcls_ce_marked;
328 	uint64_t        fcls_ce_reported;
329 	uint64_t        fcls_ce_mark_failures;
330 	uint64_t        fcls_l4s_pkts;
331 	uint64_t        fcls_ignore_tx_time;
332 	uint64_t        fcls_paced_pkts;
333 	uint64_t        fcls_fcl_pacing_needed;
334 };
335 
336 #ifdef BSD_KERNEL_PRIVATE
337 
338 _Static_assert(FQ_IF_STATS_MAX_GROUPS == FQ_IF_MAX_GROUPS,
339     "max group counts do not match");
340 
341 extern void pktsched_fq_init(void);
342 extern void fq_codel_scheduler_init(void);
343 extern int fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *h,
344     classq_pkt_t *t, uint32_t cnt, uint32_t bytes, boolean_t *pdrop);
345 extern void fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt,
346     uint8_t grp_idx);
347 extern void fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
348     classq_pkt_t *pkt, uint8_t grp_idx);
349 extern int fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
350     u_int32_t maxbytecnt, classq_pkt_t *first_packet, classq_pkt_t *last_packet,
351     u_int32_t *retpktcnt, u_int32_t *retbytecnt, uint8_t grp_idx);
352 extern int fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq,
353     mbuf_svc_class_t svc, u_int32_t maxpktcnt, u_int32_t maxbytecnt,
354     classq_pkt_t *first_packet, classq_pkt_t *last_packet, u_int32_t *retpktcnt,
355     u_int32_t *retbytecnt, uint8_t grp_idx);
356 extern int fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg);
357 extern struct flowq *fq_if_hash_pkt(fq_if_t *, fq_if_group_t *,
358     u_int32_t, mbuf_svc_class_t, u_int64_t, bool, fq_tfc_type_t);
359 extern boolean_t fq_if_at_drop_limit(fq_if_t *);
360 extern boolean_t fq_if_almost_at_drop_limit(fq_if_t *fqs);
361 extern void fq_if_drop_packet(fq_if_t *, uint64_t);
362 extern void fq_if_is_flow_heavy(fq_if_t *, struct flowq *);
363 extern boolean_t fq_if_add_fcentry(fq_if_t *, pktsched_pkt_t *, uint8_t,
364     struct flowq *, fq_if_classq_t *);
365 extern void fq_if_flow_feedback(fq_if_t *, struct flowq *, fq_if_classq_t *);
366 extern boolean_t fq_if_report_ce(fq_if_t *, pktsched_pkt_t *, uint32_t, uint32_t);
367 extern int fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
368     classq_pkt_type_t ptype);
369 extern void fq_if_teardown_ifclassq(struct ifclassq *ifq);
370 extern int fq_if_getqstats_ifclassq(struct ifclassq *ifq, uint8_t gid,
371     u_int32_t qid, struct if_ifclassq_stats *ifqs);
372 extern void fq_if_destroy_flow(fq_if_t *, fq_if_classq_t *, struct flowq *);
373 extern void fq_if_move_to_empty_flow(fq_if_t *, fq_if_classq_t *,
374     struct flowq *, uint64_t);
375 extern int fq_if_create_grp(struct ifclassq *ifcq, uint8_t qset_idx, uint8_t flags);
376 extern void fq_if_set_grp_combined(struct ifclassq *ifcq, uint8_t qset_idx);
377 extern void fq_if_set_grp_separated(struct ifclassq *ifcq, uint8_t qset_idx);
378 extern fq_if_group_t *fq_if_find_grp(fq_if_t *fqs, uint8_t grp_idx);
379 extern boolean_t fq_if_is_all_paced(struct ifclassq *ifq);
380 #endif /* BSD_KERNEL_PRIVATE */
381 
382 #ifdef __cplusplus
383 }
384 #endif
385 
386 #endif /* PRIVATE */
387 #endif /* _NET_PKTSCHED_PKTSCHED_FQ_CODEL_H_ */
388