1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <sys/kdebug.h>
32 #include <mach/thread_act.h>
33 #include <kern/sched_prim.h>
34 #include <kern/thread.h>
35 #include <kern/uipc_domain.h>
36
37 extern kern_return_t thread_terminate(thread_t);
38
39 static void nx_netif_mit_reset_interval(struct nx_netif_mit *);
40 static void nx_netif_mit_set_start_interval(struct nx_netif_mit *);
41 static uint32_t nx_netif_mit_update_interval(struct nx_netif_mit *, boolean_t);
42 static void nx_netif_mit_thread_func(void *, wait_result_t);
43 static void nx_netif_mit_thread_cont(void *, wait_result_t);
44 static void nx_netif_mit_s_thread_cont(void *, wait_result_t);
45 static void nx_netif_mit_stats(struct __kern_channel_ring *, uint64_t,
46 uint64_t);
47
48 /* mitigation intervals in micro seconds */
49 #define NETIF_BUSY_MIT_DELAY (100)
50
51 static uint32_t netif_busy_mit_delay = NETIF_BUSY_MIT_DELAY;
52
53 #define MIT_EWMA(old, new, gdecay, sdecay) do { \
54 uint32_t _avg; \
55 if ((_avg = (old)) > 0) { \
56 uint32_t _d = ((new) > _avg) ? gdecay : sdecay; \
57 _avg = (((_avg << (_d)) - _avg) + (new)) >> (_d); \
58 } else { \
59 _avg = (new); \
60 } \
61 (old) = _avg; \
62 } while (0)
63
64 /*
65 * Larger decay factor results in slower reaction. Each value is ilog2
66 * of EWMA decay rate; one for growth and another for shrink. The two
67 * decay factors chosen are such that we reach quickly to shrink, and
68 * slowly to grow. Growth and shrink are relevant to the mitigation
69 * delay interval.
70 */
71 #define NETIF_AD_MIT_GDECAY 3 /* ilog2(8) */
72 static uint32_t netif_ad_mit_gdecay = NETIF_AD_MIT_GDECAY;
73
74 #define NETIF_AD_MIT_SDECAY 2 /* ilog2(4) */
75 static uint32_t netif_ad_mit_sdecay = NETIF_AD_MIT_SDECAY;
76
77 #define NETIF_MIT_MODE_HOLDTIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
78 #define NETIF_MIT_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
79 static uint64_t netif_mit_mode_holdtime = NETIF_MIT_MODE_HOLDTIME;
80
81 #define NETIF_MIT_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
82 #define NETIF_MIT_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
83 static uint64_t netif_mit_sample_holdtime = NETIF_MIT_SAMPLETIME;
84
85 /*
86 * These numbers are based off 10ms netif_mit_sample_holdtime;
87 * changing the hold time will require recomputing them.
88 */
89 #if (DEVELOPMENT || DEBUG)
90 static struct mit_cfg_tbl mit_cfg_tbl_native[] = {
91 #else /* !DEVELOPMENT && !DEBUG */
92 static const struct mit_cfg_tbl mit_cfg_tbl_native[] = {
93 #endif /* !DEVELOPMENT && !DEBUG */
94 { .cfg_plowat = 10, .cfg_phiwat = 60, .cfg_blowat = 4000,
95 .cfg_bhiwat = 6000, .cfg_ival = 100 },
96 { .cfg_plowat = 40, .cfg_phiwat = 100, .cfg_blowat = 5000,
97 .cfg_bhiwat = 300000, .cfg_ival = 300 },
98 { .cfg_plowat = 80, .cfg_phiwat = 200, .cfg_blowat = 100000,
99 .cfg_bhiwat = 300000, .cfg_ival = 500 },
100 { .cfg_plowat = 220, .cfg_phiwat = 240, .cfg_blowat = 330000,
101 .cfg_bhiwat = 375000, .cfg_ival = 1000 },
102 { .cfg_plowat = 250, .cfg_phiwat = 2000, .cfg_blowat = 450000,
103 .cfg_bhiwat = 30000000, .cfg_ival = 200 },
104 };
105
106 #if (DEVELOPMENT || DEBUG)
107 static struct mit_cfg_tbl mit_cfg_tbl_compat[] = {
108 #else /* !DEVELOPMENT && !DEBUG */
109 static const struct mit_cfg_tbl mit_cfg_tbl_compat[] = {
110 #endif /* !DEVELOPMENT && !DEBUG */
111 { .cfg_plowat = 10, .cfg_phiwat = 60, .cfg_blowat = 4000,
112 .cfg_bhiwat = 6000, .cfg_ival = 100 },
113 { .cfg_plowat = 40, .cfg_phiwat = 100, .cfg_blowat = 5000,
114 .cfg_bhiwat = 300000, .cfg_ival = 300 },
115 { .cfg_plowat = 80, .cfg_phiwat = 200, .cfg_blowat = 100000,
116 .cfg_bhiwat = 300000, .cfg_ival = 500 },
117 { .cfg_plowat = 220, .cfg_phiwat = 240, .cfg_blowat = 330000,
118 .cfg_bhiwat = 375000, .cfg_ival = 1000 },
119 { .cfg_plowat = 250, .cfg_phiwat = 2000, .cfg_blowat = 450000,
120 .cfg_bhiwat = 3000000, .cfg_ival = 200 },
121 };
122
123 #if (DEVELOPMENT || DEBUG)
124 static struct mit_cfg_tbl mit_cfg_tbl_native_cellular[] = {
125 #else /* !DEVELOPMENT && !DEBUG */
126 static const struct mit_cfg_tbl mit_cfg_tbl_native_cellular[] = {
127 #endif /* !DEVELOPMENT && !DEBUG */
128 { .cfg_plowat = 10, .cfg_phiwat = 40, .cfg_blowat = 4000,
129 .cfg_bhiwat = 6000, .cfg_ival = 300 },
130 { .cfg_plowat = 20, .cfg_phiwat = 60, .cfg_blowat = 5000,
131 .cfg_bhiwat = 150000, .cfg_ival = 500 },
132 { .cfg_plowat = 40, .cfg_phiwat = 80, .cfg_blowat = 80000,
133 .cfg_bhiwat = 200000, .cfg_ival = 700 },
134 { .cfg_plowat = 60, .cfg_phiwat = 250, .cfg_blowat = 150000,
135 .cfg_bhiwat = 375000, .cfg_ival = 1500 },
136 { .cfg_plowat = 260, .cfg_phiwat = 2000, .cfg_blowat = 450000,
137 .cfg_bhiwat = 3000000, .cfg_ival = 400 },
138 };
139
140 #if (DEVELOPMENT || DEBUG)
141 static int sysctl_mit_mode_holdtime SYSCTL_HANDLER_ARGS;
142 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, busy_mit_delay,
143 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_busy_mit_delay,
144 NETIF_BUSY_MIT_DELAY, "");
145 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, ad_mit_gdecay,
146 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_ad_mit_gdecay, NETIF_AD_MIT_GDECAY, "");
147 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, ad_mit_sdecay,
148 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_ad_mit_sdecay, NETIF_AD_MIT_SDECAY, "");
149 SYSCTL_PROC(_kern_skywalk_netif, OID_AUTO, ad_mit_freeze,
150 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &netif_mit_mode_holdtime,
151 NETIF_MIT_MODE_HOLDTIME, sysctl_mit_mode_holdtime, "Q", "");
152 #endif /* !DEVELOPMENT && !DEBUG */
153
154 void
nx_netif_mit_init(struct nx_netif * nif,const struct ifnet * ifp,struct nx_netif_mit * mit,struct __kern_channel_ring * kr,boolean_t simple)155 nx_netif_mit_init(struct nx_netif *nif, const struct ifnet *ifp,
156 struct nx_netif_mit *mit, struct __kern_channel_ring *kr,
157 boolean_t simple)
158 {
159 #pragma unused(nif)
160 thread_precedence_policy_data_t info;
161 __unused kern_return_t kret;
162 char oid_name_buf[24];
163 const char *__null_terminated oid_name = NULL;
164
165 static_assert(sizeof(mit_cfg_tbl_native_cellular) <= sizeof(((struct nx_netif_mit *)0)->mit_tbl));
166
167 lck_spin_init(&mit->mit_lock, kr->ckr_qlock_group, &channel_lock_attr);
168
169 if (kr->ckr_tx == NR_TX) {
170 if (simple) {
171 (void) snprintf(mit->mit_name, sizeof(mit->mit_name),
172 "skywalk_%s_tx_%u", ifp->if_xname, kr->ckr_ring_id);
173 } else {
174 (void) snprintf(mit->mit_name, sizeof(mit->mit_name),
175 "skywalk_mit_%s_tx_%u", ifp->if_xname,
176 kr->ckr_ring_id);
177 }
178 oid_name = tsnprintf(oid_name_buf, sizeof(oid_name_buf),
179 "tx_%u", kr->ckr_ring_id);
180 } else {
181 if (simple) {
182 (void) snprintf(mit->mit_name, sizeof(mit->mit_name),
183 "skywalk_%s_rx_%u", ifp->if_xname, kr->ckr_ring_id);
184 } else {
185 (void) snprintf(mit->mit_name, sizeof(mit->mit_name),
186 "skywalk_mit_%s_rx_%u", ifp->if_xname,
187 kr->ckr_ring_id);
188 }
189 oid_name = tsnprintf(oid_name_buf, sizeof(oid_name_buf),
190 "rx_%u", kr->ckr_ring_id);
191 }
192
193 mit->mit_ckr = kr;
194 mit->mit_ckr->ckr_mit = mit;
195 mit->mit_interval = 0;
196 mit->mit_netif_ifp = ifp;
197
198 if ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) && (ifp->if_family ==
199 IFNET_FAMILY_CELLULAR)) {
200 bcopy(mit_cfg_tbl_native_cellular,
201 (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
202 sizeof(mit_cfg_tbl_native_cellular));
203 mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_native_cellular) /
204 sizeof(*mit->mit_cfg);
205 } else if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
206 bcopy(mit_cfg_tbl_native,
207 (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
208 sizeof(mit->mit_tbl));
209 mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_native) /
210 sizeof(*mit->mit_cfg);
211 } else {
212 bcopy(mit_cfg_tbl_compat,
213 (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
214 sizeof(mit->mit_tbl));
215 mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_compat) /
216 sizeof(*mit->mit_cfg);
217 }
218 VERIFY(mit->mit_cfg_idx_max > 0);
219 VERIFY(mit->mit_cfg_idx_max <= NETIF_MIT_CFG_TBL_MAX_CFG);
220
221 if (ifp->if_rx_mit_ival != 0) {
222 mit->mit_tbl[0].cfg_ival = ifp->if_rx_mit_ival;
223 SK_D("mit interval updated: %s cfg %u ival %u",
224 mit->mit_name, 0, mit->mit_tbl[0].cfg_ival);
225 }
226
227 net_timerclear(&mit->mit_mode_holdtime);
228 net_timerclear(&mit->mit_mode_lasttime);
229 net_timerclear(&mit->mit_sample_time);
230 net_timerclear(&mit->mit_sample_lasttime);
231 net_timerclear(&mit->mit_start_time);
232
233 net_nsectimer(&netif_mit_mode_holdtime, &mit->mit_mode_holdtime);
234 net_nsectimer(&netif_mit_sample_holdtime, &mit->mit_sample_time);
235
236 /* initialize mode and params */
237 nx_netif_mit_reset_interval(mit);
238 VERIFY(mit->mit_cfg != NULL && mit->mit_cfg_idx < mit->mit_cfg_idx_max);
239 mit->mit_flags = NETIF_MITF_INITIALIZED;
240 if (simple) {
241 /*
242 * Simple mitigation where we don't collect any statistics
243 * at all, and therefore don't want to register the ring's
244 * ckr_netif_mit_stats() callback.
245 */
246 mit->mit_flags |= NETIF_MITF_SIMPLE;
247 ASSERT(kr->ckr_netif_mit_stats == NULL);
248 } else {
249 /*
250 * Regular mitigation where we collect stats and use them
251 * for determining the delay between wakeups; initialize
252 * the ring's ckr_netif_mit_stats() callback.
253 */
254 kr->ckr_netif_mit_stats = nx_netif_mit_stats;
255 }
256
257 if (kernel_thread_start(nx_netif_mit_thread_func, mit,
258 &mit->mit_thread) != KERN_SUCCESS) {
259 panic_plain("%s: can't create thread", mit->mit_name);
260 /* NOTREACHED */
261 __builtin_unreachable();
262 }
263 /* this must not fail */
264 VERIFY(mit->mit_thread != NULL);
265
266 /* wait until nx_netif_mit_thread_func() is ready */
267 MIT_SPIN_LOCK(mit);
268 while (!(mit->mit_flags & NETIF_MITF_READY)) {
269 (void) assert_wait(&mit->mit_thread, THREAD_UNINT);
270 MIT_SPIN_UNLOCK(mit);
271 (void) thread_block(THREAD_CONTINUE_NULL);
272 MIT_SPIN_LOCK(mit);
273 }
274 MIT_SPIN_UNLOCK(mit);
275
276 bzero(&info, sizeof(info));
277 info.importance = 0;
278 kret = thread_policy_set(mit->mit_thread, THREAD_PRECEDENCE_POLICY,
279 (thread_policy_t)&info, THREAD_PRECEDENCE_POLICY_COUNT);
280 ASSERT(kret == KERN_SUCCESS);
281
282 #if (DEVELOPMENT || DEBUG)
283 /* register mit sysctl skoid */
284 skoid_create(&mit->mit_skoid, SKOID_DNODE(nif->nif_skoid), oid_name, 0);
285 skoid_add_uint(&mit->mit_skoid, "interval", CTLFLAG_RW,
286 &mit->mit_interval);
287 struct skoid *skoid = &mit->mit_skoid;
288 struct mit_cfg_tbl *t;
289 #define MIT_ADD_SKOID(_i) \
290 t = &mit->mit_tbl[_i]; \
291 skoid_add_uint(skoid, #_i"_plowat", CTLFLAG_RW, &t->cfg_plowat); \
292 skoid_add_uint(skoid, #_i"_phiwat", CTLFLAG_RW, &t->cfg_phiwat); \
293 skoid_add_uint(skoid, #_i"_blowat", CTLFLAG_RW, &t->cfg_blowat); \
294 skoid_add_uint(skoid, #_i"_bhiwat", CTLFLAG_RW, &t->cfg_bhiwat);\
295 skoid_add_uint(skoid, #_i"_ival", CTLFLAG_RW, &t->cfg_ival);
296 MIT_ADD_SKOID(0);
297 MIT_ADD_SKOID(1);
298 MIT_ADD_SKOID(2);
299 MIT_ADD_SKOID(3);
300 MIT_ADD_SKOID(4);
301 static_assert(NETIF_MIT_CFG_TBL_MAX_CFG == 5);
302 #endif /* !DEVELOPMENT && !DEBUG */
303 }
304
305 __attribute__((always_inline))
306 static inline void
nx_netif_mit_reset_interval(struct nx_netif_mit * mit)307 nx_netif_mit_reset_interval(struct nx_netif_mit *mit)
308 {
309 (void) nx_netif_mit_update_interval(mit, TRUE);
310 }
311
312 __attribute__((always_inline))
313 static inline void
nx_netif_mit_set_start_interval(struct nx_netif_mit * mit)314 nx_netif_mit_set_start_interval(struct nx_netif_mit *mit)
315 {
316 nanouptime(&mit->mit_start_time);
317 }
318
319 __attribute__((always_inline))
320 static inline uint32_t
nx_netif_mit_update_interval(struct nx_netif_mit * mit,boolean_t reset)321 nx_netif_mit_update_interval(struct nx_netif_mit *mit, boolean_t reset)
322 {
323 struct timespec now, delta;
324 uint64_t r;
325 uint32_t i;
326
327 nanouptime(&now);
328 net_timersub(&now, &mit->mit_sample_lasttime, &delta);
329
330 /* CSTYLED */
331 if ((net_timercmp(&delta, &mit->mit_mode_holdtime, >)) || reset) {
332 mit_mode_t mode = (mit->mit_flags & NETIF_MITF_SIMPLE) ?
333 MIT_MODE_SIMPLE : MIT_MODE_ADVANCED_STATIC;
334
335 /* if we haven't updated stats in a while, reset it back */
336 SK_DF(SK_VERB_NETIF_MIT, "%s: resetting [mode %u->%u]",
337 mit->mit_name, mit->mit_mode, mode);
338
339 mit->mit_mode = mode;
340 mit->mit_cfg_idx = 0;
341 mit->mit_cfg = &mit->mit_tbl[mit->mit_cfg_idx];
342 mit->mit_packets_avg = 0;
343 mit->mit_bytes_avg = 0;
344 }
345
346 /* calculate work duration (since last start work time) */
347 if (net_timerisset(&mit->mit_start_time)) {
348 net_timersub(&now, &mit->mit_start_time, &delta);
349 net_timerusec(&delta, &r);
350 } else {
351 r = 0;
352 }
353
354 switch (mit->mit_mode) {
355 case MIT_MODE_SIMPLE:
356 i = 0;
357 break;
358
359 case MIT_MODE_ADVANCED_STATIC:
360 i = mit->mit_interval;
361 break;
362
363 case MIT_MODE_ADVANCED_DYNAMIC:
364 i = mit->mit_cfg->cfg_ival;
365 break;
366 }
367
368 /*
369 * The idea here is to return the effective delay interval that
370 * causes each work phase to begin at the desired cadence, at
371 * the minimum.
372 */
373 if (__probable(r != 0)) {
374 if (__probable(i > r)) {
375 i -= r;
376 } else {
377 /* bump up cfg_idx perhaps? */
378 i = 0;
379 }
380 }
381
382 return i;
383 }
384
385 void
nx_netif_mit_cleanup(struct nx_netif_mit * mit)386 nx_netif_mit_cleanup(struct nx_netif_mit *mit)
387 {
388 if (mit->mit_thread != THREAD_NULL) {
389 ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
390
391 /* signal thread to begin self-termination */
392 MIT_SPIN_LOCK(mit);
393 mit->mit_flags |= NETIF_MITF_TERMINATING;
394 (void) thread_wakeup_thread((caddr_t)&mit->mit_flags,
395 mit->mit_thread);
396 MIT_SPIN_UNLOCK(mit);
397
398 /* and wait for thread to terminate */
399 MIT_SPIN_LOCK(mit);
400 while (!(mit->mit_flags & NETIF_MITF_TERMINATED)) {
401 (void) assert_wait(&mit->mit_flags, THREAD_UNINT);
402 MIT_SPIN_UNLOCK(mit);
403 (void) thread_block(THREAD_CONTINUE_NULL);
404 MIT_SPIN_LOCK(mit);
405 }
406 ASSERT(mit->mit_flags & NETIF_MITF_TERMINATED);
407 MIT_SPIN_UNLOCK(mit);
408 mit->mit_thread = THREAD_NULL;
409 }
410 ASSERT(mit->mit_thread == THREAD_NULL);
411 lck_spin_destroy(&mit->mit_lock, mit->mit_ckr->ckr_qlock_group);
412
413 mit->mit_ckr->ckr_mit = NULL;
414 mit->mit_ckr = NULL;
415 mit->mit_netif_ifp = NULL;
416 mit->mit_flags &= ~NETIF_MITF_INITIALIZED;
417
418 net_timerclear(&mit->mit_mode_holdtime);
419 net_timerclear(&mit->mit_mode_lasttime);
420 net_timerclear(&mit->mit_sample_time);
421 net_timerclear(&mit->mit_sample_lasttime);
422 net_timerclear(&mit->mit_start_time);
423
424 #if (DEVELOPMENT || DEBUG)
425 skoid_destroy(&mit->mit_skoid);
426 #endif /* !DEVELOPMENT && !DEBUG */
427 }
428
429 int
nx_netif_mit_tx_intr(struct __kern_channel_ring * kr,struct proc * p,uint32_t flags,uint32_t * work_done)430 nx_netif_mit_tx_intr(struct __kern_channel_ring *kr, struct proc *p,
431 uint32_t flags, uint32_t *work_done)
432 {
433 struct nexus_netif_adapter *nifna =
434 (struct nexus_netif_adapter *)KRNA(kr);
435 struct netif_stats *nifs =
436 &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
437
438 ASSERT(kr->ckr_tx == NR_TX);
439 STATS_INC(nifs, NETIF_STATS_TX_IRQ);
440
441 /*
442 * If mitigation is not enabled for this kring, we're done; otherwise,
443 * signal the thread that there is work to do, unless it's terminating.
444 */
445 if (__probable(nifna->nifna_tx_mit == NULL)) {
446 (void) nx_netif_common_intr(kr, p, flags, work_done);
447 } else {
448 struct nx_netif_mit *mit =
449 &nifna->nifna_tx_mit[kr->ckr_ring_id];
450 ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
451 MIT_SPIN_LOCK(mit);
452 mit->mit_requests++;
453 if (!(mit->mit_flags & (NETIF_MITF_RUNNING |
454 NETIF_MITF_TERMINATING | NETIF_MITF_TERMINATED))) {
455 (void) thread_wakeup_thread((caddr_t)&mit->mit_flags,
456 mit->mit_thread);
457 }
458 MIT_SPIN_UNLOCK(mit);
459 }
460
461 return 0;
462 }
463
464 int
nx_netif_mit_rx_intr(struct __kern_channel_ring * kr,struct proc * p,uint32_t flags,uint32_t * work_done)465 nx_netif_mit_rx_intr(struct __kern_channel_ring *kr, struct proc *p,
466 uint32_t flags, uint32_t *work_done)
467 {
468 struct nexus_netif_adapter *nifna =
469 (struct nexus_netif_adapter *)KRNA(kr);
470 struct netif_stats *nifs =
471 &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
472
473 KDBG((SK_KTRACE_NETIF_MIT_RX_INTR | DBG_FUNC_START), SK_KVA(kr));
474
475 ASSERT(kr->ckr_tx == NR_RX);
476 STATS_INC(nifs, NETIF_STATS_RX_IRQ);
477
478 /*
479 * If mitigation is enabled for this kring, signal the thread that there
480 * is work to do, unless it's terminating. Otherwise, we're done.
481 */
482 if (__improbable(nifna->nifna_rx_mit != NULL)) {
483 struct nx_netif_mit *mit =
484 &nifna->nifna_rx_mit[kr->ckr_ring_id];
485 ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
486 MIT_SPIN_LOCK(mit);
487 mit->mit_requests++;
488 if (!(mit->mit_flags & (NETIF_MITF_RUNNING |
489 NETIF_MITF_TERMINATING | NETIF_MITF_TERMINATED))) {
490 (void) thread_wakeup_thread((caddr_t)&mit->mit_flags,
491 mit->mit_thread);
492 }
493 MIT_SPIN_UNLOCK(mit);
494 } else {
495 (void) nx_netif_common_intr(kr, p, flags, work_done);
496 }
497
498 KDBG((SK_KTRACE_NETIF_MIT_RX_INTR | DBG_FUNC_END), SK_KVA(kr));
499
500 return 0;
501 }
502
503 __attribute__((noreturn))
504 static void
nx_netif_mit_thread_func(void * v,wait_result_t w)505 nx_netif_mit_thread_func(void *v, wait_result_t w)
506 {
507 #pragma unused(w)
508 struct nx_netif_mit *__single mit = v;
509
510 ASSERT(mit->mit_thread == current_thread());
511 thread_set_thread_name(current_thread(),
512 __unsafe_null_terminated_from_indexable(mit->mit_name));
513
514 MIT_SPIN_LOCK(mit);
515 VERIFY(!(mit->mit_flags & (NETIF_MITF_READY | NETIF_MITF_RUNNING)));
516 /* tell nx_netif_mit_init() to proceed */
517 mit->mit_flags |= NETIF_MITF_READY;
518 wakeup((caddr_t)&mit->mit_thread);
519 (void) assert_wait(&mit->mit_flags, THREAD_UNINT);
520 MIT_SPIN_UNLOCK(mit);
521 if (mit->mit_flags & NETIF_MITF_SIMPLE) {
522 (void) thread_block_parameter(nx_netif_mit_s_thread_cont, mit);
523 } else {
524 (void) thread_block_parameter(nx_netif_mit_thread_cont, mit);
525 }
526 /* NOTREACHED */
527 __builtin_unreachable();
528 }
529
530 /*
531 * Simple variant.
532 */
533 __attribute__((noreturn))
534 static void
nx_netif_mit_s_thread_cont(void * v,wait_result_t wres)535 nx_netif_mit_s_thread_cont(void *v, wait_result_t wres)
536 {
537 struct __kern_channel_ring *kr;
538 struct nx_netif_mit *__single mit = v;
539 struct netif_stats *nifs;
540 int irq_stat, error;
541
542 ASSERT(mit->mit_flags & NETIF_MITF_SIMPLE);
543 kr = __DEVOLATILE(struct __kern_channel_ring *, mit->mit_ckr);
544 nifs = &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
545 irq_stat = (kr->ckr_tx == NR_TX) ? NETIF_STATS_TX_IRQ_MIT :
546 NETIF_STATS_RX_IRQ_MIT;
547
548 MIT_SPIN_LOCK(mit);
549 if (__improbable(wres == THREAD_INTERRUPTED ||
550 (mit->mit_flags & NETIF_MITF_TERMINATING))) {
551 goto terminate;
552 }
553
554 ASSERT(!(mit->mit_flags & NETIF_MITF_TERMINATED));
555 mit->mit_flags |= NETIF_MITF_RUNNING;
556
557 /*
558 * Keep on servicing the ring until no more request.
559 */
560 for (;;) {
561 uint32_t requests = mit->mit_requests;
562
563 STATS_INC(nifs, irq_stat);
564 MIT_SPIN_UNLOCK(mit);
565
566 error = nx_netif_common_intr(kr, kernproc, 0, NULL);
567
568 /*
569 * We could get EBUSY here due to netif_inject_rx() holding
570 * the kring lock. EBUSY means the rx notify callback (which
571 * does the rx syncs..etc) wasn't called. If we don't retry
572 * nx_netif_common_intr() the driver will eventually stop
573 * notifying due to its queues being full.
574 */
575 if (error == EBUSY) {
576 uint32_t ival =
577 MAX(netif_busy_mit_delay, NETIF_BUSY_MIT_DELAY);
578
579 MIT_SPIN_LOCK(mit);
580 mit->mit_requests++;
581 MIT_SPIN_UNLOCK(mit);
582 delay(ival);
583 }
584
585 MIT_SPIN_LOCK(mit);
586
587 if ((mit->mit_flags & NETIF_MITF_TERMINATING) != 0 ||
588 requests == mit->mit_requests) {
589 mit->mit_requests = 0;
590 break;
591 }
592 }
593
594 if (__probable((mit->mit_flags & NETIF_MITF_TERMINATING) == 0)) {
595 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
596
597 MIT_SPIN_LOCK_ASSERT_HELD(mit);
598
599 if (kr->ckr_rate_limited) {
600 SK_DF(SK_VERB_NETIF_MIT,
601 "%s: posting wait deadline for MIT",
602 mit->mit_name);
603 clock_interval_to_deadline(1, NSEC_PER_MSEC,
604 &deadline);
605 }
606 mit->mit_flags &= ~NETIF_MITF_RUNNING;
607 (void) assert_wait_deadline(&mit->mit_flags,
608 THREAD_UNINT, deadline);
609 MIT_SPIN_UNLOCK(mit);
610 (void) thread_block_parameter(nx_netif_mit_s_thread_cont, mit);
611 /* NOTREACHED */
612 } else {
613 terminate:
614 MIT_SPIN_LOCK_ASSERT_HELD(mit);
615
616 VERIFY(mit->mit_thread == current_thread());
617 VERIFY((mit->mit_flags & NETIF_MITF_TERMINATING) != 0);
618 mit->mit_flags &= ~(NETIF_MITF_READY | NETIF_MITF_RUNNING |
619 NETIF_MITF_TERMINATING);
620 mit->mit_flags |= NETIF_MITF_TERMINATED;
621 wakeup((caddr_t)&mit->mit_flags);
622 MIT_SPIN_UNLOCK(mit);
623
624 /* for the extra refcnt from kernel_thread_start() */
625 thread_deallocate(current_thread());
626 /* this is the end */
627 thread_terminate(current_thread());
628 /* NOTREACHED */
629 }
630
631 /* must never get here */
632 VERIFY(0);
633 /* NOTREACHED */
634 __builtin_unreachable();
635 }
636
637 /*
638 * Advanced variant.
639 */
640 __attribute__((noreturn))
641 static void
nx_netif_mit_thread_cont(void * v,wait_result_t wres)642 nx_netif_mit_thread_cont(void *v, wait_result_t wres)
643 {
644 struct __kern_channel_ring *kr;
645 struct nx_netif_mit *__single mit = v;
646 struct netif_stats *nifs;
647 int irq_stat;
648
649 ASSERT(!(mit->mit_flags & NETIF_MITF_SIMPLE));
650 kr = __DEVOLATILE(struct __kern_channel_ring *, mit->mit_ckr);
651 nifs = &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
652 irq_stat = (kr->ckr_tx == NR_TX) ? NETIF_STATS_TX_IRQ_MIT :
653 NETIF_STATS_RX_IRQ_MIT;
654
655 MIT_SPIN_LOCK(mit);
656 if (__improbable(wres == THREAD_INTERRUPTED ||
657 (mit->mit_flags & NETIF_MITF_TERMINATING))) {
658 goto terminate;
659 }
660
661 ASSERT(!(mit->mit_flags & NETIF_MITF_TERMINATED));
662 mit->mit_flags |= NETIF_MITF_RUNNING;
663
664 /*
665 * Keep on servicing the ring until no more request.
666 */
667 for (;;) {
668 uint32_t requests = mit->mit_requests;
669 uint32_t ival;
670 int error = 0;
671
672 STATS_INC(nifs, irq_stat);
673 MIT_SPIN_UNLOCK(mit);
674
675 /*
676 * Notify the ring and trigger packets fan-out;
677 * bracket the call with timestamps to compute
678 * our effective mitigation/delay interval below.
679 */
680 nx_netif_mit_set_start_interval(mit);
681 error = nx_netif_common_intr(kr, kernproc, 0, NULL);
682 ival = nx_netif_mit_update_interval(mit, FALSE);
683
684 /*
685 * If mitigation interval is non-zero (for TX/RX)
686 * then we always introduce an artificial delay
687 * for that amount of time. Otherwise, if we get
688 * EBUSY, then kr_enter() has another thread that
689 * is working on it, and so we should wait a bit.
690 */
691 if (ival != 0 || error == EBUSY) {
692 if (error == EBUSY) {
693 ival = MAX(netif_busy_mit_delay,
694 NETIF_BUSY_MIT_DELAY);
695 MIT_SPIN_LOCK(mit);
696 mit->mit_requests++;
697 MIT_SPIN_UNLOCK(mit);
698 }
699 delay(ival);
700 }
701
702 MIT_SPIN_LOCK(mit);
703
704 if ((mit->mit_flags & NETIF_MITF_TERMINATING) != 0 ||
705 requests == mit->mit_requests) {
706 mit->mit_requests = 0;
707 break;
708 }
709 }
710
711 if (__probable((mit->mit_flags & NETIF_MITF_TERMINATING) == 0)) {
712 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
713
714 MIT_SPIN_LOCK_ASSERT_HELD(mit);
715
716 if (kr->ckr_rate_limited) {
717 SK_DF(SK_VERB_NETIF_MIT,
718 "%s: posting wait deadline for MIT",
719 mit->mit_name);
720 clock_interval_to_deadline(1, NSEC_PER_MSEC,
721 &deadline);
722 }
723 mit->mit_flags &= ~NETIF_MITF_RUNNING;
724 (void) assert_wait_deadline(&mit->mit_flags,
725 THREAD_UNINT, deadline);
726 MIT_SPIN_UNLOCK(mit);
727 (void) thread_block_parameter(nx_netif_mit_thread_cont, mit);
728 /* NOTREACHED */
729 } else {
730 terminate:
731 MIT_SPIN_LOCK_ASSERT_HELD(mit);
732
733 VERIFY(mit->mit_thread == current_thread());
734 VERIFY((mit->mit_flags & NETIF_MITF_TERMINATING) != 0);
735 mit->mit_flags &= ~(NETIF_MITF_READY | NETIF_MITF_RUNNING |
736 NETIF_MITF_TERMINATING);
737 mit->mit_flags |= NETIF_MITF_TERMINATED;
738 wakeup((caddr_t)&mit->mit_flags);
739 MIT_SPIN_UNLOCK(mit);
740
741 /* for the extra refcnt from kernel_thread_start() */
742 thread_deallocate(current_thread());
743 /* this is the end */
744 thread_terminate(current_thread());
745 /* NOTREACHED */
746 }
747
748 /* must never get here */
749 VERIFY(0);
750 /* NOTREACHED */
751 __builtin_unreachable();
752 }
753
754 static void
nx_netif_mit_stats(struct __kern_channel_ring * kr,uint64_t pkts,uint64_t bytes)755 nx_netif_mit_stats(struct __kern_channel_ring *kr, uint64_t pkts,
756 uint64_t bytes)
757 {
758 struct nx_netif_mit *mit = kr->ckr_mit;
759 struct timespec now, delta;
760 mit_mode_t mode;
761 uint32_t cfg_idx;
762
763 ASSERT(mit != NULL && !(mit->mit_flags & NETIF_MITF_SIMPLE));
764
765 if ((os_atomic_or_orig(&mit->mit_flags, NETIF_MITF_SAMPLING, relaxed) &
766 NETIF_MITF_SAMPLING) != 0) {
767 return;
768 }
769
770 mode = mit->mit_mode;
771 cfg_idx = mit->mit_cfg_idx;
772
773 nanouptime(&now);
774 if (!net_timerisset(&mit->mit_sample_lasttime)) {
775 *(&mit->mit_sample_lasttime) = *(&now);
776 }
777
778 net_timersub(&now, &mit->mit_sample_lasttime, &delta);
779 if (net_timerisset(&mit->mit_sample_time)) {
780 uint32_t ptot, btot;
781
782 /* accumulate statistics for current sampling */
783 PKTCNTR_ADD(&mit->mit_sstats, pkts, bytes);
784
785 /* CSTYLED */
786 if (net_timercmp(&delta, &mit->mit_sample_time, <)) {
787 goto done;
788 }
789
790 *(&mit->mit_sample_lasttime) = *(&now);
791
792 /* calculate min/max of bytes */
793 btot = (uint32_t)mit->mit_sstats.bytes;
794 if (mit->mit_bytes_min == 0 || mit->mit_bytes_min > btot) {
795 mit->mit_bytes_min = btot;
796 }
797 if (btot > mit->mit_bytes_max) {
798 mit->mit_bytes_max = btot;
799 }
800
801 /* calculate EWMA of bytes */
802 MIT_EWMA(mit->mit_bytes_avg, btot,
803 netif_ad_mit_gdecay, netif_ad_mit_sdecay);
804
805 /* calculate min/max of packets */
806 ptot = (uint32_t)mit->mit_sstats.packets;
807 if (mit->mit_packets_min == 0 || mit->mit_packets_min > ptot) {
808 mit->mit_packets_min = ptot;
809 }
810 if (ptot > mit->mit_packets_max) {
811 mit->mit_packets_max = ptot;
812 }
813
814 /* calculate EWMA of packets */
815 MIT_EWMA(mit->mit_packets_avg, ptot,
816 netif_ad_mit_gdecay, netif_ad_mit_sdecay);
817
818 /* reset sampling statistics */
819 PKTCNTR_CLEAR(&mit->mit_sstats);
820
821 /* Perform mode transition, if necessary */
822 if (!net_timerisset(&mit->mit_mode_lasttime)) {
823 *(&mit->mit_mode_lasttime) = *(&now);
824 }
825
826 net_timersub(&now, &mit->mit_mode_lasttime, &delta);
827 /* CSTYLED */
828 if (net_timercmp(&delta, &mit->mit_mode_holdtime, <)) {
829 goto done;
830 }
831
832 SK_RDF(SK_VERB_NETIF_MIT, 2, "%s [%u]: pavg %u bavg %u "
833 "delay %u usec", mit->mit_name, mit->mit_cfg_idx,
834 mit->mit_packets_avg, mit->mit_bytes_avg,
835 (mode == MIT_MODE_ADVANCED_STATIC ? 0 :
836 (mit->mit_tbl[mit->mit_cfg_idx].cfg_ival)));
837
838 if (mit->mit_packets_avg <= mit->mit_cfg->cfg_plowat &&
839 mit->mit_bytes_avg <= mit->mit_cfg->cfg_blowat) {
840 if (cfg_idx == 0) {
841 mode = MIT_MODE_ADVANCED_STATIC;
842 } else {
843 ASSERT(mode == MIT_MODE_ADVANCED_DYNAMIC);
844 --cfg_idx;
845 }
846 } else if (mit->mit_packets_avg >= mit->mit_cfg->cfg_phiwat &&
847 mit->mit_bytes_avg >= mit->mit_cfg->cfg_bhiwat) {
848 mode = MIT_MODE_ADVANCED_DYNAMIC;
849 if (cfg_idx < (mit->mit_cfg_idx_max - 1)) {
850 ++cfg_idx;
851 }
852 }
853
854 if (mode != mit->mit_mode || cfg_idx != mit->mit_cfg_idx) {
855 ASSERT(cfg_idx < mit->mit_cfg_idx_max);
856
857 SK_DF(SK_VERB_NETIF_MIT, "%s [%u->%u]: pavg %u "
858 "bavg %u [mode %u->%u, delay %u->%u usec]",
859 mit->mit_name, mit->mit_cfg_idx, cfg_idx,
860 mit->mit_packets_avg, mit->mit_bytes_avg,
861 mit->mit_mode, mode,
862 (mit->mit_mode == MIT_MODE_ADVANCED_STATIC ? 0 :
863 (mit->mit_cfg->cfg_ival)),
864 (mode == MIT_MODE_ADVANCED_STATIC ? 0 :
865 (mit->mit_tbl[cfg_idx].cfg_ival)));
866
867 mit->mit_mode = mode;
868 mit->mit_cfg_idx = cfg_idx;
869 mit->mit_cfg = &mit->mit_tbl[mit->mit_cfg_idx];
870 *(&mit->mit_mode_lasttime) = *(&now);
871 }
872 }
873
874 done:
875 os_atomic_andnot(&mit->mit_flags, NETIF_MITF_SAMPLING, relaxed);
876 }
877
878 #if (DEVELOPMENT || DEBUG)
879 static int
880 sysctl_mit_mode_holdtime SYSCTL_HANDLER_ARGS
881 {
882 #pragma unused(arg1, arg2)
883 uint64_t q;
884 int err;
885
886 q = netif_mit_mode_holdtime;
887
888 err = sysctl_handle_quad(oidp, &q, 0, req);
889 if (err != 0 || req->newptr == USER_ADDR_NULL) {
890 return err;
891 }
892
893 if (q < NETIF_MIT_MODE_HOLDTIME_MIN) {
894 q = NETIF_MIT_MODE_HOLDTIME_MIN;
895 }
896
897 netif_mit_mode_holdtime = q;
898
899 return err;
900 }
901 #endif /* !DEVELOPMENT && !DEBUG */
902