xref: /xnu-10002.81.5/bsd/skywalk/nexus/netif/nx_netif_mit.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <sys/kdebug.h>
32 #include <mach/thread_act.h>
33 #include <kern/thread.h>
34 #include <kern/sched_prim.h>
35 
36 extern kern_return_t thread_terminate(thread_t);
37 
38 static void nx_netif_mit_reset_interval(struct nx_netif_mit *);
39 static void nx_netif_mit_set_start_interval(struct nx_netif_mit *);
40 static uint32_t nx_netif_mit_update_interval(struct nx_netif_mit *, boolean_t);
41 static void nx_netif_mit_thread_func(void *, wait_result_t);
42 static void nx_netif_mit_thread_cont(void *, wait_result_t);
43 static void nx_netif_mit_s_thread_cont(void *, wait_result_t);
44 static void nx_netif_mit_stats(struct __kern_channel_ring *, uint64_t,
45     uint64_t);
46 
47 /* mitigation intervals in micro seconds */
48 #define NETIF_BUSY_MIT_DELAY    (100)
49 
50 static uint32_t netif_busy_mit_delay = NETIF_BUSY_MIT_DELAY;
51 
52 #define MIT_EWMA(old, new, gdecay, sdecay) do {                         \
53 	uint32_t _avg;                                                  \
54 	if ((_avg = (old)) > 0) {                                       \
55 	        uint32_t _d = ((new) > _avg) ? gdecay : sdecay;         \
56 	        _avg = (((_avg << (_d)) - _avg) + (new)) >> (_d);       \
57 	} else {                                                        \
58 	        _avg = (new);                                           \
59 	}                                                               \
60 	(old) = _avg;                                                   \
61 } while (0)
62 
63 /*
64  * Larger decay factor results in slower reaction.  Each value is ilog2
65  * of EWMA decay rate; one for growth and another for shrink.  The two
66  * decay factors chosen are such that we reach quickly to shrink, and
67  * slowly to grow.  Growth and shrink are relevant to the mitigation
68  * delay interval.
69  */
70 #define NETIF_AD_MIT_GDECAY     3       /* ilog2(8) */
71 static uint32_t netif_ad_mit_gdecay = NETIF_AD_MIT_GDECAY;
72 
73 #define NETIF_AD_MIT_SDECAY     2       /* ilog2(4) */
74 static uint32_t netif_ad_mit_sdecay = NETIF_AD_MIT_SDECAY;
75 
76 #define NETIF_MIT_MODE_HOLDTIME_MIN     (1ULL * 1000 * 1000)    /* 1 ms */
77 #define NETIF_MIT_MODE_HOLDTIME         (1000ULL * 1000 * 1000) /* 1 sec */
78 static uint64_t netif_mit_mode_holdtime = NETIF_MIT_MODE_HOLDTIME;
79 
80 #define NETIF_MIT_SAMPLETIME_MIN        (1ULL * 1000 * 1000)    /* 1 ms */
81 #define NETIF_MIT_SAMPLETIME            (10ULL * 1000 * 1000)   /* 10 ms */
82 static uint64_t netif_mit_sample_holdtime = NETIF_MIT_SAMPLETIME;
83 
84 /*
85  * These numbers are based off 10ms netif_mit_sample_holdtime;
86  * changing the hold time will require recomputing them.
87  */
88 #if (DEVELOPMENT || DEBUG)
89 static struct mit_cfg_tbl mit_cfg_tbl_native[] = {
90 #else /* !DEVELOPMENT && !DEBUG */
91 static const struct mit_cfg_tbl mit_cfg_tbl_native[] = {
92 #endif /* !DEVELOPMENT && !DEBUG */
93 	{ .cfg_plowat = 10, .cfg_phiwat = 60, .cfg_blowat = 4000,
94 	  .cfg_bhiwat = 6000, .cfg_ival = 100 },
95 	{ .cfg_plowat = 40, .cfg_phiwat = 100, .cfg_blowat = 5000,
96 	  .cfg_bhiwat = 300000, .cfg_ival = 300 },
97 	{ .cfg_plowat = 80, .cfg_phiwat = 200, .cfg_blowat = 100000,
98 	  .cfg_bhiwat = 300000, .cfg_ival = 500 },
99 	{ .cfg_plowat = 220, .cfg_phiwat = 240, .cfg_blowat = 330000,
100 	  .cfg_bhiwat = 375000, .cfg_ival = 1000 },
101 	{ .cfg_plowat = 250, .cfg_phiwat = 2000, .cfg_blowat = 450000,
102 	  .cfg_bhiwat = 30000000, .cfg_ival = 200 },
103 };
104 
105 #if (DEVELOPMENT || DEBUG)
106 static struct mit_cfg_tbl mit_cfg_tbl_compat[] = {
107 #else /* !DEVELOPMENT && !DEBUG */
108 static const struct mit_cfg_tbl mit_cfg_tbl_compat[] = {
109 #endif /* !DEVELOPMENT && !DEBUG */
110 	{ .cfg_plowat = 10, .cfg_phiwat = 60, .cfg_blowat = 4000,
111 	  .cfg_bhiwat = 6000, .cfg_ival = 100 },
112 	{ .cfg_plowat = 40, .cfg_phiwat = 100, .cfg_blowat = 5000,
113 	  .cfg_bhiwat = 300000, .cfg_ival = 300 },
114 	{ .cfg_plowat = 80, .cfg_phiwat = 200, .cfg_blowat = 100000,
115 	  .cfg_bhiwat = 300000, .cfg_ival = 500 },
116 	{ .cfg_plowat = 220, .cfg_phiwat = 240, .cfg_blowat = 330000,
117 	  .cfg_bhiwat = 375000, .cfg_ival = 1000 },
118 	{ .cfg_plowat = 250, .cfg_phiwat = 2000, .cfg_blowat = 450000,
119 	  .cfg_bhiwat = 3000000, .cfg_ival = 200 },
120 };
121 
122 #if (DEVELOPMENT || DEBUG)
123 static struct mit_cfg_tbl mit_cfg_tbl_native_cellular[] = {
124 #else /* !DEVELOPMENT && !DEBUG */
125 static const struct mit_cfg_tbl mit_cfg_tbl_native_cellular[] = {
126 #endif /* !DEVELOPMENT && !DEBUG */
127 	{ .cfg_plowat = 10, .cfg_phiwat = 40, .cfg_blowat = 4000,
128 	  .cfg_bhiwat = 6000, .cfg_ival = 300 },
129 	{ .cfg_plowat = 20, .cfg_phiwat = 60, .cfg_blowat = 5000,
130 	  .cfg_bhiwat = 150000, .cfg_ival = 500 },
131 	{ .cfg_plowat = 40, .cfg_phiwat = 80, .cfg_blowat = 80000,
132 	  .cfg_bhiwat = 200000, .cfg_ival = 700 },
133 	{ .cfg_plowat = 60, .cfg_phiwat = 250, .cfg_blowat = 150000,
134 	  .cfg_bhiwat = 375000, .cfg_ival = 1500 },
135 	{ .cfg_plowat = 260, .cfg_phiwat = 2000, .cfg_blowat = 450000,
136 	  .cfg_bhiwat = 3000000, .cfg_ival = 400 },
137 };
138 
139 #if (DEVELOPMENT || DEBUG)
140 static int sysctl_mit_mode_holdtime SYSCTL_HANDLER_ARGS;
141 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, busy_mit_delay,
142     CTLFLAG_RW | CTLFLAG_LOCKED, &netif_busy_mit_delay,
143     NETIF_BUSY_MIT_DELAY, "");
144 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, ad_mit_gdecay,
145     CTLFLAG_RW | CTLFLAG_LOCKED, &netif_ad_mit_gdecay, NETIF_AD_MIT_GDECAY, "");
146 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, ad_mit_sdecay,
147     CTLFLAG_RW | CTLFLAG_LOCKED, &netif_ad_mit_sdecay, NETIF_AD_MIT_SDECAY, "");
148 SYSCTL_PROC(_kern_skywalk_netif, OID_AUTO, ad_mit_freeze,
149     CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &netif_mit_mode_holdtime,
150     NETIF_MIT_MODE_HOLDTIME, sysctl_mit_mode_holdtime, "Q", "");
151 #endif /* !DEVELOPMENT && !DEBUG */
152 
153 void
nx_netif_mit_init(struct nx_netif * nif,const struct ifnet * ifp,struct nx_netif_mit * mit,struct __kern_channel_ring * kr,boolean_t simple)154 nx_netif_mit_init(struct nx_netif *nif, const struct ifnet *ifp,
155     struct nx_netif_mit *mit, struct __kern_channel_ring *kr,
156     boolean_t simple)
157 {
158 #pragma unused(nif)
159 	thread_precedence_policy_data_t info;
160 	__unused kern_return_t kret;
161 	char oid_name[24];
162 
163 	_CASSERT(sizeof(mit_cfg_tbl_native_cellular) <=
164 	    sizeof(((struct nx_netif_mit *)0)->mit_tbl));
165 
166 	lck_spin_init(&mit->mit_lock, kr->ckr_qlock_group, &channel_lock_attr);
167 
168 	if (kr->ckr_tx == NR_TX) {
169 		if (simple) {
170 			(void) snprintf(mit->mit_name, sizeof(mit->mit_name),
171 			    "skywalk_%s_tx_%u", ifp->if_xname, kr->ckr_ring_id);
172 		} else {
173 			(void) snprintf(mit->mit_name, sizeof(mit->mit_name),
174 			    "skywalk_mit_%s_tx_%u", ifp->if_xname,
175 			    kr->ckr_ring_id);
176 		}
177 		(void) snprintf(oid_name, sizeof(oid_name),
178 		    "tx_%u", kr->ckr_ring_id);
179 	} else {
180 		if (simple) {
181 			(void) snprintf(mit->mit_name, sizeof(mit->mit_name),
182 			    "skywalk_%s_rx_%u", ifp->if_xname, kr->ckr_ring_id);
183 		} else {
184 			(void) snprintf(mit->mit_name, sizeof(mit->mit_name),
185 			    "skywalk_mit_%s_rx_%u", ifp->if_xname,
186 			    kr->ckr_ring_id);
187 		}
188 		(void) snprintf(oid_name, sizeof(oid_name),
189 		    "rx_%u", kr->ckr_ring_id);
190 	}
191 
192 	mit->mit_ckr = kr;
193 	mit->mit_ckr->ckr_mit = mit;
194 	mit->mit_interval = 0;
195 	mit->mit_netif_ifp = ifp;
196 
197 	if ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) && (ifp->if_family ==
198 	    IFNET_FAMILY_CELLULAR)) {
199 		bcopy(mit_cfg_tbl_native_cellular,
200 		    (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
201 		    sizeof(mit_cfg_tbl_native_cellular));
202 		mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_native_cellular) /
203 		    sizeof(*mit->mit_cfg);
204 	} else if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
205 		bcopy(mit_cfg_tbl_native,
206 		    (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
207 		    sizeof(mit->mit_tbl));
208 		mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_native) /
209 		    sizeof(*mit->mit_cfg);
210 	} else {
211 		bcopy(mit_cfg_tbl_compat,
212 		    (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
213 		    sizeof(mit->mit_tbl));
214 		mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_compat) /
215 		    sizeof(*mit->mit_cfg);
216 	}
217 	VERIFY(mit->mit_cfg_idx_max > 0);
218 	VERIFY(mit->mit_cfg_idx_max <= NETIF_MIT_CFG_TBL_MAX_CFG);
219 
220 	if (ifp->if_rx_mit_ival != 0) {
221 		mit->mit_tbl[0].cfg_ival = ifp->if_rx_mit_ival;
222 		SK_D("mit interval updated: %s cfg %u ival %u",
223 		    mit->mit_name, 0, mit->mit_tbl[0].cfg_ival);
224 	}
225 
226 	net_timerclear(&mit->mit_mode_holdtime);
227 	net_timerclear(&mit->mit_mode_lasttime);
228 	net_timerclear(&mit->mit_sample_time);
229 	net_timerclear(&mit->mit_sample_lasttime);
230 	net_timerclear(&mit->mit_start_time);
231 
232 	net_nsectimer(&netif_mit_mode_holdtime, &mit->mit_mode_holdtime);
233 	net_nsectimer(&netif_mit_sample_holdtime, &mit->mit_sample_time);
234 
235 	/* initialize mode and params */
236 	nx_netif_mit_reset_interval(mit);
237 	VERIFY(mit->mit_cfg != NULL && mit->mit_cfg_idx < mit->mit_cfg_idx_max);
238 	mit->mit_flags = NETIF_MITF_INITIALIZED;
239 	if (simple) {
240 		/*
241 		 * Simple mitigation where we don't collect any statistics
242 		 * at all, and therefore don't want to register the ring's
243 		 * ckr_netif_mit_stats() callback.
244 		 */
245 		mit->mit_flags |= NETIF_MITF_SIMPLE;
246 		ASSERT(kr->ckr_netif_mit_stats == NULL);
247 	} else {
248 		/*
249 		 * Regular mitigation where we collect stats and use them
250 		 * for determining the delay between wakeups; initialize
251 		 * the ring's ckr_netif_mit_stats() callback.
252 		 */
253 		kr->ckr_netif_mit_stats = nx_netif_mit_stats;
254 	}
255 
256 	if (kernel_thread_start(nx_netif_mit_thread_func, mit,
257 	    &mit->mit_thread) != KERN_SUCCESS) {
258 		panic_plain("%s: can't create thread", mit->mit_name);
259 		/* NOTREACHED */
260 		__builtin_unreachable();
261 	}
262 	/* this must not fail */
263 	VERIFY(mit->mit_thread != NULL);
264 
265 	/* wait until nx_netif_mit_thread_func() is ready */
266 	MIT_SPIN_LOCK(mit);
267 	while (!(mit->mit_flags & NETIF_MITF_READY)) {
268 		(void) assert_wait(&mit->mit_thread, THREAD_UNINT);
269 		MIT_SPIN_UNLOCK(mit);
270 		(void) thread_block(THREAD_CONTINUE_NULL);
271 		MIT_SPIN_LOCK(mit);
272 	}
273 	MIT_SPIN_UNLOCK(mit);
274 
275 	bzero(&info, sizeof(info));
276 	info.importance = 0;
277 	kret = thread_policy_set(mit->mit_thread, THREAD_PRECEDENCE_POLICY,
278 	    (thread_policy_t)&info, THREAD_PRECEDENCE_POLICY_COUNT);
279 	ASSERT(kret == KERN_SUCCESS);
280 
281 #if (DEVELOPMENT || DEBUG)
282 	/* register mit sysctl skoid */
283 	skoid_create(&mit->mit_skoid, SKOID_DNODE(nif->nif_skoid), oid_name, 0);
284 	skoid_add_uint(&mit->mit_skoid, "interval", CTLFLAG_RW,
285 	    &mit->mit_interval);
286 	struct skoid *skoid = &mit->mit_skoid;
287 	struct mit_cfg_tbl *t;
288 #define MIT_ADD_SKOID(_i)       \
289 	t = &mit->mit_tbl[_i];  \
290 	skoid_add_uint(skoid, #_i"_plowat", CTLFLAG_RW, &t->cfg_plowat); \
291 	skoid_add_uint(skoid, #_i"_phiwat", CTLFLAG_RW, &t->cfg_phiwat);  \
292 	skoid_add_uint(skoid, #_i"_blowat", CTLFLAG_RW, &t->cfg_blowat);  \
293 	skoid_add_uint(skoid, #_i"_bhiwat", CTLFLAG_RW, &t->cfg_bhiwat);\
294 	skoid_add_uint(skoid, #_i"_ival", CTLFLAG_RW, &t->cfg_ival);
295 	MIT_ADD_SKOID(0);
296 	MIT_ADD_SKOID(1);
297 	MIT_ADD_SKOID(2);
298 	MIT_ADD_SKOID(3);
299 	MIT_ADD_SKOID(4);
300 	_CASSERT(NETIF_MIT_CFG_TBL_MAX_CFG == 5);
301 #endif /* !DEVELOPMENT && !DEBUG */
302 }
303 
304 __attribute__((always_inline))
305 static inline void
nx_netif_mit_reset_interval(struct nx_netif_mit * mit)306 nx_netif_mit_reset_interval(struct nx_netif_mit *mit)
307 {
308 	(void) nx_netif_mit_update_interval(mit, TRUE);
309 }
310 
311 __attribute__((always_inline))
312 static inline void
nx_netif_mit_set_start_interval(struct nx_netif_mit * mit)313 nx_netif_mit_set_start_interval(struct nx_netif_mit *mit)
314 {
315 	nanouptime(&mit->mit_start_time);
316 }
317 
318 __attribute__((always_inline))
319 static inline uint32_t
nx_netif_mit_update_interval(struct nx_netif_mit * mit,boolean_t reset)320 nx_netif_mit_update_interval(struct nx_netif_mit *mit, boolean_t reset)
321 {
322 	struct timespec now, delta;
323 	uint64_t r;
324 	uint32_t i;
325 
326 	nanouptime(&now);
327 	net_timersub(&now, &mit->mit_sample_lasttime, &delta);
328 
329 	/* CSTYLED */
330 	if ((net_timercmp(&delta, &mit->mit_mode_holdtime, >)) || reset) {
331 		mit_mode_t mode = (mit->mit_flags & NETIF_MITF_SIMPLE) ?
332 		    MIT_MODE_SIMPLE : MIT_MODE_ADVANCED_STATIC;
333 
334 		/* if we haven't updated stats in a while, reset it back */
335 		SK_DF(SK_VERB_NETIF_MIT, "%s: resetting [mode %u->%u]",
336 		    mit->mit_name, mit->mit_mode, mode);
337 
338 		mit->mit_mode = mode;
339 		mit->mit_cfg_idx = 0;
340 		mit->mit_cfg = &mit->mit_tbl[mit->mit_cfg_idx];
341 		mit->mit_packets_avg = 0;
342 		mit->mit_bytes_avg = 0;
343 	}
344 
345 	/* calculate work duration (since last start work time) */
346 	if (net_timerisset(&mit->mit_start_time)) {
347 		net_timersub(&now, &mit->mit_start_time, &delta);
348 		net_timerusec(&delta, &r);
349 	} else {
350 		r = 0;
351 	}
352 
353 	switch (mit->mit_mode) {
354 	case MIT_MODE_SIMPLE:
355 		i = 0;
356 		break;
357 
358 	case MIT_MODE_ADVANCED_STATIC:
359 		i = mit->mit_interval;
360 		break;
361 
362 	case MIT_MODE_ADVANCED_DYNAMIC:
363 		i = mit->mit_cfg->cfg_ival;
364 		break;
365 	}
366 
367 	/*
368 	 * The idea here is to return the effective delay interval that
369 	 * causes each work phase to begin at the desired cadence, at
370 	 * the minimum.
371 	 */
372 	if (__probable(r != 0)) {
373 		if (__probable(i > r)) {
374 			i -= r;
375 		} else {
376 			/* bump up cfg_idx perhaps? */
377 			i = 0;
378 		}
379 	}
380 
381 	return i;
382 }
383 
384 void
nx_netif_mit_cleanup(struct nx_netif_mit * mit)385 nx_netif_mit_cleanup(struct nx_netif_mit *mit)
386 {
387 	if (mit->mit_thread != THREAD_NULL) {
388 		ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
389 
390 		/* signal thread to begin self-termination */
391 		MIT_SPIN_LOCK(mit);
392 		mit->mit_flags |= NETIF_MITF_TERMINATING;
393 		(void) thread_wakeup_thread((caddr_t)&mit->mit_flags,
394 		    mit->mit_thread);
395 		MIT_SPIN_UNLOCK(mit);
396 
397 		/* and wait for thread to terminate */
398 		MIT_SPIN_LOCK(mit);
399 		while (!(mit->mit_flags & NETIF_MITF_TERMINATED)) {
400 			(void) assert_wait(&mit->mit_flags, THREAD_UNINT);
401 			MIT_SPIN_UNLOCK(mit);
402 			(void) thread_block(THREAD_CONTINUE_NULL);
403 			MIT_SPIN_LOCK(mit);
404 		}
405 		ASSERT(mit->mit_flags & NETIF_MITF_TERMINATED);
406 		MIT_SPIN_UNLOCK(mit);
407 		mit->mit_thread = THREAD_NULL;
408 	}
409 	ASSERT(mit->mit_thread == THREAD_NULL);
410 	lck_spin_destroy(&mit->mit_lock, mit->mit_ckr->ckr_qlock_group);
411 
412 	mit->mit_ckr->ckr_mit = NULL;
413 	mit->mit_ckr = NULL;
414 	mit->mit_netif_ifp = NULL;
415 	mit->mit_flags &= ~NETIF_MITF_INITIALIZED;
416 
417 	net_timerclear(&mit->mit_mode_holdtime);
418 	net_timerclear(&mit->mit_mode_lasttime);
419 	net_timerclear(&mit->mit_sample_time);
420 	net_timerclear(&mit->mit_sample_lasttime);
421 	net_timerclear(&mit->mit_start_time);
422 
423 #if (DEVELOPMENT || DEBUG)
424 	skoid_destroy(&mit->mit_skoid);
425 #endif /* !DEVELOPMENT && !DEBUG */
426 }
427 
428 int
nx_netif_mit_tx_intr(struct __kern_channel_ring * kr,struct proc * p,uint32_t flags,uint32_t * work_done)429 nx_netif_mit_tx_intr(struct __kern_channel_ring *kr, struct proc *p,
430     uint32_t flags, uint32_t *work_done)
431 {
432 	struct nexus_netif_adapter *nifna =
433 	    (struct nexus_netif_adapter *)KRNA(kr);
434 	struct netif_stats *nifs =
435 	    &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
436 
437 	ASSERT(kr->ckr_tx == NR_TX);
438 	STATS_INC(nifs, NETIF_STATS_TX_IRQ);
439 
440 	/*
441 	 * If mitigation is not enabled for this kring, we're done; otherwise,
442 	 * signal the thread that there is work to do, unless it's terminating.
443 	 */
444 	if (__probable(nifna->nifna_tx_mit == NULL)) {
445 		(void) nx_netif_common_intr(kr, p, flags, work_done);
446 	} else {
447 		struct nx_netif_mit *mit =
448 		    &nifna->nifna_tx_mit[kr->ckr_ring_id];
449 		ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
450 		MIT_SPIN_LOCK(mit);
451 		mit->mit_requests++;
452 		if (!(mit->mit_flags & (NETIF_MITF_RUNNING |
453 		    NETIF_MITF_TERMINATING | NETIF_MITF_TERMINATED))) {
454 			(void) thread_wakeup_thread((caddr_t)&mit->mit_flags,
455 			    mit->mit_thread);
456 		}
457 		MIT_SPIN_UNLOCK(mit);
458 	}
459 
460 	return 0;
461 }
462 
463 int
nx_netif_mit_rx_intr(struct __kern_channel_ring * kr,struct proc * p,uint32_t flags,uint32_t * work_done)464 nx_netif_mit_rx_intr(struct __kern_channel_ring *kr, struct proc *p,
465     uint32_t flags, uint32_t *work_done)
466 {
467 	struct nexus_netif_adapter *nifna =
468 	    (struct nexus_netif_adapter *)KRNA(kr);
469 	struct netif_stats *nifs =
470 	    &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
471 
472 	KDBG((SK_KTRACE_NETIF_MIT_RX_INTR | DBG_FUNC_START), SK_KVA(kr));
473 
474 	ASSERT(kr->ckr_tx == NR_RX);
475 	STATS_INC(nifs, NETIF_STATS_RX_IRQ);
476 
477 	/*
478 	 * If mitigation is enabled for this kring, signal the thread that there
479 	 * is work to do, unless it's terminating.  Otherwise, we're done.
480 	 */
481 	if (__improbable(nifna->nifna_rx_mit != NULL)) {
482 		struct nx_netif_mit *mit =
483 		    &nifna->nifna_rx_mit[kr->ckr_ring_id];
484 		ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
485 		MIT_SPIN_LOCK(mit);
486 		mit->mit_requests++;
487 		if (!(mit->mit_flags & (NETIF_MITF_RUNNING |
488 		    NETIF_MITF_TERMINATING | NETIF_MITF_TERMINATED))) {
489 			(void) thread_wakeup_thread((caddr_t)&mit->mit_flags,
490 			    mit->mit_thread);
491 		}
492 		MIT_SPIN_UNLOCK(mit);
493 	} else {
494 		(void) nx_netif_common_intr(kr, p, flags, work_done);
495 	}
496 
497 	KDBG((SK_KTRACE_NETIF_MIT_RX_INTR | DBG_FUNC_END), SK_KVA(kr));
498 
499 	return 0;
500 }
501 
502 __attribute__((noreturn))
503 static void
nx_netif_mit_thread_func(void * v,wait_result_t w)504 nx_netif_mit_thread_func(void *v, wait_result_t w)
505 {
506 #pragma unused(w)
507 	struct nx_netif_mit *mit = v;
508 
509 	ASSERT(mit->mit_thread == current_thread());
510 	thread_set_thread_name(current_thread(), mit->mit_name);
511 
512 	MIT_SPIN_LOCK(mit);
513 	VERIFY(!(mit->mit_flags & (NETIF_MITF_READY | NETIF_MITF_RUNNING)));
514 	/* tell nx_netif_mit_init() to proceed */
515 	mit->mit_flags |= NETIF_MITF_READY;
516 	wakeup((caddr_t)&mit->mit_thread);
517 	(void) assert_wait(&mit->mit_flags, THREAD_UNINT);
518 	MIT_SPIN_UNLOCK(mit);
519 	if (mit->mit_flags & NETIF_MITF_SIMPLE) {
520 		(void) thread_block_parameter(nx_netif_mit_s_thread_cont, mit);
521 	} else {
522 		(void) thread_block_parameter(nx_netif_mit_thread_cont, mit);
523 	}
524 	/* NOTREACHED */
525 	__builtin_unreachable();
526 }
527 
528 /*
529  * Simple variant.
530  */
531 __attribute__((noreturn))
532 static void
nx_netif_mit_s_thread_cont(void * v,wait_result_t wres)533 nx_netif_mit_s_thread_cont(void *v, wait_result_t wres)
534 {
535 	struct __kern_channel_ring *kr;
536 	struct nx_netif_mit *mit = v;
537 	struct netif_stats *nifs;
538 	int irq_stat, error;
539 
540 	ASSERT(mit->mit_flags & NETIF_MITF_SIMPLE);
541 	kr = __DEVOLATILE(struct __kern_channel_ring *, mit->mit_ckr);
542 	nifs = &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
543 	irq_stat = (kr->ckr_tx == NR_TX) ? NETIF_STATS_TX_IRQ_MIT :
544 	    NETIF_STATS_RX_IRQ_MIT;
545 
546 	MIT_SPIN_LOCK(mit);
547 	if (__improbable(wres == THREAD_INTERRUPTED ||
548 	    (mit->mit_flags & NETIF_MITF_TERMINATING))) {
549 		goto terminate;
550 	}
551 
552 	ASSERT(!(mit->mit_flags & NETIF_MITF_TERMINATED));
553 	mit->mit_flags |= NETIF_MITF_RUNNING;
554 
555 	/*
556 	 * Keep on servicing the ring until no more request.
557 	 */
558 	for (;;) {
559 		uint32_t requests = mit->mit_requests;
560 
561 		STATS_INC(nifs, irq_stat);
562 		MIT_SPIN_UNLOCK(mit);
563 
564 		error = nx_netif_common_intr(kr, kernproc, 0, NULL);
565 
566 		/*
567 		 * We could get EBUSY here due to netif_inject_rx() holding
568 		 * the kring lock. EBUSY means the rx notify callback (which
569 		 * does the rx syncs..etc) wasn't called. If we don't retry
570 		 * nx_netif_common_intr() the driver will eventually stop
571 		 * notifying due to its queues being full.
572 		 */
573 		if (error == EBUSY) {
574 			uint32_t ival =
575 			    MAX(netif_busy_mit_delay, NETIF_BUSY_MIT_DELAY);
576 
577 			MIT_SPIN_LOCK(mit);
578 			mit->mit_requests++;
579 			MIT_SPIN_UNLOCK(mit);
580 			delay(ival);
581 		}
582 
583 		MIT_SPIN_LOCK(mit);
584 
585 		if ((mit->mit_flags & NETIF_MITF_TERMINATING) != 0 ||
586 		    requests == mit->mit_requests) {
587 			mit->mit_requests = 0;
588 			break;
589 		}
590 	}
591 
592 	if (__probable((mit->mit_flags & NETIF_MITF_TERMINATING) == 0)) {
593 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
594 
595 		MIT_SPIN_LOCK_ASSERT_HELD(mit);
596 
597 		if (kr->ckr_rate_limited) {
598 			SK_DF(SK_VERB_NETIF_MIT,
599 			    "%s: posting wait deadline for MIT",
600 			    mit->mit_name);
601 			clock_interval_to_deadline(1, NSEC_PER_MSEC,
602 			    &deadline);
603 		}
604 		mit->mit_flags &= ~NETIF_MITF_RUNNING;
605 		(void) assert_wait_deadline(&mit->mit_flags,
606 		    THREAD_UNINT, deadline);
607 		MIT_SPIN_UNLOCK(mit);
608 		(void) thread_block_parameter(nx_netif_mit_s_thread_cont, mit);
609 		/* NOTREACHED */
610 	} else {
611 terminate:
612 		MIT_SPIN_LOCK_ASSERT_HELD(mit);
613 
614 		VERIFY(mit->mit_thread == current_thread());
615 		VERIFY((mit->mit_flags & NETIF_MITF_TERMINATING) != 0);
616 		mit->mit_flags &= ~(NETIF_MITF_READY | NETIF_MITF_RUNNING |
617 		    NETIF_MITF_TERMINATING);
618 		mit->mit_flags |= NETIF_MITF_TERMINATED;
619 		wakeup((caddr_t)&mit->mit_flags);
620 		MIT_SPIN_UNLOCK(mit);
621 
622 		/* for the extra refcnt from kernel_thread_start() */
623 		thread_deallocate(current_thread());
624 		/* this is the end */
625 		thread_terminate(current_thread());
626 		/* NOTREACHED */
627 	}
628 
629 	/* must never get here */
630 	VERIFY(0);
631 	/* NOTREACHED */
632 	__builtin_unreachable();
633 }
634 
635 /*
636  * Advanced variant.
637  */
638 __attribute__((noreturn))
639 static void
nx_netif_mit_thread_cont(void * v,wait_result_t wres)640 nx_netif_mit_thread_cont(void *v, wait_result_t wres)
641 {
642 	struct __kern_channel_ring *kr;
643 	struct nx_netif_mit *mit = v;
644 	struct netif_stats *nifs;
645 	int irq_stat;
646 
647 	ASSERT(!(mit->mit_flags & NETIF_MITF_SIMPLE));
648 	kr = __DEVOLATILE(struct __kern_channel_ring *, mit->mit_ckr);
649 	nifs = &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
650 	irq_stat = (kr->ckr_tx == NR_TX) ? NETIF_STATS_TX_IRQ_MIT :
651 	    NETIF_STATS_RX_IRQ_MIT;
652 
653 	MIT_SPIN_LOCK(mit);
654 	if (__improbable(wres == THREAD_INTERRUPTED ||
655 	    (mit->mit_flags & NETIF_MITF_TERMINATING))) {
656 		goto terminate;
657 	}
658 
659 	ASSERT(!(mit->mit_flags & NETIF_MITF_TERMINATED));
660 	mit->mit_flags |= NETIF_MITF_RUNNING;
661 
662 	/*
663 	 * Keep on servicing the ring until no more request.
664 	 */
665 	for (;;) {
666 		uint32_t requests = mit->mit_requests;
667 		uint32_t ival;
668 		int error = 0;
669 
670 		STATS_INC(nifs, irq_stat);
671 		MIT_SPIN_UNLOCK(mit);
672 
673 		/*
674 		 * Notify the ring and trigger packets fan-out;
675 		 * bracket the call with timestamps to compute
676 		 * our effective mitigation/delay interval below.
677 		 */
678 		nx_netif_mit_set_start_interval(mit);
679 		error = nx_netif_common_intr(kr, kernproc, 0, NULL);
680 		ival = nx_netif_mit_update_interval(mit, FALSE);
681 
682 		/*
683 		 * If mitigation interval is non-zero (for TX/RX)
684 		 * then we always introduce an artificial delay
685 		 * for that amount of time.  Otherwise, if we get
686 		 * EBUSY, then kr_enter() has another thread that
687 		 * is working on it, and so we should wait a bit.
688 		 */
689 		if (ival != 0 || error == EBUSY) {
690 			if (error == EBUSY) {
691 				ival = MAX(netif_busy_mit_delay,
692 				    NETIF_BUSY_MIT_DELAY);
693 				MIT_SPIN_LOCK(mit);
694 				mit->mit_requests++;
695 				MIT_SPIN_UNLOCK(mit);
696 			}
697 			delay(ival);
698 		}
699 
700 		MIT_SPIN_LOCK(mit);
701 
702 		if ((mit->mit_flags & NETIF_MITF_TERMINATING) != 0 ||
703 		    requests == mit->mit_requests) {
704 			mit->mit_requests = 0;
705 			break;
706 		}
707 	}
708 
709 	if (__probable((mit->mit_flags & NETIF_MITF_TERMINATING) == 0)) {
710 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
711 
712 		MIT_SPIN_LOCK_ASSERT_HELD(mit);
713 
714 		if (kr->ckr_rate_limited) {
715 			SK_DF(SK_VERB_NETIF_MIT,
716 			    "%s: posting wait deadline for MIT",
717 			    mit->mit_name);
718 			clock_interval_to_deadline(1, NSEC_PER_MSEC,
719 			    &deadline);
720 		}
721 		mit->mit_flags &= ~NETIF_MITF_RUNNING;
722 		(void) assert_wait_deadline(&mit->mit_flags,
723 		    THREAD_UNINT, deadline);
724 		MIT_SPIN_UNLOCK(mit);
725 		(void) thread_block_parameter(nx_netif_mit_thread_cont, mit);
726 		/* NOTREACHED */
727 	} else {
728 terminate:
729 		MIT_SPIN_LOCK_ASSERT_HELD(mit);
730 
731 		VERIFY(mit->mit_thread == current_thread());
732 		VERIFY((mit->mit_flags & NETIF_MITF_TERMINATING) != 0);
733 		mit->mit_flags &= ~(NETIF_MITF_READY | NETIF_MITF_RUNNING |
734 		    NETIF_MITF_TERMINATING);
735 		mit->mit_flags |= NETIF_MITF_TERMINATED;
736 		wakeup((caddr_t)&mit->mit_flags);
737 		MIT_SPIN_UNLOCK(mit);
738 
739 		/* for the extra refcnt from kernel_thread_start() */
740 		thread_deallocate(current_thread());
741 		/* this is the end */
742 		thread_terminate(current_thread());
743 		/* NOTREACHED */
744 	}
745 
746 	/* must never get here */
747 	VERIFY(0);
748 	/* NOTREACHED */
749 	__builtin_unreachable();
750 }
751 
752 static void
nx_netif_mit_stats(struct __kern_channel_ring * kr,uint64_t pkts,uint64_t bytes)753 nx_netif_mit_stats(struct __kern_channel_ring *kr, uint64_t pkts,
754     uint64_t bytes)
755 {
756 	struct nx_netif_mit *mit = kr->ckr_mit;
757 	struct timespec now, delta;
758 	mit_mode_t mode;
759 	uint32_t cfg_idx;
760 
761 	ASSERT(mit != NULL && !(mit->mit_flags & NETIF_MITF_SIMPLE));
762 
763 	if ((os_atomic_or_orig(&mit->mit_flags, NETIF_MITF_SAMPLING, relaxed) &
764 	    NETIF_MITF_SAMPLING) != 0) {
765 		return;
766 	}
767 
768 	mode = mit->mit_mode;
769 	cfg_idx = mit->mit_cfg_idx;
770 
771 	nanouptime(&now);
772 	if (!net_timerisset(&mit->mit_sample_lasttime)) {
773 		*(&mit->mit_sample_lasttime) = *(&now);
774 	}
775 
776 	net_timersub(&now, &mit->mit_sample_lasttime, &delta);
777 	if (net_timerisset(&mit->mit_sample_time)) {
778 		uint32_t ptot, btot;
779 
780 		/* accumulate statistics for current sampling */
781 		PKTCNTR_ADD(&mit->mit_sstats, pkts, bytes);
782 
783 		/* CSTYLED */
784 		if (net_timercmp(&delta, &mit->mit_sample_time, <)) {
785 			goto done;
786 		}
787 
788 		*(&mit->mit_sample_lasttime) = *(&now);
789 
790 		/* calculate min/max of bytes */
791 		btot = (uint32_t)mit->mit_sstats.bytes;
792 		if (mit->mit_bytes_min == 0 || mit->mit_bytes_min > btot) {
793 			mit->mit_bytes_min = btot;
794 		}
795 		if (btot > mit->mit_bytes_max) {
796 			mit->mit_bytes_max = btot;
797 		}
798 
799 		/* calculate EWMA of bytes */
800 		MIT_EWMA(mit->mit_bytes_avg, btot,
801 		    netif_ad_mit_gdecay, netif_ad_mit_sdecay);
802 
803 		/* calculate min/max of packets */
804 		ptot = (uint32_t)mit->mit_sstats.packets;
805 		if (mit->mit_packets_min == 0 || mit->mit_packets_min > ptot) {
806 			mit->mit_packets_min = ptot;
807 		}
808 		if (ptot > mit->mit_packets_max) {
809 			mit->mit_packets_max = ptot;
810 		}
811 
812 		/* calculate EWMA of packets */
813 		MIT_EWMA(mit->mit_packets_avg, ptot,
814 		    netif_ad_mit_gdecay, netif_ad_mit_sdecay);
815 
816 		/* reset sampling statistics */
817 		PKTCNTR_CLEAR(&mit->mit_sstats);
818 
819 		/* Perform mode transition, if necessary */
820 		if (!net_timerisset(&mit->mit_mode_lasttime)) {
821 			*(&mit->mit_mode_lasttime) = *(&now);
822 		}
823 
824 		net_timersub(&now, &mit->mit_mode_lasttime, &delta);
825 		/* CSTYLED */
826 		if (net_timercmp(&delta, &mit->mit_mode_holdtime, <)) {
827 			goto done;
828 		}
829 
830 		SK_RDF(SK_VERB_NETIF_MIT, 2, "%s [%u]: pavg %u bavg %u "
831 		    "delay %llu usec", mit->mit_name, mit->mit_cfg_idx,
832 		    mit->mit_packets_avg, mit->mit_bytes_avg,
833 		    (mode == MIT_MODE_ADVANCED_STATIC ? 0 :
834 		    (mit->mit_tbl[mit->mit_cfg_idx].cfg_ival)));
835 
836 		if (mit->mit_packets_avg <= mit->mit_cfg->cfg_plowat &&
837 		    mit->mit_bytes_avg <= mit->mit_cfg->cfg_blowat) {
838 			if (cfg_idx == 0) {
839 				mode = MIT_MODE_ADVANCED_STATIC;
840 			} else {
841 				ASSERT(mode == MIT_MODE_ADVANCED_DYNAMIC);
842 				--cfg_idx;
843 			}
844 		} else if (mit->mit_packets_avg >= mit->mit_cfg->cfg_phiwat &&
845 		    mit->mit_bytes_avg >= mit->mit_cfg->cfg_bhiwat) {
846 			mode = MIT_MODE_ADVANCED_DYNAMIC;
847 			if (cfg_idx < (mit->mit_cfg_idx_max - 1)) {
848 				++cfg_idx;
849 			}
850 		}
851 
852 		if (mode != mit->mit_mode || cfg_idx != mit->mit_cfg_idx) {
853 			ASSERT(cfg_idx < mit->mit_cfg_idx_max);
854 
855 			SK_DF(SK_VERB_NETIF_MIT, "%s [%u->%u]: pavg %u "
856 			    "bavg %u [mode %u->%u, delay %llu->%llu usec]",
857 			    mit->mit_name, mit->mit_cfg_idx, cfg_idx,
858 			    mit->mit_packets_avg, mit->mit_bytes_avg,
859 			    mit->mit_mode, mode,
860 			    (mit->mit_mode == MIT_MODE_ADVANCED_STATIC ? 0 :
861 			    (mit->mit_cfg->cfg_ival)),
862 			    (mode == MIT_MODE_ADVANCED_STATIC ? 0 :
863 			    (mit->mit_tbl[cfg_idx].cfg_ival)));
864 
865 			mit->mit_mode = mode;
866 			mit->mit_cfg_idx = cfg_idx;
867 			mit->mit_cfg = &mit->mit_tbl[mit->mit_cfg_idx];
868 			*(&mit->mit_mode_lasttime) = *(&now);
869 		}
870 	}
871 
872 done:
873 	os_atomic_andnot(&mit->mit_flags, NETIF_MITF_SAMPLING, relaxed);
874 }
875 
876 #if (DEVELOPMENT || DEBUG)
877 static int
878 sysctl_mit_mode_holdtime SYSCTL_HANDLER_ARGS
879 {
880 #pragma unused(arg1, arg2)
881 	uint64_t q;
882 	int err;
883 
884 	q = netif_mit_mode_holdtime;
885 
886 	err = sysctl_handle_quad(oidp, &q, 0, req);
887 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
888 		return err;
889 	}
890 
891 	if (q < NETIF_MIT_MODE_HOLDTIME_MIN) {
892 		q = NETIF_MIT_MODE_HOLDTIME_MIN;
893 	}
894 
895 	netif_mit_mode_holdtime = q;
896 
897 	return err;
898 }
899 #endif /* !DEVELOPMENT && !DEBUG */
900