xref: /xnu-12377.41.6/bsd/skywalk/nexus/netif/nx_netif_mit.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <sys/kdebug.h>
32 #include <mach/thread_act.h>
33 #include <kern/sched_prim.h>
34 #include <kern/thread.h>
35 #include <kern/uipc_domain.h>
36 
37 extern kern_return_t thread_terminate(thread_t);
38 
39 static void nx_netif_mit_reset_interval(struct nx_netif_mit *);
40 static void nx_netif_mit_set_start_interval(struct nx_netif_mit *);
41 static uint32_t nx_netif_mit_update_interval(struct nx_netif_mit *, boolean_t);
42 static void nx_netif_mit_thread_func(void *, wait_result_t);
43 static void nx_netif_mit_thread_cont(void *, wait_result_t);
44 static void nx_netif_mit_s_thread_cont(void *, wait_result_t);
45 static void nx_netif_mit_stats(struct __kern_channel_ring *, uint64_t,
46     uint64_t);
47 
48 /* mitigation intervals in micro seconds */
49 #define NETIF_BUSY_MIT_DELAY    (100)
50 
51 static uint32_t netif_busy_mit_delay = NETIF_BUSY_MIT_DELAY;
52 
53 #define MIT_EWMA(old, new, gdecay, sdecay) do {                         \
54 	uint32_t _avg;                                                  \
55 	if ((_avg = (old)) > 0) {                                       \
56 	        uint32_t _d = ((new) > _avg) ? gdecay : sdecay;         \
57 	        _avg = (((_avg << (_d)) - _avg) + (new)) >> (_d);       \
58 	} else {                                                        \
59 	        _avg = (new);                                           \
60 	}                                                               \
61 	(old) = _avg;                                                   \
62 } while (0)
63 
64 /*
65  * Larger decay factor results in slower reaction.  Each value is ilog2
66  * of EWMA decay rate; one for growth and another for shrink.  The two
67  * decay factors chosen are such that we reach quickly to shrink, and
68  * slowly to grow.  Growth and shrink are relevant to the mitigation
69  * delay interval.
70  */
71 #define NETIF_AD_MIT_GDECAY     3       /* ilog2(8) */
72 static uint32_t netif_ad_mit_gdecay = NETIF_AD_MIT_GDECAY;
73 
74 #define NETIF_AD_MIT_SDECAY     2       /* ilog2(4) */
75 static uint32_t netif_ad_mit_sdecay = NETIF_AD_MIT_SDECAY;
76 
77 #define NETIF_MIT_MODE_HOLDTIME_MIN     (1ULL * 1000 * 1000)    /* 1 ms */
78 #define NETIF_MIT_MODE_HOLDTIME         (1000ULL * 1000 * 1000) /* 1 sec */
79 static uint64_t netif_mit_mode_holdtime = NETIF_MIT_MODE_HOLDTIME;
80 
81 #define NETIF_MIT_SAMPLETIME_MIN        (1ULL * 1000 * 1000)    /* 1 ms */
82 #define NETIF_MIT_SAMPLETIME            (10ULL * 1000 * 1000)   /* 10 ms */
83 static uint64_t netif_mit_sample_holdtime = NETIF_MIT_SAMPLETIME;
84 
85 /*
86  * These numbers are based off 10ms netif_mit_sample_holdtime;
87  * changing the hold time will require recomputing them.
88  */
89 #if (DEVELOPMENT || DEBUG)
90 static struct mit_cfg_tbl mit_cfg_tbl_native[] = {
91 #else /* !DEVELOPMENT && !DEBUG */
92 static const struct mit_cfg_tbl mit_cfg_tbl_native[] = {
93 #endif /* !DEVELOPMENT && !DEBUG */
94 	{ .cfg_plowat = 10, .cfg_phiwat = 60, .cfg_blowat = 4000,
95 	  .cfg_bhiwat = 6000, .cfg_ival = 100 },
96 	{ .cfg_plowat = 40, .cfg_phiwat = 100, .cfg_blowat = 5000,
97 	  .cfg_bhiwat = 300000, .cfg_ival = 300 },
98 	{ .cfg_plowat = 80, .cfg_phiwat = 200, .cfg_blowat = 100000,
99 	  .cfg_bhiwat = 300000, .cfg_ival = 500 },
100 	{ .cfg_plowat = 220, .cfg_phiwat = 240, .cfg_blowat = 330000,
101 	  .cfg_bhiwat = 375000, .cfg_ival = 1000 },
102 	{ .cfg_plowat = 250, .cfg_phiwat = 2000, .cfg_blowat = 450000,
103 	  .cfg_bhiwat = 30000000, .cfg_ival = 200 },
104 };
105 
106 #if (DEVELOPMENT || DEBUG)
107 static struct mit_cfg_tbl mit_cfg_tbl_compat[] = {
108 #else /* !DEVELOPMENT && !DEBUG */
109 static const struct mit_cfg_tbl mit_cfg_tbl_compat[] = {
110 #endif /* !DEVELOPMENT && !DEBUG */
111 	{ .cfg_plowat = 10, .cfg_phiwat = 60, .cfg_blowat = 4000,
112 	  .cfg_bhiwat = 6000, .cfg_ival = 100 },
113 	{ .cfg_plowat = 40, .cfg_phiwat = 100, .cfg_blowat = 5000,
114 	  .cfg_bhiwat = 300000, .cfg_ival = 300 },
115 	{ .cfg_plowat = 80, .cfg_phiwat = 200, .cfg_blowat = 100000,
116 	  .cfg_bhiwat = 300000, .cfg_ival = 500 },
117 	{ .cfg_plowat = 220, .cfg_phiwat = 240, .cfg_blowat = 330000,
118 	  .cfg_bhiwat = 375000, .cfg_ival = 1000 },
119 	{ .cfg_plowat = 250, .cfg_phiwat = 2000, .cfg_blowat = 450000,
120 	  .cfg_bhiwat = 3000000, .cfg_ival = 200 },
121 };
122 
123 #if (DEVELOPMENT || DEBUG)
124 static struct mit_cfg_tbl mit_cfg_tbl_native_cellular[] = {
125 #else /* !DEVELOPMENT && !DEBUG */
126 static const struct mit_cfg_tbl mit_cfg_tbl_native_cellular[] = {
127 #endif /* !DEVELOPMENT && !DEBUG */
128 	{ .cfg_plowat = 10, .cfg_phiwat = 40, .cfg_blowat = 4000,
129 	  .cfg_bhiwat = 6000, .cfg_ival = 300 },
130 	{ .cfg_plowat = 20, .cfg_phiwat = 60, .cfg_blowat = 5000,
131 	  .cfg_bhiwat = 150000, .cfg_ival = 500 },
132 	{ .cfg_plowat = 40, .cfg_phiwat = 80, .cfg_blowat = 80000,
133 	  .cfg_bhiwat = 200000, .cfg_ival = 700 },
134 	{ .cfg_plowat = 60, .cfg_phiwat = 250, .cfg_blowat = 150000,
135 	  .cfg_bhiwat = 375000, .cfg_ival = 1500 },
136 	{ .cfg_plowat = 260, .cfg_phiwat = 2000, .cfg_blowat = 450000,
137 	  .cfg_bhiwat = 3000000, .cfg_ival = 400 },
138 };
139 
140 #if (DEVELOPMENT || DEBUG)
141 static int sysctl_mit_mode_holdtime SYSCTL_HANDLER_ARGS;
142 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, busy_mit_delay,
143     CTLFLAG_RW | CTLFLAG_LOCKED, &netif_busy_mit_delay,
144     NETIF_BUSY_MIT_DELAY, "");
145 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, ad_mit_gdecay,
146     CTLFLAG_RW | CTLFLAG_LOCKED, &netif_ad_mit_gdecay, NETIF_AD_MIT_GDECAY, "");
147 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, ad_mit_sdecay,
148     CTLFLAG_RW | CTLFLAG_LOCKED, &netif_ad_mit_sdecay, NETIF_AD_MIT_SDECAY, "");
149 SYSCTL_PROC(_kern_skywalk_netif, OID_AUTO, ad_mit_freeze,
150     CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &netif_mit_mode_holdtime,
151     NETIF_MIT_MODE_HOLDTIME, sysctl_mit_mode_holdtime, "Q", "");
152 #endif /* !DEVELOPMENT && !DEBUG */
153 
154 void
nx_netif_mit_init(struct nx_netif * nif,const struct ifnet * ifp,struct nx_netif_mit * mit,struct __kern_channel_ring * kr,boolean_t simple)155 nx_netif_mit_init(struct nx_netif *nif, const struct ifnet *ifp,
156     struct nx_netif_mit *mit, struct __kern_channel_ring *kr,
157     boolean_t simple)
158 {
159 #pragma unused(nif)
160 	thread_precedence_policy_data_t info;
161 	__unused kern_return_t kret;
162 	char oid_name_buf[24];
163 	const char *__null_terminated oid_name = NULL;
164 
165 	static_assert(sizeof(mit_cfg_tbl_native_cellular) <= sizeof(((struct nx_netif_mit *)0)->mit_tbl));
166 
167 	lck_spin_init(&mit->mit_lock, kr->ckr_qlock_group, &channel_lock_attr);
168 
169 	if (kr->ckr_tx == NR_TX) {
170 		if (simple) {
171 			(void) snprintf(mit->mit_name, sizeof(mit->mit_name),
172 			    "skywalk_%s_tx_%u", ifp->if_xname, kr->ckr_ring_id);
173 		} else {
174 			(void) snprintf(mit->mit_name, sizeof(mit->mit_name),
175 			    "skywalk_mit_%s_tx_%u", ifp->if_xname,
176 			    kr->ckr_ring_id);
177 		}
178 		oid_name = tsnprintf(oid_name_buf, sizeof(oid_name_buf),
179 		    "tx_%u", kr->ckr_ring_id);
180 	} else {
181 		if (simple) {
182 			(void) snprintf(mit->mit_name, sizeof(mit->mit_name),
183 			    "skywalk_%s_rx_%u", ifp->if_xname, kr->ckr_ring_id);
184 		} else {
185 			(void) snprintf(mit->mit_name, sizeof(mit->mit_name),
186 			    "skywalk_mit_%s_rx_%u", ifp->if_xname,
187 			    kr->ckr_ring_id);
188 		}
189 		oid_name = tsnprintf(oid_name_buf, sizeof(oid_name_buf),
190 		    "rx_%u", kr->ckr_ring_id);
191 	}
192 
193 	mit->mit_ckr = kr;
194 	mit->mit_ckr->ckr_mit = mit;
195 	mit->mit_interval = 0;
196 	mit->mit_netif_ifp = ifp;
197 
198 	if ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) && (ifp->if_family ==
199 	    IFNET_FAMILY_CELLULAR)) {
200 		bcopy(mit_cfg_tbl_native_cellular,
201 		    (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
202 		    sizeof(mit_cfg_tbl_native_cellular));
203 		mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_native_cellular) /
204 		    sizeof(*mit->mit_cfg);
205 	} else if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
206 		bcopy(mit_cfg_tbl_native,
207 		    (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
208 		    sizeof(mit->mit_tbl));
209 		mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_native) /
210 		    sizeof(*mit->mit_cfg);
211 	} else {
212 		bcopy(mit_cfg_tbl_compat,
213 		    (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
214 		    sizeof(mit->mit_tbl));
215 		mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_compat) /
216 		    sizeof(*mit->mit_cfg);
217 	}
218 	VERIFY(mit->mit_cfg_idx_max > 0);
219 	VERIFY(mit->mit_cfg_idx_max <= NETIF_MIT_CFG_TBL_MAX_CFG);
220 
221 	if (ifp->if_rx_mit_ival != 0) {
222 		mit->mit_tbl[0].cfg_ival = ifp->if_rx_mit_ival;
223 		SK_D("mit interval updated: %s cfg %u ival %u",
224 		    mit->mit_name, 0, mit->mit_tbl[0].cfg_ival);
225 	}
226 
227 	net_timerclear(&mit->mit_mode_holdtime);
228 	net_timerclear(&mit->mit_mode_lasttime);
229 	net_timerclear(&mit->mit_sample_time);
230 	net_timerclear(&mit->mit_sample_lasttime);
231 	net_timerclear(&mit->mit_start_time);
232 
233 	net_nsectimer(&netif_mit_mode_holdtime, &mit->mit_mode_holdtime);
234 	net_nsectimer(&netif_mit_sample_holdtime, &mit->mit_sample_time);
235 
236 	/* initialize mode and params */
237 	nx_netif_mit_reset_interval(mit);
238 	VERIFY(mit->mit_cfg != NULL && mit->mit_cfg_idx < mit->mit_cfg_idx_max);
239 	mit->mit_flags = NETIF_MITF_INITIALIZED;
240 	if (simple) {
241 		/*
242 		 * Simple mitigation where we don't collect any statistics
243 		 * at all, and therefore don't want to register the ring's
244 		 * ckr_netif_mit_stats() callback.
245 		 */
246 		mit->mit_flags |= NETIF_MITF_SIMPLE;
247 		ASSERT(kr->ckr_netif_mit_stats == NULL);
248 	} else {
249 		/*
250 		 * Regular mitigation where we collect stats and use them
251 		 * for determining the delay between wakeups; initialize
252 		 * the ring's ckr_netif_mit_stats() callback.
253 		 */
254 		kr->ckr_netif_mit_stats = nx_netif_mit_stats;
255 	}
256 
257 	if (kernel_thread_start(nx_netif_mit_thread_func, mit,
258 	    &mit->mit_thread) != KERN_SUCCESS) {
259 		panic_plain("%s: can't create thread", mit->mit_name);
260 		/* NOTREACHED */
261 		__builtin_unreachable();
262 	}
263 	/* this must not fail */
264 	VERIFY(mit->mit_thread != NULL);
265 
266 	/* wait until nx_netif_mit_thread_func() is ready */
267 	MIT_SPIN_LOCK(mit);
268 	while (!(mit->mit_flags & NETIF_MITF_READY)) {
269 		(void) assert_wait(&mit->mit_thread, THREAD_UNINT);
270 		MIT_SPIN_UNLOCK(mit);
271 		(void) thread_block(THREAD_CONTINUE_NULL);
272 		MIT_SPIN_LOCK(mit);
273 	}
274 	MIT_SPIN_UNLOCK(mit);
275 
276 	bzero(&info, sizeof(info));
277 	info.importance = 0;
278 	kret = thread_policy_set(mit->mit_thread, THREAD_PRECEDENCE_POLICY,
279 	    (thread_policy_t)&info, THREAD_PRECEDENCE_POLICY_COUNT);
280 	ASSERT(kret == KERN_SUCCESS);
281 
282 #if (DEVELOPMENT || DEBUG)
283 	/* register mit sysctl skoid */
284 	skoid_create(&mit->mit_skoid, SKOID_DNODE(nif->nif_skoid), oid_name, 0);
285 	skoid_add_uint(&mit->mit_skoid, "interval", CTLFLAG_RW,
286 	    &mit->mit_interval);
287 	struct skoid *skoid = &mit->mit_skoid;
288 	struct mit_cfg_tbl *t;
289 #define MIT_ADD_SKOID(_i)       \
290 	t = &mit->mit_tbl[_i];  \
291 	skoid_add_uint(skoid, #_i"_plowat", CTLFLAG_RW, &t->cfg_plowat); \
292 	skoid_add_uint(skoid, #_i"_phiwat", CTLFLAG_RW, &t->cfg_phiwat);  \
293 	skoid_add_uint(skoid, #_i"_blowat", CTLFLAG_RW, &t->cfg_blowat);  \
294 	skoid_add_uint(skoid, #_i"_bhiwat", CTLFLAG_RW, &t->cfg_bhiwat);\
295 	skoid_add_uint(skoid, #_i"_ival", CTLFLAG_RW, &t->cfg_ival);
296 	MIT_ADD_SKOID(0);
297 	MIT_ADD_SKOID(1);
298 	MIT_ADD_SKOID(2);
299 	MIT_ADD_SKOID(3);
300 	MIT_ADD_SKOID(4);
301 	static_assert(NETIF_MIT_CFG_TBL_MAX_CFG == 5);
302 #endif /* !DEVELOPMENT && !DEBUG */
303 }
304 
305 __attribute__((always_inline))
306 static inline void
nx_netif_mit_reset_interval(struct nx_netif_mit * mit)307 nx_netif_mit_reset_interval(struct nx_netif_mit *mit)
308 {
309 	(void) nx_netif_mit_update_interval(mit, TRUE);
310 }
311 
312 __attribute__((always_inline))
313 static inline void
nx_netif_mit_set_start_interval(struct nx_netif_mit * mit)314 nx_netif_mit_set_start_interval(struct nx_netif_mit *mit)
315 {
316 	nanouptime(&mit->mit_start_time);
317 }
318 
319 __attribute__((always_inline))
320 static inline uint32_t
nx_netif_mit_update_interval(struct nx_netif_mit * mit,boolean_t reset)321 nx_netif_mit_update_interval(struct nx_netif_mit *mit, boolean_t reset)
322 {
323 	struct timespec now, delta;
324 	uint64_t r;
325 	uint32_t i;
326 
327 	nanouptime(&now);
328 	net_timersub(&now, &mit->mit_sample_lasttime, &delta);
329 
330 	/* CSTYLED */
331 	if ((net_timercmp(&delta, &mit->mit_mode_holdtime, >)) || reset) {
332 		mit_mode_t mode = (mit->mit_flags & NETIF_MITF_SIMPLE) ?
333 		    MIT_MODE_SIMPLE : MIT_MODE_ADVANCED_STATIC;
334 
335 		/* if we haven't updated stats in a while, reset it back */
336 		SK_DF(SK_VERB_NETIF_MIT, "%s: resetting [mode %u->%u]",
337 		    mit->mit_name, mit->mit_mode, mode);
338 
339 		mit->mit_mode = mode;
340 		mit->mit_cfg_idx = 0;
341 		mit->mit_cfg = &mit->mit_tbl[mit->mit_cfg_idx];
342 		mit->mit_packets_avg = 0;
343 		mit->mit_bytes_avg = 0;
344 	}
345 
346 	/* calculate work duration (since last start work time) */
347 	if (net_timerisset(&mit->mit_start_time)) {
348 		net_timersub(&now, &mit->mit_start_time, &delta);
349 		net_timerusec(&delta, &r);
350 	} else {
351 		r = 0;
352 	}
353 
354 	switch (mit->mit_mode) {
355 	case MIT_MODE_SIMPLE:
356 		i = 0;
357 		break;
358 
359 	case MIT_MODE_ADVANCED_STATIC:
360 		i = mit->mit_interval;
361 		break;
362 
363 	case MIT_MODE_ADVANCED_DYNAMIC:
364 		i = mit->mit_cfg->cfg_ival;
365 		break;
366 	}
367 
368 	/*
369 	 * The idea here is to return the effective delay interval that
370 	 * causes each work phase to begin at the desired cadence, at
371 	 * the minimum.
372 	 */
373 	if (__probable(r != 0)) {
374 		if (__probable(i > r)) {
375 			i -= r;
376 		} else {
377 			/* bump up cfg_idx perhaps? */
378 			i = 0;
379 		}
380 	}
381 
382 	return i;
383 }
384 
385 void
nx_netif_mit_cleanup(struct nx_netif_mit * mit)386 nx_netif_mit_cleanup(struct nx_netif_mit *mit)
387 {
388 	if (mit->mit_thread != THREAD_NULL) {
389 		ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
390 
391 		/* signal thread to begin self-termination */
392 		MIT_SPIN_LOCK(mit);
393 		mit->mit_flags |= NETIF_MITF_TERMINATING;
394 		(void) thread_wakeup_thread((caddr_t)&mit->mit_flags,
395 		    mit->mit_thread);
396 		MIT_SPIN_UNLOCK(mit);
397 
398 		/* and wait for thread to terminate */
399 		MIT_SPIN_LOCK(mit);
400 		while (!(mit->mit_flags & NETIF_MITF_TERMINATED)) {
401 			(void) assert_wait(&mit->mit_flags, THREAD_UNINT);
402 			MIT_SPIN_UNLOCK(mit);
403 			(void) thread_block(THREAD_CONTINUE_NULL);
404 			MIT_SPIN_LOCK(mit);
405 		}
406 		ASSERT(mit->mit_flags & NETIF_MITF_TERMINATED);
407 		MIT_SPIN_UNLOCK(mit);
408 		mit->mit_thread = THREAD_NULL;
409 	}
410 	ASSERT(mit->mit_thread == THREAD_NULL);
411 	lck_spin_destroy(&mit->mit_lock, mit->mit_ckr->ckr_qlock_group);
412 
413 	mit->mit_ckr->ckr_mit = NULL;
414 	mit->mit_ckr = NULL;
415 	mit->mit_netif_ifp = NULL;
416 	mit->mit_flags &= ~NETIF_MITF_INITIALIZED;
417 
418 	net_timerclear(&mit->mit_mode_holdtime);
419 	net_timerclear(&mit->mit_mode_lasttime);
420 	net_timerclear(&mit->mit_sample_time);
421 	net_timerclear(&mit->mit_sample_lasttime);
422 	net_timerclear(&mit->mit_start_time);
423 
424 #if (DEVELOPMENT || DEBUG)
425 	skoid_destroy(&mit->mit_skoid);
426 #endif /* !DEVELOPMENT && !DEBUG */
427 }
428 
429 int
nx_netif_mit_tx_intr(struct __kern_channel_ring * kr,struct proc * p,uint32_t flags,uint32_t * work_done)430 nx_netif_mit_tx_intr(struct __kern_channel_ring *kr, struct proc *p,
431     uint32_t flags, uint32_t *work_done)
432 {
433 	struct nexus_netif_adapter *nifna =
434 	    (struct nexus_netif_adapter *)KRNA(kr);
435 	struct netif_stats *nifs =
436 	    &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
437 
438 	ASSERT(kr->ckr_tx == NR_TX);
439 	STATS_INC(nifs, NETIF_STATS_TX_IRQ);
440 
441 	/*
442 	 * If mitigation is not enabled for this kring, we're done; otherwise,
443 	 * signal the thread that there is work to do, unless it's terminating.
444 	 */
445 	if (__probable(nifna->nifna_tx_mit == NULL)) {
446 		(void) nx_netif_common_intr(kr, p, flags, work_done);
447 	} else {
448 		struct nx_netif_mit *mit =
449 		    &nifna->nifna_tx_mit[kr->ckr_ring_id];
450 		ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
451 		MIT_SPIN_LOCK(mit);
452 		mit->mit_requests++;
453 		if (!(mit->mit_flags & (NETIF_MITF_RUNNING |
454 		    NETIF_MITF_TERMINATING | NETIF_MITF_TERMINATED))) {
455 			(void) thread_wakeup_thread((caddr_t)&mit->mit_flags,
456 			    mit->mit_thread);
457 		}
458 		MIT_SPIN_UNLOCK(mit);
459 	}
460 
461 	return 0;
462 }
463 
464 int
nx_netif_mit_rx_intr(struct __kern_channel_ring * kr,struct proc * p,uint32_t flags,uint32_t * work_done)465 nx_netif_mit_rx_intr(struct __kern_channel_ring *kr, struct proc *p,
466     uint32_t flags, uint32_t *work_done)
467 {
468 	struct nexus_netif_adapter *nifna =
469 	    (struct nexus_netif_adapter *)KRNA(kr);
470 	struct netif_stats *nifs =
471 	    &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
472 
473 	KDBG((SK_KTRACE_NETIF_MIT_RX_INTR | DBG_FUNC_START), SK_KVA(kr));
474 
475 	ASSERT(kr->ckr_tx == NR_RX);
476 	STATS_INC(nifs, NETIF_STATS_RX_IRQ);
477 
478 	/*
479 	 * If mitigation is enabled for this kring, signal the thread that there
480 	 * is work to do, unless it's terminating.  Otherwise, we're done.
481 	 */
482 	if (__improbable(nifna->nifna_rx_mit != NULL)) {
483 		struct nx_netif_mit *mit =
484 		    &nifna->nifna_rx_mit[kr->ckr_ring_id];
485 		ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
486 		MIT_SPIN_LOCK(mit);
487 		mit->mit_requests++;
488 		if (!(mit->mit_flags & (NETIF_MITF_RUNNING |
489 		    NETIF_MITF_TERMINATING | NETIF_MITF_TERMINATED))) {
490 			(void) thread_wakeup_thread((caddr_t)&mit->mit_flags,
491 			    mit->mit_thread);
492 		}
493 		MIT_SPIN_UNLOCK(mit);
494 	} else {
495 		(void) nx_netif_common_intr(kr, p, flags, work_done);
496 	}
497 
498 	KDBG((SK_KTRACE_NETIF_MIT_RX_INTR | DBG_FUNC_END), SK_KVA(kr));
499 
500 	return 0;
501 }
502 
503 __attribute__((noreturn))
504 static void
nx_netif_mit_thread_func(void * v,wait_result_t w)505 nx_netif_mit_thread_func(void *v, wait_result_t w)
506 {
507 #pragma unused(w)
508 	struct nx_netif_mit *__single mit = v;
509 
510 	ASSERT(mit->mit_thread == current_thread());
511 	thread_set_thread_name(current_thread(),
512 	    __unsafe_null_terminated_from_indexable(mit->mit_name));
513 
514 	MIT_SPIN_LOCK(mit);
515 	VERIFY(!(mit->mit_flags & (NETIF_MITF_READY | NETIF_MITF_RUNNING)));
516 	/* tell nx_netif_mit_init() to proceed */
517 	mit->mit_flags |= NETIF_MITF_READY;
518 	wakeup((caddr_t)&mit->mit_thread);
519 	(void) assert_wait(&mit->mit_flags, THREAD_UNINT);
520 	MIT_SPIN_UNLOCK(mit);
521 	if (mit->mit_flags & NETIF_MITF_SIMPLE) {
522 		(void) thread_block_parameter(nx_netif_mit_s_thread_cont, mit);
523 	} else {
524 		(void) thread_block_parameter(nx_netif_mit_thread_cont, mit);
525 	}
526 	/* NOTREACHED */
527 	__builtin_unreachable();
528 }
529 
530 /*
531  * Simple variant.
532  */
533 __attribute__((noreturn))
534 static void
nx_netif_mit_s_thread_cont(void * v,wait_result_t wres)535 nx_netif_mit_s_thread_cont(void *v, wait_result_t wres)
536 {
537 	struct __kern_channel_ring *kr;
538 	struct nx_netif_mit *__single mit = v;
539 	struct netif_stats *nifs;
540 	int irq_stat, error;
541 
542 	ASSERT(mit->mit_flags & NETIF_MITF_SIMPLE);
543 	kr = __DEVOLATILE(struct __kern_channel_ring *, mit->mit_ckr);
544 	nifs = &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
545 	irq_stat = (kr->ckr_tx == NR_TX) ? NETIF_STATS_TX_IRQ_MIT :
546 	    NETIF_STATS_RX_IRQ_MIT;
547 
548 	MIT_SPIN_LOCK(mit);
549 	if (__improbable(wres == THREAD_INTERRUPTED ||
550 	    (mit->mit_flags & NETIF_MITF_TERMINATING))) {
551 		goto terminate;
552 	}
553 
554 	ASSERT(!(mit->mit_flags & NETIF_MITF_TERMINATED));
555 	mit->mit_flags |= NETIF_MITF_RUNNING;
556 
557 	/*
558 	 * Keep on servicing the ring until no more request.
559 	 */
560 	for (;;) {
561 		uint32_t requests = mit->mit_requests;
562 
563 		STATS_INC(nifs, irq_stat);
564 		MIT_SPIN_UNLOCK(mit);
565 
566 		error = nx_netif_common_intr(kr, kernproc, 0, NULL);
567 
568 		/*
569 		 * We could get EBUSY here due to netif_inject_rx() holding
570 		 * the kring lock. EBUSY means the rx notify callback (which
571 		 * does the rx syncs..etc) wasn't called. If we don't retry
572 		 * nx_netif_common_intr() the driver will eventually stop
573 		 * notifying due to its queues being full.
574 		 */
575 		if (error == EBUSY) {
576 			uint32_t ival =
577 			    MAX(netif_busy_mit_delay, NETIF_BUSY_MIT_DELAY);
578 
579 			MIT_SPIN_LOCK(mit);
580 			mit->mit_requests++;
581 			MIT_SPIN_UNLOCK(mit);
582 			delay(ival);
583 		}
584 
585 		MIT_SPIN_LOCK(mit);
586 
587 		if ((mit->mit_flags & NETIF_MITF_TERMINATING) != 0 ||
588 		    requests == mit->mit_requests) {
589 			mit->mit_requests = 0;
590 			break;
591 		}
592 	}
593 
594 	if (__probable((mit->mit_flags & NETIF_MITF_TERMINATING) == 0)) {
595 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
596 
597 		MIT_SPIN_LOCK_ASSERT_HELD(mit);
598 
599 		if (kr->ckr_rate_limited) {
600 			SK_DF(SK_VERB_NETIF_MIT,
601 			    "%s: posting wait deadline for MIT",
602 			    mit->mit_name);
603 			clock_interval_to_deadline(1, NSEC_PER_MSEC,
604 			    &deadline);
605 		}
606 		mit->mit_flags &= ~NETIF_MITF_RUNNING;
607 		(void) assert_wait_deadline(&mit->mit_flags,
608 		    THREAD_UNINT, deadline);
609 		MIT_SPIN_UNLOCK(mit);
610 		(void) thread_block_parameter(nx_netif_mit_s_thread_cont, mit);
611 		/* NOTREACHED */
612 	} else {
613 terminate:
614 		MIT_SPIN_LOCK_ASSERT_HELD(mit);
615 
616 		VERIFY(mit->mit_thread == current_thread());
617 		VERIFY((mit->mit_flags & NETIF_MITF_TERMINATING) != 0);
618 		mit->mit_flags &= ~(NETIF_MITF_READY | NETIF_MITF_RUNNING |
619 		    NETIF_MITF_TERMINATING);
620 		mit->mit_flags |= NETIF_MITF_TERMINATED;
621 		wakeup((caddr_t)&mit->mit_flags);
622 		MIT_SPIN_UNLOCK(mit);
623 
624 		/* for the extra refcnt from kernel_thread_start() */
625 		thread_deallocate(current_thread());
626 		/* this is the end */
627 		thread_terminate(current_thread());
628 		/* NOTREACHED */
629 	}
630 
631 	/* must never get here */
632 	VERIFY(0);
633 	/* NOTREACHED */
634 	__builtin_unreachable();
635 }
636 
637 /*
638  * Advanced variant.
639  */
640 __attribute__((noreturn))
641 static void
nx_netif_mit_thread_cont(void * v,wait_result_t wres)642 nx_netif_mit_thread_cont(void *v, wait_result_t wres)
643 {
644 	struct __kern_channel_ring *kr;
645 	struct nx_netif_mit *__single mit = v;
646 	struct netif_stats *nifs;
647 	int irq_stat;
648 
649 	ASSERT(!(mit->mit_flags & NETIF_MITF_SIMPLE));
650 	kr = __DEVOLATILE(struct __kern_channel_ring *, mit->mit_ckr);
651 	nifs = &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
652 	irq_stat = (kr->ckr_tx == NR_TX) ? NETIF_STATS_TX_IRQ_MIT :
653 	    NETIF_STATS_RX_IRQ_MIT;
654 
655 	MIT_SPIN_LOCK(mit);
656 	if (__improbable(wres == THREAD_INTERRUPTED ||
657 	    (mit->mit_flags & NETIF_MITF_TERMINATING))) {
658 		goto terminate;
659 	}
660 
661 	ASSERT(!(mit->mit_flags & NETIF_MITF_TERMINATED));
662 	mit->mit_flags |= NETIF_MITF_RUNNING;
663 
664 	/*
665 	 * Keep on servicing the ring until no more request.
666 	 */
667 	for (;;) {
668 		uint32_t requests = mit->mit_requests;
669 		uint32_t ival;
670 		int error = 0;
671 
672 		STATS_INC(nifs, irq_stat);
673 		MIT_SPIN_UNLOCK(mit);
674 
675 		/*
676 		 * Notify the ring and trigger packets fan-out;
677 		 * bracket the call with timestamps to compute
678 		 * our effective mitigation/delay interval below.
679 		 */
680 		nx_netif_mit_set_start_interval(mit);
681 		error = nx_netif_common_intr(kr, kernproc, 0, NULL);
682 		ival = nx_netif_mit_update_interval(mit, FALSE);
683 
684 		/*
685 		 * If mitigation interval is non-zero (for TX/RX)
686 		 * then we always introduce an artificial delay
687 		 * for that amount of time.  Otherwise, if we get
688 		 * EBUSY, then kr_enter() has another thread that
689 		 * is working on it, and so we should wait a bit.
690 		 */
691 		if (ival != 0 || error == EBUSY) {
692 			if (error == EBUSY) {
693 				ival = MAX(netif_busy_mit_delay,
694 				    NETIF_BUSY_MIT_DELAY);
695 				MIT_SPIN_LOCK(mit);
696 				mit->mit_requests++;
697 				MIT_SPIN_UNLOCK(mit);
698 			}
699 			delay(ival);
700 		}
701 
702 		MIT_SPIN_LOCK(mit);
703 
704 		if ((mit->mit_flags & NETIF_MITF_TERMINATING) != 0 ||
705 		    requests == mit->mit_requests) {
706 			mit->mit_requests = 0;
707 			break;
708 		}
709 	}
710 
711 	if (__probable((mit->mit_flags & NETIF_MITF_TERMINATING) == 0)) {
712 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
713 
714 		MIT_SPIN_LOCK_ASSERT_HELD(mit);
715 
716 		if (kr->ckr_rate_limited) {
717 			SK_DF(SK_VERB_NETIF_MIT,
718 			    "%s: posting wait deadline for MIT",
719 			    mit->mit_name);
720 			clock_interval_to_deadline(1, NSEC_PER_MSEC,
721 			    &deadline);
722 		}
723 		mit->mit_flags &= ~NETIF_MITF_RUNNING;
724 		(void) assert_wait_deadline(&mit->mit_flags,
725 		    THREAD_UNINT, deadline);
726 		MIT_SPIN_UNLOCK(mit);
727 		(void) thread_block_parameter(nx_netif_mit_thread_cont, mit);
728 		/* NOTREACHED */
729 	} else {
730 terminate:
731 		MIT_SPIN_LOCK_ASSERT_HELD(mit);
732 
733 		VERIFY(mit->mit_thread == current_thread());
734 		VERIFY((mit->mit_flags & NETIF_MITF_TERMINATING) != 0);
735 		mit->mit_flags &= ~(NETIF_MITF_READY | NETIF_MITF_RUNNING |
736 		    NETIF_MITF_TERMINATING);
737 		mit->mit_flags |= NETIF_MITF_TERMINATED;
738 		wakeup((caddr_t)&mit->mit_flags);
739 		MIT_SPIN_UNLOCK(mit);
740 
741 		/* for the extra refcnt from kernel_thread_start() */
742 		thread_deallocate(current_thread());
743 		/* this is the end */
744 		thread_terminate(current_thread());
745 		/* NOTREACHED */
746 	}
747 
748 	/* must never get here */
749 	VERIFY(0);
750 	/* NOTREACHED */
751 	__builtin_unreachable();
752 }
753 
754 static void
nx_netif_mit_stats(struct __kern_channel_ring * kr,uint64_t pkts,uint64_t bytes)755 nx_netif_mit_stats(struct __kern_channel_ring *kr, uint64_t pkts,
756     uint64_t bytes)
757 {
758 	struct nx_netif_mit *mit = kr->ckr_mit;
759 	struct timespec now, delta;
760 	mit_mode_t mode;
761 	uint32_t cfg_idx;
762 
763 	ASSERT(mit != NULL && !(mit->mit_flags & NETIF_MITF_SIMPLE));
764 
765 	if ((os_atomic_or_orig(&mit->mit_flags, NETIF_MITF_SAMPLING, relaxed) &
766 	    NETIF_MITF_SAMPLING) != 0) {
767 		return;
768 	}
769 
770 	mode = mit->mit_mode;
771 	cfg_idx = mit->mit_cfg_idx;
772 
773 	nanouptime(&now);
774 	if (!net_timerisset(&mit->mit_sample_lasttime)) {
775 		*(&mit->mit_sample_lasttime) = *(&now);
776 	}
777 
778 	net_timersub(&now, &mit->mit_sample_lasttime, &delta);
779 	if (net_timerisset(&mit->mit_sample_time)) {
780 		uint32_t ptot, btot;
781 
782 		/* accumulate statistics for current sampling */
783 		PKTCNTR_ADD(&mit->mit_sstats, pkts, bytes);
784 
785 		/* CSTYLED */
786 		if (net_timercmp(&delta, &mit->mit_sample_time, <)) {
787 			goto done;
788 		}
789 
790 		*(&mit->mit_sample_lasttime) = *(&now);
791 
792 		/* calculate min/max of bytes */
793 		btot = (uint32_t)mit->mit_sstats.bytes;
794 		if (mit->mit_bytes_min == 0 || mit->mit_bytes_min > btot) {
795 			mit->mit_bytes_min = btot;
796 		}
797 		if (btot > mit->mit_bytes_max) {
798 			mit->mit_bytes_max = btot;
799 		}
800 
801 		/* calculate EWMA of bytes */
802 		MIT_EWMA(mit->mit_bytes_avg, btot,
803 		    netif_ad_mit_gdecay, netif_ad_mit_sdecay);
804 
805 		/* calculate min/max of packets */
806 		ptot = (uint32_t)mit->mit_sstats.packets;
807 		if (mit->mit_packets_min == 0 || mit->mit_packets_min > ptot) {
808 			mit->mit_packets_min = ptot;
809 		}
810 		if (ptot > mit->mit_packets_max) {
811 			mit->mit_packets_max = ptot;
812 		}
813 
814 		/* calculate EWMA of packets */
815 		MIT_EWMA(mit->mit_packets_avg, ptot,
816 		    netif_ad_mit_gdecay, netif_ad_mit_sdecay);
817 
818 		/* reset sampling statistics */
819 		PKTCNTR_CLEAR(&mit->mit_sstats);
820 
821 		/* Perform mode transition, if necessary */
822 		if (!net_timerisset(&mit->mit_mode_lasttime)) {
823 			*(&mit->mit_mode_lasttime) = *(&now);
824 		}
825 
826 		net_timersub(&now, &mit->mit_mode_lasttime, &delta);
827 		/* CSTYLED */
828 		if (net_timercmp(&delta, &mit->mit_mode_holdtime, <)) {
829 			goto done;
830 		}
831 
832 		SK_RDF(SK_VERB_NETIF_MIT, 2, "%s [%u]: pavg %u bavg %u "
833 		    "delay %u usec", mit->mit_name, mit->mit_cfg_idx,
834 		    mit->mit_packets_avg, mit->mit_bytes_avg,
835 		    (mode == MIT_MODE_ADVANCED_STATIC ? 0 :
836 		    (mit->mit_tbl[mit->mit_cfg_idx].cfg_ival)));
837 
838 		if (mit->mit_packets_avg <= mit->mit_cfg->cfg_plowat &&
839 		    mit->mit_bytes_avg <= mit->mit_cfg->cfg_blowat) {
840 			if (cfg_idx == 0) {
841 				mode = MIT_MODE_ADVANCED_STATIC;
842 			} else {
843 				ASSERT(mode == MIT_MODE_ADVANCED_DYNAMIC);
844 				--cfg_idx;
845 			}
846 		} else if (mit->mit_packets_avg >= mit->mit_cfg->cfg_phiwat &&
847 		    mit->mit_bytes_avg >= mit->mit_cfg->cfg_bhiwat) {
848 			mode = MIT_MODE_ADVANCED_DYNAMIC;
849 			if (cfg_idx < (mit->mit_cfg_idx_max - 1)) {
850 				++cfg_idx;
851 			}
852 		}
853 
854 		if (mode != mit->mit_mode || cfg_idx != mit->mit_cfg_idx) {
855 			ASSERT(cfg_idx < mit->mit_cfg_idx_max);
856 
857 			SK_DF(SK_VERB_NETIF_MIT, "%s [%u->%u]: pavg %u "
858 			    "bavg %u [mode %u->%u, delay %u->%u usec]",
859 			    mit->mit_name, mit->mit_cfg_idx, cfg_idx,
860 			    mit->mit_packets_avg, mit->mit_bytes_avg,
861 			    mit->mit_mode, mode,
862 			    (mit->mit_mode == MIT_MODE_ADVANCED_STATIC ? 0 :
863 			    (mit->mit_cfg->cfg_ival)),
864 			    (mode == MIT_MODE_ADVANCED_STATIC ? 0 :
865 			    (mit->mit_tbl[cfg_idx].cfg_ival)));
866 
867 			mit->mit_mode = mode;
868 			mit->mit_cfg_idx = cfg_idx;
869 			mit->mit_cfg = &mit->mit_tbl[mit->mit_cfg_idx];
870 			*(&mit->mit_mode_lasttime) = *(&now);
871 		}
872 	}
873 
874 done:
875 	os_atomic_andnot(&mit->mit_flags, NETIF_MITF_SAMPLING, relaxed);
876 }
877 
878 #if (DEVELOPMENT || DEBUG)
879 static int
880 sysctl_mit_mode_holdtime SYSCTL_HANDLER_ARGS
881 {
882 #pragma unused(arg1, arg2)
883 	uint64_t q;
884 	int err;
885 
886 	q = netif_mit_mode_holdtime;
887 
888 	err = sysctl_handle_quad(oidp, &q, 0, req);
889 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
890 		return err;
891 	}
892 
893 	if (q < NETIF_MIT_MODE_HOLDTIME_MIN) {
894 		q = NETIF_MIT_MODE_HOLDTIME_MIN;
895 	}
896 
897 	netif_mit_mode_holdtime = q;
898 
899 	return err;
900 }
901 #endif /* !DEVELOPMENT && !DEBUG */
902