xref: /xnu-11215.81.4/bsd/skywalk/nexus/netif/nx_netif_poll.c (revision d4514f0bc1d3f944c22d92e68b646ac3fb40d452)
1 /*
2  * Copyright (c) 2019-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <sys/kdebug.h>
32 #include <kern/thread.h>
33 #include <kern/sched_prim.h>
34 #include <net/dlil_sysctl.h>
35 
36 extern kern_return_t thread_terminate(thread_t);
37 
38 #define NETIF_POLL_EWMA(old, new, decay) do {                                 \
39 	uint32_t _avg;                                                 \
40 	if ((_avg = (old)) > 0)                                         \
41 	        _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
42 	else                                                            \
43 	        _avg = (new);                                           \
44 	(old) = _avg;                                                   \
45 } while (0)
46 
47 /* rate limit debug messages */
48 struct timespec netif_poll_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
49 
50 static inline void
nx_netif_rxpoll_set_mode(struct ifnet * ifp,ifnet_model_t mode)51 nx_netif_rxpoll_set_mode(struct ifnet *ifp, ifnet_model_t mode)
52 {
53 	errno_t err;
54 	uint64_t ival;
55 	struct timespec ts;
56 	struct ifnet_model_params p = { .model = mode, .reserved = { 0 } };
57 
58 	if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
59 		ival = IF_RXPOLL_INTERVALTIME_MIN;
60 	}
61 
62 	if ((err = ((*ifp->if_input_ctl)(ifp, IFNET_CTL_SET_INPUT_MODEL,
63 	    sizeof(p), &p))) != 0) {
64 		SK_ERR("%s: error setting polling mode to %s (%d)",
65 		    if_name(ifp), (mode == IFNET_MODEL_INPUT_POLL_ON) ?
66 		    "ON" : "OFF", err);
67 	}
68 
69 	switch (mode) {
70 	case IFNET_MODEL_INPUT_POLL_OFF:
71 		ifnet_set_poll_cycle(ifp, NULL);
72 		ifp->if_rxpoll_offreq++;
73 		if (err != 0) {
74 			ifp->if_rxpoll_offerr++;
75 		}
76 		break;
77 
78 	case IFNET_MODEL_INPUT_POLL_ON:
79 		net_nsectimer(&ival, &ts);
80 		ifnet_set_poll_cycle(ifp, &ts);
81 		ifp->if_rxpoll_onreq++;
82 		if (err != 0) {
83 			ifp->if_rxpoll_onerr++;
84 		}
85 		break;
86 
87 	default:
88 		VERIFY(0);
89 		/* NOTREACHED */
90 		__builtin_unreachable();
91 	}
92 }
93 
94 /*
95  * Updates the input poll statistics and determines the next mode based
96  * on the configured thresholds.
97  */
98 static inline void
netif_rxpoll_compat_update_rxpoll_stats(struct ifnet * ifp,struct ifnet_stat_increment_param * s)99 netif_rxpoll_compat_update_rxpoll_stats(struct ifnet *ifp,
100     struct ifnet_stat_increment_param *s)
101 {
102 	uint32_t poll_thresh = 0, poll_ival = 0;
103 	uint32_t m_cnt, m_size, poll_req = 0;
104 	struct timespec now, delta;
105 	ifnet_model_t mode;
106 	uint64_t ival;
107 
108 	ASSERT(net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL));
109 	LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_NOTOWNED);
110 
111 	/* total packets and bytes passed in by driver */
112 	m_cnt = s->packets_in;
113 	m_size = s->bytes_in;
114 
115 	lck_mtx_lock_spin(&ifp->if_poll_lock);
116 	if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
117 		ival = IF_RXPOLL_INTERVALTIME_MIN;
118 	}
119 	/* Link parameters changed? */
120 	if (ifp->if_poll_update != 0) {
121 		ifp->if_poll_update = 0;
122 		(void) netif_rxpoll_set_params(ifp, NULL, TRUE);
123 	}
124 
125 	/* Current operating mode */
126 	mode = ifp->if_poll_mode;
127 
128 	nanouptime(&now);
129 	if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
130 		*(&ifp->if_poll_sample_lasttime) = now;
131 	}
132 
133 	net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
134 	if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
135 		uint32_t ptot, btot;
136 
137 		/* Accumulate statistics for current sampling */
138 		PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
139 
140 		if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
141 			goto skip;
142 		}
143 		*(&ifp->if_poll_sample_lasttime) = now;
144 
145 		/* Calculate min/max of inbound bytes */
146 		btot = (uint32_t)ifp->if_poll_sstats.bytes;
147 		if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
148 			ifp->if_rxpoll_bmin = btot;
149 		}
150 		if (btot > ifp->if_rxpoll_bmax) {
151 			ifp->if_rxpoll_bmax = btot;
152 		}
153 
154 		/* Calculate EWMA of inbound bytes */
155 		NETIF_POLL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
156 
157 		/* Calculate min/max of inbound packets */
158 		ptot = (uint32_t)ifp->if_poll_sstats.packets;
159 		if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
160 			ifp->if_rxpoll_pmin = ptot;
161 		}
162 		if (ptot > ifp->if_rxpoll_pmax) {
163 			ifp->if_rxpoll_pmax = ptot;
164 		}
165 
166 		/* Calculate EWMA of inbound packets */
167 		NETIF_POLL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
168 
169 		/* Reset sampling statistics */
170 		PKTCNTR_CLEAR(&ifp->if_poll_sstats);
171 
172 #if (SK_LOG && (DEVELOPMENT || DEBUG))
173 		if (__improbable(sk_verbose & SK_VERB_NETIF_POLL)) {
174 			if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
175 				*(&ifp->if_poll_dbg_lasttime) = *(&now);
176 			}
177 			net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
178 			if (net_timercmp(&delta, &netif_poll_dbgrate, >=)) {
179 				*(&ifp->if_poll_dbg_lasttime) = *(&now);
180 				SK_DF(SK_VERB_NETIF_POLL,
181 				    "%s: [%s] pkts avg %d max %d "
182 				    "limits [%d/%d], bytes avg %d "
183 				    "limits [%d/%d]", if_name(ifp),
184 				    (ifp->if_poll_mode ==
185 				    IFNET_MODEL_INPUT_POLL_ON) ?
186 				    "ON" : "OFF", ifp->if_rxpoll_pavg,
187 				    ifp->if_rxpoll_pmax,
188 				    ifp->if_rxpoll_plowat,
189 				    ifp->if_rxpoll_phiwat,
190 				    ifp->if_rxpoll_bavg,
191 				    ifp->if_rxpoll_blowat,
192 				    ifp->if_rxpoll_bhiwat);
193 			}
194 		}
195 #endif /* (SK_LOG && (DEVELOPMENT || DEBUG)) */
196 
197 		/* Perform mode transition, if necessary */
198 		if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
199 			*(&ifp->if_poll_mode_lasttime) = *(&now);
200 		}
201 
202 		net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
203 		if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
204 			goto skip;
205 		}
206 
207 		if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
208 		    ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
209 		    ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
210 			mode = IFNET_MODEL_INPUT_POLL_OFF;
211 		} else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
212 		    ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat &&
213 		    ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
214 			mode = IFNET_MODEL_INPUT_POLL_ON;
215 		}
216 
217 		if (mode != ifp->if_poll_mode) {
218 			ifp->if_poll_mode = mode;
219 			*(&ifp->if_poll_mode_lasttime) = *(&now);
220 			poll_req++;
221 		}
222 	}
223 skip:
224 	/* update rxpoll stats */
225 	if (ifp->if_poll_tstats.packets != 0) {
226 		ifp->if_poll_pstats.ifi_poll_packets +=
227 		    ifp->if_poll_tstats.packets;
228 		ifp->if_poll_tstats.packets = 0;
229 	}
230 	if (ifp->if_poll_tstats.bytes != 0) {
231 		ifp->if_poll_pstats.ifi_poll_bytes +=
232 		    ifp->if_poll_tstats.bytes;
233 		ifp->if_poll_tstats.bytes = 0;
234 	}
235 
236 	lck_mtx_unlock(&ifp->if_poll_lock);
237 	/*
238 	 * If there's a mode change, perform a downcall to the driver
239 	 * for the new mode. This function is called from the poller thread
240 	 * which holds a reference on the ifnet.
241 	 */
242 	if (poll_req != 0) {
243 		nx_netif_rxpoll_set_mode(ifp, mode);
244 	}
245 
246 	/* Signal the poller thread to do work if required */
247 	if (mode == IFNET_MODEL_INPUT_POLL_ON && m_cnt > 1 &&
248 	    (poll_ival = if_rxpoll_interval_pkts) > 0) {
249 		poll_thresh = m_cnt;
250 	}
251 	if (poll_thresh != 0 && poll_ival > 0 &&
252 	    (--poll_thresh % poll_ival) == 0) {
253 		lck_mtx_lock_spin(&ifp->if_poll_lock);
254 		ifp->if_poll_req++;
255 		lck_mtx_unlock(&ifp->if_poll_lock);
256 	}
257 }
258 
259 /*
260  * Must be called on an attached ifnet (caller is expected to check.)
261  * Caller may pass NULL for poll parameters to indicate "auto-tuning."
262  */
263 errno_t
netif_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)264 netif_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
265     boolean_t locked)
266 {
267 	errno_t err;
268 
269 	VERIFY(ifp != NULL);
270 	if ((ifp->if_eflags & IFEF_RXPOLL) == 0) {
271 		return ENXIO;
272 	}
273 	err = dlil_rxpoll_validate_params(p);
274 	if (err != 0) {
275 		return err;
276 	}
277 
278 	if (!locked) {
279 		lck_mtx_lock(&ifp->if_poll_lock);
280 	}
281 	LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
282 	/*
283 	 * Normally, we'd reset the parameters to the auto-tuned values
284 	 * if the the poller thread detects a change in link rate.  If the
285 	 * driver provides its own parameters right after a link rate
286 	 * changes, but before the input thread gets to run, we want to
287 	 * make sure to keep the driver's values.  Clearing if_poll_update
288 	 * will achieve that.
289 	 */
290 	if (p != NULL && !locked && ifp->if_poll_update != 0) {
291 		ifp->if_poll_update = 0;
292 	}
293 	dlil_rxpoll_update_params(ifp, p);
294 	if (!locked) {
295 		lck_mtx_unlock(&ifp->if_poll_lock);
296 	}
297 	return 0;
298 }
299 
300 static inline void
netif_rxpoll_poll_driver(struct ifnet * ifp,uint32_t m_lim,struct ifnet_stat_increment_param * s,struct timespec * start_time,struct timespec * poll_duration)301 netif_rxpoll_poll_driver(struct ifnet *ifp, uint32_t m_lim,
302     struct ifnet_stat_increment_param *s, struct timespec *start_time,
303     struct timespec *poll_duration)
304 {
305 	struct mbuf *__single m_head = NULL, *__single m_tail = NULL;
306 	uint32_t m_cnt = 0, m_totlen = 0;
307 	struct timespec now;
308 
309 	/* invoke the driver's input poll routine */
310 	((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail, &m_cnt,
311 	&m_totlen));
312 	VERIFY((m_cnt > 0) || ((m_head == NULL) && (m_tail == NULL)));
313 
314 	s->packets_in = m_cnt;
315 	s->bytes_in = m_totlen;
316 	/*
317 	 * Bracket the work done with timestamps to compute the effective
318 	 * poll interval.
319 	 */
320 	nanouptime(start_time);
321 	(void) ifnet_input_poll(ifp, m_head, m_tail,
322 	    (m_head != NULL) ? s : NULL);
323 	nanouptime(&now);
324 	net_timersub(&now, start_time, poll_duration);
325 
326 	SK_DF(SK_VERB_NETIF_POLL, "%s: polled %d pkts, pkts avg %d max %d, "
327 	    "wreq avg %d, bytes avg %d", if_name(ifp), m_cnt,
328 	    ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
329 	    ifp->if_rxpoll_bavg);
330 }
331 
332 static inline void
netif_rxpoll_process_interrupt(struct ifnet * ifp,proc_t p,struct ifnet_stat_increment_param * s,struct nx_mbq * rcvq)333 netif_rxpoll_process_interrupt(struct ifnet *ifp, proc_t p,
334     struct ifnet_stat_increment_param *s, struct nx_mbq *rcvq)
335 {
336 	struct nexus_adapter *na = &NA(ifp)->nifna_up;
337 
338 	nx_mbq_lock_spin(rcvq);
339 	s->packets_in = nx_mbq_len(rcvq);
340 	s->bytes_in = (uint32_t)nx_mbq_size(rcvq);
341 	nx_mbq_unlock(rcvq);
342 	(void) nx_netif_mit_rx_intr((NAKR(na, NR_RX)), p, 0, NULL);
343 }
344 
345 __attribute__((noreturn))
346 static void
netif_rxpoll_compat_thread_cont(void * v,wait_result_t wres)347 netif_rxpoll_compat_thread_cont(void *v, wait_result_t wres)
348 {
349 	struct ifnet *__single ifp = v;
350 	struct timespec *ts = NULL;
351 	struct timespec start_time, poll_intvl, poll_duration;
352 	struct ifnet_stat_increment_param s;
353 
354 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
355 	bzero(&s, sizeof(s));
356 	net_timerclear(&start_time);
357 
358 	lck_mtx_lock_spin(&ifp->if_poll_lock);
359 	if (__improbable(wres == THREAD_INTERRUPTED ||
360 	    (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
361 		goto terminate;
362 	}
363 
364 	ifp->if_poll_flags |= IF_POLLF_RUNNING;
365 	/*
366 	 * Keep on servicing until no more request.
367 	 */
368 
369 	for (;;) {
370 		uint16_t req = ifp->if_poll_req;
371 		struct nexus_adapter *na = &NA(ifp)->nifna_up;
372 		struct __kern_channel_ring *kring = &na->na_rx_rings[0];
373 		struct nx_mbq *rxq = &kring->ckr_rx_queue;
374 		uint32_t m_lim;
375 		boolean_t poll, poll_again = false;
376 
377 		m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
378 		    MAX((nx_mbq_limit(rxq)), (ifp->if_rxpoll_phiwat << 2));
379 		poll = (ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_ON);
380 		lck_mtx_unlock(&ifp->if_poll_lock);
381 
382 		net_timerclear(&poll_duration);
383 
384 		/* If no longer attached, there's nothing to do;
385 		 * else hold an IO refcnt to prevent the interface
386 		 * from being detached (will be released below.)
387 		 */
388 		if (!ifnet_is_attached(ifp, 1)) {
389 			lck_mtx_lock_spin(&ifp->if_poll_lock);
390 			break;
391 		}
392 
393 		if (poll) {
394 			netif_rxpoll_poll_driver(ifp, m_lim, &s, &start_time,
395 			    &poll_duration);
396 			/*
397 			 * if the polled duration is more than the poll
398 			 * interval, then poll again to catch up.
399 			 */
400 			ASSERT(net_timerisset(&ifp->if_poll_cycle));
401 			if (net_timercmp(&poll_duration, &ifp->if_poll_cycle,
402 			    >=)) {
403 				poll_again = true;
404 			}
405 		} else {
406 			netif_rxpoll_process_interrupt(ifp, kernproc, &s, rxq);
407 			net_timerclear(&start_time);
408 		}
409 
410 		netif_rxpoll_compat_update_rxpoll_stats(ifp, &s);
411 		/* Release the io ref count */
412 		ifnet_decr_iorefcnt(ifp);
413 
414 		lck_mtx_lock_spin(&ifp->if_poll_lock);
415 
416 		/* if signalled to terminate */
417 		if (__improbable((ifp->if_poll_flags & IF_POLLF_TERMINATING)
418 		    != 0)) {
419 			break;
420 		}
421 		/* if there's no pending request, we're done. */
422 		if (!poll_again && (req == ifp->if_poll_req)) {
423 			break;
424 		}
425 	}
426 
427 	ifp->if_poll_req = 0;
428 	ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
429 	/*
430 	 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
431 	 * until ifnet_poll() is called again.
432 	 */
433 	/* calculate work duration (since last start work time) */
434 	if (ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_ON) {
435 		ASSERT(net_timerisset(&ifp->if_poll_cycle));
436 		ASSERT(net_timercmp(&poll_duration, &ifp->if_poll_cycle, <));
437 		net_timersub(&ifp->if_poll_cycle, &poll_duration, &poll_intvl);
438 		ASSERT(net_timerisset(&poll_intvl));
439 		ts = &poll_intvl;
440 	} else {
441 		ts = NULL;
442 	}
443 
444 	if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
445 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
446 
447 		if (ts != NULL) {
448 			uint64_t interval;
449 
450 			_CASSERT(IF_RXPOLL_INTERVALTIME_MIN >= (1ULL * 1000));
451 			net_timerusec(ts, &interval);
452 			ASSERT(interval <= UINT32_MAX);
453 			clock_interval_to_deadline((uint32_t)interval, NSEC_PER_USEC,
454 			    &deadline);
455 		}
456 
457 		(void) assert_wait_deadline(&ifp->if_poll_thread,
458 		    THREAD_UNINT, deadline);
459 		lck_mtx_unlock(&ifp->if_poll_lock);
460 		(void) thread_block_parameter(netif_rxpoll_compat_thread_cont,
461 		    ifp);
462 		/* NOTREACHED */
463 	} else {
464 terminate:
465 		/* interface is detached (maybe while asleep)? */
466 		ifnet_set_poll_cycle(ifp, NULL);
467 		ifp->if_poll_flags &= ~IF_POLLF_READY;
468 
469 		/* clear if_poll_thread to allow termination to continue */
470 		ASSERT(ifp->if_poll_thread != THREAD_NULL);
471 		ifp->if_poll_thread = THREAD_NULL;
472 		wakeup((caddr_t)&ifp->if_poll_thread);
473 		lck_mtx_unlock(&ifp->if_poll_lock);
474 		SK_DF(SK_VERB_NETIF_POLL, "%s: poller thread terminated",
475 		    if_name(ifp));
476 		/* for the extra refcnt from kernel_thread_start() */
477 		thread_deallocate(current_thread());
478 		/* this is the end */
479 		thread_terminate(current_thread());
480 		/* NOTREACHED */
481 	}
482 
483 	VERIFY(0);
484 	/* NOTREACHED */
485 	__builtin_unreachable();
486 }
487 
488 __attribute__((noreturn))
489 void
netif_rxpoll_compat_thread_func(void * v,wait_result_t w)490 netif_rxpoll_compat_thread_func(void *v, wait_result_t w)
491 {
492 #pragma unused(w)
493 	char thread_name_buf[MAXTHREADNAMESIZE];
494 	const char *__null_terminated thread_name = NULL;
495 	struct ifnet *__single ifp = v;
496 
497 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
498 	VERIFY(current_thread() == ifp->if_poll_thread);
499 
500 	/* construct the name for this thread, and then apply it */
501 	bzero(thread_name_buf, sizeof(thread_name_buf));
502 	thread_name = tsnprintf(thread_name_buf, sizeof(thread_name_buf),
503 	    "skywalk_netif_poller_%s", ifp->if_xname);
504 	thread_set_thread_name(ifp->if_poll_thread, thread_name);
505 
506 	lck_mtx_lock(&ifp->if_poll_lock);
507 	VERIFY(!(ifp->if_poll_flags & (IF_POLLF_READY | IF_POLLF_RUNNING)));
508 	/* tell nx_netif_compat_na_activate() to proceed */
509 	ifp->if_poll_flags |= IF_POLLF_READY;
510 	wakeup((caddr_t)&ifp->if_poll_flags);
511 	(void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
512 	lck_mtx_unlock(&ifp->if_poll_lock);
513 	(void) thread_block_parameter(netif_rxpoll_compat_thread_cont, ifp);
514 	/* NOTREACHED */
515 	__builtin_unreachable();
516 }
517