xref: /xnu-10002.81.5/bsd/skywalk/nexus/netif/nx_netif_poll.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2019-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <sys/kdebug.h>
32 #include <kern/thread.h>
33 #include <kern/sched_prim.h>
34 
35 extern kern_return_t thread_terminate(thread_t);
36 
37 #define NETIF_POLL_EWMA(old, new, decay) do {                                 \
38 	uint32_t _avg;                                                 \
39 	if ((_avg = (old)) > 0)                                         \
40 	        _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
41 	else                                                            \
42 	        _avg = (new);                                           \
43 	(old) = _avg;                                                   \
44 } while (0)
45 
46 /* rate limit debug messages */
47 struct timespec netif_poll_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
48 
49 static inline void
nx_netif_rxpoll_set_mode(struct ifnet * ifp,ifnet_model_t mode)50 nx_netif_rxpoll_set_mode(struct ifnet *ifp, ifnet_model_t mode)
51 {
52 	errno_t err;
53 	uint64_t ival;
54 	struct timespec ts;
55 	struct ifnet_model_params p = { .model = mode, .reserved = { 0 } };
56 
57 	if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
58 		ival = IF_RXPOLL_INTERVALTIME_MIN;
59 	}
60 
61 	if ((err = ((*ifp->if_input_ctl)(ifp, IFNET_CTL_SET_INPUT_MODEL,
62 	    sizeof(p), &p))) != 0) {
63 		SK_ERR("%s: error setting polling mode to %s (%d)",
64 		    if_name(ifp), (mode == IFNET_MODEL_INPUT_POLL_ON) ?
65 		    "ON" : "OFF", err);
66 	}
67 
68 	switch (mode) {
69 	case IFNET_MODEL_INPUT_POLL_OFF:
70 		ifnet_set_poll_cycle(ifp, NULL);
71 		ifp->if_rxpoll_offreq++;
72 		if (err != 0) {
73 			ifp->if_rxpoll_offerr++;
74 		}
75 		break;
76 
77 	case IFNET_MODEL_INPUT_POLL_ON:
78 		net_nsectimer(&ival, &ts);
79 		ifnet_set_poll_cycle(ifp, &ts);
80 		ifp->if_rxpoll_onreq++;
81 		if (err != 0) {
82 			ifp->if_rxpoll_onerr++;
83 		}
84 		break;
85 
86 	default:
87 		VERIFY(0);
88 		/* NOTREACHED */
89 		__builtin_unreachable();
90 	}
91 }
92 
93 /*
94  * Updates the input poll statistics and determines the next mode based
95  * on the configured thresholds.
96  */
97 static inline void
netif_rxpoll_compat_update_rxpoll_stats(struct ifnet * ifp,struct ifnet_stat_increment_param * s)98 netif_rxpoll_compat_update_rxpoll_stats(struct ifnet *ifp,
99     struct ifnet_stat_increment_param *s)
100 {
101 	uint32_t poll_thresh = 0, poll_ival = 0;
102 	uint32_t m_cnt, m_size, poll_req = 0;
103 	struct timespec now, delta;
104 	ifnet_model_t mode;
105 	uint64_t ival;
106 
107 	ASSERT(net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL));
108 	LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_NOTOWNED);
109 
110 	/* total packets and bytes passed in by driver */
111 	m_cnt = s->packets_in;
112 	m_size = s->bytes_in;
113 
114 	lck_mtx_lock_spin(&ifp->if_poll_lock);
115 	if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
116 		ival = IF_RXPOLL_INTERVALTIME_MIN;
117 	}
118 	/* Link parameters changed? */
119 	if (ifp->if_poll_update != 0) {
120 		ifp->if_poll_update = 0;
121 		(void) netif_rxpoll_set_params(ifp, NULL, TRUE);
122 	}
123 
124 	/* Current operating mode */
125 	mode = ifp->if_poll_mode;
126 
127 	nanouptime(&now);
128 	if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
129 		*(&ifp->if_poll_sample_lasttime) = now;
130 	}
131 
132 	net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
133 	if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
134 		uint32_t ptot, btot;
135 
136 		/* Accumulate statistics for current sampling */
137 		PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
138 
139 		if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
140 			goto skip;
141 		}
142 		*(&ifp->if_poll_sample_lasttime) = now;
143 
144 		/* Calculate min/max of inbound bytes */
145 		btot = (uint32_t)ifp->if_poll_sstats.bytes;
146 		if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
147 			ifp->if_rxpoll_bmin = btot;
148 		}
149 		if (btot > ifp->if_rxpoll_bmax) {
150 			ifp->if_rxpoll_bmax = btot;
151 		}
152 
153 		/* Calculate EWMA of inbound bytes */
154 		NETIF_POLL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
155 
156 		/* Calculate min/max of inbound packets */
157 		ptot = (uint32_t)ifp->if_poll_sstats.packets;
158 		if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
159 			ifp->if_rxpoll_pmin = ptot;
160 		}
161 		if (ptot > ifp->if_rxpoll_pmax) {
162 			ifp->if_rxpoll_pmax = ptot;
163 		}
164 
165 		/* Calculate EWMA of inbound packets */
166 		NETIF_POLL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
167 
168 		/* Reset sampling statistics */
169 		PKTCNTR_CLEAR(&ifp->if_poll_sstats);
170 
171 #if (SK_LOG && (DEVELOPMENT || DEBUG))
172 		if (__improbable(sk_verbose & SK_VERB_NETIF_POLL)) {
173 			if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
174 				*(&ifp->if_poll_dbg_lasttime) = *(&now);
175 			}
176 			net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
177 			if (net_timercmp(&delta, &netif_poll_dbgrate, >=)) {
178 				*(&ifp->if_poll_dbg_lasttime) = *(&now);
179 				SK_DF(SK_VERB_NETIF_POLL,
180 				    "%s: [%s] pkts avg %d max %d "
181 				    "limits [%d/%d], bytes avg %d "
182 				    "limits [%d/%d]", if_name(ifp),
183 				    (ifp->if_poll_mode ==
184 				    IFNET_MODEL_INPUT_POLL_ON) ?
185 				    "ON" : "OFF", ifp->if_rxpoll_pavg,
186 				    ifp->if_rxpoll_pmax,
187 				    ifp->if_rxpoll_plowat,
188 				    ifp->if_rxpoll_phiwat,
189 				    ifp->if_rxpoll_bavg,
190 				    ifp->if_rxpoll_blowat,
191 				    ifp->if_rxpoll_bhiwat);
192 			}
193 		}
194 #endif /* (SK_LOG && (DEVELOPMENT || DEBUG)) */
195 
196 		/* Perform mode transition, if necessary */
197 		if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
198 			*(&ifp->if_poll_mode_lasttime) = *(&now);
199 		}
200 
201 		net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
202 		if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
203 			goto skip;
204 		}
205 
206 		if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
207 		    ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
208 		    ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
209 			mode = IFNET_MODEL_INPUT_POLL_OFF;
210 		} else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
211 		    ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat &&
212 		    ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
213 			mode = IFNET_MODEL_INPUT_POLL_ON;
214 		}
215 
216 		if (mode != ifp->if_poll_mode) {
217 			ifp->if_poll_mode = mode;
218 			*(&ifp->if_poll_mode_lasttime) = *(&now);
219 			poll_req++;
220 		}
221 	}
222 skip:
223 	/* update rxpoll stats */
224 	if (ifp->if_poll_tstats.packets != 0) {
225 		ifp->if_poll_pstats.ifi_poll_packets +=
226 		    ifp->if_poll_tstats.packets;
227 		ifp->if_poll_tstats.packets = 0;
228 	}
229 	if (ifp->if_poll_tstats.bytes != 0) {
230 		ifp->if_poll_pstats.ifi_poll_bytes +=
231 		    ifp->if_poll_tstats.bytes;
232 		ifp->if_poll_tstats.bytes = 0;
233 	}
234 
235 	lck_mtx_unlock(&ifp->if_poll_lock);
236 	/*
237 	 * If there's a mode change, perform a downcall to the driver
238 	 * for the new mode. This function is called from the poller thread
239 	 * which holds a reference on the ifnet.
240 	 */
241 	if (poll_req != 0) {
242 		nx_netif_rxpoll_set_mode(ifp, mode);
243 	}
244 
245 	/* Signal the poller thread to do work if required */
246 	if (mode == IFNET_MODEL_INPUT_POLL_ON && m_cnt > 1 &&
247 	    (poll_ival = if_rxpoll_interval_pkts) > 0) {
248 		poll_thresh = m_cnt;
249 	}
250 	if (poll_thresh != 0 && poll_ival > 0 &&
251 	    (--poll_thresh % poll_ival) == 0) {
252 		lck_mtx_lock_spin(&ifp->if_poll_lock);
253 		ifp->if_poll_req++;
254 		lck_mtx_unlock(&ifp->if_poll_lock);
255 	}
256 }
257 
258 /*
259  * Must be called on an attached ifnet (caller is expected to check.)
260  * Caller may pass NULL for poll parameters to indicate "auto-tuning."
261  */
262 errno_t
netif_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)263 netif_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
264     boolean_t locked)
265 {
266 	errno_t err;
267 
268 	VERIFY(ifp != NULL);
269 	if ((ifp->if_eflags & IFEF_RXPOLL) == 0) {
270 		return ENXIO;
271 	}
272 	err = dlil_rxpoll_validate_params(p);
273 	if (err != 0) {
274 		return err;
275 	}
276 
277 	if (!locked) {
278 		lck_mtx_lock(&ifp->if_poll_lock);
279 	}
280 	LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
281 	/*
282 	 * Normally, we'd reset the parameters to the auto-tuned values
283 	 * if the the poller thread detects a change in link rate.  If the
284 	 * driver provides its own parameters right after a link rate
285 	 * changes, but before the input thread gets to run, we want to
286 	 * make sure to keep the driver's values.  Clearing if_poll_update
287 	 * will achieve that.
288 	 */
289 	if (p != NULL && !locked && ifp->if_poll_update != 0) {
290 		ifp->if_poll_update = 0;
291 	}
292 	dlil_rxpoll_update_params(ifp, p);
293 	if (!locked) {
294 		lck_mtx_unlock(&ifp->if_poll_lock);
295 	}
296 	return 0;
297 }
298 
299 static inline void
netif_rxpoll_poll_driver(struct ifnet * ifp,uint32_t m_lim,struct ifnet_stat_increment_param * s,struct timespec * start_time,struct timespec * poll_duration)300 netif_rxpoll_poll_driver(struct ifnet *ifp, uint32_t m_lim,
301     struct ifnet_stat_increment_param *s, struct timespec *start_time,
302     struct timespec *poll_duration)
303 {
304 	struct mbuf *m_head = NULL, *m_tail = NULL;
305 	uint32_t m_cnt = 0, m_totlen = 0;
306 	struct timespec now;
307 
308 	/* invoke the driver's input poll routine */
309 	((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail, &m_cnt,
310 	&m_totlen));
311 	VERIFY((m_cnt > 0) || ((m_head == NULL) && (m_tail == NULL)));
312 
313 	s->packets_in = m_cnt;
314 	s->bytes_in = m_totlen;
315 	/*
316 	 * Bracket the work done with timestamps to compute the effective
317 	 * poll interval.
318 	 */
319 	nanouptime(start_time);
320 	(void) ifnet_input_poll(ifp, m_head, m_tail,
321 	    (m_head != NULL) ? s : NULL);
322 	nanouptime(&now);
323 	net_timersub(&now, start_time, poll_duration);
324 
325 	SK_DF(SK_VERB_NETIF_POLL, "%s: polled %d pkts, pkts avg %d max %d, "
326 	    "wreq avg %d, bytes avg %d", if_name(ifp), m_cnt,
327 	    ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
328 	    ifp->if_rxpoll_bavg);
329 }
330 
331 static inline void
netif_rxpoll_process_interrupt(struct ifnet * ifp,proc_t p,struct ifnet_stat_increment_param * s,struct nx_mbq * rcvq)332 netif_rxpoll_process_interrupt(struct ifnet *ifp, proc_t p,
333     struct ifnet_stat_increment_param *s, struct nx_mbq *rcvq)
334 {
335 	struct nexus_adapter *na = &NA(ifp)->nifna_up;
336 
337 	nx_mbq_lock_spin(rcvq);
338 	s->packets_in = nx_mbq_len(rcvq);
339 	s->bytes_in = (uint32_t)nx_mbq_size(rcvq);
340 	nx_mbq_unlock(rcvq);
341 	(void) nx_netif_mit_rx_intr((NAKR(na, NR_RX)), p, 0, NULL);
342 }
343 
344 __attribute__((noreturn))
345 static void
netif_rxpoll_compat_thread_cont(void * v,wait_result_t wres)346 netif_rxpoll_compat_thread_cont(void *v, wait_result_t wres)
347 {
348 	struct ifnet *ifp = v;
349 	struct timespec *ts = NULL;
350 	struct timespec start_time, poll_intvl, poll_duration;
351 	struct ifnet_stat_increment_param s;
352 
353 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
354 	bzero(&s, sizeof(s));
355 	net_timerclear(&start_time);
356 
357 	lck_mtx_lock_spin(&ifp->if_poll_lock);
358 	if (__improbable(wres == THREAD_INTERRUPTED ||
359 	    (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
360 		goto terminate;
361 	}
362 
363 	ifp->if_poll_flags |= IF_POLLF_RUNNING;
364 	/*
365 	 * Keep on servicing until no more request.
366 	 */
367 
368 	for (;;) {
369 		uint16_t req = ifp->if_poll_req;
370 		struct nexus_adapter *na = &NA(ifp)->nifna_up;
371 		struct __kern_channel_ring *kring = &na->na_rx_rings[0];
372 		struct nx_mbq *rxq = &kring->ckr_rx_queue;
373 		uint32_t m_lim;
374 		boolean_t poll, poll_again = false;
375 
376 		m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
377 		    MAX((nx_mbq_limit(rxq)), (ifp->if_rxpoll_phiwat << 2));
378 		poll = (ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_ON);
379 		lck_mtx_unlock(&ifp->if_poll_lock);
380 
381 		net_timerclear(&poll_duration);
382 
383 		/* If no longer attached, there's nothing to do;
384 		 * else hold an IO refcnt to prevent the interface
385 		 * from being detached (will be released below.)
386 		 */
387 		if (!ifnet_is_attached(ifp, 1)) {
388 			lck_mtx_lock_spin(&ifp->if_poll_lock);
389 			break;
390 		}
391 
392 		if (poll) {
393 			netif_rxpoll_poll_driver(ifp, m_lim, &s, &start_time,
394 			    &poll_duration);
395 			/*
396 			 * if the polled duration is more than the poll
397 			 * interval, then poll again to catch up.
398 			 */
399 			ASSERT(net_timerisset(&ifp->if_poll_cycle));
400 			if (net_timercmp(&poll_duration, &ifp->if_poll_cycle,
401 			    >=)) {
402 				poll_again = true;
403 			}
404 		} else {
405 			netif_rxpoll_process_interrupt(ifp, kernproc, &s, rxq);
406 			net_timerclear(&start_time);
407 		}
408 
409 		netif_rxpoll_compat_update_rxpoll_stats(ifp, &s);
410 		/* Release the io ref count */
411 		ifnet_decr_iorefcnt(ifp);
412 
413 		lck_mtx_lock_spin(&ifp->if_poll_lock);
414 
415 		/* if signalled to terminate */
416 		if (__improbable((ifp->if_poll_flags & IF_POLLF_TERMINATING)
417 		    != 0)) {
418 			break;
419 		}
420 		/* if there's no pending request, we're done. */
421 		if (!poll_again && (req == ifp->if_poll_req)) {
422 			break;
423 		}
424 	}
425 
426 	ifp->if_poll_req = 0;
427 	ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
428 	/*
429 	 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
430 	 * until ifnet_poll() is called again.
431 	 */
432 	/* calculate work duration (since last start work time) */
433 	if (ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_ON) {
434 		ASSERT(net_timerisset(&ifp->if_poll_cycle));
435 		ASSERT(net_timercmp(&poll_duration, &ifp->if_poll_cycle, <));
436 		net_timersub(&ifp->if_poll_cycle, &poll_duration, &poll_intvl);
437 		ASSERT(net_timerisset(&poll_intvl));
438 		ts = &poll_intvl;
439 	} else {
440 		ts = NULL;
441 	}
442 
443 	if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
444 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
445 
446 		if (ts != NULL) {
447 			uint64_t interval;
448 
449 			_CASSERT(IF_RXPOLL_INTERVALTIME_MIN >= (1ULL * 1000));
450 			net_timerusec(ts, &interval);
451 			ASSERT(interval <= UINT32_MAX);
452 			clock_interval_to_deadline((uint32_t)interval, NSEC_PER_USEC,
453 			    &deadline);
454 		}
455 
456 		(void) assert_wait_deadline(&ifp->if_poll_thread,
457 		    THREAD_UNINT, deadline);
458 		lck_mtx_unlock(&ifp->if_poll_lock);
459 		(void) thread_block_parameter(netif_rxpoll_compat_thread_cont,
460 		    ifp);
461 		/* NOTREACHED */
462 	} else {
463 terminate:
464 		/* interface is detached (maybe while asleep)? */
465 		ifnet_set_poll_cycle(ifp, NULL);
466 		ifp->if_poll_flags &= ~IF_POLLF_READY;
467 
468 		/* clear if_poll_thread to allow termination to continue */
469 		ASSERT(ifp->if_poll_thread != THREAD_NULL);
470 		ifp->if_poll_thread = THREAD_NULL;
471 		wakeup((caddr_t)&ifp->if_poll_thread);
472 		lck_mtx_unlock(&ifp->if_poll_lock);
473 		SK_DF(SK_VERB_NETIF_POLL, "%s: poller thread terminated",
474 		    if_name(ifp));
475 		/* for the extra refcnt from kernel_thread_start() */
476 		thread_deallocate(current_thread());
477 		/* this is the end */
478 		thread_terminate(current_thread());
479 		/* NOTREACHED */
480 	}
481 
482 	VERIFY(0);
483 	/* NOTREACHED */
484 	__builtin_unreachable();
485 }
486 
487 __attribute__((noreturn))
488 void
netif_rxpoll_compat_thread_func(void * v,wait_result_t w)489 netif_rxpoll_compat_thread_func(void *v, wait_result_t w)
490 {
491 #pragma unused(w)
492 	char thread_name[MAXTHREADNAMESIZE];
493 	struct ifnet *ifp = v;
494 
495 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
496 	VERIFY(current_thread() == ifp->if_poll_thread);
497 
498 	/* construct the name for this thread, and then apply it */
499 	bzero(thread_name, sizeof(thread_name));
500 	(void) snprintf(thread_name, sizeof(thread_name),
501 	    "skywalk_netif_poller_%s", ifp->if_xname);
502 	thread_set_thread_name(ifp->if_poll_thread, thread_name);
503 
504 	lck_mtx_lock(&ifp->if_poll_lock);
505 	VERIFY(!(ifp->if_poll_flags & (IF_POLLF_READY | IF_POLLF_RUNNING)));
506 	/* tell nx_netif_compat_na_activate() to proceed */
507 	ifp->if_poll_flags |= IF_POLLF_READY;
508 	wakeup((caddr_t)&ifp->if_poll_flags);
509 	(void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
510 	lck_mtx_unlock(&ifp->if_poll_lock);
511 	(void) thread_block_parameter(netif_rxpoll_compat_thread_cont, ifp);
512 	/* NOTREACHED */
513 	__builtin_unreachable();
514 }
515