1 /*
2 * Copyright (c) 2019-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <sys/kdebug.h>
32 #include <kern/thread.h>
33 #include <kern/sched_prim.h>
34 #include <net/dlil_sysctl.h>
35
36 extern kern_return_t thread_terminate(thread_t);
37
38 #define NETIF_POLL_EWMA(old, new, decay) do { \
39 uint32_t _avg; \
40 if ((_avg = (old)) > 0) \
41 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
42 else \
43 _avg = (new); \
44 (old) = _avg; \
45 } while (0)
46
47 /* rate limit debug messages */
48 struct timespec netif_poll_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
49
50 static inline void
nx_netif_rxpoll_set_mode(struct ifnet * ifp,ifnet_model_t mode)51 nx_netif_rxpoll_set_mode(struct ifnet *ifp, ifnet_model_t mode)
52 {
53 errno_t err;
54 uint64_t ival;
55 struct timespec ts;
56 struct ifnet_model_params p = { .model = mode, .reserved = { 0 } };
57
58 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
59 ival = IF_RXPOLL_INTERVALTIME_MIN;
60 }
61
62 if ((err = ((*ifp->if_input_ctl)(ifp, IFNET_CTL_SET_INPUT_MODEL,
63 sizeof(p), &p))) != 0) {
64 SK_ERR("%s: error setting polling mode to %s (%d)",
65 if_name(ifp), (mode == IFNET_MODEL_INPUT_POLL_ON) ?
66 "ON" : "OFF", err);
67 }
68
69 switch (mode) {
70 case IFNET_MODEL_INPUT_POLL_OFF:
71 ifnet_set_poll_cycle(ifp, NULL);
72 ifp->if_rxpoll_offreq++;
73 if (err != 0) {
74 ifp->if_rxpoll_offerr++;
75 }
76 break;
77
78 case IFNET_MODEL_INPUT_POLL_ON:
79 net_nsectimer(&ival, &ts);
80 ifnet_set_poll_cycle(ifp, &ts);
81 ifp->if_rxpoll_onreq++;
82 if (err != 0) {
83 ifp->if_rxpoll_onerr++;
84 }
85 break;
86
87 default:
88 VERIFY(0);
89 /* NOTREACHED */
90 __builtin_unreachable();
91 }
92 }
93
94 /*
95 * Updates the input poll statistics and determines the next mode based
96 * on the configured thresholds.
97 */
98 static inline void
netif_rxpoll_compat_update_rxpoll_stats(struct ifnet * ifp,struct ifnet_stat_increment_param * s)99 netif_rxpoll_compat_update_rxpoll_stats(struct ifnet *ifp,
100 struct ifnet_stat_increment_param *s)
101 {
102 uint32_t poll_thresh = 0, poll_ival = 0;
103 uint32_t m_cnt, m_size, poll_req = 0;
104 struct timespec now, delta;
105 ifnet_model_t mode;
106 uint64_t ival;
107
108 ASSERT(net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL));
109 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_NOTOWNED);
110
111 /* total packets and bytes passed in by driver */
112 m_cnt = s->packets_in;
113 m_size = s->bytes_in;
114
115 lck_mtx_lock_spin(&ifp->if_poll_lock);
116 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
117 ival = IF_RXPOLL_INTERVALTIME_MIN;
118 }
119 /* Link parameters changed? */
120 if (ifp->if_poll_update != 0) {
121 ifp->if_poll_update = 0;
122 (void) netif_rxpoll_set_params(ifp, NULL, TRUE);
123 }
124
125 /* Current operating mode */
126 mode = ifp->if_poll_mode;
127
128 nanouptime(&now);
129 if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
130 *(&ifp->if_poll_sample_lasttime) = now;
131 }
132
133 net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
134 if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
135 uint32_t ptot, btot;
136
137 /* Accumulate statistics for current sampling */
138 PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
139
140 if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
141 goto skip;
142 }
143 *(&ifp->if_poll_sample_lasttime) = now;
144
145 /* Calculate min/max of inbound bytes */
146 btot = (uint32_t)ifp->if_poll_sstats.bytes;
147 if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
148 ifp->if_rxpoll_bmin = btot;
149 }
150 if (btot > ifp->if_rxpoll_bmax) {
151 ifp->if_rxpoll_bmax = btot;
152 }
153
154 /* Calculate EWMA of inbound bytes */
155 NETIF_POLL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
156
157 /* Calculate min/max of inbound packets */
158 ptot = (uint32_t)ifp->if_poll_sstats.packets;
159 if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
160 ifp->if_rxpoll_pmin = ptot;
161 }
162 if (ptot > ifp->if_rxpoll_pmax) {
163 ifp->if_rxpoll_pmax = ptot;
164 }
165
166 /* Calculate EWMA of inbound packets */
167 NETIF_POLL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
168
169 /* Reset sampling statistics */
170 PKTCNTR_CLEAR(&ifp->if_poll_sstats);
171
172 #if (SK_LOG && (DEVELOPMENT || DEBUG))
173 if (__improbable(sk_verbose & SK_VERB_NETIF_POLL)) {
174 if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
175 *(&ifp->if_poll_dbg_lasttime) = *(&now);
176 }
177 net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
178 if (net_timercmp(&delta, &netif_poll_dbgrate, >=)) {
179 *(&ifp->if_poll_dbg_lasttime) = *(&now);
180 SK_DF(SK_VERB_NETIF_POLL,
181 "%s: [%s] pkts avg %d max %d "
182 "limits [%d/%d], bytes avg %d "
183 "limits [%d/%d]", if_name(ifp),
184 (ifp->if_poll_mode ==
185 IFNET_MODEL_INPUT_POLL_ON) ?
186 "ON" : "OFF", ifp->if_rxpoll_pavg,
187 ifp->if_rxpoll_pmax,
188 ifp->if_rxpoll_plowat,
189 ifp->if_rxpoll_phiwat,
190 ifp->if_rxpoll_bavg,
191 ifp->if_rxpoll_blowat,
192 ifp->if_rxpoll_bhiwat);
193 }
194 }
195 #endif /* (SK_LOG && (DEVELOPMENT || DEBUG)) */
196
197 /* Perform mode transition, if necessary */
198 if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
199 *(&ifp->if_poll_mode_lasttime) = *(&now);
200 }
201
202 net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
203 if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
204 goto skip;
205 }
206
207 if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
208 ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
209 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
210 mode = IFNET_MODEL_INPUT_POLL_OFF;
211 } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
212 ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat &&
213 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
214 mode = IFNET_MODEL_INPUT_POLL_ON;
215 }
216
217 if (mode != ifp->if_poll_mode) {
218 ifp->if_poll_mode = mode;
219 *(&ifp->if_poll_mode_lasttime) = *(&now);
220 poll_req++;
221 }
222 }
223 skip:
224 /* update rxpoll stats */
225 if (ifp->if_poll_tstats.packets != 0) {
226 ifp->if_poll_pstats.ifi_poll_packets +=
227 ifp->if_poll_tstats.packets;
228 ifp->if_poll_tstats.packets = 0;
229 }
230 if (ifp->if_poll_tstats.bytes != 0) {
231 ifp->if_poll_pstats.ifi_poll_bytes +=
232 ifp->if_poll_tstats.bytes;
233 ifp->if_poll_tstats.bytes = 0;
234 }
235
236 lck_mtx_unlock(&ifp->if_poll_lock);
237 /*
238 * If there's a mode change, perform a downcall to the driver
239 * for the new mode. This function is called from the poller thread
240 * which holds a reference on the ifnet.
241 */
242 if (poll_req != 0) {
243 nx_netif_rxpoll_set_mode(ifp, mode);
244 }
245
246 /* Signal the poller thread to do work if required */
247 if (mode == IFNET_MODEL_INPUT_POLL_ON && m_cnt > 1 &&
248 (poll_ival = if_rxpoll_interval_pkts) > 0) {
249 poll_thresh = m_cnt;
250 }
251 if (poll_thresh != 0 && poll_ival > 0 &&
252 (--poll_thresh % poll_ival) == 0) {
253 lck_mtx_lock_spin(&ifp->if_poll_lock);
254 ifp->if_poll_req++;
255 lck_mtx_unlock(&ifp->if_poll_lock);
256 }
257 }
258
259 /*
260 * Must be called on an attached ifnet (caller is expected to check.)
261 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
262 */
263 errno_t
netif_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)264 netif_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
265 boolean_t locked)
266 {
267 errno_t err;
268
269 VERIFY(ifp != NULL);
270 if ((ifp->if_eflags & IFEF_RXPOLL) == 0) {
271 return ENXIO;
272 }
273 err = dlil_rxpoll_validate_params(p);
274 if (err != 0) {
275 return err;
276 }
277
278 if (!locked) {
279 lck_mtx_lock(&ifp->if_poll_lock);
280 }
281 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
282 /*
283 * Normally, we'd reset the parameters to the auto-tuned values
284 * if the the poller thread detects a change in link rate. If the
285 * driver provides its own parameters right after a link rate
286 * changes, but before the input thread gets to run, we want to
287 * make sure to keep the driver's values. Clearing if_poll_update
288 * will achieve that.
289 */
290 if (p != NULL && !locked && ifp->if_poll_update != 0) {
291 ifp->if_poll_update = 0;
292 }
293 dlil_rxpoll_update_params(ifp, p);
294 if (!locked) {
295 lck_mtx_unlock(&ifp->if_poll_lock);
296 }
297 return 0;
298 }
299
300 static inline void
netif_rxpoll_poll_driver(struct ifnet * ifp,uint32_t m_lim,struct ifnet_stat_increment_param * s,struct timespec * start_time,struct timespec * poll_duration)301 netif_rxpoll_poll_driver(struct ifnet *ifp, uint32_t m_lim,
302 struct ifnet_stat_increment_param *s, struct timespec *start_time,
303 struct timespec *poll_duration)
304 {
305 struct mbuf *__single m_head = NULL, *__single m_tail = NULL;
306 uint32_t m_cnt = 0, m_totlen = 0;
307 struct timespec now;
308
309 /* invoke the driver's input poll routine */
310 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail, &m_cnt,
311 &m_totlen));
312 VERIFY((m_cnt > 0) || ((m_head == NULL) && (m_tail == NULL)));
313
314 s->packets_in = m_cnt;
315 s->bytes_in = m_totlen;
316 /*
317 * Bracket the work done with timestamps to compute the effective
318 * poll interval.
319 */
320 nanouptime(start_time);
321 (void) ifnet_input_poll(ifp, m_head, m_tail,
322 (m_head != NULL) ? s : NULL);
323 nanouptime(&now);
324 net_timersub(&now, start_time, poll_duration);
325
326 SK_DF(SK_VERB_NETIF_POLL, "%s: polled %d pkts, pkts avg %d max %d, "
327 "wreq avg %d, bytes avg %d", if_name(ifp), m_cnt,
328 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
329 ifp->if_rxpoll_bavg);
330 }
331
332 static inline void
netif_rxpoll_process_interrupt(struct ifnet * ifp,proc_t p,struct ifnet_stat_increment_param * s,struct nx_mbq * rcvq)333 netif_rxpoll_process_interrupt(struct ifnet *ifp, proc_t p,
334 struct ifnet_stat_increment_param *s, struct nx_mbq *rcvq)
335 {
336 struct nexus_adapter *na = &NA(ifp)->nifna_up;
337
338 nx_mbq_lock_spin(rcvq);
339 s->packets_in = nx_mbq_len(rcvq);
340 s->bytes_in = (uint32_t)nx_mbq_size(rcvq);
341 nx_mbq_unlock(rcvq);
342 (void) nx_netif_mit_rx_intr((NAKR(na, NR_RX)), p, 0, NULL);
343 }
344
345 __attribute__((noreturn))
346 static void
netif_rxpoll_compat_thread_cont(void * v,wait_result_t wres)347 netif_rxpoll_compat_thread_cont(void *v, wait_result_t wres)
348 {
349 struct ifnet *__single ifp = v;
350 struct timespec *ts = NULL;
351 struct timespec start_time, poll_intvl, poll_duration;
352 struct ifnet_stat_increment_param s;
353
354 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
355 bzero(&s, sizeof(s));
356 net_timerclear(&start_time);
357
358 lck_mtx_lock_spin(&ifp->if_poll_lock);
359 if (__improbable(wres == THREAD_INTERRUPTED ||
360 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
361 goto terminate;
362 }
363
364 ifp->if_poll_flags |= IF_POLLF_RUNNING;
365 /*
366 * Keep on servicing until no more request.
367 */
368
369 for (;;) {
370 uint16_t req = ifp->if_poll_req;
371 struct nexus_adapter *na = &NA(ifp)->nifna_up;
372 struct __kern_channel_ring *kring = &na->na_rx_rings[0];
373 struct nx_mbq *rxq = &kring->ckr_rx_queue;
374 uint32_t m_lim;
375 boolean_t poll, poll_again = false;
376
377 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
378 MAX((nx_mbq_limit(rxq)), (ifp->if_rxpoll_phiwat << 2));
379 poll = (ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_ON);
380 lck_mtx_unlock(&ifp->if_poll_lock);
381
382 net_timerclear(&poll_duration);
383
384 /* If no longer attached, there's nothing to do;
385 * else hold an IO refcnt to prevent the interface
386 * from being detached (will be released below.)
387 */
388 if (!ifnet_is_attached(ifp, 1)) {
389 lck_mtx_lock_spin(&ifp->if_poll_lock);
390 break;
391 }
392
393 if (poll) {
394 netif_rxpoll_poll_driver(ifp, m_lim, &s, &start_time,
395 &poll_duration);
396 /*
397 * if the polled duration is more than the poll
398 * interval, then poll again to catch up.
399 */
400 ASSERT(net_timerisset(&ifp->if_poll_cycle));
401 if (net_timercmp(&poll_duration, &ifp->if_poll_cycle,
402 >=)) {
403 poll_again = true;
404 }
405 } else {
406 netif_rxpoll_process_interrupt(ifp, kernproc, &s, rxq);
407 net_timerclear(&start_time);
408 }
409
410 netif_rxpoll_compat_update_rxpoll_stats(ifp, &s);
411 /* Release the io ref count */
412 ifnet_decr_iorefcnt(ifp);
413
414 lck_mtx_lock_spin(&ifp->if_poll_lock);
415
416 /* if signalled to terminate */
417 if (__improbable((ifp->if_poll_flags & IF_POLLF_TERMINATING)
418 != 0)) {
419 break;
420 }
421 /* if there's no pending request, we're done. */
422 if (!poll_again && (req == ifp->if_poll_req)) {
423 break;
424 }
425 }
426
427 ifp->if_poll_req = 0;
428 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
429 /*
430 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
431 * until ifnet_poll() is called again.
432 */
433 /* calculate work duration (since last start work time) */
434 if (ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_ON) {
435 ASSERT(net_timerisset(&ifp->if_poll_cycle));
436 ASSERT(net_timercmp(&poll_duration, &ifp->if_poll_cycle, <));
437 net_timersub(&ifp->if_poll_cycle, &poll_duration, &poll_intvl);
438 ASSERT(net_timerisset(&poll_intvl));
439 ts = &poll_intvl;
440 } else {
441 ts = NULL;
442 }
443
444 if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
445 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
446
447 if (ts != NULL) {
448 uint64_t interval;
449
450 _CASSERT(IF_RXPOLL_INTERVALTIME_MIN >= (1ULL * 1000));
451 net_timerusec(ts, &interval);
452 ASSERT(interval <= UINT32_MAX);
453 clock_interval_to_deadline((uint32_t)interval, NSEC_PER_USEC,
454 &deadline);
455 }
456
457 (void) assert_wait_deadline(&ifp->if_poll_thread,
458 THREAD_UNINT, deadline);
459 lck_mtx_unlock(&ifp->if_poll_lock);
460 (void) thread_block_parameter(netif_rxpoll_compat_thread_cont,
461 ifp);
462 /* NOTREACHED */
463 } else {
464 terminate:
465 /* interface is detached (maybe while asleep)? */
466 ifnet_set_poll_cycle(ifp, NULL);
467 ifp->if_poll_flags &= ~IF_POLLF_READY;
468
469 /* clear if_poll_thread to allow termination to continue */
470 ASSERT(ifp->if_poll_thread != THREAD_NULL);
471 ifp->if_poll_thread = THREAD_NULL;
472 wakeup((caddr_t)&ifp->if_poll_thread);
473 lck_mtx_unlock(&ifp->if_poll_lock);
474 SK_DF(SK_VERB_NETIF_POLL, "%s: poller thread terminated",
475 if_name(ifp));
476 /* for the extra refcnt from kernel_thread_start() */
477 thread_deallocate(current_thread());
478 /* this is the end */
479 thread_terminate(current_thread());
480 /* NOTREACHED */
481 }
482
483 VERIFY(0);
484 /* NOTREACHED */
485 __builtin_unreachable();
486 }
487
488 __attribute__((noreturn))
489 void
netif_rxpoll_compat_thread_func(void * v,wait_result_t w)490 netif_rxpoll_compat_thread_func(void *v, wait_result_t w)
491 {
492 #pragma unused(w)
493 char thread_name_buf[MAXTHREADNAMESIZE];
494 const char *__null_terminated thread_name = NULL;
495 struct ifnet *__single ifp = v;
496
497 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
498 VERIFY(current_thread() == ifp->if_poll_thread);
499
500 /* construct the name for this thread, and then apply it */
501 bzero(thread_name_buf, sizeof(thread_name_buf));
502 thread_name = tsnprintf(thread_name_buf, sizeof(thread_name_buf),
503 "skywalk_netif_poller_%s", ifp->if_xname);
504 thread_set_thread_name(ifp->if_poll_thread, thread_name);
505
506 lck_mtx_lock(&ifp->if_poll_lock);
507 VERIFY(!(ifp->if_poll_flags & (IF_POLLF_READY | IF_POLLF_RUNNING)));
508 /* tell nx_netif_compat_na_activate() to proceed */
509 ifp->if_poll_flags |= IF_POLLF_READY;
510 wakeup((caddr_t)&ifp->if_poll_flags);
511 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
512 lck_mtx_unlock(&ifp->if_poll_lock);
513 (void) thread_block_parameter(netif_rxpoll_compat_thread_cont, ifp);
514 /* NOTREACHED */
515 __builtin_unreachable();
516 }
517