1 /*
2 * Copyright (c) 2019-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <skywalk/os_skywalk_private.h>
29 #include <skywalk/nexus/netif/nx_netif.h>
30 #include <netinet/ip6.h>
31 #include <netinet6/in6_var.h>
32 #include <net/pktap.h>
33 #include <sys/sdt.h>
34 #include <os/log.h>
35
36 /* This is just a list for now for simplicity. */
37 struct netif_list_flowtable {
38 struct netif_flow_head lft_flow_list;
39 };
40
41 static netif_flow_lookup_t netif_flow_list_lookup;
42 static netif_flow_insert_t netif_flow_list_insert;
43 static netif_flow_remove_t netif_flow_list_remove;
44 static netif_flow_table_alloc_t netif_flow_list_table_alloc;
45 static netif_flow_table_free_t netif_flow_list_table_free;
46
47 static netif_flow_match_t netif_flow_ethertype_match;
48 static netif_flow_info_t netif_flow_ethertype_info;
49 static netif_flow_match_t netif_flow_ipv6_ula_match;
50 static netif_flow_info_t netif_flow_ipv6_ula_info;
51
52 /*
53 * Two flow table types can share the same internal implementation.
54 * Using a list for now for simplicity.
55 */
56 static struct netif_flowtable_ops netif_ethertype_ops = {
57 .nfo_lookup = netif_flow_list_lookup,
58 .nfo_match = netif_flow_ethertype_match,
59 .nfo_info = netif_flow_ethertype_info,
60 .nfo_insert = netif_flow_list_insert,
61 .nfo_remove = netif_flow_list_remove,
62 .nfo_table_alloc = netif_flow_list_table_alloc,
63 .nfo_table_free = netif_flow_list_table_free
64 };
65
66 static struct netif_flowtable_ops netif_ipv6_ula_ops = {
67 .nfo_lookup = netif_flow_list_lookup,
68 .nfo_match = netif_flow_ipv6_ula_match,
69 .nfo_info = netif_flow_ipv6_ula_info,
70 .nfo_insert = netif_flow_list_insert,
71 .nfo_remove = netif_flow_list_remove,
72 .nfo_table_alloc = netif_flow_list_table_alloc,
73 .nfo_table_free = netif_flow_list_table_free
74 };
75
76 static int
netif_flow_get_buf_pkt(struct __kern_packet * pkt,size_t minlen,uint8_t * __sized_by (* len)* buf,uint32_t * len)77 netif_flow_get_buf_pkt(struct __kern_packet *pkt, size_t minlen,
78 uint8_t *__sized_by(*len) *buf, uint32_t *len)
79 {
80 uint8_t *baddr;
81
82 if (pkt->pkt_length < minlen) {
83 return EINVAL;
84 }
85 MD_BUFLET_ADDR_ABS(pkt, baddr);
86 baddr += pkt->pkt_headroom;
87
88 *buf = baddr;
89 *len = pkt->pkt_length;
90 return 0;
91 }
92
93 static int
netif_flow_get_buf_mbuf(struct mbuf * m,size_t minlen,uint8_t * __sized_by (* len)* buf,uint32_t * len)94 netif_flow_get_buf_mbuf(struct mbuf *m, size_t minlen,
95 uint8_t *__sized_by(*len) *buf, uint32_t *len)
96 {
97 /*
98 * XXX
99 * Not pulling up here if mbuf is not contiguous.
100 * This does not impact the current use case (ethertype
101 * demux).
102 */
103 if (m->m_len < minlen) {
104 return EINVAL;
105 }
106 *buf = (uint8_t *)m_mtod_current(m);
107 *len = m->m_len;
108 return 0;
109 }
110
111 static int
netif_flow_get_buf(struct __kern_packet * pkt,size_t minlen,uint8_t * __sized_by (* len)* buf,uint32_t * len)112 netif_flow_get_buf(struct __kern_packet *pkt, size_t minlen,
113 uint8_t *__sized_by(*len) *buf, uint32_t *len)
114 {
115 ASSERT((pkt->pkt_pflags & PKT_F_PKT_DATA) == 0);
116 if ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
117 ASSERT(pkt->pkt_mbuf != NULL);
118 return netif_flow_get_buf_mbuf(pkt->pkt_mbuf, minlen, buf, len);
119 }
120 return netif_flow_get_buf_pkt(pkt, minlen, buf, len);
121 }
122
123 static int
netif_flow_ethertype_info(struct __kern_packet * pkt,struct netif_flow_desc * fd,uint32_t flags)124 netif_flow_ethertype_info(struct __kern_packet *pkt,
125 struct netif_flow_desc *fd, uint32_t flags)
126 {
127 #pragma unused (flags)
128 ether_header_t *eh;
129 uint32_t len;
130 uint16_t etype;
131 uint16_t tag;
132 uint8_t *__sized_by(len) buf;
133 int err;
134
135 err = netif_flow_get_buf(pkt, sizeof(ether_header_t), &buf,
136 &len);
137 if (err != 0) {
138 DTRACE_SKYWALK2(get__buf__failed, struct __kern_packet *,
139 pkt, int, err);
140 return err;
141 }
142 eh = (ether_header_t *)(void *)buf;
143 if (__probable((((uintptr_t)buf) & 1) == 0)) {
144 etype = eh->ether_type;
145 } else {
146 bcopy(&eh->ether_type, &etype, sizeof(etype));
147 }
148 etype = ntohs(etype);
149
150 if (kern_packet_get_vlan_tag(SK_PKT2PH(pkt), &tag) == 0) {
151 DTRACE_SKYWALK2(hw__vlan, struct __kern_packet *, pkt,
152 uint16_t, tag);
153 } else if (etype == ETHERTYPE_VLAN) {
154 struct ether_vlan_header *evh;
155
156 DTRACE_SKYWALK2(encap__vlan, struct __kern_packet *, pkt,
157 uint8_t *, buf);
158 if ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
159 struct mbuf *m = pkt->pkt_mbuf;
160
161 if (mbuf_len(m) < sizeof(*evh)) {
162 DTRACE_SKYWALK1(mbuf__too__small,
163 struct mbuf *, m);
164 return EINVAL;
165 }
166 } else {
167 if (len < sizeof(*evh)) {
168 DTRACE_SKYWALK2(pkt__too__small,
169 struct __kern_packet *, pkt,
170 uint32_t, len);
171 return EINVAL;
172 }
173 }
174 evh = (struct ether_vlan_header *)eh;
175 if (__probable((((uintptr_t)evh) & 1) == 0)) {
176 tag = evh->evl_tag;
177 etype = evh->evl_proto;
178 } else {
179 bcopy(&evh->evl_tag, &tag, sizeof(tag));
180 bcopy(&evh->evl_proto, &etype, sizeof(etype));
181 }
182 tag = ntohs(tag);
183 etype = ntohs(etype);
184 } else {
185 tag = 0;
186 }
187 /* Only accept priority tagged packets */
188 if (EVL_VLANOFTAG(tag) != 0) {
189 DTRACE_SKYWALK2(vlan__non__zero,
190 struct __kern_packet *, pkt, uint16_t, tag);
191 return ENOTSUP;
192 }
193 DTRACE_SKYWALK4(extracted__info, struct __kern_packet *, pkt,
194 uint8_t *, buf, uint16_t, tag, uint16_t, etype);
195 fd->fd_ethertype = etype;
196 return 0;
197 }
198
199 static boolean_t
netif_flow_ethertype_match(struct netif_flow_desc * fd1,struct netif_flow_desc * fd2)200 netif_flow_ethertype_match(struct netif_flow_desc *fd1,
201 struct netif_flow_desc *fd2)
202 {
203 return fd1->fd_ethertype == fd2->fd_ethertype;
204 }
205
206 static int
netif_flow_ipv6_ula_info(struct __kern_packet * pkt,struct netif_flow_desc * fd,uint32_t flags)207 netif_flow_ipv6_ula_info(struct __kern_packet *pkt,
208 struct netif_flow_desc *fd, uint32_t flags)
209 {
210 ether_header_t *eh;
211 uint32_t len;
212 uint8_t *__sized_by(len) buf;
213 struct ip6_hdr *ip6h;
214 void *laddr, *raddr;
215 uint16_t etype;
216 int err;
217
218 err = netif_flow_get_buf(pkt, sizeof(*eh) + sizeof(*ip6h),
219 &buf, &len);
220 if (err != 0) {
221 DTRACE_SKYWALK2(get__buf__failed, struct __kern_packet *,
222 pkt, int, err);
223 return err;
224 }
225 eh = (ether_header_t *)(void *)buf;
226 ip6h = (struct ip6_hdr *)(eh + 1);
227
228 bcopy(&eh->ether_type, &etype, sizeof(etype));
229 etype = ntohs(etype);
230 if (etype != ETHERTYPE_IPV6) {
231 return ENOENT;
232 }
233 if (len < sizeof(*eh) + sizeof(*ip6h)) {
234 return EINVAL;
235 }
236 if ((flags & NETIF_FLOW_OUTBOUND) != 0) {
237 laddr = &ip6h->ip6_src;
238 raddr = &ip6h->ip6_dst;
239 } else {
240 laddr = &ip6h->ip6_dst;
241 raddr = &ip6h->ip6_src;
242 }
243 bcopy(laddr, &fd->fd_laddr, sizeof(struct in6_addr));
244 bcopy(raddr, &fd->fd_raddr, sizeof(struct in6_addr));
245 return 0;
246 }
247
248 static boolean_t
netif_flow_ipv6_ula_match(struct netif_flow_desc * fd1,struct netif_flow_desc * fd2)249 netif_flow_ipv6_ula_match(struct netif_flow_desc *fd1, struct netif_flow_desc *fd2)
250 {
251 return IN6_ARE_ADDR_EQUAL(&fd1->fd_laddr, &fd2->fd_laddr) &&
252 IN6_ARE_ADDR_EQUAL(&fd1->fd_raddr, &fd2->fd_raddr);
253 }
254
255 static int
netif_flow_list_lookup(struct netif_flowtable * ft,struct __kern_packet * pkt,uint32_t flags,struct netif_flow ** f)256 netif_flow_list_lookup(struct netif_flowtable *ft, struct __kern_packet *pkt,
257 uint32_t flags, struct netif_flow **f)
258 {
259 struct netif_list_flowtable *__single lft = ft->ft_internal;
260 struct netif_flowtable_ops *fops = ft->ft_ops;
261 struct netif_flow *nf;
262 struct netif_flow_desc fd;
263 int err;
264
265 /* XXX returns the first flow if "accept all" is on */
266 if (nx_netif_vp_accept_all != 0) {
267 nf = SLIST_FIRST(&lft->lft_flow_list);
268 goto done;
269 }
270 err = fops->nfo_info(pkt, &fd, flags);
271 if (err != 0) {
272 return err;
273 }
274 SLIST_FOREACH(nf, &lft->lft_flow_list, nf_table_link) {
275 if (fops->nfo_match(&nf->nf_desc, &fd)) {
276 break;
277 }
278 }
279 done:
280 if (nf == NULL) {
281 return ENOENT;
282 }
283 *f = nf;
284 return 0;
285 }
286
287 static int
netif_flow_list_insert(struct netif_flowtable * ft,struct netif_flow * f)288 netif_flow_list_insert(struct netif_flowtable *ft, struct netif_flow *f)
289 {
290 struct netif_list_flowtable *__single lft = ft->ft_internal;
291 struct netif_flow *nf;
292
293 SLIST_FOREACH(nf, &lft->lft_flow_list, nf_table_link) {
294 if (nf->nf_port == f->nf_port ||
295 ft->ft_ops->nfo_match(&nf->nf_desc, &f->nf_desc)) {
296 break;
297 }
298 }
299 if (nf != NULL) {
300 return EEXIST;
301 }
302 SLIST_INSERT_HEAD(&lft->lft_flow_list, f, nf_table_link);
303 return 0;
304 }
305
306 static void
netif_flow_list_remove(struct netif_flowtable * ft,struct netif_flow * f)307 netif_flow_list_remove(struct netif_flowtable *ft, struct netif_flow *f)
308 {
309 struct netif_list_flowtable *__single lft = ft->ft_internal;
310
311 SLIST_REMOVE(&lft->lft_flow_list, f, netif_flow, nf_table_link);
312 }
313
314 static struct netif_flowtable *
netif_flow_list_table_alloc(struct netif_flowtable_ops * ops)315 netif_flow_list_table_alloc(struct netif_flowtable_ops *ops)
316 {
317 struct netif_flowtable *ft;
318 struct netif_list_flowtable *lft;
319
320 ft = skn_alloc_type(flowtable, struct netif_flowtable,
321 Z_WAITOK | Z_NOFAIL, skmem_tag_netif_flow);
322 lft = skn_alloc_type(list_flowtable, struct netif_list_flowtable,
323 Z_WAITOK | Z_NOFAIL, skmem_tag_netif_flow);
324 /*
325 * For now lft just holds a list. We can use any data structure here.
326 */
327 SLIST_INIT(&lft->lft_flow_list);
328 ft->ft_internal = lft;
329 ft->ft_ops = ops;
330 return ft;
331 }
332
333 static void
netif_flow_list_table_free(struct netif_flowtable * ft)334 netif_flow_list_table_free(struct netif_flowtable *ft)
335 {
336 struct netif_list_flowtable *__single lft;
337
338 ASSERT(ft->ft_ops != NULL);
339 ft->ft_ops = NULL;
340
341 ASSERT(ft->ft_internal != NULL);
342 lft = ft->ft_internal;
343 ASSERT(SLIST_EMPTY(&lft->lft_flow_list));
344
345 skn_free_type(list_flowtable, struct netif_list_flowtable, lft);
346 ft->ft_internal = NULL;
347
348 skn_free_type(flowtable, struct netif_flowtable, ft);
349 }
350
351 static void
nx_netif_flow_deliver(struct nx_netif * nif,struct netif_flow * f,void * data,uint32_t flags)352 nx_netif_flow_deliver(struct nx_netif *nif, struct netif_flow *f,
353 void *data, uint32_t flags)
354 {
355 #pragma unused(nif)
356 f->nf_cb_func(f->nf_cb_arg, data, flags);
357 }
358
359 void
nx_netif_snoop(struct nx_netif * nif,struct __kern_packet * pkt,boolean_t inbound)360 nx_netif_snoop(struct nx_netif *nif, struct __kern_packet *pkt,
361 boolean_t inbound)
362 {
363 /* pktap only supports IPv4 or IPv6 packets */
364 if (!NETIF_IS_LOW_LATENCY(nif)) {
365 return;
366 }
367 if (inbound) {
368 pktap_input_packet(nif->nif_ifp, AF_INET6, DLT_EN10MB,
369 -1, NULL, -1, NULL, SK_PKT2PH(pkt), NULL, 0, 0, 0,
370 PTH_FLAG_NEXUS_CHAN);
371 } else {
372 pktap_output_packet(nif->nif_ifp, AF_INET6, DLT_EN10MB,
373 -1, NULL, -1, NULL, SK_PKT2PH(pkt), NULL, 0, 0, 0,
374 PTH_FLAG_NEXUS_CHAN);
375 }
376 }
377
378 /*
379 * This function ensures that the interface's mac address matches:
380 * -the destination mac address of inbound packets
381 * -the source mac address of outbound packets
382 */
383 boolean_t
nx_netif_validate_macaddr(struct nx_netif * nif,struct __kern_packet * pkt,uint32_t flags)384 nx_netif_validate_macaddr(struct nx_netif *nif, struct __kern_packet *pkt,
385 uint32_t flags)
386 {
387 struct netif_stats *nifs = &nif->nif_stats;
388 struct ifnet *ifp = nif->nif_ifp;
389 uint8_t local_addr[ETHER_ADDR_LEN], *addr;
390 boolean_t valid = FALSE, outbound, mbcast;
391 ether_header_t *eh;
392 uint32_t len;
393 uint8_t *__sized_by(len) buf;
394
395 /*
396 * No need to hold any lock for the checks below because we are not
397 * accessing any shared state.
398 */
399 if (netif_flow_get_buf(pkt, sizeof(ether_header_t), &buf, &len) != 0) {
400 STATS_INC(nifs, NETIF_STATS_VP_BAD_PKT_LEN);
401 DTRACE_SKYWALK2(bad__pkt__sz, struct nx_netif *, nif,
402 struct __kern_packet *, pkt);
403 return FALSE;
404 }
405 DTRACE_SKYWALK4(dump__buf, struct nx_netif *, nif,
406 struct __kern_packet *, pkt, void *, buf, uint32_t, len);
407
408 eh = (ether_header_t *)(void *)buf;
409 outbound = ((flags & NETIF_FLOW_OUTBOUND) != 0);
410 addr = outbound ? eh->ether_shost : eh->ether_dhost;
411 mbcast = ((addr[0] & 1) != 0);
412
413 if (NETIF_IS_LOW_LATENCY(nif)) {
414 /* disallow multicast/broadcast as both src or dest macaddr */
415 if (mbcast) {
416 DTRACE_SKYWALK4(mbcast__pkt__llw,
417 struct nx_netif *, nif, struct __kern_packet *, pkt,
418 void *, buf, uint32_t, len);
419 goto done;
420 }
421 /* only validate macaddr for outbound packets */
422 if (!outbound) {
423 DTRACE_SKYWALK4(skip__check__llw,
424 struct nx_netif *, nif, struct __kern_packet *, pkt,
425 void *, buf, uint32_t, len);
426 return TRUE;
427 }
428 } else {
429 if (mbcast) {
430 if (outbound) {
431 /* disallow multicast/broadcast as src macaddr */
432 DTRACE_SKYWALK4(mbcast__src,
433 struct nx_netif *, nif,
434 struct __kern_packet *, pkt,
435 void *, buf, uint32_t, len);
436 goto done;
437 } else {
438 /* allow multicast/broadcast as dest macaddr */
439 DTRACE_SKYWALK4(mbcast__dest,
440 struct nx_netif *, nif,
441 struct __kern_packet *, pkt,
442 void *, buf, uint32_t, len);
443 return TRUE;
444 }
445 }
446 }
447 if (ifnet_lladdr_copy_bytes(ifp, local_addr, sizeof(local_addr)) != 0) {
448 STATS_INC(nifs, NETIF_STATS_VP_BAD_MADDR_LEN);
449 DTRACE_SKYWALK2(bad__addr__len, struct nx_netif *, nif,
450 struct ifnet *, ifp);
451 return FALSE;
452 }
453 valid = (_ether_cmp(local_addr, addr) == 0);
454 done:
455 if (!valid) {
456 /*
457 * A non-matching mac addr is not an error for the input path
458 * because we are expected to get such packets. These packets
459 * are already counted as NETIF_STATS_FLOW_NOT_FOUND.
460 */
461 if (outbound) {
462 STATS_INC(nifs, NETIF_STATS_VP_BAD_MADDR);
463 }
464 DTRACE_SKYWALK2(bad__addr, struct nx_netif *, nif,
465 struct __kern_packet *, pkt);
466 }
467 return valid;
468 }
469
470 /*
471 * Checks whether a packet matches the specified flow's description.
472 * This is used for validating outbound packets.
473 */
474 boolean_t
nx_netif_flow_match(struct nx_netif * nif,struct __kern_packet * pkt,struct netif_flow * f,uint32_t flags)475 nx_netif_flow_match(struct nx_netif *nif, struct __kern_packet *pkt,
476 struct netif_flow *f, uint32_t flags)
477 {
478 struct netif_stats *nifs = &nif->nif_stats;
479 struct netif_flowtable *ft;
480 struct netif_flowtable_ops *fops;
481 struct netif_flow_desc fd;
482 boolean_t match = FALSE;
483 int err;
484
485 /*
486 * Unlike the lookup case, ft cannot be NULL here because there
487 * should be a table to hold our flow. No locking is needed because
488 * no one can close our channel while we have ongoing syncs.
489 */
490 VERIFY((ft = nif->nif_flow_table) != NULL);
491 fops = ft->ft_ops;
492
493 /*
494 * We increment error stats here but not when we classify because in
495 * this case a match is expected.
496 */
497 err = fops->nfo_info(pkt, &fd, flags);
498 if (err != 0) {
499 STATS_INC(nifs, NETIF_STATS_VP_FLOW_INFO_ERR);
500 DTRACE_SKYWALK3(info__err, struct nx_netif *, nif, int, err,
501 struct __kern_packet *, pkt);
502 return FALSE;
503 }
504 match = fops->nfo_match(&f->nf_desc, &fd);
505 if (!match) {
506 STATS_INC(nifs, NETIF_STATS_VP_FLOW_NOT_MATCH);
507 DTRACE_SKYWALK3(not__match, struct nx_netif *, nif,
508 struct netif_flow *, f, struct __kern_packet *, pkt);
509 }
510 return match;
511 }
512
513 struct netif_flow *
nx_netif_flow_classify(struct nx_netif * nif,struct __kern_packet * pkt,uint32_t flags)514 nx_netif_flow_classify(struct nx_netif *nif, struct __kern_packet *pkt,
515 uint32_t flags)
516 {
517 struct netif_stats *nifs = &nif->nif_stats;
518 struct netif_flow *__single f = NULL;
519 struct netif_flowtable *ft;
520 int err;
521
522 lck_mtx_lock(&nif->nif_flow_lock);
523 if ((nif->nif_flow_flags & NETIF_FLOW_FLAG_ENABLED) == 0) {
524 STATS_INC(nifs, NETIF_STATS_VP_FLOW_DISABLED);
525 DTRACE_SKYWALK1(disabled, struct nx_netif *, nif);
526 goto fail;
527 }
528 if ((ft = nif->nif_flow_table) == NULL) {
529 STATS_INC(nifs, NETIF_STATS_VP_FLOW_EMPTY_TABLE);
530 DTRACE_SKYWALK1(empty__flowtable, struct nx_netif *, nif);
531 goto fail;
532 }
533 err = ft->ft_ops->nfo_lookup(ft, pkt, flags, &f);
534 if (err != 0) {
535 /* caller increments counter */
536 DTRACE_SKYWALK1(not__found, struct nx_netif *, nif);
537 goto fail;
538 }
539 f->nf_refcnt++;
540 lck_mtx_unlock(&nif->nif_flow_lock);
541 return f;
542
543 fail:
544 lck_mtx_unlock(&nif->nif_flow_lock);
545 return NULL;
546 }
547
548 void
nx_netif_flow_release(struct nx_netif * nif,struct netif_flow * nf)549 nx_netif_flow_release(struct nx_netif *nif, struct netif_flow *nf)
550 {
551 lck_mtx_lock(&nif->nif_flow_lock);
552 if (--nf->nf_refcnt == 0) {
553 wakeup(&nf->nf_refcnt);
554 }
555 lck_mtx_unlock(&nif->nif_flow_lock);
556 }
557
558 static struct netif_flow *
flow_classify(struct nx_netif * nif,struct __kern_packet * pkt,uint32_t flags)559 flow_classify(struct nx_netif *nif, struct __kern_packet *pkt, uint32_t flags)
560 {
561 if (nx_netif_vp_accept_all == 0 &&
562 !nx_netif_validate_macaddr(nif, pkt, flags)) {
563 return NULL;
564 }
565 return nx_netif_flow_classify(nif, pkt, flags);
566 }
567
568 errno_t
nx_netif_demux(struct nexus_netif_adapter * nifna,struct __kern_packet * pkt_chain,struct __kern_packet ** remain,struct nexus_pkt_stats * stats,uint32_t flags)569 nx_netif_demux(struct nexus_netif_adapter *nifna,
570 struct __kern_packet *pkt_chain, struct __kern_packet **remain,
571 struct nexus_pkt_stats *stats, uint32_t flags)
572 {
573 struct __kern_packet *pkt = pkt_chain, *next;
574 struct __kern_packet *__single head = NULL;
575 struct __kern_packet **tailp = &head;
576 struct __kern_packet *__single rhead = NULL;
577 struct __kern_packet **rtailp = &rhead;
578 struct netif_flow *nf, *prev_nf = NULL;
579 struct nx_netif *nif = nifna->nifna_netif;
580 struct netif_stats *nifs = &nif->nif_stats;
581 int c = 0, r = 0, delivered = 0, bytes = 0, rbytes = 0, plen = 0;
582
583 while (pkt != NULL) {
584 next = pkt->pkt_nextpkt;
585 pkt->pkt_nextpkt = NULL;
586
587 ASSERT((pkt->pkt_pflags & PKT_F_PKT_DATA) == 0);
588 plen = ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) ?
589 m_pktlen(pkt->pkt_mbuf) : pkt->pkt_length;
590
591 /*
592 * The returned nf is refcounted to ensure it doesn't
593 * disappear while packets are being delivered.
594 */
595 nf = flow_classify(nif, pkt, flags);
596 if (nf != NULL) {
597 nx_netif_snoop(nif, pkt, TRUE);
598
599 /*
600 * Keep growing the chain until we classify to a
601 * different nf.
602 */
603 if (prev_nf != NULL) {
604 if (prev_nf != nf) {
605 DTRACE_SKYWALK5(deliver,
606 struct nx_netif *, nif,
607 struct netif_flow *, prev_nf,
608 struct __kern_packet *, head,
609 int, c, uint32_t, flags);
610
611 nx_netif_flow_deliver(nif,
612 prev_nf, head, flags);
613 nx_netif_flow_release(nif, prev_nf);
614 prev_nf = nf;
615 head = NULL;
616 tailp = &head;
617 delivered += c;
618 c = 0;
619 } else {
620 /*
621 * one reference is enough.
622 */
623 nx_netif_flow_release(nif, nf);
624 }
625 } else {
626 prev_nf = nf;
627 }
628 c++;
629 bytes += plen;
630 *tailp = pkt;
631 tailp = &pkt->pkt_nextpkt;
632 } else {
633 r++;
634 rbytes += plen;
635 *rtailp = pkt;
636 rtailp = &pkt->pkt_nextpkt;
637 }
638 pkt = next;
639 }
640 if (head != NULL) {
641 ASSERT(prev_nf != NULL);
642 DTRACE_SKYWALK5(deliver__last, struct nx_netif *,
643 nif, struct netif_flow *, prev_nf, struct __kern_packet *,
644 pkt, int, c, uint32_t, flags);
645
646 nx_netif_flow_deliver(nif, prev_nf, head, flags);
647 nx_netif_flow_release(nif, prev_nf);
648 prev_nf = NULL;
649 head = NULL;
650 tailp = &head;
651 delivered += c;
652 }
653 if (rhead != NULL) {
654 if (remain != NULL) {
655 *remain = rhead;
656 } else {
657 nx_netif_free_packet_chain(rhead, NULL);
658 }
659 }
660
661 if (stats != NULL) {
662 stats->nps_pkts += delivered;
663 stats->nps_bytes += bytes;
664 }
665
666 STATS_ADD(nifs, NETIF_STATS_VP_FLOW_FOUND, delivered);
667 STATS_ADD(nifs, NETIF_STATS_VP_FLOW_NOT_FOUND, r);
668 DTRACE_SKYWALK5(demux__delivered, struct nx_netif *,
669 nif, int, delivered, int, r, int, bytes, int, rbytes);
670 return 0;
671 }
672
673 SK_NO_INLINE_ATTRIBUTE
674 static errno_t
nx_netif_flowtable_init(struct nx_netif * nif,netif_flowtable_type_t type)675 nx_netif_flowtable_init(struct nx_netif *nif, netif_flowtable_type_t type)
676 {
677 struct netif_flowtable *ft;
678 struct netif_flowtable_ops *fops;
679
680 switch (type) {
681 case FT_TYPE_ETHERTYPE:
682 fops = &netif_ethertype_ops;
683 break;
684 case FT_TYPE_IPV6_ULA:
685 fops = &netif_ipv6_ula_ops;
686 break;
687 default:
688 return ENOTSUP;
689 }
690 ft = fops->nfo_table_alloc(fops);
691 if (ft == NULL) {
692 return ENOMEM;
693 }
694 nif->nif_flow_table = ft;
695 return 0;
696 }
697
698 SK_NO_INLINE_ATTRIBUTE
699 static void
nx_netif_flowtable_fini(struct nx_netif * nif)700 nx_netif_flowtable_fini(struct nx_netif *nif)
701 {
702 struct netif_flowtable *ft = nif->nif_flow_table;
703
704 ASSERT(ft != NULL);
705 ft->ft_ops->nfo_table_free(ft);
706 nif->nif_flow_table = NULL;
707 }
708
709 /*
710 * netif doesn't keep accounting of flow statistics, this log message will
711 * print a snapshot of the current netif stats at the time of flow creation
712 * and removal. For a netif on interfaces like "llwX", the difference in these
713 * stats at creation vs removal will be analogous to flow stats as there will
714 * be atmost one flow active at any given time.
715 */
716 static inline void
nx_netif_flow_log(struct nx_netif * nif,struct netif_flow * nf,boolean_t add)717 nx_netif_flow_log(struct nx_netif *nif, struct netif_flow *nf, boolean_t add)
718 {
719 int i;
720 struct netif_stats *nifs = &nif->nif_stats;
721
722 os_log(OS_LOG_DEFAULT, "netif flowstats (%s): if %s, nx_port %d, "
723 "ethertype 0x%x, src %s, dst %s", add ? "add" : "remove",
724 if_name(nif->nif_ifp), nf->nf_port, nf->nf_desc.fd_ethertype,
725 ip6_sprintf(&nf->nf_desc.fd_laddr),
726 ip6_sprintf(&nf->nf_desc.fd_raddr));
727 for (i = 0; i < __NETIF_STATS_MAX; i++) {
728 if (STATS_VAL(nifs, i) == 0) {
729 continue;
730 }
731 os_log(OS_LOG_DEFAULT, "%s: %llu", netif_stats_str(i),
732 STATS_VAL(nifs, i));
733 }
734 }
735
736 errno_t
nx_netif_flow_add(struct nx_netif * nif,nexus_port_t port,struct netif_flow_desc * desc,void * cb_arg,errno_t (* cb_func)(void *,void *,uint32_t),struct netif_flow ** nfp)737 nx_netif_flow_add(struct nx_netif *nif, nexus_port_t port,
738 struct netif_flow_desc *desc, void *cb_arg,
739 errno_t (*cb_func)(void *, void *, uint32_t),
740 struct netif_flow **nfp)
741 {
742 struct netif_flow *nf = NULL;
743 struct netif_flowtable *ft;
744 struct netif_stats *nifs = &nif->nif_stats;
745 boolean_t refcnt_incr = FALSE, new_table = FALSE;
746 errno_t err = 0;
747
748 lck_mtx_lock(&nif->nif_flow_lock);
749 nf = sk_alloc_type(struct netif_flow, Z_WAITOK | Z_NOFAIL,
750 skmem_tag_netif_flow);
751 bcopy(desc, &nf->nf_desc, sizeof(*desc));
752 nf->nf_port = port;
753 nf->nf_refcnt = 0;
754 nf->nf_cb_arg = cb_arg;
755 nf->nf_cb_func = cb_func;
756
757 if (++nif->nif_flow_cnt == 1) {
758 netif_flowtable_type_t ft_type;
759
760 ft_type = NETIF_IS_LOW_LATENCY(nif) ? FT_TYPE_IPV6_ULA :
761 FT_TYPE_ETHERTYPE;
762
763 err = nx_netif_flowtable_init(nif, ft_type);
764 if (err != 0) {
765 STATS_INC(nifs, NETIF_STATS_VP_FLOW_TABLE_INIT_FAIL);
766 DTRACE_SKYWALK1(flowtable__init__fail,
767 struct nx_netif *, nif);
768 goto fail;
769 }
770 new_table = TRUE;
771 }
772 refcnt_incr = TRUE;
773 ft = nif->nif_flow_table;
774 err = ft->ft_ops->nfo_insert(ft, nf);
775 if (err != 0) {
776 STATS_INC(nifs, NETIF_STATS_VP_FLOW_INSERT_FAIL);
777 DTRACE_SKYWALK1(insert__fail, struct nx_netif *, nif);
778 goto fail;
779 }
780 SLIST_INSERT_HEAD(&nif->nif_flow_list, nf, nf_link);
781 if (nfp != NULL) {
782 *nfp = nf;
783 }
784 STATS_INC(nifs, NETIF_STATS_VP_FLOW_ADD);
785 lck_mtx_unlock(&nif->nif_flow_lock);
786 SK_DF(SK_VERB_VP, "flow add successful: if %s, nif %p",
787 if_name(nif->nif_ifp), SK_KVA(nif));
788 nx_netif_flow_log(nif, nf, TRUE);
789 return 0;
790
791 fail:
792 if (nf != NULL) {
793 sk_free_type(struct netif_flow, nf);
794 }
795 if (refcnt_incr && --nif->nif_flow_cnt == 0) {
796 if (new_table) {
797 nx_netif_flowtable_fini(nif);
798 }
799 }
800 lck_mtx_unlock(&nif->nif_flow_lock);
801 SK_ERR("flow add failed: if %s, nif %p, err %d",
802 if_name(nif->nif_ifp), SK_KVA(nif), err);
803 return err;
804 }
805
806 errno_t
nx_netif_flow_remove(struct nx_netif * nif,struct netif_flow * nf)807 nx_netif_flow_remove(struct nx_netif *nif, struct netif_flow *nf)
808 {
809 struct netif_flowtable_ops *fops;
810 struct netif_flowtable *ft;
811 struct netif_stats *nifs = &nif->nif_stats;
812
813 lck_mtx_lock(&nif->nif_flow_lock);
814 SLIST_REMOVE(&nif->nif_flow_list, nf, netif_flow, nf_link);
815 ft = nif->nif_flow_table;
816 ASSERT(ft != NULL);
817 fops = ft->ft_ops;
818 fops->nfo_remove(ft, nf);
819
820 while (nf->nf_refcnt > 0) {
821 DTRACE_SKYWALK1(wait__refcnt, struct netif_flow *, nf);
822 (void) msleep(&nf->nf_refcnt,
823 &nif->nif_flow_lock, (PZERO + 1),
824 __FUNCTION__, NULL);
825 }
826 if (--nif->nif_flow_cnt == 0) {
827 nx_netif_flowtable_fini(nif);
828 }
829 STATS_INC(nifs, NETIF_STATS_VP_FLOW_REMOVE);
830 lck_mtx_unlock(&nif->nif_flow_lock);
831
832 SK_DF(SK_VERB_VP, "flow remove: if %s, nif %p",
833 if_name(nif->nif_ifp), SK_KVA(nif));
834 nx_netif_flow_log(nif, nf, FALSE);
835 sk_free_type(struct netif_flow, nf);
836 return 0;
837 }
838
839 void
nx_netif_flow_init(struct nx_netif * nif)840 nx_netif_flow_init(struct nx_netif *nif)
841 {
842 ifnet_t ifp = nif->nif_ifp;
843
844 if (!ifnet_needs_netif_netagent(ifp) && !NETIF_IS_LOW_LATENCY(nif)) {
845 SK_DF(SK_VERB_VP, "%s: flows not supported due to missing "
846 "if_attach_nx flag or invalid interface type",
847 if_name(ifp));
848 return;
849 }
850 if (ifp->if_family != IFNET_FAMILY_ETHERNET) {
851 SK_DF(SK_VERB_VP, "%s: flows not supported on "
852 "interface family %d", if_name(ifp), ifp->if_family);
853 return;
854 }
855 ASSERT(nif->nif_flow_flags == 0);
856 lck_mtx_init(&nif->nif_flow_lock, &nexus_lock_group,
857 &nexus_lock_attr);
858
859 SLIST_INIT(&nif->nif_flow_list);
860 nif->nif_flow_table = NULL;
861 nif->nif_flow_cnt = 0;
862 nif->nif_flow_flags |= NETIF_FLOW_FLAG_INITIALIZED;
863
864 SK_DF(SK_VERB_VP, "%s: flows initialized", if_name(ifp));
865 }
866
867 void
nx_netif_flow_fini(struct nx_netif * nif)868 nx_netif_flow_fini(struct nx_netif *nif)
869 {
870 if ((nif->nif_flow_flags & NETIF_FLOW_FLAG_INITIALIZED) == 0) {
871 SK_DF(SK_VERB_VP, "%s: flows not initialized",
872 if_name(nif->nif_ifp));
873 return;
874 }
875 nif->nif_flow_flags &= ~NETIF_FLOW_FLAG_INITIALIZED;
876
877 /* This should've been cleared before we get to this point */
878 ASSERT((nif->nif_flow_flags & NETIF_FLOW_FLAG_ENABLED) == 0);
879 ASSERT(nif->nif_flow_cnt == 0);
880 ASSERT(nif->nif_flow_table == NULL);
881 ASSERT(SLIST_EMPTY(&nif->nif_flow_list));
882
883 lck_mtx_destroy(&nif->nif_flow_lock, &nexus_lock_group);
884
885 SK_DF(SK_VERB_VP, "%s: flows uninitialization done",
886 if_name(nif->nif_ifp));
887 }
888
889 static void
nx_netif_flow_set_enable(struct nx_netif * nif,boolean_t set)890 nx_netif_flow_set_enable(struct nx_netif *nif, boolean_t set)
891 {
892 /*
893 * No locking needed while checking for the initialized bit because
894 * if this were not set, no other flag would be modified.
895 */
896 if ((nif->nif_flow_flags & NETIF_FLOW_FLAG_INITIALIZED) == 0) {
897 return;
898 }
899 lck_mtx_lock(&nif->nif_flow_lock);
900 if (set) {
901 SK_DF(SK_VERB_VP, "%s: flow enable, nif %p",
902 if_name(nif->nif_ifp), SK_KVA(nif));
903 nif->nif_flow_flags |= NETIF_FLOW_FLAG_ENABLED;
904 } else {
905 SK_DF(SK_VERB_VP, "%s: flow disable, nif %p",
906 if_name(nif->nif_ifp), SK_KVA(nif));
907 nif->nif_flow_flags &= ~NETIF_FLOW_FLAG_ENABLED;
908 }
909 lck_mtx_unlock(&nif->nif_flow_lock);
910 }
911
912 void
nx_netif_flow_enable(struct nx_netif * nif)913 nx_netif_flow_enable(struct nx_netif *nif)
914 {
915 nx_netif_flow_set_enable(nif, TRUE);
916 }
917
918 void
nx_netif_flow_disable(struct nx_netif * nif)919 nx_netif_flow_disable(struct nx_netif *nif)
920 {
921 nx_netif_flow_set_enable(nif, FALSE);
922 }
923