xref: /xnu-12377.81.4/bsd/skywalk/nexus/netif/nx_netif_flow.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2019-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #include <skywalk/os_skywalk_private.h>
29 #include <skywalk/nexus/netif/nx_netif.h>
30 #include <netinet/ip6.h>
31 #include <netinet6/in6_var.h>
32 #include <net/pktap.h>
33 #include <sys/sdt.h>
34 #include <os/log.h>
35 
36 /* This is just a list for now for simplicity. */
37 struct netif_list_flowtable {
38 	struct netif_flow_head  lft_flow_list;
39 };
40 
41 static netif_flow_lookup_t netif_flow_list_lookup;
42 static netif_flow_insert_t netif_flow_list_insert;
43 static netif_flow_remove_t netif_flow_list_remove;
44 static netif_flow_table_alloc_t netif_flow_list_table_alloc;
45 static netif_flow_table_free_t netif_flow_list_table_free;
46 
47 static netif_flow_match_t netif_flow_ethertype_match;
48 static netif_flow_info_t netif_flow_ethertype_info;
49 static netif_flow_match_t netif_flow_ipv6_ula_match;
50 static netif_flow_info_t netif_flow_ipv6_ula_info;
51 
52 /*
53  * Two flow table types can share the same internal implementation.
54  * Using a list for now for simplicity.
55  */
56 static struct netif_flowtable_ops netif_ethertype_ops = {
57 	.nfo_lookup = netif_flow_list_lookup,
58 	.nfo_match = netif_flow_ethertype_match,
59 	.nfo_info = netif_flow_ethertype_info,
60 	.nfo_insert = netif_flow_list_insert,
61 	.nfo_remove = netif_flow_list_remove,
62 	.nfo_table_alloc = netif_flow_list_table_alloc,
63 	.nfo_table_free = netif_flow_list_table_free
64 };
65 
66 static struct netif_flowtable_ops netif_ipv6_ula_ops = {
67 	.nfo_lookup = netif_flow_list_lookup,
68 	.nfo_match = netif_flow_ipv6_ula_match,
69 	.nfo_info = netif_flow_ipv6_ula_info,
70 	.nfo_insert = netif_flow_list_insert,
71 	.nfo_remove = netif_flow_list_remove,
72 	.nfo_table_alloc = netif_flow_list_table_alloc,
73 	.nfo_table_free = netif_flow_list_table_free
74 };
75 
76 static int
netif_flow_get_buf_pkt(struct __kern_packet * pkt,size_t minlen,uint8_t * __sized_by (* len)* buf,uint32_t * len)77 netif_flow_get_buf_pkt(struct __kern_packet *pkt, size_t minlen,
78     uint8_t *__sized_by(*len) *buf, uint32_t *len)
79 {
80 	uint8_t *baddr;
81 
82 	if (pkt->pkt_length < minlen) {
83 		return EINVAL;
84 	}
85 	MD_BUFLET_ADDR_ABS(pkt, baddr);
86 	baddr += pkt->pkt_headroom;
87 
88 	*buf = baddr;
89 	*len = pkt->pkt_length;
90 	return 0;
91 }
92 
93 static int
netif_flow_get_buf_mbuf(struct mbuf * m,size_t minlen,uint8_t * __sized_by (* len)* buf,uint32_t * len)94 netif_flow_get_buf_mbuf(struct mbuf *m, size_t minlen,
95     uint8_t *__sized_by(*len) *buf, uint32_t *len)
96 {
97 	/*
98 	 * XXX
99 	 * Not pulling up here if mbuf is not contiguous.
100 	 * This does not impact the current use case (ethertype
101 	 * demux).
102 	 */
103 	if (m->m_len < minlen) {
104 		return EINVAL;
105 	}
106 	*buf = (uint8_t *)m_mtod_current(m);
107 	*len = m->m_len;
108 	return 0;
109 }
110 
111 static int
netif_flow_get_buf(struct __kern_packet * pkt,size_t minlen,uint8_t * __sized_by (* len)* buf,uint32_t * len)112 netif_flow_get_buf(struct __kern_packet *pkt, size_t minlen,
113     uint8_t *__sized_by(*len) *buf, uint32_t *len)
114 {
115 	ASSERT((pkt->pkt_pflags & PKT_F_PKT_DATA) == 0);
116 	if ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
117 		ASSERT(pkt->pkt_mbuf != NULL);
118 		return netif_flow_get_buf_mbuf(pkt->pkt_mbuf, minlen, buf, len);
119 	}
120 	return netif_flow_get_buf_pkt(pkt, minlen, buf, len);
121 }
122 
123 static int
netif_flow_ethertype_info(struct __kern_packet * pkt,struct netif_flow_desc * fd,uint32_t flags)124 netif_flow_ethertype_info(struct __kern_packet *pkt,
125     struct netif_flow_desc *fd, uint32_t flags)
126 {
127 #pragma unused (flags)
128 	ether_header_t *eh;
129 	uint32_t len;
130 	uint16_t etype;
131 	uint16_t tag;
132 	uint8_t *__sized_by(len) buf;
133 	int err;
134 
135 	err = netif_flow_get_buf(pkt, sizeof(ether_header_t), &buf,
136 	    &len);
137 	if (err != 0) {
138 		DTRACE_SKYWALK2(get__buf__failed, struct __kern_packet *,
139 		    pkt, int, err);
140 		return err;
141 	}
142 	eh = (ether_header_t *)(void *)buf;
143 	if (__probable((((uintptr_t)buf) & 1) == 0)) {
144 		etype = eh->ether_type;
145 	} else {
146 		bcopy(&eh->ether_type, &etype, sizeof(etype));
147 	}
148 	etype = ntohs(etype);
149 
150 	if (kern_packet_get_vlan_tag(SK_PKT2PH(pkt), &tag) == 0) {
151 		DTRACE_SKYWALK2(hw__vlan, struct __kern_packet *, pkt,
152 		    uint16_t, tag);
153 	} else if (etype == ETHERTYPE_VLAN) {
154 		struct ether_vlan_header *evh;
155 
156 		DTRACE_SKYWALK2(encap__vlan, struct __kern_packet *, pkt,
157 		    uint8_t *, buf);
158 		if ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
159 			struct mbuf *m = pkt->pkt_mbuf;
160 
161 			if (mbuf_len(m) < sizeof(*evh)) {
162 				DTRACE_SKYWALK1(mbuf__too__small,
163 				    struct mbuf *, m);
164 				return EINVAL;
165 			}
166 		} else {
167 			if (len < sizeof(*evh)) {
168 				DTRACE_SKYWALK2(pkt__too__small,
169 				    struct __kern_packet *, pkt,
170 				    uint32_t, len);
171 				return EINVAL;
172 			}
173 		}
174 		evh = (struct ether_vlan_header *)eh;
175 		if (__probable((((uintptr_t)evh) & 1) == 0)) {
176 			tag = evh->evl_tag;
177 			etype = evh->evl_proto;
178 		} else {
179 			bcopy(&evh->evl_tag, &tag, sizeof(tag));
180 			bcopy(&evh->evl_proto, &etype, sizeof(etype));
181 		}
182 		tag = ntohs(tag);
183 		etype = ntohs(etype);
184 	} else {
185 		tag = 0;
186 	}
187 	/* Only accept priority tagged packets */
188 	if (EVL_VLANOFTAG(tag) != 0) {
189 		DTRACE_SKYWALK2(vlan__non__zero,
190 		    struct __kern_packet *, pkt, uint16_t, tag);
191 		return ENOTSUP;
192 	}
193 	DTRACE_SKYWALK4(extracted__info, struct __kern_packet *, pkt,
194 	    uint8_t *, buf, uint16_t, tag, uint16_t, etype);
195 	fd->fd_ethertype = etype;
196 	return 0;
197 }
198 
199 static boolean_t
netif_flow_ethertype_match(struct netif_flow_desc * fd1,struct netif_flow_desc * fd2)200 netif_flow_ethertype_match(struct netif_flow_desc *fd1,
201     struct netif_flow_desc *fd2)
202 {
203 	return fd1->fd_ethertype == fd2->fd_ethertype;
204 }
205 
206 static int
netif_flow_ipv6_ula_info(struct __kern_packet * pkt,struct netif_flow_desc * fd,uint32_t flags)207 netif_flow_ipv6_ula_info(struct __kern_packet *pkt,
208     struct netif_flow_desc *fd, uint32_t flags)
209 {
210 	ether_header_t *eh;
211 	uint32_t len;
212 	uint8_t *__sized_by(len) buf;
213 	struct ip6_hdr *ip6h;
214 	void *laddr, *raddr;
215 	uint16_t etype;
216 	int err;
217 
218 	err = netif_flow_get_buf(pkt, sizeof(*eh) + sizeof(*ip6h),
219 	    &buf, &len);
220 	if (err != 0) {
221 		DTRACE_SKYWALK2(get__buf__failed, struct __kern_packet *,
222 		    pkt, int, err);
223 		return err;
224 	}
225 	eh = (ether_header_t *)(void *)buf;
226 	ip6h = (struct ip6_hdr *)(eh + 1);
227 
228 	bcopy(&eh->ether_type, &etype, sizeof(etype));
229 	etype = ntohs(etype);
230 	if (etype != ETHERTYPE_IPV6) {
231 		return ENOENT;
232 	}
233 	if (len < sizeof(*eh) + sizeof(*ip6h)) {
234 		return EINVAL;
235 	}
236 	if ((flags & NETIF_FLOW_OUTBOUND) != 0) {
237 		laddr = &ip6h->ip6_src;
238 		raddr = &ip6h->ip6_dst;
239 	} else {
240 		laddr = &ip6h->ip6_dst;
241 		raddr = &ip6h->ip6_src;
242 	}
243 	bcopy(laddr, &fd->fd_laddr, sizeof(struct in6_addr));
244 	bcopy(raddr, &fd->fd_raddr, sizeof(struct in6_addr));
245 	return 0;
246 }
247 
248 static boolean_t
netif_flow_ipv6_ula_match(struct netif_flow_desc * fd1,struct netif_flow_desc * fd2)249 netif_flow_ipv6_ula_match(struct netif_flow_desc *fd1, struct netif_flow_desc *fd2)
250 {
251 	return IN6_ARE_ADDR_EQUAL(&fd1->fd_laddr, &fd2->fd_laddr) &&
252 	       IN6_ARE_ADDR_EQUAL(&fd1->fd_raddr, &fd2->fd_raddr);
253 }
254 
255 static int
netif_flow_list_lookup(struct netif_flowtable * ft,struct __kern_packet * pkt,uint32_t flags,struct netif_flow ** f)256 netif_flow_list_lookup(struct netif_flowtable *ft, struct __kern_packet *pkt,
257     uint32_t flags, struct netif_flow **f)
258 {
259 	struct netif_list_flowtable *__single lft = ft->ft_internal;
260 	struct netif_flowtable_ops *fops = ft->ft_ops;
261 	struct netif_flow *nf;
262 	struct netif_flow_desc fd;
263 	int err;
264 
265 	/* XXX returns the first flow if "accept all" is on */
266 	if (nx_netif_vp_accept_all != 0) {
267 		nf = SLIST_FIRST(&lft->lft_flow_list);
268 		goto done;
269 	}
270 	err = fops->nfo_info(pkt, &fd, flags);
271 	if (err != 0) {
272 		return err;
273 	}
274 	SLIST_FOREACH(nf, &lft->lft_flow_list, nf_table_link) {
275 		if (fops->nfo_match(&nf->nf_desc, &fd)) {
276 			break;
277 		}
278 	}
279 done:
280 	if (nf == NULL) {
281 		return ENOENT;
282 	}
283 	*f = nf;
284 	return 0;
285 }
286 
287 static int
netif_flow_list_insert(struct netif_flowtable * ft,struct netif_flow * f)288 netif_flow_list_insert(struct netif_flowtable *ft, struct netif_flow *f)
289 {
290 	struct netif_list_flowtable *__single lft = ft->ft_internal;
291 	struct netif_flow *nf;
292 
293 	SLIST_FOREACH(nf, &lft->lft_flow_list, nf_table_link) {
294 		if (nf->nf_port == f->nf_port ||
295 		    ft->ft_ops->nfo_match(&nf->nf_desc, &f->nf_desc)) {
296 			break;
297 		}
298 	}
299 	if (nf != NULL) {
300 		return EEXIST;
301 	}
302 	SLIST_INSERT_HEAD(&lft->lft_flow_list, f, nf_table_link);
303 	return 0;
304 }
305 
306 static void
netif_flow_list_remove(struct netif_flowtable * ft,struct netif_flow * f)307 netif_flow_list_remove(struct netif_flowtable *ft, struct netif_flow *f)
308 {
309 	struct netif_list_flowtable *__single lft = ft->ft_internal;
310 
311 	SLIST_REMOVE(&lft->lft_flow_list, f, netif_flow, nf_table_link);
312 }
313 
314 static struct netif_flowtable *
netif_flow_list_table_alloc(struct netif_flowtable_ops * ops)315 netif_flow_list_table_alloc(struct netif_flowtable_ops *ops)
316 {
317 	struct netif_flowtable *ft;
318 	struct netif_list_flowtable *lft;
319 
320 	ft = skn_alloc_type(flowtable, struct netif_flowtable,
321 	    Z_WAITOK | Z_NOFAIL, skmem_tag_netif_flow);
322 	lft = skn_alloc_type(list_flowtable, struct netif_list_flowtable,
323 	    Z_WAITOK | Z_NOFAIL, skmem_tag_netif_flow);
324 	/*
325 	 * For now lft just holds a list. We can use any data structure here.
326 	 */
327 	SLIST_INIT(&lft->lft_flow_list);
328 	ft->ft_internal = lft;
329 	ft->ft_ops = ops;
330 	return ft;
331 }
332 
333 static void
netif_flow_list_table_free(struct netif_flowtable * ft)334 netif_flow_list_table_free(struct netif_flowtable *ft)
335 {
336 	struct netif_list_flowtable *__single lft;
337 
338 	ASSERT(ft->ft_ops != NULL);
339 	ft->ft_ops = NULL;
340 
341 	ASSERT(ft->ft_internal != NULL);
342 	lft = ft->ft_internal;
343 	ASSERT(SLIST_EMPTY(&lft->lft_flow_list));
344 
345 	skn_free_type(list_flowtable, struct netif_list_flowtable, lft);
346 	ft->ft_internal = NULL;
347 
348 	skn_free_type(flowtable, struct netif_flowtable, ft);
349 }
350 
351 static void
nx_netif_flow_deliver(struct nx_netif * nif,struct netif_flow * f,void * data,uint32_t flags)352 nx_netif_flow_deliver(struct nx_netif *nif, struct netif_flow *f,
353     void *data, uint32_t flags)
354 {
355 #pragma unused(nif)
356 	f->nf_cb_func(f->nf_cb_arg, data, flags);
357 }
358 
359 void
nx_netif_snoop(struct nx_netif * nif,struct __kern_packet * pkt,boolean_t inbound)360 nx_netif_snoop(struct nx_netif *nif, struct __kern_packet *pkt,
361     boolean_t inbound)
362 {
363 	/* pktap only supports IPv4 or IPv6 packets */
364 	if (!NETIF_IS_LOW_LATENCY(nif)) {
365 		return;
366 	}
367 	if (inbound) {
368 		pktap_input_packet(nif->nif_ifp, AF_INET6, DLT_EN10MB,
369 		    -1, NULL, -1, NULL, SK_PKT2PH(pkt), NULL, 0, 0, 0,
370 		    PTH_FLAG_NEXUS_CHAN);
371 	} else {
372 		pktap_output_packet(nif->nif_ifp, AF_INET6, DLT_EN10MB,
373 		    -1, NULL, -1, NULL, SK_PKT2PH(pkt), NULL, 0, 0, 0,
374 		    PTH_FLAG_NEXUS_CHAN);
375 	}
376 }
377 
378 /*
379  * This function ensures that the interface's mac address matches:
380  * -the destination mac address of inbound packets
381  * -the source mac address of outbound packets
382  */
383 boolean_t
nx_netif_validate_macaddr(struct nx_netif * nif,struct __kern_packet * pkt,uint32_t flags)384 nx_netif_validate_macaddr(struct nx_netif *nif, struct __kern_packet *pkt,
385     uint32_t flags)
386 {
387 	struct netif_stats *nifs = &nif->nif_stats;
388 	struct ifnet *ifp = nif->nif_ifp;
389 	uint8_t local_addr[ETHER_ADDR_LEN], *addr;
390 	boolean_t valid = FALSE, outbound, mbcast;
391 	ether_header_t *eh;
392 	uint32_t len;
393 	uint8_t *__sized_by(len) buf;
394 
395 	/*
396 	 * No need to hold any lock for the checks below because we are not
397 	 * accessing any shared state.
398 	 */
399 	if (netif_flow_get_buf(pkt, sizeof(ether_header_t), &buf, &len) != 0) {
400 		STATS_INC(nifs, NETIF_STATS_VP_BAD_PKT_LEN);
401 		DTRACE_SKYWALK2(bad__pkt__sz, struct nx_netif *, nif,
402 		    struct __kern_packet *, pkt);
403 		return FALSE;
404 	}
405 	DTRACE_SKYWALK4(dump__buf, struct nx_netif *, nif,
406 	    struct __kern_packet *, pkt, void *, buf, uint32_t, len);
407 
408 	eh = (ether_header_t *)(void *)buf;
409 	outbound = ((flags & NETIF_FLOW_OUTBOUND) != 0);
410 	addr = outbound ? eh->ether_shost : eh->ether_dhost;
411 	mbcast = ((addr[0] & 1) != 0);
412 
413 	if (NETIF_IS_LOW_LATENCY(nif)) {
414 		/* disallow multicast/broadcast as both src or dest macaddr */
415 		if (mbcast) {
416 			DTRACE_SKYWALK4(mbcast__pkt__llw,
417 			    struct nx_netif *, nif, struct __kern_packet *, pkt,
418 			    void *, buf, uint32_t, len);
419 			goto done;
420 		}
421 		/* only validate macaddr for outbound packets */
422 		if (!outbound) {
423 			DTRACE_SKYWALK4(skip__check__llw,
424 			    struct nx_netif *, nif, struct __kern_packet *, pkt,
425 			    void *, buf, uint32_t, len);
426 			return TRUE;
427 		}
428 	} else {
429 		if (mbcast) {
430 			if (outbound) {
431 				/* disallow multicast/broadcast as src macaddr */
432 				DTRACE_SKYWALK4(mbcast__src,
433 				    struct nx_netif *, nif,
434 				    struct __kern_packet *, pkt,
435 				    void *, buf, uint32_t, len);
436 				goto done;
437 			} else {
438 				/* allow multicast/broadcast as dest macaddr */
439 				DTRACE_SKYWALK4(mbcast__dest,
440 				    struct nx_netif *, nif,
441 				    struct __kern_packet *, pkt,
442 				    void *, buf, uint32_t, len);
443 				return TRUE;
444 			}
445 		}
446 	}
447 	if (ifnet_lladdr_copy_bytes(ifp, local_addr, sizeof(local_addr)) != 0) {
448 		STATS_INC(nifs, NETIF_STATS_VP_BAD_MADDR_LEN);
449 		DTRACE_SKYWALK2(bad__addr__len, struct nx_netif *, nif,
450 		    struct ifnet *, ifp);
451 		return FALSE;
452 	}
453 	valid = (_ether_cmp(local_addr, addr) == 0);
454 done:
455 	if (!valid) {
456 		/*
457 		 * A non-matching mac addr is not an error for the input path
458 		 * because we are expected to get such packets. These packets
459 		 * are already counted as NETIF_STATS_FLOW_NOT_FOUND.
460 		 */
461 		if (outbound) {
462 			STATS_INC(nifs, NETIF_STATS_VP_BAD_MADDR);
463 		}
464 		DTRACE_SKYWALK2(bad__addr, struct nx_netif *, nif,
465 		    struct __kern_packet *, pkt);
466 	}
467 	return valid;
468 }
469 
470 /*
471  * Checks whether a packet matches the specified flow's description.
472  * This is used for validating outbound packets.
473  */
474 boolean_t
nx_netif_flow_match(struct nx_netif * nif,struct __kern_packet * pkt,struct netif_flow * f,uint32_t flags)475 nx_netif_flow_match(struct nx_netif *nif, struct __kern_packet *pkt,
476     struct netif_flow *f, uint32_t flags)
477 {
478 	struct netif_stats *nifs = &nif->nif_stats;
479 	struct netif_flowtable *ft;
480 	struct netif_flowtable_ops *fops;
481 	struct netif_flow_desc fd;
482 	boolean_t match = FALSE;
483 	int err;
484 
485 	/*
486 	 * Unlike the lookup case, ft cannot be NULL here because there
487 	 * should be a table to hold our flow. No locking is needed because
488 	 * no one can close our channel while we have ongoing syncs.
489 	 */
490 	VERIFY((ft = nif->nif_flow_table) != NULL);
491 	fops = ft->ft_ops;
492 
493 	/*
494 	 * We increment error stats here but not when we classify because in
495 	 * this case a match is expected.
496 	 */
497 	err = fops->nfo_info(pkt, &fd, flags);
498 	if (err != 0) {
499 		STATS_INC(nifs, NETIF_STATS_VP_FLOW_INFO_ERR);
500 		DTRACE_SKYWALK3(info__err, struct nx_netif *, nif, int, err,
501 		    struct __kern_packet *, pkt);
502 		return FALSE;
503 	}
504 	match = fops->nfo_match(&f->nf_desc, &fd);
505 	if (!match) {
506 		STATS_INC(nifs, NETIF_STATS_VP_FLOW_NOT_MATCH);
507 		DTRACE_SKYWALK3(not__match, struct nx_netif *, nif,
508 		    struct netif_flow *, f, struct __kern_packet *, pkt);
509 	}
510 	return match;
511 }
512 
513 struct netif_flow *
nx_netif_flow_classify(struct nx_netif * nif,struct __kern_packet * pkt,uint32_t flags)514 nx_netif_flow_classify(struct nx_netif *nif, struct __kern_packet *pkt,
515     uint32_t flags)
516 {
517 	struct netif_stats *nifs = &nif->nif_stats;
518 	struct netif_flow *__single f = NULL;
519 	struct netif_flowtable *ft;
520 	int err;
521 
522 	lck_mtx_lock(&nif->nif_flow_lock);
523 	if ((nif->nif_flow_flags & NETIF_FLOW_FLAG_ENABLED) == 0) {
524 		STATS_INC(nifs, NETIF_STATS_VP_FLOW_DISABLED);
525 		DTRACE_SKYWALK1(disabled, struct nx_netif *, nif);
526 		goto fail;
527 	}
528 	if ((ft = nif->nif_flow_table) == NULL) {
529 		STATS_INC(nifs, NETIF_STATS_VP_FLOW_EMPTY_TABLE);
530 		DTRACE_SKYWALK1(empty__flowtable, struct nx_netif *, nif);
531 		goto fail;
532 	}
533 	err = ft->ft_ops->nfo_lookup(ft, pkt, flags, &f);
534 	if (err != 0) {
535 		/* caller increments counter */
536 		DTRACE_SKYWALK1(not__found, struct nx_netif *, nif);
537 		goto fail;
538 	}
539 	f->nf_refcnt++;
540 	lck_mtx_unlock(&nif->nif_flow_lock);
541 	return f;
542 
543 fail:
544 	lck_mtx_unlock(&nif->nif_flow_lock);
545 	return NULL;
546 }
547 
548 void
nx_netif_flow_release(struct nx_netif * nif,struct netif_flow * nf)549 nx_netif_flow_release(struct nx_netif *nif, struct netif_flow *nf)
550 {
551 	lck_mtx_lock(&nif->nif_flow_lock);
552 	if (--nf->nf_refcnt == 0) {
553 		wakeup(&nf->nf_refcnt);
554 	}
555 	lck_mtx_unlock(&nif->nif_flow_lock);
556 }
557 
558 static struct netif_flow *
flow_classify(struct nx_netif * nif,struct __kern_packet * pkt,uint32_t flags)559 flow_classify(struct nx_netif *nif, struct __kern_packet *pkt, uint32_t flags)
560 {
561 	if (nx_netif_vp_accept_all == 0 &&
562 	    !nx_netif_validate_macaddr(nif, pkt, flags)) {
563 		return NULL;
564 	}
565 	return nx_netif_flow_classify(nif, pkt, flags);
566 }
567 
568 errno_t
nx_netif_demux(struct nexus_netif_adapter * nifna,struct __kern_packet * pkt_chain,struct __kern_packet ** remain,struct nexus_pkt_stats * stats,uint32_t flags)569 nx_netif_demux(struct nexus_netif_adapter *nifna,
570     struct __kern_packet *pkt_chain, struct __kern_packet **remain,
571     struct nexus_pkt_stats *stats, uint32_t flags)
572 {
573 	struct __kern_packet *pkt = pkt_chain, *next;
574 	struct __kern_packet *__single head = NULL;
575 	struct __kern_packet **tailp = &head;
576 	struct __kern_packet *__single rhead = NULL;
577 	struct __kern_packet **rtailp = &rhead;
578 	struct netif_flow *nf, *prev_nf = NULL;
579 	struct nx_netif *nif = nifna->nifna_netif;
580 	struct netif_stats *nifs = &nif->nif_stats;
581 	int c = 0, r = 0, delivered = 0, bytes = 0, rbytes = 0, plen = 0;
582 
583 	while (pkt != NULL) {
584 		next = pkt->pkt_nextpkt;
585 		pkt->pkt_nextpkt = NULL;
586 
587 		ASSERT((pkt->pkt_pflags & PKT_F_PKT_DATA) == 0);
588 		plen = ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) ?
589 		    m_pktlen(pkt->pkt_mbuf) : pkt->pkt_length;
590 
591 		/*
592 		 * The returned nf is refcounted to ensure it doesn't
593 		 * disappear while packets are being delivered.
594 		 */
595 		nf = flow_classify(nif, pkt, flags);
596 		if (nf != NULL) {
597 			nx_netif_snoop(nif, pkt, TRUE);
598 
599 			/*
600 			 * Keep growing the chain until we classify to a
601 			 * different nf.
602 			 */
603 			if (prev_nf != NULL) {
604 				if (prev_nf != nf) {
605 					DTRACE_SKYWALK5(deliver,
606 					    struct nx_netif *, nif,
607 					    struct netif_flow *, prev_nf,
608 					    struct __kern_packet *, head,
609 					    int, c, uint32_t, flags);
610 
611 					nx_netif_flow_deliver(nif,
612 					    prev_nf, head, flags);
613 					nx_netif_flow_release(nif, prev_nf);
614 					prev_nf = nf;
615 					head = NULL;
616 					tailp = &head;
617 					delivered += c;
618 					c = 0;
619 				} else {
620 					/*
621 					 * one reference is enough.
622 					 */
623 					nx_netif_flow_release(nif, nf);
624 				}
625 			} else {
626 				prev_nf = nf;
627 			}
628 			c++;
629 			bytes += plen;
630 			*tailp = pkt;
631 			tailp = &pkt->pkt_nextpkt;
632 		} else {
633 			r++;
634 			rbytes += plen;
635 			*rtailp = pkt;
636 			rtailp = &pkt->pkt_nextpkt;
637 		}
638 		pkt = next;
639 	}
640 	if (head != NULL) {
641 		ASSERT(prev_nf != NULL);
642 		DTRACE_SKYWALK5(deliver__last, struct nx_netif *,
643 		    nif, struct netif_flow *, prev_nf, struct __kern_packet *,
644 		    pkt, int, c, uint32_t, flags);
645 
646 		nx_netif_flow_deliver(nif, prev_nf, head, flags);
647 		nx_netif_flow_release(nif, prev_nf);
648 		prev_nf = NULL;
649 		head = NULL;
650 		tailp = &head;
651 		delivered += c;
652 	}
653 	if (rhead != NULL) {
654 		if (remain != NULL) {
655 			*remain = rhead;
656 		} else {
657 			nx_netif_free_packet_chain(rhead, NULL);
658 		}
659 	}
660 
661 	if (stats != NULL) {
662 		stats->nps_pkts += delivered;
663 		stats->nps_bytes += bytes;
664 	}
665 
666 	STATS_ADD(nifs, NETIF_STATS_VP_FLOW_FOUND, delivered);
667 	STATS_ADD(nifs, NETIF_STATS_VP_FLOW_NOT_FOUND, r);
668 	DTRACE_SKYWALK5(demux__delivered, struct nx_netif *,
669 	    nif, int, delivered, int, r, int, bytes, int, rbytes);
670 	return 0;
671 }
672 
673 SK_NO_INLINE_ATTRIBUTE
674 static errno_t
nx_netif_flowtable_init(struct nx_netif * nif,netif_flowtable_type_t type)675 nx_netif_flowtable_init(struct nx_netif *nif, netif_flowtable_type_t type)
676 {
677 	struct netif_flowtable *ft;
678 	struct netif_flowtable_ops *fops;
679 
680 	switch (type) {
681 	case FT_TYPE_ETHERTYPE:
682 		fops = &netif_ethertype_ops;
683 		break;
684 	case FT_TYPE_IPV6_ULA:
685 		fops = &netif_ipv6_ula_ops;
686 		break;
687 	default:
688 		return ENOTSUP;
689 	}
690 	ft = fops->nfo_table_alloc(fops);
691 	if (ft == NULL) {
692 		return ENOMEM;
693 	}
694 	nif->nif_flow_table = ft;
695 	return 0;
696 }
697 
698 SK_NO_INLINE_ATTRIBUTE
699 static void
nx_netif_flowtable_fini(struct nx_netif * nif)700 nx_netif_flowtable_fini(struct nx_netif *nif)
701 {
702 	struct netif_flowtable *ft = nif->nif_flow_table;
703 
704 	ASSERT(ft != NULL);
705 	ft->ft_ops->nfo_table_free(ft);
706 	nif->nif_flow_table = NULL;
707 }
708 
709 /*
710  * netif doesn't keep accounting of flow statistics, this log message will
711  * print a snapshot of the current netif stats at the time of flow creation
712  * and removal. For a netif on interfaces like "llwX", the difference in these
713  * stats at creation vs removal will be analogous to flow stats as there will
714  * be atmost one flow active at any given time.
715  */
716 static inline void
nx_netif_flow_log(struct nx_netif * nif,struct netif_flow * nf,boolean_t add)717 nx_netif_flow_log(struct nx_netif *nif, struct netif_flow *nf, boolean_t add)
718 {
719 	int i;
720 	struct netif_stats *nifs = &nif->nif_stats;
721 
722 	os_log(OS_LOG_DEFAULT, "netif flowstats (%s): if %s, nx_port %d, "
723 	    "ethertype 0x%x, src %s, dst %s", add ? "add" : "remove",
724 	    if_name(nif->nif_ifp), nf->nf_port, nf->nf_desc.fd_ethertype,
725 	    ip6_sprintf(&nf->nf_desc.fd_laddr),
726 	    ip6_sprintf(&nf->nf_desc.fd_raddr));
727 	for (i = 0; i < __NETIF_STATS_MAX; i++) {
728 		if (STATS_VAL(nifs, i) == 0) {
729 			continue;
730 		}
731 		os_log(OS_LOG_DEFAULT, "%s: %llu", netif_stats_str(i),
732 		    STATS_VAL(nifs, i));
733 	}
734 }
735 
736 errno_t
nx_netif_flow_add(struct nx_netif * nif,nexus_port_t port,struct netif_flow_desc * desc,void * cb_arg,errno_t (* cb_func)(void *,void *,uint32_t),struct netif_flow ** nfp)737 nx_netif_flow_add(struct nx_netif *nif, nexus_port_t port,
738     struct netif_flow_desc *desc, void *cb_arg,
739     errno_t (*cb_func)(void *, void *, uint32_t),
740     struct netif_flow **nfp)
741 {
742 	struct netif_flow *nf = NULL;
743 	struct netif_flowtable *ft;
744 	struct netif_stats *nifs = &nif->nif_stats;
745 	boolean_t refcnt_incr = FALSE, new_table = FALSE;
746 	errno_t err = 0;
747 
748 	lck_mtx_lock(&nif->nif_flow_lock);
749 	nf = sk_alloc_type(struct netif_flow, Z_WAITOK | Z_NOFAIL,
750 	    skmem_tag_netif_flow);
751 	bcopy(desc, &nf->nf_desc, sizeof(*desc));
752 	nf->nf_port = port;
753 	nf->nf_refcnt = 0;
754 	nf->nf_cb_arg = cb_arg;
755 	nf->nf_cb_func = cb_func;
756 
757 	if (++nif->nif_flow_cnt == 1) {
758 		netif_flowtable_type_t ft_type;
759 
760 		ft_type = NETIF_IS_LOW_LATENCY(nif) ? FT_TYPE_IPV6_ULA :
761 		    FT_TYPE_ETHERTYPE;
762 
763 		err = nx_netif_flowtable_init(nif, ft_type);
764 		if (err != 0) {
765 			STATS_INC(nifs, NETIF_STATS_VP_FLOW_TABLE_INIT_FAIL);
766 			DTRACE_SKYWALK1(flowtable__init__fail,
767 			    struct nx_netif *, nif);
768 			goto fail;
769 		}
770 		new_table = TRUE;
771 	}
772 	refcnt_incr = TRUE;
773 	ft = nif->nif_flow_table;
774 	err = ft->ft_ops->nfo_insert(ft, nf);
775 	if (err != 0) {
776 		STATS_INC(nifs, NETIF_STATS_VP_FLOW_INSERT_FAIL);
777 		DTRACE_SKYWALK1(insert__fail, struct nx_netif *, nif);
778 		goto fail;
779 	}
780 	SLIST_INSERT_HEAD(&nif->nif_flow_list, nf, nf_link);
781 	if (nfp != NULL) {
782 		*nfp = nf;
783 	}
784 	STATS_INC(nifs, NETIF_STATS_VP_FLOW_ADD);
785 	lck_mtx_unlock(&nif->nif_flow_lock);
786 	SK_DF(SK_VERB_VP, "flow add successful: if %s, nif %p",
787 	    if_name(nif->nif_ifp), SK_KVA(nif));
788 	nx_netif_flow_log(nif, nf, TRUE);
789 	return 0;
790 
791 fail:
792 	if (nf != NULL) {
793 		sk_free_type(struct netif_flow, nf);
794 	}
795 	if (refcnt_incr && --nif->nif_flow_cnt == 0) {
796 		if (new_table) {
797 			nx_netif_flowtable_fini(nif);
798 		}
799 	}
800 	lck_mtx_unlock(&nif->nif_flow_lock);
801 	SK_ERR("flow add failed: if %s, nif %p, err %d",
802 	    if_name(nif->nif_ifp), SK_KVA(nif), err);
803 	return err;
804 }
805 
806 errno_t
nx_netif_flow_remove(struct nx_netif * nif,struct netif_flow * nf)807 nx_netif_flow_remove(struct nx_netif *nif, struct netif_flow *nf)
808 {
809 	struct netif_flowtable_ops *fops;
810 	struct netif_flowtable *ft;
811 	struct netif_stats *nifs = &nif->nif_stats;
812 
813 	lck_mtx_lock(&nif->nif_flow_lock);
814 	SLIST_REMOVE(&nif->nif_flow_list, nf, netif_flow, nf_link);
815 	ft = nif->nif_flow_table;
816 	ASSERT(ft != NULL);
817 	fops = ft->ft_ops;
818 	fops->nfo_remove(ft, nf);
819 
820 	while (nf->nf_refcnt > 0) {
821 		DTRACE_SKYWALK1(wait__refcnt, struct netif_flow *, nf);
822 		(void) msleep(&nf->nf_refcnt,
823 		    &nif->nif_flow_lock, (PZERO + 1),
824 		    __FUNCTION__, NULL);
825 	}
826 	if (--nif->nif_flow_cnt == 0) {
827 		nx_netif_flowtable_fini(nif);
828 	}
829 	STATS_INC(nifs, NETIF_STATS_VP_FLOW_REMOVE);
830 	lck_mtx_unlock(&nif->nif_flow_lock);
831 
832 	SK_DF(SK_VERB_VP, "flow remove: if %s, nif %p",
833 	    if_name(nif->nif_ifp), SK_KVA(nif));
834 	nx_netif_flow_log(nif, nf, FALSE);
835 	sk_free_type(struct netif_flow, nf);
836 	return 0;
837 }
838 
839 void
nx_netif_flow_init(struct nx_netif * nif)840 nx_netif_flow_init(struct nx_netif *nif)
841 {
842 	ifnet_t ifp = nif->nif_ifp;
843 
844 	if (!ifnet_needs_netif_netagent(ifp) && !NETIF_IS_LOW_LATENCY(nif)) {
845 		SK_DF(SK_VERB_VP, "%s: flows not supported due to missing "
846 		    "if_attach_nx flag or invalid interface type",
847 		    if_name(ifp));
848 		return;
849 	}
850 	if (ifp->if_family != IFNET_FAMILY_ETHERNET) {
851 		SK_DF(SK_VERB_VP, "%s: flows not supported on "
852 		    "interface family %d", if_name(ifp), ifp->if_family);
853 		return;
854 	}
855 	ASSERT(nif->nif_flow_flags == 0);
856 	lck_mtx_init(&nif->nif_flow_lock, &nexus_lock_group,
857 	    &nexus_lock_attr);
858 
859 	SLIST_INIT(&nif->nif_flow_list);
860 	nif->nif_flow_table = NULL;
861 	nif->nif_flow_cnt = 0;
862 	nif->nif_flow_flags |= NETIF_FLOW_FLAG_INITIALIZED;
863 
864 	SK_DF(SK_VERB_VP, "%s: flows initialized", if_name(ifp));
865 }
866 
867 void
nx_netif_flow_fini(struct nx_netif * nif)868 nx_netif_flow_fini(struct nx_netif *nif)
869 {
870 	if ((nif->nif_flow_flags & NETIF_FLOW_FLAG_INITIALIZED) == 0) {
871 		SK_DF(SK_VERB_VP, "%s: flows not initialized",
872 		    if_name(nif->nif_ifp));
873 		return;
874 	}
875 	nif->nif_flow_flags &= ~NETIF_FLOW_FLAG_INITIALIZED;
876 
877 	/* This should've been cleared before we get to this point */
878 	ASSERT((nif->nif_flow_flags & NETIF_FLOW_FLAG_ENABLED) == 0);
879 	ASSERT(nif->nif_flow_cnt == 0);
880 	ASSERT(nif->nif_flow_table == NULL);
881 	ASSERT(SLIST_EMPTY(&nif->nif_flow_list));
882 
883 	lck_mtx_destroy(&nif->nif_flow_lock, &nexus_lock_group);
884 
885 	SK_DF(SK_VERB_VP, "%s: flows uninitialization done",
886 	    if_name(nif->nif_ifp));
887 }
888 
889 static void
nx_netif_flow_set_enable(struct nx_netif * nif,boolean_t set)890 nx_netif_flow_set_enable(struct nx_netif *nif, boolean_t set)
891 {
892 	/*
893 	 * No locking needed while checking for the initialized bit because
894 	 * if this were not set, no other flag would be modified.
895 	 */
896 	if ((nif->nif_flow_flags & NETIF_FLOW_FLAG_INITIALIZED) == 0) {
897 		return;
898 	}
899 	lck_mtx_lock(&nif->nif_flow_lock);
900 	if (set) {
901 		SK_DF(SK_VERB_VP, "%s: flow enable, nif %p",
902 		    if_name(nif->nif_ifp), SK_KVA(nif));
903 		nif->nif_flow_flags |= NETIF_FLOW_FLAG_ENABLED;
904 	} else {
905 		SK_DF(SK_VERB_VP, "%s: flow disable, nif %p",
906 		    if_name(nif->nif_ifp), SK_KVA(nif));
907 		nif->nif_flow_flags &= ~NETIF_FLOW_FLAG_ENABLED;
908 	}
909 	lck_mtx_unlock(&nif->nif_flow_lock);
910 }
911 
912 void
nx_netif_flow_enable(struct nx_netif * nif)913 nx_netif_flow_enable(struct nx_netif *nif)
914 {
915 	nx_netif_flow_set_enable(nif, TRUE);
916 }
917 
918 void
nx_netif_flow_disable(struct nx_netif * nif)919 nx_netif_flow_disable(struct nx_netif *nif)
920 {
921 	nx_netif_flow_set_enable(nif, FALSE);
922 }
923