xref: /xnu-10002.81.5/bsd/skywalk/core/skywalk.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <pexpert/pexpert.h>    /* for PE_parse_boot_argn */
31 #include <sys/codesign.h>       /* for csproc_get_platform_binary */
32 #include <sys/signalvar.h>      /* for psignal_with_reason */
33 #include <sys/reason.h>
34 #include <sys/kern_memorystatus.h>
35 #if CONFIG_MACF
36 #include <security/mac_framework.h>
37 #endif /* CONFIG_MACF */
38 
39 #ifndef htole16
40 #if BYTE_ORDER == LITTLE_ENDIAN
41 #define htole16(x)      ((uint16_t)(x))
42 #else /* BYTE_ORDER != LITTLE_ENDIAN */
43 #define htole16(x)      bswap16((x))
44 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
45 #endif /* htole16 */
46 
47 LCK_GRP_DECLARE(sk_lock_group, "sk_lock");
48 LCK_ATTR_DECLARE(sk_lock_attr, 0, 0);
49 LCK_MTX_DECLARE_ATTR(sk_lock, &sk_lock_group, &sk_lock_attr);
50 
51 static void skywalk_fini(void);
52 static int sk_priv_chk(proc_t, kauth_cred_t, int);
53 
54 static int __sk_inited = 0;
55 #if (DEVELOPMENT || DEBUG)
56 size_t sk_copy_thres = SK_COPY_THRES;
57 uint64_t sk_verbose;
58 #endif /* DEVELOPMENT || DEBUG */
59 uint32_t sk_debug;
60 uint64_t sk_features =
61 #if SKYWALK
62     SK_FEATURE_SKYWALK |
63 #endif
64 #if DEVELOPMENT
65     SK_FEATURE_DEVELOPMENT |
66 #endif
67 #if DEBUG
68     SK_FEATURE_DEBUG |
69 #endif
70 #if CONFIG_NEXUS_FLOWSWITCH
71     SK_FEATURE_NEXUS_FLOWSWITCH |
72 #endif
73 #if CONFIG_NEXUS_MONITOR
74     SK_FEATURE_NEXUS_MONITOR |
75 #endif
76 #if CONFIG_NEXUS_NETIF
77     SK_FEATURE_NEXUS_NETIF |
78 #endif
79 #if CONFIG_NEXUS_USER_PIPE
80     SK_FEATURE_NEXUS_USER_PIPE |
81 #endif
82 #if CONFIG_NEXUS_KERNEL_PIPE
83     SK_FEATURE_NEXUS_KERNEL_PIPE |
84 #endif
85 #if CONFIG_NEXUS_KERNEL_PIPE && (DEVELOPMENT || DEBUG)
86     SK_FEATURE_NEXUS_KERNEL_PIPE_LOOPBACK |
87 #endif
88 #if (DEVELOPMENT || DEBUG)
89     SK_FEATURE_DEV_OR_DEBUG |
90 #endif
91     0;
92 
93 uint32_t sk_opp_defunct = 0;    /* opportunistic defunct */
94 
95 /* checksum offload is generic to any nexus (not specific to flowswitch) */
96 uint32_t sk_cksum_tx = 1;       /* advertise outbound offload */
97 uint32_t sk_cksum_rx = 1;       /* perform inbound checksum offload */
98 
99 /* guard pages */
100 uint32_t sk_guard = 0;          /* guard pages (0: disable) */
101 #define SK_GUARD_MIN    1       /* minimum # of guard pages */
102 #define SK_GUARD_MAX    4       /* maximum # of guard pages */
103 uint32_t sk_headguard_sz = SK_GUARD_MIN; /* # of leading guard pages */
104 uint32_t sk_tailguard_sz = SK_GUARD_MIN; /* # of trailing guard pages */
105 
106 #if (DEVELOPMENT || DEBUG)
107 uint32_t sk_txring_sz = 0;      /* flowswitch */
108 uint32_t sk_rxring_sz = 0;      /* flowswitch */
109 uint32_t sk_net_txring_sz = 0;  /* netif adapter */
110 uint32_t sk_net_rxring_sz = 0;  /* netif adapter */
111 uint32_t sk_min_pool_size = 0;  /* minimum packet pool size */
112 #endif /* !DEVELOPMENT && !DEBUG */
113 
114 uint32_t sk_max_flows = NX_FLOWADV_DEFAULT;
115 uint32_t sk_fadv_nchunks;       /* # of FO_FLOWADV_CHUNK in bitmap */
116 uint32_t sk_netif_compat_txmodel = NETIF_COMPAT_TXMODEL_DEFAULT;
117 uint32_t sk_netif_native_txmodel = NETIF_NATIVE_TXMODEL_DEFAULT;
118 /*
119  * Configures the RX aggregation logic for TCP in flowswitch.
120  * A non-zero value enables the aggregation logic, with the maximum
121  * aggregation length (in bytes) limited to this value.
122  *
123  * DO NOT increase beyond 16KB. If you do, we end up corrupting the data-stream
124  * as we create aggregate-mbufs with a pktlen > 16KB but only a single element.
125  */
126 uint32_t sk_fsw_rx_agg_tcp = 16384;
127 
128 /*
129  * Forces the RX host path to use or not use aggregation, regardless of the
130  * existence of filters (see sk_fsw_rx_agg_tcp_host_t for valid values).
131  */
132 uint32_t sk_fsw_rx_agg_tcp_host = SK_FSW_RX_AGG_TCP_HOST_AUTO;
133 
134 /*
135  * Configures the skywalk infrastructure for handling TCP TX aggregation.
136  * A non-zero value enables the support.
137  */
138 uint32_t sk_fsw_tx_agg_tcp = 1;
139 /*
140  * Configuration to limit the number of buffers for flowswitch VP channel.
141  */
142 uint32_t sk_fsw_max_bufs = 0;
143 /*
144  * GSO MTU for the channel path:
145  *   > 0: enable GSO and use value as the largest supported segment size
146  *  == 0: disable GSO
147  */
148 uint32_t sk_fsw_gso_mtu = 16 * 1024;
149 
150 /* list of interfaces that allow direct open from userspace */
151 #define SK_NETIF_DIRECT_MAX     8
152 char sk_netif_direct[SK_NETIF_DIRECT_MAX][IFXNAMSIZ];
153 uint32_t sk_netif_direct_cnt = 0;
154 
155 uint16_t sk_tx_delay_qlen = 16;                 /* packets */
156 uint16_t sk_tx_delay_timeout = (1 * 1000);        /* microseconds */
157 
158 #define SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ     64
159 #define SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ     64
160 uint32_t sk_netif_compat_aux_cell_tx_ring_sz =
161     SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ;
162 uint32_t sk_netif_compat_aux_cell_rx_ring_sz =
163     SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ;
164 
165 /* Wi-Fi Access Point */
166 #define SK_NETIF_COMPAT_WAP_TX_RING_SZ  128
167 #define SK_NETIF_COMPAT_WAP_RX_RING_SZ  128
168 uint32_t sk_netif_compat_wap_tx_ring_sz = SK_NETIF_COMPAT_WAP_TX_RING_SZ;
169 uint32_t sk_netif_compat_wap_rx_ring_sz = SK_NETIF_COMPAT_WAP_RX_RING_SZ;
170 
171 /* AWDL */
172 #define SK_NETIF_COMPAT_AWDL_TX_RING_SZ 128
173 #define SK_NETIF_COMPAT_AWDL_RX_RING_SZ 128
174 uint32_t sk_netif_compat_awdl_tx_ring_sz = SK_NETIF_COMPAT_AWDL_TX_RING_SZ;
175 uint32_t sk_netif_compat_awdl_rx_ring_sz = SK_NETIF_COMPAT_AWDL_RX_RING_SZ;
176 
177 /* Wi-Fi Infrastructure */
178 #define SK_NETIF_COMPAT_WIF_TX_RING_SZ  128
179 #define SK_NETIF_COMPAT_WIF_RX_RING_SZ  128
180 uint32_t sk_netif_compat_wif_tx_ring_sz = SK_NETIF_COMPAT_WIF_TX_RING_SZ;
181 uint32_t sk_netif_compat_wif_rx_ring_sz = SK_NETIF_COMPAT_WIF_RX_RING_SZ;
182 
183 #define SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ      128
184 #define SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ      128
185 uint32_t sk_netif_compat_usb_eth_tx_ring_sz =
186     SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ;
187 uint32_t sk_netif_compat_usb_eth_rx_ring_sz =
188     SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ;
189 
190 #define SK_NETIF_COMPAT_RX_MBQ_LIMIT    8192
191 int sk_netif_compat_rx_mbq_limit = SK_NETIF_COMPAT_RX_MBQ_LIMIT;
192 
193 uint32_t sk_netif_tx_mit = SK_NETIF_MIT_AUTO;
194 uint32_t sk_netif_rx_mit = SK_NETIF_MIT_AUTO;
195 char sk_ll_prefix[IFNAMSIZ] = "llw";
196 uint32_t sk_rx_sync_packets = 1;
197 uint32_t sk_channel_buflet_alloc = 0;
198 
199 SYSCTL_NODE(_kern, OID_AUTO, skywalk, CTLFLAG_RW | CTLFLAG_LOCKED,
200     0, "Skywalk parameters");
201 SYSCTL_NODE(_kern_skywalk, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_LOCKED,
202     0, "Skywalk statistics");
203 
204 SYSCTL_OPAQUE(_kern_skywalk, OID_AUTO, features, CTLFLAG_RD | CTLFLAG_LOCKED,
205     &sk_features, sizeof(sk_features), "-", "Skywalk features");
206 
207 #if (DEVELOPMENT || DEBUG)
208 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
209     &sk_verbose, "Skywalk verbose mode");
210 SYSCTL_UINT(_kern_skywalk, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
211     &sk_debug, 0, "Skywalk debug mode");
212 SYSCTL_LONG(_kern_skywalk, OID_AUTO, sk_copy_thres, CTLFLAG_RW | CTLFLAG_LOCKED,
213     &sk_copy_thres, "Skywalk copy threshold");
214 static int __priv_check = 1;
215 SYSCTL_INT(_kern_skywalk, OID_AUTO, priv_check, CTLFLAG_RW | CTLFLAG_LOCKED,
216     &__priv_check, 0, "Skywalk privilege check");
217 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_opp_defunct, CTLFLAG_RW | CTLFLAG_LOCKED,
218     &sk_opp_defunct, 0, "Defunct opportunistically");
219 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_tx, CTLFLAG_RW | CTLFLAG_LOCKED,
220     &sk_cksum_tx, 0, "Advertise (and perform) outbound checksum offload");
221 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_rx, CTLFLAG_RW | CTLFLAG_LOCKED,
222     &sk_cksum_rx, 0, "Perform inbound checksum offload");
223 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_rx_sync_packets, CTLFLAG_RW | CTLFLAG_LOCKED,
224     &sk_rx_sync_packets, 0, "Enable RX sync packets");
225 SYSCTL_UINT(_kern_skywalk, OID_AUTO, chan_buf_alloc,
226     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_channel_buflet_alloc, 0,
227     "channel buflet allocation (enable/disable)");
228 #endif /* !DEVELOPMENT && !DEBUG */
229 
230 #if (DEVELOPMENT || DEBUG)
231 uint32_t sk_inject_error_rmask = 0x3;
232 SYSCTL_UINT(_kern_skywalk, OID_AUTO, inject_error_rmask,
233     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_inject_error_rmask, 0x3, "");
234 #endif /* !DEVELOPMENT && !DEBUG */
235 
236 #if (DEVELOPMENT || DEBUG)
237 static void skywalk_self_tests(void);
238 #endif /* (DEVELOPMENT || DEBUG) */
239 
240 #define SKMEM_TAG_SYSCTL_BUF "com.apple.skywalk.sysctl_buf"
241 SKMEM_TAG_DEFINE(skmem_tag_sysctl_buf, SKMEM_TAG_SYSCTL_BUF);
242 
243 #define SKMEM_TAG_OID       "com.apple.skywalk.skoid"
244 SKMEM_TAG_DEFINE(skmem_tag_oid, SKMEM_TAG_OID);
245 
246 #if (SK_LOG || DEVELOPMENT || DEBUG)
247 #define SKMEM_TAG_DUMP  "com.apple.skywalk.dump"
248 static SKMEM_TAG_DEFINE(skmem_tag_dump, SKMEM_TAG_DUMP);
249 
250 static char *sk_dump_buf;
251 #define SK_DUMP_BUF_SIZE        2048
252 #define SK_DUMP_BUF_ALIGN       16
253 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
254 
255 __startup_func
256 void
__sk_tag_make(const struct sk_tag_spec * spec)257 __sk_tag_make(const struct sk_tag_spec *spec)
258 {
259 	*spec->skt_var = kern_allocation_name_allocate(spec->skt_name, 0);
260 }
261 
262 boolean_t
skywalk_netif_direct_enabled(void)263 skywalk_netif_direct_enabled(void)
264 {
265 	return sk_netif_direct_cnt > 0;
266 }
267 
268 boolean_t
skywalk_netif_direct_allowed(const char * ifname)269 skywalk_netif_direct_allowed(const char *ifname)
270 {
271 	uint32_t i;
272 
273 	for (i = 0; i < sk_netif_direct_cnt; i++) {
274 		if (strncmp(ifname, sk_netif_direct[i], IFXNAMSIZ) == 0) {
275 			return TRUE;
276 		}
277 	}
278 	return FALSE;
279 }
280 
281 #if (DEVELOPMENT || DEBUG)
282 static void
parse_netif_direct(void)283 parse_netif_direct(void)
284 {
285 	char buf[(IFXNAMSIZ + 1) * SK_NETIF_DIRECT_MAX];
286 	size_t i, curr, len, iflen;
287 
288 	if (!PE_parse_boot_arg_str("sk_netif_direct", buf, sizeof(buf))) {
289 		return;
290 	}
291 
292 	curr = 0;
293 	len = strlen(buf);
294 	for (i = 0; i < len + 1 &&
295 	    sk_netif_direct_cnt < SK_NETIF_DIRECT_MAX; i++) {
296 		if (buf[i] != ',' && buf[i] != '\0') {
297 			continue;
298 		}
299 
300 		buf[i] = '\0';
301 		iflen = i - curr;
302 		if (iflen > 0 && iflen < IFXNAMSIZ) {
303 			(void) strlcpy(sk_netif_direct[sk_netif_direct_cnt],
304 			    buf + curr, IFXNAMSIZ);
305 			sk_netif_direct_cnt++;
306 		}
307 		curr = i + 1;
308 	}
309 }
310 #endif /* DEVELOPMENT || DEBUG */
311 
312 static void
skywalk_fini(void)313 skywalk_fini(void)
314 {
315 	SK_LOCK_ASSERT_HELD();
316 
317 	if (__sk_inited) {
318 #if (DEVELOPMENT || DEBUG)
319 		skmem_test_fini();
320 		cht_test_fini();
321 #endif /* (DEVELOPMENT || DEBUG) */
322 		channel_fini();
323 		nexus_fini();
324 		skmem_fini();
325 		flowidns_fini();
326 
327 #if (SK_LOG || DEVELOPMENT || DEBUG)
328 		if (sk_dump_buf != NULL) {
329 			sk_free_data(sk_dump_buf, SK_DUMP_BUF_SIZE);
330 			sk_dump_buf = NULL;
331 		}
332 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
333 
334 		__sk_inited = 0;
335 	}
336 }
337 
338 int
skywalk_init(void)339 skywalk_init(void)
340 {
341 	int error;
342 
343 	VERIFY(!__sk_inited);
344 
345 	_CASSERT(sizeof(kern_packet_t) == sizeof(uint64_t));
346 	_CASSERT(sizeof(bitmap_t) == sizeof(uint64_t));
347 
348 #if (DEVELOPMENT || DEBUG)
349 	PE_parse_boot_argn("sk_debug", &sk_debug, sizeof(sk_debug));
350 	PE_parse_boot_argn("sk_verbose", &sk_verbose, sizeof(sk_verbose));
351 	(void) PE_parse_boot_argn("sk_opp_defunct", &sk_opp_defunct,
352 	    sizeof(sk_opp_defunct));
353 	(void) PE_parse_boot_argn("sk_cksum_tx", &sk_cksum_tx,
354 	    sizeof(sk_cksum_tx));
355 	(void) PE_parse_boot_argn("sk_cksum_rx", &sk_cksum_rx,
356 	    sizeof(sk_cksum_rx));
357 	(void) PE_parse_boot_argn("sk_txring_sz", &sk_txring_sz,
358 	    sizeof(sk_txring_sz));
359 	(void) PE_parse_boot_argn("sk_rxring_sz", &sk_rxring_sz,
360 	    sizeof(sk_rxring_sz));
361 	(void) PE_parse_boot_argn("sk_net_txring_sz", &sk_net_txring_sz,
362 	    sizeof(sk_net_txring_sz));
363 	(void) PE_parse_boot_argn("sk_net_rxring_sz", &sk_net_rxring_sz,
364 	    sizeof(sk_net_rxring_sz));
365 	(void) PE_parse_boot_argn("sk_max_flows", &sk_max_flows,
366 	    sizeof(sk_max_flows));
367 	(void) PE_parse_boot_argn("sk_native_txmodel", &sk_netif_native_txmodel,
368 	    sizeof(sk_netif_native_txmodel));
369 	(void) PE_parse_boot_argn("sk_compat_txmodel", &sk_netif_compat_txmodel,
370 	    sizeof(sk_netif_compat_txmodel));
371 	(void) PE_parse_boot_argn("sk_tx_delay_qlen", &sk_tx_delay_qlen,
372 	    sizeof(sk_tx_delay_qlen));
373 	(void) PE_parse_boot_argn("sk_ts_delay_timeout", &sk_tx_delay_timeout,
374 	    sizeof(sk_tx_delay_timeout));
375 	(void) PE_parse_boot_argn("sk_compat_aux_cell_tx_ring_sz",
376 	    &sk_netif_compat_aux_cell_tx_ring_sz,
377 	    sizeof(sk_netif_compat_aux_cell_tx_ring_sz));
378 	(void) PE_parse_boot_argn("sk_compat_aux_cell_rx_ring_sz",
379 	    &sk_netif_compat_aux_cell_rx_ring_sz,
380 	    sizeof(sk_netif_compat_aux_cell_rx_ring_sz));
381 	(void) PE_parse_boot_argn("sk_compat_wap_tx_ring_sz",
382 	    &sk_netif_compat_wap_tx_ring_sz,
383 	    sizeof(sk_netif_compat_wap_tx_ring_sz));
384 	(void) PE_parse_boot_argn("sk_compat_wap_rx_ring_sz",
385 	    &sk_netif_compat_wap_rx_ring_sz,
386 	    sizeof(sk_netif_compat_wap_rx_ring_sz));
387 	(void) PE_parse_boot_argn("sk_compat_awdl_tx_ring_sz",
388 	    &sk_netif_compat_awdl_tx_ring_sz,
389 	    sizeof(sk_netif_compat_awdl_tx_ring_sz));
390 	(void) PE_parse_boot_argn("sk_compat_awdl_rx_ring_sz",
391 	    &sk_netif_compat_awdl_rx_ring_sz,
392 	    sizeof(sk_netif_compat_awdl_rx_ring_sz));
393 	(void) PE_parse_boot_argn("sk_compat_wif_tx_ring_sz",
394 	    &sk_netif_compat_wif_tx_ring_sz,
395 	    sizeof(sk_netif_compat_wif_tx_ring_sz));
396 	(void) PE_parse_boot_argn("sk_compat_wif_rx_ring_sz",
397 	    &sk_netif_compat_wif_rx_ring_sz,
398 	    sizeof(sk_netif_compat_wif_rx_ring_sz));
399 	(void) PE_parse_boot_argn("sk_compat_usb_eth_tx_ring_sz",
400 	    &sk_netif_compat_usb_eth_tx_ring_sz,
401 	    sizeof(sk_netif_compat_usb_eth_tx_ring_sz));
402 	(void) PE_parse_boot_argn("sk_compat_usb_eth_rx_ring_sz",
403 	    &sk_netif_compat_usb_eth_rx_ring_sz,
404 	    sizeof(sk_netif_compat_usb_eth_rx_ring_sz));
405 	(void) PE_parse_boot_argn("sk_compat_rx_mbq_limit",
406 	    &sk_netif_compat_rx_mbq_limit, sizeof(sk_netif_compat_rx_mbq_limit));
407 	(void) PE_parse_boot_argn("sk_netif_tx_mit",
408 	    &sk_netif_tx_mit, sizeof(sk_netif_tx_mit));
409 	(void) PE_parse_boot_argn("sk_netif_rx_mit",
410 	    &sk_netif_rx_mit, sizeof(sk_netif_rx_mit));
411 	(void) PE_parse_boot_arg_str("sk_ll_prefix", sk_ll_prefix,
412 	    sizeof(sk_ll_prefix));
413 	parse_netif_direct();
414 	(void) PE_parse_boot_argn("sk_fsw_rx_agg_tcp", &sk_fsw_rx_agg_tcp,
415 	    sizeof(sk_fsw_rx_agg_tcp));
416 	(void) PE_parse_boot_argn("sk_fsw_tx_agg_tcp", &sk_fsw_tx_agg_tcp,
417 	    sizeof(sk_fsw_tx_agg_tcp));
418 	(void) PE_parse_boot_argn("sk_fsw_gso_mtu", &sk_fsw_gso_mtu,
419 	    sizeof(sk_fsw_gso_mtu));
420 	(void) PE_parse_boot_argn("sk_fsw_max_bufs", &sk_fsw_max_bufs,
421 	    sizeof(sk_fsw_max_bufs));
422 	(void) PE_parse_boot_argn("sk_rx_sync_packets", &sk_rx_sync_packets,
423 	    sizeof(sk_rx_sync_packets));
424 	(void) PE_parse_boot_argn("sk_chan_buf_alloc", &sk_channel_buflet_alloc,
425 	    sizeof(sk_channel_buflet_alloc));
426 	(void) PE_parse_boot_argn("sk_guard", &sk_guard, sizeof(sk_guard));
427 	(void) PE_parse_boot_argn("sk_headguard_sz", &sk_headguard_sz,
428 	    sizeof(sk_headguard_sz));
429 	(void) PE_parse_boot_argn("sk_tailguard_sz", &sk_tailguard_sz,
430 	    sizeof(sk_tailguard_sz));
431 	(void) PE_parse_boot_argn("sk_min_pool_size", &sk_min_pool_size,
432 	    sizeof(sk_min_pool_size));
433 #endif /* DEVELOPMENT || DEBUG */
434 
435 	if (sk_max_flows == 0) {
436 		sk_max_flows = NX_FLOWADV_DEFAULT;
437 	} else if (sk_max_flows > NX_FLOWADV_MAX) {
438 		sk_max_flows = NX_FLOWADV_MAX;
439 	}
440 
441 	if (sk_netif_tx_mit > SK_NETIF_MIT_MAX) {
442 		sk_netif_tx_mit = SK_NETIF_MIT_MAX;
443 	}
444 	if (sk_netif_rx_mit > SK_NETIF_MIT_MAX) {
445 		sk_netif_rx_mit = SK_NETIF_MIT_MAX;
446 	}
447 
448 	sk_fadv_nchunks = (uint32_t)P2ROUNDUP(sk_max_flows, FO_FLOWADV_CHUNK) /
449 	    FO_FLOWADV_CHUNK;
450 
451 	if (sk_guard) {
452 		uint32_t sz;
453 		/* leading guard page(s) */
454 		if (sk_headguard_sz == 0) {
455 			read_frandom(&sz, sizeof(sz));
456 			sk_headguard_sz = (sz % (SK_GUARD_MAX + 1));
457 		} else if (sk_headguard_sz > SK_GUARD_MAX) {
458 			sk_headguard_sz = SK_GUARD_MAX;
459 		}
460 		if (sk_headguard_sz < SK_GUARD_MIN) {
461 			sk_headguard_sz = SK_GUARD_MIN;
462 		}
463 		/* trailing guard page(s) */
464 		if (sk_tailguard_sz == 0) {
465 			read_frandom(&sz, sizeof(sz));
466 			sk_tailguard_sz = (sz % (SK_GUARD_MAX + 1));
467 		} else if (sk_tailguard_sz > SK_GUARD_MAX) {
468 			sk_tailguard_sz = SK_GUARD_MAX;
469 		}
470 		if (sk_tailguard_sz < SK_GUARD_MIN) {
471 			sk_tailguard_sz = SK_GUARD_MIN;
472 		}
473 	} else {
474 		sk_headguard_sz = sk_tailguard_sz = SK_GUARD_MIN;
475 	}
476 	ASSERT(sk_headguard_sz >= SK_GUARD_MIN);
477 	ASSERT(sk_headguard_sz <= SK_GUARD_MAX);
478 	ASSERT(sk_tailguard_sz >= SK_GUARD_MIN);
479 	ASSERT(sk_tailguard_sz <= SK_GUARD_MAX);
480 
481 	__sk_inited = 1;
482 
483 	SK_LOCK();
484 	skmem_init();
485 	error = nexus_init();
486 	if (error == 0) {
487 		error = channel_init();
488 	}
489 	if (error != 0) {
490 		skywalk_fini();
491 	}
492 	SK_UNLOCK();
493 
494 	if (error == 0) {
495 #if (SK_LOG || DEVELOPMENT || DEBUG)
496 		/* allocate space for sk_dump_buf */
497 		sk_dump_buf = sk_alloc_data(SK_DUMP_BUF_SIZE, Z_WAITOK | Z_NOFAIL,
498 		    skmem_tag_dump);
499 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
500 
501 		netns_init();
502 		protons_init();
503 		flowidns_init();
504 
505 #if (DEVELOPMENT || DEBUG)
506 		skywalk_self_tests();
507 		skmem_test_init();
508 		cht_test_init();
509 #endif /* DEVELOPMENT || DEBUG */
510 	}
511 
512 	return error;
513 }
514 
515 /*
516  * csproc_get_platform_binary() returns non-zero if the process is platform
517  * code, which means that it is considered part of the Operating System.
518  * On iOS, that means it's contained in the trust cache or a loaded one.
519  * On macOS, everything signed by B&I is currently platform code, but the
520  * policy in general is subject to change.  Thus this is an approximate.
521  */
522 boolean_t
skywalk_check_platform_binary(proc_t p)523 skywalk_check_platform_binary(proc_t p)
524 {
525 	return (csproc_get_platform_binary(p) == 0) ? FALSE : TRUE;
526 }
527 
528 static int
sk_priv_chk(proc_t p,kauth_cred_t cred,int priv)529 sk_priv_chk(proc_t p, kauth_cred_t cred, int priv)
530 {
531 #pragma unused(p)
532 	int ret = EPERM;
533 
534 	if (cred != NULL) {
535 		ret = priv_check_cred(cred, priv, 0);
536 	}
537 #if (DEVELOPMENT || DEBUG)
538 	if (ret != 0) {
539 		const char *pstr;
540 
541 		switch (priv) {
542 		case PRIV_SKYWALK_REGISTER_USER_PIPE:
543 			pstr = "com.apple.private.skywalk.register-user-pipe";
544 			break;
545 
546 		case PRIV_SKYWALK_REGISTER_KERNEL_PIPE:
547 			pstr = "com.apple.private.skywalk.register-kernel-pipe";
548 			break;
549 
550 		case PRIV_SKYWALK_REGISTER_NET_IF:
551 			pstr = "com.apple.private.skywalk.register-net-if";
552 			break;
553 
554 		case PRIV_SKYWALK_REGISTER_FLOW_SWITCH:
555 			pstr = "com.apple.private.skywalk.register-flow-switch";
556 			break;
557 
558 		case PRIV_SKYWALK_OBSERVE_ALL:
559 			pstr = "com.apple.private.skywalk.observe-all";
560 			break;
561 
562 		case PRIV_SKYWALK_OBSERVE_STATS:
563 			pstr = "com.apple.private.skywalk.observe-stats";
564 			break;
565 
566 		case PRIV_SKYWALK_LOW_LATENCY_CHANNEL:
567 			pstr = "com.apple.private.skywalk.low-latency-channel";
568 			break;
569 
570 		default:
571 			pstr = "unknown";
572 			break;
573 		}
574 
575 #if SK_LOG
576 		if (__priv_check) {
577 			SK_DF(SK_VERB_PRIV, "%s(%d) insufficient privilege %d "
578 			    "(\"%s\") err %d", sk_proc_name_address(p),
579 			    sk_proc_pid(p), priv, pstr, ret);
580 		} else {
581 			SK_DF(SK_VERB_PRIV, "%s(%d) IGNORING missing privilege "
582 			    "%d (\"%s\") err %d", sk_proc_name_address(p),
583 			    sk_proc_pid(p), priv, pstr, ret);
584 		}
585 #endif /* SK_LOG */
586 
587 		/* ignore privilege check failures if requested */
588 		if (!__priv_check) {
589 			ret = 0;
590 		}
591 	}
592 #endif /* !DEVELOPMENT && !DEBUG */
593 
594 	return ret;
595 }
596 
597 int
skywalk_priv_check_cred(proc_t p,kauth_cred_t cred,int priv)598 skywalk_priv_check_cred(proc_t p, kauth_cred_t cred, int priv)
599 {
600 	return sk_priv_chk(p, cred, priv);
601 }
602 
603 int
skywalk_priv_check_proc_cred(proc_t p,int priv)604 skywalk_priv_check_proc_cred(proc_t p, int priv)
605 {
606 	int ret;
607 	kauth_cred_t cred = kauth_cred_proc_ref(p);
608 	ret = sk_priv_chk(p, cred, priv);
609 	kauth_cred_unref(&cred);
610 
611 	return ret;
612 }
613 
614 #if CONFIG_MACF
615 int
skywalk_mac_system_check_proc_cred(proc_t p,const char * info_type)616 skywalk_mac_system_check_proc_cred(proc_t p, const char *info_type)
617 {
618 	int ret;
619 	kauth_cred_t cred = kauth_cred_proc_ref(p);
620 	ret = mac_system_check_info(cred, info_type);
621 	kauth_cred_unref(&cred);
622 
623 	return ret;
624 }
625 #endif /* CONFIG_MACF */
626 
627 /*
628  * Scan thru the list of privileges needed before we allow someone
629  * to open a handle to the Nexus controller.  This should be done
630  * at nxctl_create() time, and additional privilege check specific
631  * to the operation (e.g. register, etc.) should be done afterwards.
632  */
633 int
skywalk_nxctl_check_privileges(proc_t p,kauth_cred_t cred)634 skywalk_nxctl_check_privileges(proc_t p, kauth_cred_t cred)
635 {
636 	int ret = 0;
637 
638 	if (p == kernproc) {
639 		goto done;
640 	}
641 
642 	do {
643 		/*
644 		 * Check for observe-{stats,all} entitlements first
645 		 * before the rest, to account for nexus controller
646 		 * clients that don't need anything but statistics;
647 		 * it would help quiesce sandbox violation warnings.
648 		 */
649 		if ((ret = sk_priv_chk(p, cred,
650 		    PRIV_SKYWALK_OBSERVE_STATS)) == 0) {
651 			break;
652 		}
653 		if ((ret = sk_priv_chk(p, cred,
654 		    PRIV_SKYWALK_OBSERVE_ALL)) == 0) {
655 			break;
656 		}
657 		if ((ret = sk_priv_chk(p, cred,
658 		    PRIV_SKYWALK_REGISTER_USER_PIPE)) == 0) {
659 			break;
660 		}
661 		if ((ret = sk_priv_chk(p, cred,
662 		    PRIV_SKYWALK_REGISTER_KERNEL_PIPE)) == 0) {
663 			break;
664 		}
665 		if ((ret = sk_priv_chk(p, cred,
666 		    PRIV_SKYWALK_REGISTER_NET_IF)) == 0) {
667 			break;
668 		}
669 		if ((ret = sk_priv_chk(p, cred,
670 		    PRIV_SKYWALK_REGISTER_FLOW_SWITCH)) == 0) {
671 			break;
672 		}
673 		/* none set, so too bad */
674 		ret = EPERM;
675 	} while (0);
676 
677 #if (DEVELOPMENT || DEBUG)
678 	if (ret != 0) {
679 		SK_ERR("%s(%d) insufficient privilege to open nexus controller "
680 		    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ret);
681 	}
682 #endif /* !DEVELOPMENT && !DEBUG */
683 done:
684 	return ret;
685 }
686 
687 void
sk_gen_guard_id(boolean_t isch,const uuid_t uuid,guardid_t * guard)688 sk_gen_guard_id(boolean_t isch, const uuid_t uuid, guardid_t *guard)
689 {
690 #define GUARD_CH_SIG    0x4348  /* 'CH' */
691 #define GUARD_NX_SIG    0x4e58  /* 'NX' */
692 	union {
693 		uint8_t         _u8[8];
694 		uint16_t        _u16[4];
695 		uint64_t        _u64;
696 	} __u;
697 
698 	read_random(&__u._u16[0], sizeof(uint16_t));
699 	bcopy(uuid, (void *)&__u._u16[1], sizeof(uint16_t));
700 	__u._u16[2] = htole16(isch ? GUARD_CH_SIG : GUARD_NX_SIG);
701 	__u._u16[3] = htole16(0x534b);  /* 'SK' */
702 	VERIFY(__u._u64 != 0);
703 
704 	bzero(guard, sizeof(*guard));
705 	bcopy((void *)&__u._u64, guard, MIN(sizeof(*guard),
706 	    sizeof(uint64_t)));
707 }
708 
709 
710 extern const char *
sk_uuid_unparse(const uuid_t uu,uuid_string_t out)711 sk_uuid_unparse(const uuid_t uu, uuid_string_t out)
712 {
713 	uuid_unparse_upper(uu, out);
714 	return (const char *)out;
715 }
716 
717 #if SK_LOG
718 /*
719  * packet-dump function, user-supplied or static buffer.
720  * The destination buffer must be at least 30+4*len
721  *
722  * @param p
723  *   buffer to be dumped.
724  * @param len
725  *   buffer's total length.
726  * @param dumplen
727  *   length to be dumped.
728  * @param dst
729  *   destination char buffer. sk_dump_buf would be used if dst is NULL.
730  * @param lim
731  *   destination char buffer max length. Not used if dst is NULL.
732  *
733  */
734 const char *
sk_dump(const char * label,const void * obj,int len,int dumplen,char * dst,int lim)735 sk_dump(const char *label, const void *obj, int len, int dumplen,
736     char *dst, int lim)
737 {
738 	int i, j, i0, n = 0;
739 	static char hex[] = "0123456789abcdef";
740 	const char *p = obj;    /* dump cursor */
741 	char *o;        /* output position */
742 
743 #define P_HI(x) hex[((x) & 0xf0) >> 4]
744 #define P_LO(x) hex[((x) & 0xf)]
745 #define P_C(x)  ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
746 	if (lim <= 0 || lim > len) {
747 		lim = len;
748 	}
749 	if (dst == NULL) {
750 		dst = sk_dump_buf;
751 		lim = SK_DUMP_BUF_SIZE;
752 	}
753 	dumplen = MIN(len, dumplen);
754 	o = dst;
755 	n += scnprintf(o, lim, "%s 0x%llx len %d lim %d\n", label,
756 	    SK_KVA(p), len, lim);
757 	o += strlen(o);
758 	/* hexdump routine */
759 	for (i = 0; i < dumplen;) {
760 		n += scnprintf(o, lim - n, "%5d: ", i);
761 		o += strlen(o);
762 		memset(o, ' ', 48);
763 		i0 = i;
764 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
765 			o[j * 3] = P_HI(p[i]);
766 			o[j * 3 + 1] = P_LO(p[i]);
767 		}
768 		i = i0;
769 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
770 			o[j + 48] = P_C(p[i]);
771 		}
772 		o[j + 48] = '\n';
773 		o += j + 49;
774 	}
775 	*o = '\0';
776 #undef P_HI
777 #undef P_LO
778 #undef P_C
779 	return dst;
780 }
781 
782 /*
783  * "Safe" variant of proc_name_address(), mean to be used only for logging.
784  */
785 const char *
sk_proc_name_address(struct proc * p)786 sk_proc_name_address(struct proc *p)
787 {
788 	if (p == PROC_NULL) {
789 		return "proc_null";
790 	}
791 
792 	return proc_name_address(p);
793 }
794 
795 /*
796  * "Safe" variant of proc_pid(), mean to be used only for logging.
797  */
798 int
sk_proc_pid(struct proc * p)799 sk_proc_pid(struct proc *p)
800 {
801 	if (p == PROC_NULL) {
802 		return -1;
803 	}
804 
805 	return proc_pid(p);
806 }
807 
808 const char *
sk_sa_ntop(struct sockaddr * sa,char * addr_str,size_t addr_strlen)809 sk_sa_ntop(struct sockaddr *sa, char *addr_str, size_t addr_strlen)
810 {
811 	const char *str = NULL;
812 
813 	addr_str[0] = '\0';
814 
815 	switch (sa->sa_family) {
816 	case AF_INET:
817 		str = inet_ntop(AF_INET, &SIN(sa)->sin_addr.s_addr,
818 		    addr_str, (socklen_t)addr_strlen);
819 		break;
820 
821 	case AF_INET6:
822 		str = inet_ntop(AF_INET6, &SIN6(sa)->sin6_addr,
823 		    addr_str, (socklen_t)addr_strlen);
824 		break;
825 
826 	default:
827 		str = addr_str;
828 		break;
829 	}
830 
831 	return str;
832 }
833 
834 const char *
sk_memstatus2str(uint32_t status)835 sk_memstatus2str(uint32_t status)
836 {
837 	const char *str = NULL;
838 
839 	switch (status) {
840 	case kMemorystatusInvalid:
841 		str = "kMemorystatusInvalid";
842 		break;
843 
844 	case kMemorystatusKilled:
845 		str = "kMemorystatusKilled";
846 		break;
847 
848 	case kMemorystatusKilledHiwat:
849 		str = "kMemorystatusKilledHiwat";
850 		break;
851 
852 	case kMemorystatusKilledVnodes:
853 		str = "kMemorystatusKilledVnodes";
854 		break;
855 
856 	case kMemorystatusKilledVMPageShortage:
857 		str = "kMemorystatusKilledVMPageShortage";
858 		break;
859 
860 	case kMemorystatusKilledProcThrashing:
861 		str = "kMemorystatusKilledProcThrashing";
862 		break;
863 
864 	case kMemorystatusKilledVMCompressorThrashing:
865 		str = "kMemorystatusKilledVMCompressorThrashing";
866 		break;
867 
868 	case kMemorystatusKilledVMCompressorSpaceShortage:
869 		str = "kMemorystatusKilledVMCompressorSpaceShortage";
870 		break;
871 
872 	case kMemorystatusKilledFCThrashing:
873 		str = "kMemorystatusKilledFCThrashing";
874 		break;
875 
876 	case kMemorystatusKilledPerProcessLimit:
877 		str = "kMemorystatusKilledPerProcessLimit";
878 		break;
879 
880 	case kMemorystatusKilledDiskSpaceShortage:
881 		str = "kMemorystatusKilledDiskSpaceShortage";
882 		break;
883 
884 	case kMemorystatusKilledIdleExit:
885 		str = "kMemorystatusKilledIdleExit";
886 		break;
887 
888 	case kMemorystatusKilledZoneMapExhaustion:
889 		str = "kMemorystatusKilledZoneMapExhaustion";
890 		break;
891 
892 	default:
893 		str = "unknown";
894 		break;
895 	}
896 
897 	return str;
898 }
899 #endif /* SK_LOG */
900 
901 bool
sk_sa_has_addr(struct sockaddr * sa)902 sk_sa_has_addr(struct sockaddr *sa)
903 {
904 	switch (sa->sa_family) {
905 	case AF_INET:
906 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
907 		return SIN(sa)->sin_addr.s_addr != INADDR_ANY;
908 	case AF_INET6:
909 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
910 		return !IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr);
911 	default:
912 		return false;
913 	}
914 }
915 
916 bool
sk_sa_has_port(struct sockaddr * sa)917 sk_sa_has_port(struct sockaddr *sa)
918 {
919 	switch (sa->sa_family) {
920 	case AF_INET:
921 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
922 		return SIN(sa)->sin_port != 0;
923 	case AF_INET6:
924 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
925 		return SIN6(sa)->sin6_port != 0;
926 	default:
927 		return false;
928 	}
929 }
930 
931 /* returns port number in host byte order */
932 uint16_t
sk_sa_get_port(struct sockaddr * sa)933 sk_sa_get_port(struct sockaddr *sa)
934 {
935 	switch (sa->sa_family) {
936 	case AF_INET:
937 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
938 		return ntohs(SIN(sa)->sin_port);
939 	case AF_INET6:
940 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
941 		return ntohs(SIN6(sa)->sin6_port);
942 	default:
943 		VERIFY(0);
944 		/* NOTREACHED */
945 		__builtin_unreachable();
946 	}
947 }
948 
949 void
skywalk_kill_process(struct proc * p,uint64_t reason_code)950 skywalk_kill_process(struct proc *p, uint64_t reason_code)
951 {
952 	os_reason_t exit_reason = OS_REASON_NULL;
953 
954 	VERIFY(p != kernproc);
955 
956 	exit_reason = os_reason_create(OS_REASON_SKYWALK, reason_code);
957 	if (exit_reason == OS_REASON_NULL) {
958 		SK_ERR("%s(%d) unable to allocate memory for crash reason "
959 		    "0x%llX", sk_proc_name_address(p), sk_proc_pid(p),
960 		    reason_code);
961 	} else {
962 		exit_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
963 		SK_ERR("%s(%d) aborted for reason 0x%llX",
964 		    sk_proc_name_address(p), sk_proc_pid(p), reason_code);
965 	}
966 
967 	psignal_try_thread_with_reason(p, current_thread(), SIGABRT,
968 	    exit_reason);
969 }
970 
971 #if (DEVELOPMENT || DEBUG)
972 #define SK_MEMCMP_LEN 256               /* length of each section */
973 #define SK_MASK_MAXLEN 80               /* maximum mask length */
974 
975 #define SK_MEMCMP_MASK_VERIFY(t, l, lr) do {                            \
976 	_CASSERT(sizeof(t##_m) == SK_MASK_MAXLEN);                      \
977 	if ((sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) ^           \
978 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
979 	        panic_plain("\nbroken: " #t " using "                   \
980 	            "sk_memcmp_mask_" #l " at i=%d\n", i);              \
981 	/* NOTREACHED */                                        \
982 	        __builtin_unreachable();                                \
983 	}                                                               \
984 	if ((sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) ^  \
985 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
986 	        panic_plain("\nbroken: " #t " using "                   \
987 	            "sk_memcmp_mask_" #l "_scalar at i=%d\n", i);       \
988 	/* NOTREACHED */                                        \
989 	        __builtin_unreachable();                                \
990 	}                                                               \
991 } while (0)
992 
993 #define SK_MEMCMP_MASK_MATCH_VERIFY(t, l) do {                          \
994 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) {            \
995 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
996 	            " mismatch (expected match) at i=%d s1=0x%x"        \
997 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
998 	/* NOTREACHED */                                        \
999 	        __builtin_unreachable();                                \
1000 	}                                                               \
1001 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) {   \
1002 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1003 	            "_scalar mismatch (expected match) at i=%d s1=0x%x" \
1004 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
1005 	/* NOTREACHED */                                        \
1006 	        __builtin_unreachable();                                \
1007 	}                                                               \
1008 } while (0)
1009 
1010 #define SK_MEMCMP_MASK_MISMATCH_VERIFY(t, l) do {                       \
1011 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) == 0) {            \
1012 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1013 	            " match (expected mismatch) at i=%d s1=0x%x"        \
1014 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
1015 	/* NOTREACHED */                                        \
1016 	        __builtin_unreachable();                                \
1017 	}                                                               \
1018 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) == 0) {   \
1019 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1020 	            "_scalar match (expected mismatch) at i=%d "        \
1021 	            "s1=0x%x s2=0x%x\n", i, hdr1[i], hdr2[i]);          \
1022 	/* NOTREACHED */                                        \
1023 	        __builtin_unreachable();                                \
1024 	}                                                               \
1025 } while (0)
1026 
1027 #define SK_MEMCMP_BYTEMASK_VERIFY(t) do {                               \
1028 	if ((sk_memcmp_mask(hdr1, hdr2, t##_m, i) != 0) ^               \
1029 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
1030 	        panic_plain("\nbroken: " #t " using "                   \
1031 	            "sk_memcmp_mask at i=%d\n", i);                     \
1032 	/* NOTREACHED */                                        \
1033 	        __builtin_unreachable();                                \
1034 	}                                                               \
1035 	if ((sk_memcmp_mask_scalar(hdr1, hdr2, t##_m, i) != 0) ^        \
1036 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
1037 	        panic_plain("\nbroken: " #t " using "                   \
1038 	            "sk_memcmp_mask_scalar at i=%d\n", i);              \
1039 	/* NOTREACHED */                                        \
1040 	        __builtin_unreachable();                                \
1041 	}                                                               \
1042 } while (0)
1043 
1044 static inline int
skywalk_memcmp_mask_ref(const uint8_t * src1,const uint8_t * src2,const uint8_t * byte_mask,size_t n)1045 skywalk_memcmp_mask_ref(const uint8_t *src1, const uint8_t *src2,
1046     const uint8_t *byte_mask, size_t n)
1047 {
1048 	uint32_t result = 0;
1049 	for (size_t i = 0; i < n; i++) {
1050 		result |= (src1[i] ^ src2[i]) & byte_mask[i];
1051 	}
1052 	return result;
1053 }
1054 
1055 static void
skywalk_memcmp_mask_self_tests(void)1056 skywalk_memcmp_mask_self_tests(void)
1057 {
1058 	static const uint8_t ipv4_m[] = {
1059 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xff, 0xff,
1060 		0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
1061 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1062 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1063 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1064 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1065 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1066 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1067 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1068 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1069 	};
1070 	static const uint8_t ipv6_m[] = {
1071 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1072 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1073 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1074 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1075 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1076 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1077 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1078 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1079 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1080 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1081 	};
1082 	static const uint8_t tcp_m[] = {
1083 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1084 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1085 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1086 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1087 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1088 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1089 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1090 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1091 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1092 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1093 	};
1094 	static const uint8_t ipv6_tcp_m[] = {
1095 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1096 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1097 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1098 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1099 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1100 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1101 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1102 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1103 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1104 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1105 	};
1106 	static const uint8_t udp_m[] = {
1107 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1108 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1109 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1110 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1111 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1112 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1113 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1114 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1115 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1116 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1117 	};
1118 	static const uint8_t fk_all_m[] = {
1119 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1120 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1121 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1122 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1123 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1124 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1125 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1126 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1127 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1128 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1129 	};
1130 	static const uint8_t fk_t2_m[] = {
1131 		0x0a, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
1132 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1133 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1134 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1135 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1136 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1137 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1138 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1139 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1140 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1141 	};
1142 	static const uint8_t fk_t3_m[] = {
1143 		0x0f, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1144 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1145 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1146 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1147 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1148 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1149 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1150 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1151 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1152 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1153 	};
1154 	static const uint8_t fk_t4_m[] = {
1155 		0x2f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1156 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1157 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1158 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1159 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1160 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1161 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1162 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1163 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1164 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1165 	};
1166 	static const uint8_t fk_t5_m[] = {
1167 		0x3f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1168 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1169 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1170 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1171 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1172 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1173 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1174 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1175 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1176 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1177 	};
1178 	static const uint8_t fk_i1_m[] = {
1179 		0x02, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
1180 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1181 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1182 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1183 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1184 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1185 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1186 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1187 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1188 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1189 	};
1190 	static const uint8_t fk_i2_m[] = {
1191 		0x07, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1192 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1193 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1194 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1195 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1196 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1197 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1198 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1199 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1200 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1201 	};
1202 	static const uint8_t fk_i3_m[] = {
1203 		0x17, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1204 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1205 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1206 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1207 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1208 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1209 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1210 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1211 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1212 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1213 	};
1214 
1215 	/* validate flow entry mask (2-tuple) */
1216 	_CASSERT(FKMASK_2TUPLE == (FKMASK_PROTO | FKMASK_SPORT));
1217 	VERIFY(fk_mask_2tuple.fk_mask == FKMASK_2TUPLE);
1218 	VERIFY(fk_mask_2tuple.fk_ipver == 0);
1219 	VERIFY(fk_mask_2tuple.fk_proto == 0xff);
1220 	VERIFY(fk_mask_2tuple.fk_sport == 0xffff);
1221 	VERIFY(fk_mask_2tuple.fk_dport == 0);
1222 	VERIFY(fk_mask_2tuple.fk_src._addr64[0] == 0);
1223 	VERIFY(fk_mask_2tuple.fk_src._addr64[1] == 0);
1224 	VERIFY(fk_mask_2tuple.fk_dst._addr64[0] == 0);
1225 	VERIFY(fk_mask_2tuple.fk_dst._addr64[1] == 0);
1226 	VERIFY(fk_mask_2tuple.fk_pad[0] == 0);
1227 
1228 	_CASSERT(FKMASK_3TUPLE == (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC));
1229 	VERIFY(fk_mask_3tuple.fk_mask == FKMASK_3TUPLE);
1230 	VERIFY(fk_mask_3tuple.fk_ipver == 0xff);
1231 	VERIFY(fk_mask_3tuple.fk_proto == 0xff);
1232 	VERIFY(fk_mask_3tuple.fk_sport == 0xffff);
1233 	VERIFY(fk_mask_3tuple.fk_dport == 0);
1234 	VERIFY(fk_mask_3tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1235 	VERIFY(fk_mask_3tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1236 	VERIFY(fk_mask_3tuple.fk_dst._addr64[0] == 0);
1237 	VERIFY(fk_mask_3tuple.fk_dst._addr64[1] == 0);
1238 	VERIFY(fk_mask_3tuple.fk_pad[0] == 0);
1239 
1240 	_CASSERT(FKMASK_4TUPLE == (FKMASK_3TUPLE | FKMASK_DPORT));
1241 	VERIFY(fk_mask_4tuple.fk_mask == FKMASK_4TUPLE);
1242 	VERIFY(fk_mask_4tuple.fk_ipver == 0xff);
1243 	VERIFY(fk_mask_4tuple.fk_proto == 0xff);
1244 	VERIFY(fk_mask_4tuple.fk_sport == 0xffff);
1245 	VERIFY(fk_mask_4tuple.fk_dport == 0xffff);
1246 	VERIFY(fk_mask_4tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1247 	VERIFY(fk_mask_4tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1248 	VERIFY(fk_mask_4tuple.fk_dst._addr64[0] == 0);
1249 	VERIFY(fk_mask_4tuple.fk_dst._addr64[1] == 0);
1250 	VERIFY(fk_mask_4tuple.fk_pad[0] == 0);
1251 
1252 	_CASSERT(FKMASK_5TUPLE == (FKMASK_4TUPLE | FKMASK_DST));
1253 	VERIFY(fk_mask_5tuple.fk_mask == FKMASK_5TUPLE);
1254 	VERIFY(fk_mask_5tuple.fk_ipver == 0xff);
1255 	VERIFY(fk_mask_5tuple.fk_proto == 0xff);
1256 	VERIFY(fk_mask_5tuple.fk_sport == 0xffff);
1257 	VERIFY(fk_mask_5tuple.fk_dport == 0xffff);
1258 	VERIFY(fk_mask_5tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1259 	VERIFY(fk_mask_5tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1260 	VERIFY(fk_mask_5tuple.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1261 	VERIFY(fk_mask_5tuple.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1262 	VERIFY(fk_mask_5tuple.fk_pad[0] == 0);
1263 
1264 	_CASSERT(FKMASK_IPFLOW1 == FKMASK_PROTO);
1265 	VERIFY(fk_mask_ipflow1.fk_mask == FKMASK_IPFLOW1);
1266 	VERIFY(fk_mask_ipflow1.fk_ipver == 0);
1267 	VERIFY(fk_mask_ipflow1.fk_proto == 0xff);
1268 	VERIFY(fk_mask_ipflow1.fk_sport == 0);
1269 	VERIFY(fk_mask_ipflow1.fk_dport == 0);
1270 	VERIFY(fk_mask_ipflow1.fk_src._addr64[0] == 0);
1271 	VERIFY(fk_mask_ipflow1.fk_src._addr64[1] == 0);
1272 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[0] == 0);
1273 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[1] == 0);
1274 	VERIFY(fk_mask_ipflow1.fk_pad[0] == 0);
1275 
1276 	_CASSERT(FKMASK_IPFLOW2 == (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC));
1277 	VERIFY(fk_mask_ipflow2.fk_mask == FKMASK_IPFLOW2);
1278 	VERIFY(fk_mask_ipflow2.fk_ipver == 0xff);
1279 	VERIFY(fk_mask_ipflow2.fk_proto == 0xff);
1280 	VERIFY(fk_mask_ipflow2.fk_sport == 0);
1281 	VERIFY(fk_mask_ipflow2.fk_dport == 0);
1282 	VERIFY(fk_mask_ipflow2.fk_src._addr64[0] == 0xffffffffffffffffULL);
1283 	VERIFY(fk_mask_ipflow2.fk_src._addr64[1] == 0xffffffffffffffffULL);
1284 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[0] == 0);
1285 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[1] == 0);
1286 	VERIFY(fk_mask_ipflow2.fk_pad[0] == 0);
1287 
1288 	_CASSERT(FKMASK_IPFLOW3 == (FKMASK_IPFLOW2 | FKMASK_DST));
1289 	VERIFY(fk_mask_ipflow3.fk_mask == FKMASK_IPFLOW3);
1290 	VERIFY(fk_mask_ipflow3.fk_ipver == 0xff);
1291 	VERIFY(fk_mask_ipflow3.fk_proto == 0xff);
1292 	VERIFY(fk_mask_ipflow3.fk_sport == 0);
1293 	VERIFY(fk_mask_ipflow3.fk_dport == 0);
1294 	VERIFY(fk_mask_ipflow3.fk_src._addr64[0] == 0xffffffffffffffffULL);
1295 	VERIFY(fk_mask_ipflow3.fk_src._addr64[1] == 0xffffffffffffffffULL);
1296 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1297 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1298 	VERIFY(fk_mask_ipflow3.fk_pad[0] == 0);
1299 
1300 	VERIFY(sk_dump_buf != NULL);
1301 
1302 	/* reset sk_dump_buf */
1303 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1304 
1305 	/*
1306 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1307 	 * section begins on a 128-bit boundary, and is a multiple of
1308 	 * 64-bytes len.  A section is SK_MEMCMP_LEN-bytes long,
1309 	 * which means we need at least 16+(3*SK_MEMCMP_LEN) bytes.
1310 	 *
1311 	 * 1st section is s1 -> (hdr1 aligned to 16-bytes)
1312 	 * 2nd section is s2 -> (hdr2 = hdr1 + SK_MEMCMP_LEN)
1313 	 * 3rd section is s3 -> (mask = hdr2 + SK_MEMCMP_LEN)
1314 	 */
1315 	void *s1, *s2, *s3;
1316 
1317 	s1 = sk_dump_buf;
1318 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1319 		s1 = (void *)P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN);
1320 	}
1321 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1322 	s2 = (void *)((uintptr_t)s1 + SK_MEMCMP_LEN);
1323 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1324 	s3 = (void *)((uintptr_t)s2 + SK_MEMCMP_LEN);
1325 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1326 
1327 	uint8_t *hdr1 = s1;
1328 	uint8_t *hdr2 = s2;
1329 	uint8_t *byte_m = s3;
1330 
1331 	/* fill byte mask with random data */
1332 	read_frandom(byte_m, SK_MEMCMP_LEN);
1333 
1334 	kprintf("Skywalk: memcmp mask ... ");
1335 
1336 	int i;
1337 	for (i = 0; i < 80; i++) {
1338 		hdr1[i] = 1;
1339 		SK_MEMCMP_MASK_VERIFY(ipv4, 32, 20);
1340 		SK_MEMCMP_MASK_VERIFY(ipv6, 64, 40);
1341 		SK_MEMCMP_MASK_VERIFY(ipv6_tcp, 80, 64);
1342 		SK_MEMCMP_MASK_VERIFY(tcp, 32, 24);
1343 		SK_MEMCMP_MASK_VERIFY(udp, 16, 6);
1344 		SK_MEMCMP_MASK_VERIFY(fk_all, 48, 48);
1345 		SK_MEMCMP_MASK_VERIFY(fk_t2, 48, 48);
1346 		SK_MEMCMP_MASK_VERIFY(fk_t3, 48, 48);
1347 		SK_MEMCMP_MASK_VERIFY(fk_t4, 48, 48);
1348 		SK_MEMCMP_MASK_VERIFY(fk_t5, 48, 48);
1349 		SK_MEMCMP_MASK_VERIFY(fk_i1, 48, 48);
1350 		SK_MEMCMP_MASK_VERIFY(fk_i2, 48, 48);
1351 		SK_MEMCMP_MASK_VERIFY(fk_i3, 48, 48);
1352 		hdr2[i] = 1;
1353 	}
1354 
1355 	bzero(hdr1, SK_MEMCMP_LEN);
1356 	bzero(hdr2, SK_MEMCMP_LEN);
1357 
1358 	/* re-fill byte mask with random data */
1359 	read_frandom(byte_m, SK_MEMCMP_LEN);
1360 
1361 	for (i = 0; i < SK_MEMCMP_LEN; i++) {
1362 		hdr1[i] = 1;
1363 		SK_MEMCMP_BYTEMASK_VERIFY(byte);
1364 		hdr2[i] = 1;
1365 	}
1366 
1367 	/* fill hdr1 and hd2 with random data */
1368 	read_frandom(hdr1, SK_MEMCMP_LEN);
1369 	bcopy(hdr1, hdr2, SK_MEMCMP_LEN);
1370 	memset(byte_m, 0xff, SK_MEMCMP_LEN);
1371 
1372 	for (i = 0; i < 80; i++) {
1373 		uint8_t val = hdr2[i];
1374 		uint8_t mval = byte_m[i];
1375 
1376 		while (hdr2[i] == hdr1[i] || hdr2[i] == 0) {
1377 			uint8_t newval;
1378 			read_frandom(&newval, sizeof(newval));
1379 			hdr2[i] = newval;
1380 		}
1381 		if (i < 16) {
1382 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 16);
1383 		} else if (i < 32) {
1384 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 32);
1385 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1386 		} else if (i < 48) {
1387 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 48);
1388 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1389 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1390 		} else if (i < 64) {
1391 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 64);
1392 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1393 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1394 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1395 		} else if (i < 80) {
1396 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 80);
1397 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1398 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1399 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1400 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1401 		}
1402 		byte_m[i] = 0;
1403 		if (i < 16) {
1404 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1405 		} else if (i < 32) {
1406 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1407 		} else if (i < 48) {
1408 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1409 		} else if (i < 64) {
1410 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1411 		} else if (i < 80) {
1412 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 80);
1413 		}
1414 		hdr2[i] = val;
1415 		byte_m[i] = mval;
1416 	}
1417 
1418 	kprintf("PASSED\n");
1419 }
1420 
1421 #define SK_COPY_LEN     128             /* length of each section */
1422 
1423 #define SK_COPY_PREPARE(t) do {                                         \
1424 	bzero(s2, SK_COPY_LEN);                                         \
1425 	bzero(s3, SK_COPY_LEN);                                         \
1426 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1427 	kprintf("Skywalk: " #t " ... ");                                \
1428 } while (0)
1429 
1430 #define SK_COPY_VERIFY(t) do {                                          \
1431 	if (_s1 != s1 || _s2 != s2 || _s3 != s3) {                      \
1432 	        panic_plain("\ninput registers clobbered: " #t "\n");   \
1433 	/* NOTREACHED */                                        \
1434 	        __builtin_unreachable();                                \
1435 	}                                                               \
1436 	if (bcmp(s2, s3, SK_COPY_LEN) != 0) {                           \
1437 	        panic_plain("\nbroken: " #t "\n");                      \
1438 	/* NOTREACHED */                                        \
1439 	        __builtin_unreachable();                                \
1440 	} else {                                                        \
1441 	        kprintf("PASSED\n");                                    \
1442 	}                                                               \
1443 } while (0)
1444 
1445 #define SK_ZERO_PREPARE(t) do {                                         \
1446 	bcopy(s1, s2, SK_COPY_LEN);                                     \
1447 	bcopy(s1, s3, SK_COPY_LEN);                                     \
1448 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1449 	kprintf("Skywalk: " #t " ... ");                                \
1450 } while (0)
1451 
1452 #define SK_ZERO_VERIFY(t)       SK_COPY_VERIFY(t)
1453 
1454 static void
skywalk_self_tests(void)1455 skywalk_self_tests(void)
1456 {
1457 	void *s1, *s2, *s3;
1458 	void *_s1, *_s2, *_s3;
1459 
1460 	VERIFY(sk_dump_buf != NULL);
1461 
1462 	/*
1463 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1464 	 * section begins on a 128-bit boundary, and is a multiple of
1465 	 * 64-bytes len.  A section is 128-bytes long, which means we
1466 	 * need at least 16+(3*128) bytes.
1467 	 *
1468 	 * 1st section is source buffer full of random data;
1469 	 * 2nd section is reference target based on bcopy;
1470 	 * 3rd section is test target base on our stuff.
1471 	 */
1472 	_CASSERT(SK_COPY_LEN != 0 && (SK_COPY_LEN % 128) == 0);
1473 	_CASSERT((SK_COPY_LEN % 16) == 0);
1474 	_CASSERT((SK_DUMP_BUF_ALIGN % 16) == 0);
1475 	_CASSERT(SK_DUMP_BUF_SIZE >= (SK_DUMP_BUF_ALIGN + (SK_COPY_LEN * 3)));
1476 
1477 	s1 = sk_dump_buf;
1478 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1479 		s1 = (void *)P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN);
1480 	}
1481 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1482 	s2 = (void *)((uintptr_t)s1 + SK_COPY_LEN);
1483 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1484 	s3 = (void *)((uintptr_t)s2 + SK_COPY_LEN);
1485 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1486 
1487 	/* fill s1 with random data */
1488 	read_frandom(s1, SK_COPY_LEN);
1489 
1490 	kprintf("Skywalk: running self-tests\n");
1491 
1492 	/* Copy 8-bytes, 64-bit aligned */
1493 	SK_COPY_PREPARE(sk_copy64_8);
1494 	bcopy(s1, s2, 8);
1495 	sk_copy64_8((uint64_t *)s1, (uint64_t *)s3);
1496 	SK_COPY_VERIFY(sk_copy64_8);
1497 
1498 	/* Copy 8-bytes, 32-bit aligned */
1499 	SK_COPY_PREPARE(sk_copy32_8);
1500 	bcopy((void *)((uintptr_t)s1 + sizeof(uint32_t)),
1501 	    (void *)((uintptr_t)s2 + sizeof(uint32_t)), 8);
1502 	sk_copy32_8((uint32_t *)((uintptr_t)s1 + sizeof(uint32_t)),
1503 	    (uint32_t *)((uintptr_t)s3 + sizeof(uint32_t)));
1504 	SK_COPY_VERIFY(sk_copy32_8);
1505 
1506 	/* Copy 16-bytes, 64-bit aligned */
1507 	SK_COPY_PREPARE(sk_copy64_16);
1508 	bcopy(s1, s2, 16);
1509 	sk_copy64_16((uint64_t *)s1, (uint64_t *)s3);
1510 	SK_COPY_VERIFY(sk_copy64_16);
1511 
1512 	/* Copy 16-bytes, 32-bit aligned */
1513 	SK_COPY_PREPARE(sk_copy32_16);
1514 	bcopy((void *)((uintptr_t)s1 + sizeof(uint32_t)),
1515 	    (void *)((uintptr_t)s2 + sizeof(uint32_t)), 16);
1516 	sk_copy32_16((uint32_t *)((uintptr_t)s1 + sizeof(uint32_t)),
1517 	    (uint32_t *)((uintptr_t)s3 + sizeof(uint32_t)));
1518 	SK_COPY_VERIFY(sk_copy32_16);
1519 
1520 	/* Copy 20-bytes, 64-bit aligned */
1521 	SK_COPY_PREPARE(sk_copy64_20);
1522 	bcopy(s1, s2, 20);
1523 	sk_copy64_20((uint64_t *)s1, (uint64_t *)s3);
1524 	SK_COPY_VERIFY(sk_copy64_20);
1525 
1526 	/* Copy 24-bytes, 64-bit aligned */
1527 	SK_COPY_PREPARE(sk_copy64_24);
1528 	bcopy(s1, s2, 24);
1529 	sk_copy64_24((uint64_t *)s1, (uint64_t *)s3);
1530 	SK_COPY_VERIFY(sk_copy64_24);
1531 
1532 	/* Copy 32-bytes, 64-bit aligned */
1533 	SK_COPY_PREPARE(sk_copy64_32);
1534 	bcopy(s1, s2, 32);
1535 	sk_copy64_32((uint64_t *)s1, (uint64_t *)s3);
1536 	SK_COPY_VERIFY(sk_copy64_32);
1537 
1538 	/* Copy 32-bytes, 32-bit aligned */
1539 	SK_COPY_PREPARE(sk_copy32_32);
1540 	bcopy((void *)((uintptr_t)s1 + sizeof(uint32_t)),
1541 	    (void *)((uintptr_t)s2 + sizeof(uint32_t)), 32);
1542 	sk_copy32_32((uint32_t *)((uintptr_t)s1 + sizeof(uint32_t)),
1543 	    (uint32_t *)((uintptr_t)s3 + sizeof(uint32_t)));
1544 	SK_COPY_VERIFY(sk_copy32_32);
1545 
1546 	/* Copy 40-bytes, 64-bit aligned */
1547 	SK_COPY_PREPARE(sk_copy64_40);
1548 	bcopy(s1, s2, 40);
1549 	sk_copy64_40((uint64_t *)s1, (uint64_t *)s3);
1550 	SK_COPY_VERIFY(sk_copy64_40);
1551 
1552 	/* Copy entire section in 64-bytes chunks, 64-bit aligned */
1553 	SK_COPY_PREPARE(sk_copy64_64x);
1554 	bcopy(s1, s2, SK_COPY_LEN);
1555 	sk_copy64_64x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1556 	SK_COPY_VERIFY(sk_copy64_64x);
1557 
1558 	/* Copy entire section in 32-bytes chunks, 64-bit aligned */
1559 	SK_COPY_PREPARE(sk_copy64_32x);
1560 	bcopy(s1, s2, SK_COPY_LEN);
1561 	sk_copy64_32x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1562 	SK_COPY_VERIFY(sk_copy64_32x);
1563 
1564 	/* Copy entire section in 8-bytes chunks, 64-bit aligned */
1565 	SK_COPY_PREPARE(sk_copy64_8x);
1566 	bcopy(s1, s2, SK_COPY_LEN);
1567 	sk_copy64_8x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1568 	SK_COPY_VERIFY(sk_copy64_8x);
1569 
1570 	/* Copy entire section in 4-bytes chunks, 64-bit aligned */
1571 	SK_COPY_PREPARE(sk_copy64_4x);
1572 	bcopy(s1, s2, SK_COPY_LEN);
1573 	sk_copy64_4x((uint32_t *)s1, (uint32_t *)s3, SK_COPY_LEN);
1574 	SK_COPY_VERIFY(sk_copy64_4x);
1575 
1576 	/*
1577 	 * Re-use sk_dump_buf for testing sk_zero, same principle as above.
1578 	 *
1579 	 * 1st section is source buffer full of random data;
1580 	 * 2nd section is reference target based on bzero;
1581 	 * 3rd section is test target base on our stuff.
1582 	 */
1583 	SK_ZERO_PREPARE(sk_zero_16);
1584 	bzero(s2, 16);
1585 	sk_zero_16(s3);
1586 	SK_ZERO_VERIFY(sk_zero_16);
1587 
1588 	SK_ZERO_PREPARE(sk_zero_32);
1589 	bzero(s2, 32);
1590 	sk_zero_32(s3);
1591 	SK_ZERO_VERIFY(sk_zero_32);
1592 
1593 	SK_ZERO_PREPARE(sk_zero_48);
1594 	bzero(s2, 48);
1595 	sk_zero_48(s3);
1596 	SK_ZERO_VERIFY(sk_zero_48);
1597 
1598 	SK_ZERO_PREPARE(sk_zero_128);
1599 	bzero(s2, 128);
1600 	sk_zero_128(s3);
1601 	SK_ZERO_VERIFY(sk_zero_128);
1602 
1603 	/* Perform memcmp with mask self tests */
1604 	skywalk_memcmp_mask_self_tests();
1605 
1606 	/* reset sk_dump_buf */
1607 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1608 
1609 	/* Keep packet trace code in sync with ariadne plist */
1610 	_CASSERT(SK_KTRACE_AON_IF_STATS == 0x8100004);
1611 
1612 	_CASSERT(SK_KTRACE_FSW_DEV_RING_FLUSH == 0x8110004);
1613 	_CASSERT(SK_KTRACE_FSW_USER_RING_FLUSH == 0x8110008);
1614 	_CASSERT(SK_KTRACE_FSW_FLOW_TRACK_RTT == 0x8110010);
1615 
1616 	_CASSERT(SK_KTRACE_NETIF_RING_TX_REFILL == 0x8120004);
1617 	_CASSERT(SK_KTRACE_NETIF_HOST_ENQUEUE == 0x8120008);
1618 	_CASSERT(SK_KTRACE_NETIF_MIT_RX_INTR == 0x812000c);
1619 	_CASSERT(SK_KTRACE_NETIF_COMMON_INTR == 0x8120010);
1620 	_CASSERT(SK_KTRACE_NETIF_RX_NOTIFY_DEFAULT == 0x8120014);
1621 	_CASSERT(SK_KTRACE_NETIF_RX_NOTIFY_FAST == 0x8120018);
1622 
1623 	_CASSERT(SK_KTRACE_CHANNEL_TX_REFILL == 0x8130004);
1624 
1625 	_CASSERT(SK_KTRACE_PKT_RX_DRV == 0x8140004);
1626 	_CASSERT(SK_KTRACE_PKT_RX_FSW == 0x8140008);
1627 	_CASSERT(SK_KTRACE_PKT_RX_CHN == 0x814000c);
1628 	_CASSERT(SK_KTRACE_PKT_TX_FSW == 0x8140040);
1629 	_CASSERT(SK_KTRACE_PKT_TX_AQM == 0x8140044);
1630 	_CASSERT(SK_KTRACE_PKT_TX_DRV == 0x8140048);
1631 }
1632 #endif /* DEVELOPMENT || DEBUG */
1633