xref: /xnu-11215.41.3/bsd/skywalk/core/skywalk.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <pexpert/pexpert.h>    /* for PE_parse_boot_argn */
31 #include <sys/codesign.h>       /* for csproc_get_platform_binary */
32 #include <sys/signalvar.h>      /* for psignal_with_reason */
33 #include <sys/reason.h>
34 #include <sys/kern_memorystatus.h>
35 #if CONFIG_MACF
36 #include <security/mac_framework.h>
37 #endif /* CONFIG_MACF */
38 
39 #ifndef htole16
40 #if BYTE_ORDER == LITTLE_ENDIAN
41 #define htole16(x)      ((uint16_t)(x))
42 #else /* BYTE_ORDER != LITTLE_ENDIAN */
43 #define htole16(x)      bswap16((x))
44 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
45 #endif /* htole16 */
46 
47 LCK_GRP_DECLARE(sk_lock_group, "sk_lock");
48 LCK_ATTR_DECLARE(sk_lock_attr, 0, 0);
49 LCK_MTX_DECLARE_ATTR(sk_lock, &sk_lock_group, &sk_lock_attr);
50 
51 static void skywalk_fini(void);
52 static int sk_priv_chk(proc_t, kauth_cred_t, int);
53 
54 static int __sk_inited = 0;
55 #if (DEVELOPMENT || DEBUG)
56 size_t sk_copy_thres = SK_COPY_THRES;
57 uint64_t sk_verbose;
58 #endif /* DEVELOPMENT || DEBUG */
59 uint32_t sk_debug;
60 uint64_t sk_features =
61 #if SKYWALK
62     SK_FEATURE_SKYWALK |
63 #endif
64 #if DEVELOPMENT
65     SK_FEATURE_DEVELOPMENT |
66 #endif
67 #if DEBUG
68     SK_FEATURE_DEBUG |
69 #endif
70 #if CONFIG_NEXUS_FLOWSWITCH
71     SK_FEATURE_NEXUS_FLOWSWITCH |
72 #endif
73 #if CONFIG_NEXUS_MONITOR
74     SK_FEATURE_NEXUS_MONITOR |
75 #endif
76 #if CONFIG_NEXUS_NETIF
77     SK_FEATURE_NEXUS_NETIF |
78 #endif
79 #if CONFIG_NEXUS_USER_PIPE
80     SK_FEATURE_NEXUS_USER_PIPE |
81 #endif
82 #if CONFIG_NEXUS_KERNEL_PIPE
83     SK_FEATURE_NEXUS_KERNEL_PIPE |
84 #endif
85 #if CONFIG_NEXUS_KERNEL_PIPE && (DEVELOPMENT || DEBUG)
86     SK_FEATURE_NEXUS_KERNEL_PIPE_LOOPBACK |
87 #endif
88 #if (DEVELOPMENT || DEBUG)
89     SK_FEATURE_DEV_OR_DEBUG |
90 #endif
91     0;
92 
93 uint32_t sk_opp_defunct = 0;    /* opportunistic defunct */
94 
95 /* checksum offload is generic to any nexus (not specific to flowswitch) */
96 uint32_t sk_cksum_tx = 1;       /* advertise outbound offload */
97 uint32_t sk_cksum_rx = 1;       /* perform inbound checksum offload */
98 
99 /* guard pages */
100 uint32_t sk_guard = 0;          /* guard pages (0: disable) */
101 #define SK_GUARD_MIN    1       /* minimum # of guard pages */
102 #define SK_GUARD_MAX    4       /* maximum # of guard pages */
103 uint32_t sk_headguard_sz = SK_GUARD_MIN; /* # of leading guard pages */
104 uint32_t sk_tailguard_sz = SK_GUARD_MIN; /* # of trailing guard pages */
105 
106 #if (DEVELOPMENT || DEBUG)
107 uint32_t sk_txring_sz = 0;      /* flowswitch */
108 uint32_t sk_rxring_sz = 0;      /* flowswitch */
109 uint32_t sk_net_txring_sz = 0;  /* netif adapter */
110 uint32_t sk_net_rxring_sz = 0;  /* netif adapter */
111 uint32_t sk_min_pool_size = 0;  /* minimum packet pool size */
112 #endif /* !DEVELOPMENT && !DEBUG */
113 
114 uint32_t sk_max_flows = NX_FLOWADV_DEFAULT;
115 uint32_t sk_fadv_nchunks;       /* # of FO_FLOWADV_CHUNK in bitmap */
116 uint32_t sk_netif_compat_txmodel = NETIF_COMPAT_TXMODEL_DEFAULT;
117 uint32_t sk_netif_native_txmodel = NETIF_NATIVE_TXMODEL_DEFAULT;
118 /*
119  * Configures the RX aggregation logic for TCP in flowswitch.
120  * A non-zero value enables the aggregation logic, with the maximum
121  * aggregation length (in bytes) limited to this value.
122  *
123  * DO NOT increase beyond 16KB. If you do, we end up corrupting the data-stream
124  * as we create aggregate-mbufs with a pktlen > 16KB but only a single element.
125  */
126 uint32_t sk_fsw_rx_agg_tcp = 16384;
127 
128 /*
129  * Forces the RX host path to use or not use aggregation, regardless of the
130  * existence of filters (see sk_fsw_rx_agg_tcp_host_t for valid values).
131  */
132 uint32_t sk_fsw_rx_agg_tcp_host = SK_FSW_RX_AGG_TCP_HOST_AUTO;
133 
134 /*
135  * Configures the skywalk infrastructure for handling TCP TX aggregation.
136  * A non-zero value enables the support.
137  */
138 uint32_t sk_fsw_tx_agg_tcp = 1;
139 /*
140  * Configuration to limit the number of buffers for flowswitch VP channel.
141  */
142 uint32_t sk_fsw_max_bufs = 0;
143 /*
144  * GSO MTU for the channel path:
145  *   > 0: enable GSO and use value as the largest supported segment size
146  *  == 0: disable GSO
147  */
148 uint32_t sk_fsw_gso_mtu = 16 * 1024;
149 
150 /* list of interfaces that allow direct open from userspace */
151 #define SK_NETIF_DIRECT_MAX     8
152 char sk_netif_direct[SK_NETIF_DIRECT_MAX][IFXNAMSIZ];
153 uint32_t sk_netif_direct_cnt = 0;
154 
155 uint16_t sk_tx_delay_qlen = 16;                 /* packets */
156 uint16_t sk_tx_delay_timeout = (1 * 1000);        /* microseconds */
157 
158 #define SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ     64
159 #define SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ     64
160 uint32_t sk_netif_compat_aux_cell_tx_ring_sz =
161     SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ;
162 uint32_t sk_netif_compat_aux_cell_rx_ring_sz =
163     SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ;
164 
165 /* Wi-Fi Access Point */
166 #define SK_NETIF_COMPAT_WAP_TX_RING_SZ  128
167 #define SK_NETIF_COMPAT_WAP_RX_RING_SZ  128
168 uint32_t sk_netif_compat_wap_tx_ring_sz = SK_NETIF_COMPAT_WAP_TX_RING_SZ;
169 uint32_t sk_netif_compat_wap_rx_ring_sz = SK_NETIF_COMPAT_WAP_RX_RING_SZ;
170 
171 /* AWDL */
172 #define SK_NETIF_COMPAT_AWDL_TX_RING_SZ 128
173 #define SK_NETIF_COMPAT_AWDL_RX_RING_SZ 128
174 uint32_t sk_netif_compat_awdl_tx_ring_sz = SK_NETIF_COMPAT_AWDL_TX_RING_SZ;
175 uint32_t sk_netif_compat_awdl_rx_ring_sz = SK_NETIF_COMPAT_AWDL_RX_RING_SZ;
176 
177 /* Wi-Fi Infrastructure */
178 #define SK_NETIF_COMPAT_WIF_TX_RING_SZ  128
179 #define SK_NETIF_COMPAT_WIF_RX_RING_SZ  128
180 uint32_t sk_netif_compat_wif_tx_ring_sz = SK_NETIF_COMPAT_WIF_TX_RING_SZ;
181 uint32_t sk_netif_compat_wif_rx_ring_sz = SK_NETIF_COMPAT_WIF_RX_RING_SZ;
182 
183 #define SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ      128
184 #define SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ      128
185 uint32_t sk_netif_compat_usb_eth_tx_ring_sz =
186     SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ;
187 uint32_t sk_netif_compat_usb_eth_rx_ring_sz =
188     SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ;
189 
190 #define SK_NETIF_COMPAT_RX_MBQ_LIMIT    8192
191 int sk_netif_compat_rx_mbq_limit = SK_NETIF_COMPAT_RX_MBQ_LIMIT;
192 
193 uint32_t sk_netif_tx_mit = SK_NETIF_MIT_AUTO;
194 uint32_t sk_netif_rx_mit = SK_NETIF_MIT_AUTO;
195 char sk_ll_prefix[IFNAMSIZ] = "llw";
196 uint32_t sk_rx_sync_packets = 1;
197 uint32_t sk_channel_buflet_alloc = 0;
198 
199 SYSCTL_NODE(_kern, OID_AUTO, skywalk, CTLFLAG_RW | CTLFLAG_LOCKED,
200     0, "Skywalk parameters");
201 SYSCTL_NODE(_kern_skywalk, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_LOCKED,
202     0, "Skywalk statistics");
203 
204 SYSCTL_OPAQUE(_kern_skywalk, OID_AUTO, features, CTLFLAG_RD | CTLFLAG_LOCKED,
205     &sk_features, sizeof(sk_features), "-", "Skywalk features");
206 
207 #if (DEVELOPMENT || DEBUG)
208 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
209     &sk_verbose, "Skywalk verbose mode");
210 SYSCTL_UINT(_kern_skywalk, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
211     &sk_debug, 0, "Skywalk debug mode");
212 SYSCTL_LONG(_kern_skywalk, OID_AUTO, sk_copy_thres, CTLFLAG_RW | CTLFLAG_LOCKED,
213     &sk_copy_thres, "Skywalk copy threshold");
214 static int __priv_check = 1;
215 SYSCTL_INT(_kern_skywalk, OID_AUTO, priv_check, CTLFLAG_RW | CTLFLAG_LOCKED,
216     &__priv_check, 0, "Skywalk privilege check");
217 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_opp_defunct, CTLFLAG_RW | CTLFLAG_LOCKED,
218     &sk_opp_defunct, 0, "Defunct opportunistically");
219 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_tx, CTLFLAG_RW | CTLFLAG_LOCKED,
220     &sk_cksum_tx, 0, "Advertise (and perform) outbound checksum offload");
221 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_rx, CTLFLAG_RW | CTLFLAG_LOCKED,
222     &sk_cksum_rx, 0, "Perform inbound checksum offload");
223 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_rx_sync_packets, CTLFLAG_RW | CTLFLAG_LOCKED,
224     &sk_rx_sync_packets, 0, "Enable RX sync packets");
225 SYSCTL_UINT(_kern_skywalk, OID_AUTO, chan_buf_alloc,
226     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_channel_buflet_alloc, 0,
227     "channel buflet allocation (enable/disable)");
228 #endif /* !DEVELOPMENT && !DEBUG */
229 
230 #if (DEVELOPMENT || DEBUG)
231 uint32_t sk_inject_error_rmask = 0x3;
232 SYSCTL_UINT(_kern_skywalk, OID_AUTO, inject_error_rmask,
233     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_inject_error_rmask, 0x3, "");
234 #endif /* !DEVELOPMENT && !DEBUG */
235 
236 #if (DEVELOPMENT || DEBUG)
237 static void skywalk_self_tests(void);
238 #endif /* (DEVELOPMENT || DEBUG) */
239 
240 #define SKMEM_TAG_SYSCTL_BUF "com.apple.skywalk.sysctl_buf"
241 SKMEM_TAG_DEFINE(skmem_tag_sysctl_buf, SKMEM_TAG_SYSCTL_BUF);
242 
243 #define SKMEM_TAG_OID       "com.apple.skywalk.skoid"
244 SKMEM_TAG_DEFINE(skmem_tag_oid, SKMEM_TAG_OID);
245 
246 #if (SK_LOG || DEVELOPMENT || DEBUG)
247 #define SKMEM_TAG_DUMP  "com.apple.skywalk.dump"
248 static SKMEM_TAG_DEFINE(skmem_tag_dump, SKMEM_TAG_DUMP);
249 
250 static uint32_t sk_dump_buf_size;
251 static char *__sized_by(sk_dump_buf_size) sk_dump_buf;
252 #define SK_DUMP_BUF_SIZE        2048
253 #define SK_DUMP_BUF_ALIGN       16
254 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
255 
256 __startup_func
257 void
__sk_tag_make(const struct sk_tag_spec * spec)258 __sk_tag_make(const struct sk_tag_spec *spec)
259 {
260 	*spec->skt_var = kern_allocation_name_allocate(spec->skt_name, 0);
261 }
262 
263 boolean_t
skywalk_netif_direct_enabled(void)264 skywalk_netif_direct_enabled(void)
265 {
266 	return sk_netif_direct_cnt > 0;
267 }
268 
269 boolean_t
skywalk_netif_direct_allowed(const char * ifname)270 skywalk_netif_direct_allowed(const char *ifname)
271 {
272 	uint32_t i;
273 
274 	for (i = 0; i < sk_netif_direct_cnt; i++) {
275 		if (strlcmp(sk_netif_direct[i], ifname, IFXNAMSIZ) == 0) {
276 			return TRUE;
277 		}
278 	}
279 	return FALSE;
280 }
281 
282 #if (DEVELOPMENT || DEBUG)
283 static void
parse_netif_direct(void)284 parse_netif_direct(void)
285 {
286 	char buf[(IFXNAMSIZ + 1) * SK_NETIF_DIRECT_MAX];
287 	size_t i, curr, len, iflen;
288 
289 	if (!PE_parse_boot_arg_str("sk_netif_direct", buf, sizeof(buf))) {
290 		return;
291 	}
292 
293 	curr = 0;
294 	len = strbuflen(buf);
295 	for (i = 0; i < len + 1 &&
296 	    sk_netif_direct_cnt < SK_NETIF_DIRECT_MAX; i++) {
297 		if (buf[i] != ',' && buf[i] != '\0') {
298 			continue;
299 		}
300 
301 		buf[i] = '\0';
302 		iflen = i - curr;
303 		if (iflen > 0 && iflen < IFXNAMSIZ) {
304 			(void) strbufcpy(sk_netif_direct[sk_netif_direct_cnt],
305 			    IFXNAMSIZ, buf + curr, IFXNAMSIZ);
306 			sk_netif_direct_cnt++;
307 		}
308 		curr = i + 1;
309 	}
310 }
311 #endif /* DEVELOPMENT || DEBUG */
312 
313 static void
skywalk_fini(void)314 skywalk_fini(void)
315 {
316 	SK_LOCK_ASSERT_HELD();
317 
318 	if (__sk_inited) {
319 #if (DEVELOPMENT || DEBUG)
320 		skmem_test_fini();
321 		cht_test_fini();
322 #endif /* (DEVELOPMENT || DEBUG) */
323 		channel_fini();
324 		nexus_fini();
325 		skmem_fini();
326 		flowidns_fini();
327 
328 #if (SK_LOG || DEVELOPMENT || DEBUG)
329 		if (sk_dump_buf != NULL) {
330 			sk_free_data_sized_by(sk_dump_buf, sk_dump_buf_size);
331 			sk_dump_buf = NULL;
332 			sk_dump_buf_size = 0;
333 		}
334 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
335 
336 		__sk_inited = 0;
337 	}
338 }
339 
340 int
skywalk_init(void)341 skywalk_init(void)
342 {
343 	int error;
344 
345 	VERIFY(!__sk_inited);
346 
347 	_CASSERT(sizeof(kern_packet_t) == sizeof(uint64_t));
348 	_CASSERT(sizeof(bitmap_t) == sizeof(uint64_t));
349 
350 #if (DEVELOPMENT || DEBUG)
351 	PE_parse_boot_argn("sk_debug", &sk_debug, sizeof(sk_debug));
352 	PE_parse_boot_argn("sk_verbose", &sk_verbose, sizeof(sk_verbose));
353 	(void) PE_parse_boot_argn("sk_opp_defunct", &sk_opp_defunct,
354 	    sizeof(sk_opp_defunct));
355 	(void) PE_parse_boot_argn("sk_cksum_tx", &sk_cksum_tx,
356 	    sizeof(sk_cksum_tx));
357 	(void) PE_parse_boot_argn("sk_cksum_rx", &sk_cksum_rx,
358 	    sizeof(sk_cksum_rx));
359 	(void) PE_parse_boot_argn("sk_txring_sz", &sk_txring_sz,
360 	    sizeof(sk_txring_sz));
361 	(void) PE_parse_boot_argn("sk_rxring_sz", &sk_rxring_sz,
362 	    sizeof(sk_rxring_sz));
363 	(void) PE_parse_boot_argn("sk_net_txring_sz", &sk_net_txring_sz,
364 	    sizeof(sk_net_txring_sz));
365 	(void) PE_parse_boot_argn("sk_net_rxring_sz", &sk_net_rxring_sz,
366 	    sizeof(sk_net_rxring_sz));
367 	(void) PE_parse_boot_argn("sk_max_flows", &sk_max_flows,
368 	    sizeof(sk_max_flows));
369 	(void) PE_parse_boot_argn("sk_native_txmodel", &sk_netif_native_txmodel,
370 	    sizeof(sk_netif_native_txmodel));
371 	(void) PE_parse_boot_argn("sk_compat_txmodel", &sk_netif_compat_txmodel,
372 	    sizeof(sk_netif_compat_txmodel));
373 	(void) PE_parse_boot_argn("sk_tx_delay_qlen", &sk_tx_delay_qlen,
374 	    sizeof(sk_tx_delay_qlen));
375 	(void) PE_parse_boot_argn("sk_ts_delay_timeout", &sk_tx_delay_timeout,
376 	    sizeof(sk_tx_delay_timeout));
377 	(void) PE_parse_boot_argn("sk_compat_aux_cell_tx_ring_sz",
378 	    &sk_netif_compat_aux_cell_tx_ring_sz,
379 	    sizeof(sk_netif_compat_aux_cell_tx_ring_sz));
380 	(void) PE_parse_boot_argn("sk_compat_aux_cell_rx_ring_sz",
381 	    &sk_netif_compat_aux_cell_rx_ring_sz,
382 	    sizeof(sk_netif_compat_aux_cell_rx_ring_sz));
383 	(void) PE_parse_boot_argn("sk_compat_wap_tx_ring_sz",
384 	    &sk_netif_compat_wap_tx_ring_sz,
385 	    sizeof(sk_netif_compat_wap_tx_ring_sz));
386 	(void) PE_parse_boot_argn("sk_compat_wap_rx_ring_sz",
387 	    &sk_netif_compat_wap_rx_ring_sz,
388 	    sizeof(sk_netif_compat_wap_rx_ring_sz));
389 	(void) PE_parse_boot_argn("sk_compat_awdl_tx_ring_sz",
390 	    &sk_netif_compat_awdl_tx_ring_sz,
391 	    sizeof(sk_netif_compat_awdl_tx_ring_sz));
392 	(void) PE_parse_boot_argn("sk_compat_awdl_rx_ring_sz",
393 	    &sk_netif_compat_awdl_rx_ring_sz,
394 	    sizeof(sk_netif_compat_awdl_rx_ring_sz));
395 	(void) PE_parse_boot_argn("sk_compat_wif_tx_ring_sz",
396 	    &sk_netif_compat_wif_tx_ring_sz,
397 	    sizeof(sk_netif_compat_wif_tx_ring_sz));
398 	(void) PE_parse_boot_argn("sk_compat_wif_rx_ring_sz",
399 	    &sk_netif_compat_wif_rx_ring_sz,
400 	    sizeof(sk_netif_compat_wif_rx_ring_sz));
401 	(void) PE_parse_boot_argn("sk_compat_usb_eth_tx_ring_sz",
402 	    &sk_netif_compat_usb_eth_tx_ring_sz,
403 	    sizeof(sk_netif_compat_usb_eth_tx_ring_sz));
404 	(void) PE_parse_boot_argn("sk_compat_usb_eth_rx_ring_sz",
405 	    &sk_netif_compat_usb_eth_rx_ring_sz,
406 	    sizeof(sk_netif_compat_usb_eth_rx_ring_sz));
407 	(void) PE_parse_boot_argn("sk_compat_rx_mbq_limit",
408 	    &sk_netif_compat_rx_mbq_limit, sizeof(sk_netif_compat_rx_mbq_limit));
409 	(void) PE_parse_boot_argn("sk_netif_tx_mit",
410 	    &sk_netif_tx_mit, sizeof(sk_netif_tx_mit));
411 	(void) PE_parse_boot_argn("sk_netif_rx_mit",
412 	    &sk_netif_rx_mit, sizeof(sk_netif_rx_mit));
413 	(void) PE_parse_boot_arg_str("sk_ll_prefix", sk_ll_prefix,
414 	    sizeof(sk_ll_prefix));
415 	parse_netif_direct();
416 	(void) PE_parse_boot_argn("sk_fsw_rx_agg_tcp", &sk_fsw_rx_agg_tcp,
417 	    sizeof(sk_fsw_rx_agg_tcp));
418 	(void) PE_parse_boot_argn("sk_fsw_tx_agg_tcp", &sk_fsw_tx_agg_tcp,
419 	    sizeof(sk_fsw_tx_agg_tcp));
420 	(void) PE_parse_boot_argn("sk_fsw_gso_mtu", &sk_fsw_gso_mtu,
421 	    sizeof(sk_fsw_gso_mtu));
422 	(void) PE_parse_boot_argn("sk_fsw_max_bufs", &sk_fsw_max_bufs,
423 	    sizeof(sk_fsw_max_bufs));
424 	(void) PE_parse_boot_argn("sk_rx_sync_packets", &sk_rx_sync_packets,
425 	    sizeof(sk_rx_sync_packets));
426 	(void) PE_parse_boot_argn("sk_chan_buf_alloc", &sk_channel_buflet_alloc,
427 	    sizeof(sk_channel_buflet_alloc));
428 	(void) PE_parse_boot_argn("sk_guard", &sk_guard, sizeof(sk_guard));
429 	(void) PE_parse_boot_argn("sk_headguard_sz", &sk_headguard_sz,
430 	    sizeof(sk_headguard_sz));
431 	(void) PE_parse_boot_argn("sk_tailguard_sz", &sk_tailguard_sz,
432 	    sizeof(sk_tailguard_sz));
433 	(void) PE_parse_boot_argn("sk_min_pool_size", &sk_min_pool_size,
434 	    sizeof(sk_min_pool_size));
435 #endif /* DEVELOPMENT || DEBUG */
436 
437 	if (sk_max_flows == 0) {
438 		sk_max_flows = NX_FLOWADV_DEFAULT;
439 	} else if (sk_max_flows > NX_FLOWADV_MAX) {
440 		sk_max_flows = NX_FLOWADV_MAX;
441 	}
442 
443 	if (sk_netif_tx_mit > SK_NETIF_MIT_MAX) {
444 		sk_netif_tx_mit = SK_NETIF_MIT_MAX;
445 	}
446 	if (sk_netif_rx_mit > SK_NETIF_MIT_MAX) {
447 		sk_netif_rx_mit = SK_NETIF_MIT_MAX;
448 	}
449 
450 	sk_fadv_nchunks = (uint32_t)P2ROUNDUP(sk_max_flows, FO_FLOWADV_CHUNK) /
451 	    FO_FLOWADV_CHUNK;
452 
453 	if (sk_guard) {
454 		uint32_t sz;
455 		/* leading guard page(s) */
456 		if (sk_headguard_sz == 0) {
457 			read_frandom(&sz, sizeof(sz));
458 			sk_headguard_sz = (sz % (SK_GUARD_MAX + 1));
459 		} else if (sk_headguard_sz > SK_GUARD_MAX) {
460 			sk_headguard_sz = SK_GUARD_MAX;
461 		}
462 		if (sk_headguard_sz < SK_GUARD_MIN) {
463 			sk_headguard_sz = SK_GUARD_MIN;
464 		}
465 		/* trailing guard page(s) */
466 		if (sk_tailguard_sz == 0) {
467 			read_frandom(&sz, sizeof(sz));
468 			sk_tailguard_sz = (sz % (SK_GUARD_MAX + 1));
469 		} else if (sk_tailguard_sz > SK_GUARD_MAX) {
470 			sk_tailguard_sz = SK_GUARD_MAX;
471 		}
472 		if (sk_tailguard_sz < SK_GUARD_MIN) {
473 			sk_tailguard_sz = SK_GUARD_MIN;
474 		}
475 	} else {
476 		sk_headguard_sz = sk_tailguard_sz = SK_GUARD_MIN;
477 	}
478 	ASSERT(sk_headguard_sz >= SK_GUARD_MIN);
479 	ASSERT(sk_headguard_sz <= SK_GUARD_MAX);
480 	ASSERT(sk_tailguard_sz >= SK_GUARD_MIN);
481 	ASSERT(sk_tailguard_sz <= SK_GUARD_MAX);
482 
483 	__sk_inited = 1;
484 
485 	SK_LOCK();
486 	skmem_init();
487 	error = nexus_init();
488 	if (error == 0) {
489 		error = channel_init();
490 	}
491 	if (error != 0) {
492 		skywalk_fini();
493 	}
494 	SK_UNLOCK();
495 
496 	if (error == 0) {
497 #if (SK_LOG || DEVELOPMENT || DEBUG)
498 		/* allocate space for sk_dump_buf */
499 		sk_dump_buf = sk_alloc_data(SK_DUMP_BUF_SIZE, Z_WAITOK | Z_NOFAIL,
500 		    skmem_tag_dump);
501 		sk_dump_buf_size = SK_DUMP_BUF_SIZE;
502 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
503 
504 		netns_init();
505 		protons_init();
506 		flowidns_init();
507 
508 #if (DEVELOPMENT || DEBUG)
509 		skywalk_self_tests();
510 		skmem_test_init();
511 		cht_test_init();
512 #endif /* DEVELOPMENT || DEBUG */
513 	}
514 
515 	return error;
516 }
517 
518 /*
519  * csproc_get_platform_binary() returns non-zero if the process is platform
520  * code, which means that it is considered part of the Operating System.
521  * On iOS, that means it's contained in the trust cache or a loaded one.
522  * On macOS, everything signed by B&I is currently platform code, but the
523  * policy in general is subject to change.  Thus this is an approximate.
524  */
525 boolean_t
skywalk_check_platform_binary(proc_t p)526 skywalk_check_platform_binary(proc_t p)
527 {
528 	return (csproc_get_platform_binary(p) == 0) ? FALSE : TRUE;
529 }
530 
531 static int
sk_priv_chk(proc_t p,kauth_cred_t cred,int priv)532 sk_priv_chk(proc_t p, kauth_cred_t cred, int priv)
533 {
534 #pragma unused(p)
535 	int ret = EPERM;
536 
537 	if (cred != NULL) {
538 		ret = priv_check_cred(cred, priv, 0);
539 	}
540 #if (DEVELOPMENT || DEBUG)
541 	if (ret != 0) {
542 		const char *pstr;
543 
544 		switch (priv) {
545 		case PRIV_SKYWALK_REGISTER_USER_PIPE:
546 			pstr = "com.apple.private.skywalk.register-user-pipe";
547 			break;
548 
549 		case PRIV_SKYWALK_REGISTER_KERNEL_PIPE:
550 			pstr = "com.apple.private.skywalk.register-kernel-pipe";
551 			break;
552 
553 		case PRIV_SKYWALK_REGISTER_NET_IF:
554 			pstr = "com.apple.private.skywalk.register-net-if";
555 			break;
556 
557 		case PRIV_SKYWALK_REGISTER_FLOW_SWITCH:
558 			pstr = "com.apple.private.skywalk.register-flow-switch";
559 			break;
560 
561 		case PRIV_SKYWALK_OBSERVE_ALL:
562 			pstr = "com.apple.private.skywalk.observe-all";
563 			break;
564 
565 		case PRIV_SKYWALK_OBSERVE_STATS:
566 			pstr = "com.apple.private.skywalk.observe-stats";
567 			break;
568 
569 		case PRIV_SKYWALK_LOW_LATENCY_CHANNEL:
570 			pstr = "com.apple.private.skywalk.low-latency-channel";
571 			break;
572 
573 		default:
574 			pstr = "unknown";
575 			break;
576 		}
577 
578 #if SK_LOG
579 		if (__priv_check) {
580 			SK_DF(SK_VERB_PRIV, "%s(%d) insufficient privilege %d "
581 			    "(\"%s\") err %d", sk_proc_name_address(p),
582 			    sk_proc_pid(p), priv, pstr, ret);
583 		} else {
584 			SK_DF(SK_VERB_PRIV, "%s(%d) IGNORING missing privilege "
585 			    "%d (\"%s\") err %d", sk_proc_name_address(p),
586 			    sk_proc_pid(p), priv, pstr, ret);
587 		}
588 #endif /* SK_LOG */
589 
590 		/* ignore privilege check failures if requested */
591 		if (!__priv_check) {
592 			ret = 0;
593 		}
594 	}
595 #endif /* !DEVELOPMENT && !DEBUG */
596 
597 	return ret;
598 }
599 
600 int
skywalk_priv_check_cred(proc_t p,kauth_cred_t cred,int priv)601 skywalk_priv_check_cred(proc_t p, kauth_cred_t cred, int priv)
602 {
603 	return sk_priv_chk(p, cred, priv);
604 }
605 
606 #if CONFIG_MACF
607 int
skywalk_mac_system_check_proc_cred(proc_t p,const char * info_type)608 skywalk_mac_system_check_proc_cred(proc_t p, const char *info_type)
609 {
610 	int ret;
611 	kauth_cred_t cred = kauth_cred_proc_ref(p);
612 	ret = mac_system_check_info(cred, info_type);
613 	kauth_cred_unref(&cred);
614 
615 	return ret;
616 }
617 #endif /* CONFIG_MACF */
618 
619 /*
620  * Scan thru the list of privileges needed before we allow someone
621  * to open a handle to the Nexus controller.  This should be done
622  * at nxctl_create() time, and additional privilege check specific
623  * to the operation (e.g. register, etc.) should be done afterwards.
624  */
625 int
skywalk_nxctl_check_privileges(proc_t p,kauth_cred_t cred)626 skywalk_nxctl_check_privileges(proc_t p, kauth_cred_t cred)
627 {
628 	int ret = 0;
629 
630 	if (p == kernproc) {
631 		goto done;
632 	}
633 
634 	do {
635 		/*
636 		 * Check for observe-{stats,all} entitlements first
637 		 * before the rest, to account for nexus controller
638 		 * clients that don't need anything but statistics;
639 		 * it would help quiesce sandbox violation warnings.
640 		 */
641 		if ((ret = sk_priv_chk(p, cred,
642 		    PRIV_SKYWALK_OBSERVE_STATS)) == 0) {
643 			break;
644 		}
645 		if ((ret = sk_priv_chk(p, cred,
646 		    PRIV_SKYWALK_OBSERVE_ALL)) == 0) {
647 			break;
648 		}
649 		if ((ret = sk_priv_chk(p, cred,
650 		    PRIV_SKYWALK_REGISTER_USER_PIPE)) == 0) {
651 			break;
652 		}
653 		if ((ret = sk_priv_chk(p, cred,
654 		    PRIV_SKYWALK_REGISTER_KERNEL_PIPE)) == 0) {
655 			break;
656 		}
657 		if ((ret = sk_priv_chk(p, cred,
658 		    PRIV_SKYWALK_REGISTER_NET_IF)) == 0) {
659 			break;
660 		}
661 		if ((ret = sk_priv_chk(p, cred,
662 		    PRIV_SKYWALK_REGISTER_FLOW_SWITCH)) == 0) {
663 			break;
664 		}
665 		/* none set, so too bad */
666 		ret = EPERM;
667 	} while (0);
668 
669 #if (DEVELOPMENT || DEBUG)
670 	if (ret != 0) {
671 		SK_ERR("%s(%d) insufficient privilege to open nexus controller "
672 		    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ret);
673 	}
674 #endif /* !DEVELOPMENT && !DEBUG */
675 done:
676 	return ret;
677 }
678 
679 void
sk_gen_guard_id(boolean_t isch,const uuid_t uuid,guardid_t * guard)680 sk_gen_guard_id(boolean_t isch, const uuid_t uuid, guardid_t *guard)
681 {
682 #define GUARD_CH_SIG    0x4348  /* 'CH' */
683 #define GUARD_NX_SIG    0x4e58  /* 'NX' */
684 	union {
685 		uint8_t         _u8[8];
686 		uint16_t        _u16[4];
687 		uint64_t        _u64;
688 	} __u;
689 
690 	read_random(&__u._u16[0], sizeof(uint16_t));
691 	bcopy(uuid, (void *)&__u._u16[1], sizeof(uint16_t));
692 	__u._u16[2] = htole16(isch ? GUARD_CH_SIG : GUARD_NX_SIG);
693 	__u._u16[3] = htole16(0x534b);  /* 'SK' */
694 	VERIFY(__u._u64 != 0);
695 
696 	bzero(guard, sizeof(*guard));
697 	bcopy((void *)&__u._u64, guard, MIN(sizeof(*guard),
698 	    sizeof(uint64_t)));
699 }
700 
701 
702 extern char *
__counted_by(sizeof (uuid_string_t))703 __counted_by(sizeof(uuid_string_t))
704 sk_uuid_unparse(const uuid_t uu, uuid_string_t out)
705 {
706 	uuid_unparse_upper(uu, out);
707 	return out;
708 }
709 
710 #if SK_LOG
711 /*
712  * packet-dump function, user-supplied or static buffer.
713  * The destination buffer must be at least 30+4*len
714  *
715  * @param p
716  *   buffer to be dumped.
717  * @param len
718  *   buffer's total length.
719  * @param dumplen
720  *   length to be dumped.
721  * @param dst
722  *   destination char buffer. sk_dump_buf would be used if dst is NULL.
723  * @param lim
724  *   destination char buffer max length. Not used if dst is NULL.
725  *
726  * -fbounds-safety: Note that all callers of this function pass NULL and 0 for
727  * dst and lim, respectively.
728  */
729 const char *
__counted_by(lim)730 __counted_by(lim)
731 sk_dump(const char *label, const void *__sized_by(len) obj, int len, int dumplen,
732     char *__counted_by(lim) dst, int lim)
733 {
734 	int i, j, i0, n = 0;
735 	static char hex[] = "0123456789abcdef";
736 	const char *p = obj;    /* dump cursor */
737 	uint32_t size;
738 	char *__sized_by(size) o;        /* output position */
739 
740 #define P_HI(x) hex[((x) & 0xf0) >> 4]
741 #define P_LO(x) hex[((x) & 0xf)]
742 #define P_C(x)  ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
743 	if (dst == NULL) {
744 		dst = sk_dump_buf;
745 		lim = SK_DUMP_BUF_SIZE;
746 	} else if (lim <= 0 || lim > len) {
747 		dst = dst;
748 		lim = len;  /* rdar://117789233 */
749 	}
750 	dumplen = MIN(len, dumplen);
751 	o = dst;
752 	size = lim;
753 	n = scnprintf(o, lim, "%s 0x%llx len %d lim %d\n", label,
754 	    SK_KVA(p), len, lim);
755 	o += strbuflen(o, n);
756 	size -= n;
757 	/* hexdump routine */
758 	for (i = 0; i < dumplen;) {
759 		n = scnprintf(o, size, "%5d: ", i);
760 		o += n;
761 		size -= n;
762 		memset(o, ' ', 48);
763 		i0 = i;
764 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
765 			o[j * 3] = P_HI(p[i]);
766 			o[j * 3 + 1] = P_LO(p[i]);
767 		}
768 		i = i0;
769 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
770 			o[j + 48] = P_C(p[i]);
771 		}
772 		o[j + 48] = '\n';
773 		o += j + 49;
774 		size -= (j + 49);
775 	}
776 	*o = '\0';
777 #undef P_HI
778 #undef P_LO
779 #undef P_C
780 	return dst;
781 }
782 
783 /*
784  * "Safe" variant of proc_name_address(), meant to be used only for logging.
785  */
786 const char *
sk_proc_name_address(struct proc * p)787 sk_proc_name_address(struct proc *p)
788 {
789 	if (p == PROC_NULL) {
790 		return "proc_null";
791 	}
792 
793 	return proc_name_address(p);
794 }
795 
796 /*
797  * "Safe" variant of proc_pid(), mean to be used only for logging.
798  */
799 int
sk_proc_pid(struct proc * p)800 sk_proc_pid(struct proc *p)
801 {
802 	if (p == PROC_NULL) {
803 		return -1;
804 	}
805 
806 	return proc_pid(p);
807 }
808 
809 const char *
sk_sa_ntop(struct sockaddr * sa,char * __counted_by (addr_strlen)addr_str,size_t addr_strlen)810 sk_sa_ntop(struct sockaddr *sa, char *__counted_by(addr_strlen)addr_str,
811     size_t addr_strlen)
812 {
813 	const char *__null_terminated str = NULL;
814 
815 	addr_str[0] = '\0';
816 
817 	switch (sa->sa_family) {
818 	case AF_INET:
819 		str = inet_ntop(AF_INET, &SIN(sa)->sin_addr.s_addr,
820 		    addr_str, (socklen_t)addr_strlen);
821 		break;
822 
823 	case AF_INET6:
824 		str = inet_ntop(AF_INET6, &SIN6(sa)->sin6_addr,
825 		    addr_str, (socklen_t)addr_strlen);
826 		break;
827 
828 	default:
829 		str = __unsafe_null_terminated_from_indexable(addr_str);
830 		break;
831 	}
832 
833 	return str;
834 }
835 
836 const char *
sk_memstatus2str(uint32_t status)837 sk_memstatus2str(uint32_t status)
838 {
839 	const char *__null_terminated str = NULL;
840 
841 	switch (status) {
842 	case kMemorystatusInvalid:
843 		str = "kMemorystatusInvalid";
844 		break;
845 
846 	case kMemorystatusKilled:
847 		str = "kMemorystatusKilled";
848 		break;
849 
850 	case kMemorystatusKilledHiwat:
851 		str = "kMemorystatusKilledHiwat";
852 		break;
853 
854 	case kMemorystatusKilledVnodes:
855 		str = "kMemorystatusKilledVnodes";
856 		break;
857 
858 	case kMemorystatusKilledVMPageShortage:
859 		str = "kMemorystatusKilledVMPageShortage";
860 		break;
861 
862 	case kMemorystatusKilledProcThrashing:
863 		str = "kMemorystatusKilledProcThrashing";
864 		break;
865 
866 	case kMemorystatusKilledVMCompressorThrashing:
867 		str = "kMemorystatusKilledVMCompressorThrashing";
868 		break;
869 
870 	case kMemorystatusKilledVMCompressorSpaceShortage:
871 		str = "kMemorystatusKilledVMCompressorSpaceShortage";
872 		break;
873 
874 	case kMemorystatusKilledFCThrashing:
875 		str = "kMemorystatusKilledFCThrashing";
876 		break;
877 
878 	case kMemorystatusKilledPerProcessLimit:
879 		str = "kMemorystatusKilledPerProcessLimit";
880 		break;
881 
882 	case kMemorystatusKilledDiskSpaceShortage:
883 		str = "kMemorystatusKilledDiskSpaceShortage";
884 		break;
885 
886 	case kMemorystatusKilledIdleExit:
887 		str = "kMemorystatusKilledIdleExit";
888 		break;
889 
890 	case kMemorystatusKilledZoneMapExhaustion:
891 		str = "kMemorystatusKilledZoneMapExhaustion";
892 		break;
893 
894 	default:
895 		str = "unknown";
896 		break;
897 	}
898 
899 	return str;
900 }
901 #endif /* SK_LOG */
902 
903 bool
sk_sa_has_addr(struct sockaddr * sa)904 sk_sa_has_addr(struct sockaddr *sa)
905 {
906 	switch (sa->sa_family) {
907 	case AF_INET:
908 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
909 		return SIN(sa)->sin_addr.s_addr != INADDR_ANY;
910 	case AF_INET6:
911 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
912 		return !IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr);
913 	default:
914 		return false;
915 	}
916 }
917 
918 bool
sk_sa_has_port(struct sockaddr * sa)919 sk_sa_has_port(struct sockaddr *sa)
920 {
921 	switch (sa->sa_family) {
922 	case AF_INET:
923 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
924 		return SIN(sa)->sin_port != 0;
925 	case AF_INET6:
926 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
927 		return SIN6(sa)->sin6_port != 0;
928 	default:
929 		return false;
930 	}
931 }
932 
933 /* returns port number in host byte order */
934 uint16_t
sk_sa_get_port(struct sockaddr * sa)935 sk_sa_get_port(struct sockaddr *sa)
936 {
937 	switch (sa->sa_family) {
938 	case AF_INET:
939 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
940 		return ntohs(SIN(sa)->sin_port);
941 	case AF_INET6:
942 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
943 		return ntohs(SIN6(sa)->sin6_port);
944 	default:
945 		VERIFY(0);
946 		/* NOTREACHED */
947 		__builtin_unreachable();
948 	}
949 }
950 
951 void
skywalk_kill_process(struct proc * p,uint64_t reason_code)952 skywalk_kill_process(struct proc *p, uint64_t reason_code)
953 {
954 	os_reason_t exit_reason = OS_REASON_NULL;
955 
956 	VERIFY(p != kernproc);
957 
958 	exit_reason = os_reason_create(OS_REASON_SKYWALK, reason_code);
959 	if (exit_reason == OS_REASON_NULL) {
960 		SK_ERR("%s(%d) unable to allocate memory for crash reason "
961 		    "0x%llX", sk_proc_name_address(p), sk_proc_pid(p),
962 		    reason_code);
963 	} else {
964 		exit_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
965 		SK_ERR("%s(%d) aborted for reason 0x%llX",
966 		    sk_proc_name_address(p), sk_proc_pid(p), reason_code);
967 	}
968 
969 	psignal_try_thread_with_reason(p, current_thread(), SIGABRT,
970 	    exit_reason);
971 }
972 
973 #if (DEVELOPMENT || DEBUG)
974 #define SK_MEMCMP_LEN 256               /* length of each section */
975 #define SK_MASK_MAXLEN 80               /* maximum mask length */
976 
977 #define SK_MEMCMP_MASK_VERIFY(t, l, lr) do {                            \
978 	_CASSERT(sizeof(t##_m) == SK_MASK_MAXLEN);                      \
979 	if ((sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) ^           \
980 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
981 	        panic_plain("\nbroken: " #t " using "                   \
982 	            "sk_memcmp_mask_" #l " at i=%d\n", i);              \
983 	/* NOTREACHED */                                        \
984 	        __builtin_unreachable();                                \
985 	}                                                               \
986 	if ((sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) ^  \
987 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
988 	        panic_plain("\nbroken: " #t " using "                   \
989 	            "sk_memcmp_mask_" #l "_scalar at i=%d\n", i);       \
990 	/* NOTREACHED */                                        \
991 	        __builtin_unreachable();                                \
992 	}                                                               \
993 } while (0)
994 
995 #define SK_MEMCMP_MASK_MATCH_VERIFY(t, l) do {                          \
996 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) {            \
997 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
998 	            " mismatch (expected match) at i=%d s1=0x%x"        \
999 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
1000 	/* NOTREACHED */                                        \
1001 	        __builtin_unreachable();                                \
1002 	}                                                               \
1003 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) {   \
1004 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1005 	            "_scalar mismatch (expected match) at i=%d s1=0x%x" \
1006 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
1007 	/* NOTREACHED */                                        \
1008 	        __builtin_unreachable();                                \
1009 	}                                                               \
1010 } while (0)
1011 
1012 #define SK_MEMCMP_MASK_MISMATCH_VERIFY(t, l) do {                       \
1013 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) == 0) {            \
1014 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1015 	            " match (expected mismatch) at i=%d s1=0x%x"        \
1016 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
1017 	/* NOTREACHED */                                        \
1018 	        __builtin_unreachable();                                \
1019 	}                                                               \
1020 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) == 0) {   \
1021 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1022 	            "_scalar match (expected mismatch) at i=%d "        \
1023 	            "s1=0x%x s2=0x%x\n", i, hdr1[i], hdr2[i]);          \
1024 	/* NOTREACHED */                                        \
1025 	        __builtin_unreachable();                                \
1026 	}                                                               \
1027 } while (0)
1028 
1029 #define SK_MEMCMP_BYTEMASK_VERIFY(t) do {                               \
1030 	if ((sk_memcmp_mask(hdr1, hdr2, t##_m, i) != 0) ^               \
1031 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
1032 	        panic_plain("\nbroken: " #t " using "                   \
1033 	            "sk_memcmp_mask at i=%d\n", i);                     \
1034 	/* NOTREACHED */                                        \
1035 	        __builtin_unreachable();                                \
1036 	}                                                               \
1037 	if ((sk_memcmp_mask_scalar(hdr1, hdr2, t##_m, i) != 0) ^        \
1038 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
1039 	        panic_plain("\nbroken: " #t " using "                   \
1040 	            "sk_memcmp_mask_scalar at i=%d\n", i);              \
1041 	/* NOTREACHED */                                        \
1042 	        __builtin_unreachable();                                \
1043 	}                                                               \
1044 } while (0)
1045 
1046 static inline int
skywalk_memcmp_mask_ref(const uint8_t * __sized_by (n)src1,const uint8_t * __sized_by (n)src2,const uint8_t * __sized_by (n)byte_mask,size_t n)1047 skywalk_memcmp_mask_ref(const uint8_t *__sized_by(n)src1,
1048     const uint8_t *__sized_by(n)src2, const uint8_t *__sized_by(n)byte_mask,
1049     size_t n)
1050 {
1051 	uint32_t result = 0;
1052 	for (size_t i = 0; i < n; i++) {
1053 		result |= (src1[i] ^ src2[i]) & byte_mask[i];
1054 	}
1055 	return result;
1056 }
1057 
1058 static void
skywalk_memcmp_mask_self_tests(void)1059 skywalk_memcmp_mask_self_tests(void)
1060 {
1061 	static const uint8_t ipv4_m[] = {
1062 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xff, 0xff,
1063 		0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
1064 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1065 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1066 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1067 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1068 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1069 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1070 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1071 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1072 	};
1073 	static const uint8_t ipv6_m[] = {
1074 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1075 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1076 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1077 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1078 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1079 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1080 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1081 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1082 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1083 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1084 	};
1085 	static const uint8_t tcp_m[] = {
1086 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1087 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1088 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1089 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1090 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1091 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1092 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1093 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1094 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1095 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1096 	};
1097 	static const uint8_t ipv6_tcp_m[] = {
1098 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1099 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1100 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1101 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1102 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1103 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1104 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1105 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1106 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1107 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1108 	};
1109 	static const uint8_t udp_m[] = {
1110 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1111 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1112 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1113 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1114 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1115 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1116 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1117 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1118 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1119 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1120 	};
1121 	static const uint8_t fk_all_m[] = {
1122 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1123 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1124 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1125 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1126 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1127 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1128 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1129 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1130 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1131 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1132 	};
1133 	static const uint8_t fk_t2_m[] = {
1134 		0x0a, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
1135 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1136 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1137 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1138 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1139 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1140 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1141 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1142 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1143 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1144 	};
1145 	static const uint8_t fk_t3_m[] = {
1146 		0x0f, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1147 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1148 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1149 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1150 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1151 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1152 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1153 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1154 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1155 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1156 	};
1157 	static const uint8_t fk_t4_m[] = {
1158 		0x2f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1159 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1160 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1161 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1162 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1163 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1164 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1165 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1166 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1167 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1168 	};
1169 	static const uint8_t fk_t5_m[] = {
1170 		0x3f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1171 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1172 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1173 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1174 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1175 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1176 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1177 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1178 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1179 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1180 	};
1181 	static const uint8_t fk_i1_m[] = {
1182 		0x02, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
1183 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1184 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1185 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1186 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1187 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1188 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1189 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1190 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1191 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1192 	};
1193 	static const uint8_t fk_i2_m[] = {
1194 		0x07, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1195 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1196 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1197 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1198 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1199 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1200 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1201 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1202 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1203 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1204 	};
1205 	static const uint8_t fk_i3_m[] = {
1206 		0x17, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1207 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1208 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1209 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1210 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1211 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1212 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1213 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1214 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1215 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1216 	};
1217 
1218 	/* validate flow entry mask (2-tuple) */
1219 	_CASSERT(FKMASK_2TUPLE == (FKMASK_PROTO | FKMASK_SPORT));
1220 	VERIFY(fk_mask_2tuple.fk_mask == FKMASK_2TUPLE);
1221 	VERIFY(fk_mask_2tuple.fk_ipver == 0);
1222 	VERIFY(fk_mask_2tuple.fk_proto == 0xff);
1223 	VERIFY(fk_mask_2tuple.fk_sport == 0xffff);
1224 	VERIFY(fk_mask_2tuple.fk_dport == 0);
1225 	VERIFY(fk_mask_2tuple.fk_src._addr64[0] == 0);
1226 	VERIFY(fk_mask_2tuple.fk_src._addr64[1] == 0);
1227 	VERIFY(fk_mask_2tuple.fk_dst._addr64[0] == 0);
1228 	VERIFY(fk_mask_2tuple.fk_dst._addr64[1] == 0);
1229 	VERIFY(fk_mask_2tuple.fk_pad[0] == 0);
1230 
1231 	_CASSERT(FKMASK_3TUPLE == (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC));
1232 	VERIFY(fk_mask_3tuple.fk_mask == FKMASK_3TUPLE);
1233 	VERIFY(fk_mask_3tuple.fk_ipver == 0xff);
1234 	VERIFY(fk_mask_3tuple.fk_proto == 0xff);
1235 	VERIFY(fk_mask_3tuple.fk_sport == 0xffff);
1236 	VERIFY(fk_mask_3tuple.fk_dport == 0);
1237 	VERIFY(fk_mask_3tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1238 	VERIFY(fk_mask_3tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1239 	VERIFY(fk_mask_3tuple.fk_dst._addr64[0] == 0);
1240 	VERIFY(fk_mask_3tuple.fk_dst._addr64[1] == 0);
1241 	VERIFY(fk_mask_3tuple.fk_pad[0] == 0);
1242 
1243 	_CASSERT(FKMASK_4TUPLE == (FKMASK_3TUPLE | FKMASK_DPORT));
1244 	VERIFY(fk_mask_4tuple.fk_mask == FKMASK_4TUPLE);
1245 	VERIFY(fk_mask_4tuple.fk_ipver == 0xff);
1246 	VERIFY(fk_mask_4tuple.fk_proto == 0xff);
1247 	VERIFY(fk_mask_4tuple.fk_sport == 0xffff);
1248 	VERIFY(fk_mask_4tuple.fk_dport == 0xffff);
1249 	VERIFY(fk_mask_4tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1250 	VERIFY(fk_mask_4tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1251 	VERIFY(fk_mask_4tuple.fk_dst._addr64[0] == 0);
1252 	VERIFY(fk_mask_4tuple.fk_dst._addr64[1] == 0);
1253 	VERIFY(fk_mask_4tuple.fk_pad[0] == 0);
1254 
1255 	_CASSERT(FKMASK_5TUPLE == (FKMASK_4TUPLE | FKMASK_DST));
1256 	VERIFY(fk_mask_5tuple.fk_mask == FKMASK_5TUPLE);
1257 	VERIFY(fk_mask_5tuple.fk_ipver == 0xff);
1258 	VERIFY(fk_mask_5tuple.fk_proto == 0xff);
1259 	VERIFY(fk_mask_5tuple.fk_sport == 0xffff);
1260 	VERIFY(fk_mask_5tuple.fk_dport == 0xffff);
1261 	VERIFY(fk_mask_5tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1262 	VERIFY(fk_mask_5tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1263 	VERIFY(fk_mask_5tuple.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1264 	VERIFY(fk_mask_5tuple.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1265 	VERIFY(fk_mask_5tuple.fk_pad[0] == 0);
1266 
1267 	_CASSERT(FKMASK_IPFLOW1 == FKMASK_PROTO);
1268 	VERIFY(fk_mask_ipflow1.fk_mask == FKMASK_IPFLOW1);
1269 	VERIFY(fk_mask_ipflow1.fk_ipver == 0);
1270 	VERIFY(fk_mask_ipflow1.fk_proto == 0xff);
1271 	VERIFY(fk_mask_ipflow1.fk_sport == 0);
1272 	VERIFY(fk_mask_ipflow1.fk_dport == 0);
1273 	VERIFY(fk_mask_ipflow1.fk_src._addr64[0] == 0);
1274 	VERIFY(fk_mask_ipflow1.fk_src._addr64[1] == 0);
1275 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[0] == 0);
1276 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[1] == 0);
1277 	VERIFY(fk_mask_ipflow1.fk_pad[0] == 0);
1278 
1279 	_CASSERT(FKMASK_IPFLOW2 == (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC));
1280 	VERIFY(fk_mask_ipflow2.fk_mask == FKMASK_IPFLOW2);
1281 	VERIFY(fk_mask_ipflow2.fk_ipver == 0xff);
1282 	VERIFY(fk_mask_ipflow2.fk_proto == 0xff);
1283 	VERIFY(fk_mask_ipflow2.fk_sport == 0);
1284 	VERIFY(fk_mask_ipflow2.fk_dport == 0);
1285 	VERIFY(fk_mask_ipflow2.fk_src._addr64[0] == 0xffffffffffffffffULL);
1286 	VERIFY(fk_mask_ipflow2.fk_src._addr64[1] == 0xffffffffffffffffULL);
1287 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[0] == 0);
1288 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[1] == 0);
1289 	VERIFY(fk_mask_ipflow2.fk_pad[0] == 0);
1290 
1291 	_CASSERT(FKMASK_IPFLOW3 == (FKMASK_IPFLOW2 | FKMASK_DST));
1292 	VERIFY(fk_mask_ipflow3.fk_mask == FKMASK_IPFLOW3);
1293 	VERIFY(fk_mask_ipflow3.fk_ipver == 0xff);
1294 	VERIFY(fk_mask_ipflow3.fk_proto == 0xff);
1295 	VERIFY(fk_mask_ipflow3.fk_sport == 0);
1296 	VERIFY(fk_mask_ipflow3.fk_dport == 0);
1297 	VERIFY(fk_mask_ipflow3.fk_src._addr64[0] == 0xffffffffffffffffULL);
1298 	VERIFY(fk_mask_ipflow3.fk_src._addr64[1] == 0xffffffffffffffffULL);
1299 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1300 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1301 	VERIFY(fk_mask_ipflow3.fk_pad[0] == 0);
1302 
1303 	VERIFY(sk_dump_buf != NULL);
1304 
1305 	/* reset sk_dump_buf */
1306 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1307 
1308 	/*
1309 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1310 	 * section begins on a 128-bit boundary, and is a multiple of
1311 	 * 64-bytes len.  A section is SK_MEMCMP_LEN-bytes long,
1312 	 * which means we need at least 16+(3*SK_MEMCMP_LEN) bytes.
1313 	 *
1314 	 * 1st section is s1 -> (hdr1 aligned to 16-bytes)
1315 	 * 2nd section is s2 -> (hdr2 = hdr1 + SK_MEMCMP_LEN)
1316 	 * 3rd section is s3 -> (mask = hdr2 + SK_MEMCMP_LEN)
1317 	 */
1318 	void *s1, *s2, *s3;
1319 	uintptr_t diff;
1320 
1321 	s1 = sk_dump_buf;
1322 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1323 		diff = P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN) - (uintptr_t)s1;
1324 		s1 = (void *)((char *)s1 + diff);
1325 	}
1326 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1327 	s2 = (void *)((char *)s1 + SK_MEMCMP_LEN);
1328 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1329 	s3 = (void *)((char *)s2 + SK_MEMCMP_LEN);
1330 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1331 
1332 	uint8_t *hdr1 = s1;
1333 	uint8_t *hdr2 = s2;
1334 	uint8_t *byte_m = s3;
1335 
1336 	/* fill byte mask with random data */
1337 	read_frandom(byte_m, SK_MEMCMP_LEN);
1338 
1339 	kprintf("Skywalk: memcmp mask ... ");
1340 
1341 	int i;
1342 	for (i = 0; i < 80; i++) {
1343 		hdr1[i] = 1;
1344 		SK_MEMCMP_MASK_VERIFY(ipv4, 32, 20);
1345 		SK_MEMCMP_MASK_VERIFY(ipv6, 64, 40);
1346 		SK_MEMCMP_MASK_VERIFY(ipv6_tcp, 80, 64);
1347 		SK_MEMCMP_MASK_VERIFY(tcp, 32, 24);
1348 		SK_MEMCMP_MASK_VERIFY(udp, 16, 6);
1349 		SK_MEMCMP_MASK_VERIFY(fk_all, 48, 48);
1350 		SK_MEMCMP_MASK_VERIFY(fk_t2, 48, 48);
1351 		SK_MEMCMP_MASK_VERIFY(fk_t3, 48, 48);
1352 		SK_MEMCMP_MASK_VERIFY(fk_t4, 48, 48);
1353 		SK_MEMCMP_MASK_VERIFY(fk_t5, 48, 48);
1354 		SK_MEMCMP_MASK_VERIFY(fk_i1, 48, 48);
1355 		SK_MEMCMP_MASK_VERIFY(fk_i2, 48, 48);
1356 		SK_MEMCMP_MASK_VERIFY(fk_i3, 48, 48);
1357 		hdr2[i] = 1;
1358 	}
1359 
1360 	bzero(hdr1, SK_MEMCMP_LEN);
1361 	bzero(hdr2, SK_MEMCMP_LEN);
1362 
1363 	/* re-fill byte mask with random data */
1364 	read_frandom(byte_m, SK_MEMCMP_LEN);
1365 
1366 	for (i = 0; i < SK_MEMCMP_LEN; i++) {
1367 		hdr1[i] = 1;
1368 		SK_MEMCMP_BYTEMASK_VERIFY(byte);
1369 		hdr2[i] = 1;
1370 	}
1371 
1372 	/* fill hdr1 and hd2 with random data */
1373 	read_frandom(hdr1, SK_MEMCMP_LEN);
1374 	bcopy(hdr1, hdr2, SK_MEMCMP_LEN);
1375 	memset(byte_m, 0xff, SK_MEMCMP_LEN);
1376 
1377 	for (i = 0; i < 80; i++) {
1378 		uint8_t val = hdr2[i];
1379 		uint8_t mval = byte_m[i];
1380 
1381 		while (hdr2[i] == hdr1[i] || hdr2[i] == 0) {
1382 			uint8_t newval;
1383 			read_frandom(&newval, sizeof(newval));
1384 			hdr2[i] = newval;
1385 		}
1386 		if (i < 16) {
1387 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 16);
1388 		} else if (i < 32) {
1389 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 32);
1390 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1391 		} else if (i < 48) {
1392 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 48);
1393 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1394 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1395 		} else if (i < 64) {
1396 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 64);
1397 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1398 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1399 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1400 		} else if (i < 80) {
1401 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 80);
1402 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1403 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1404 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1405 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1406 		}
1407 		byte_m[i] = 0;
1408 		if (i < 16) {
1409 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1410 		} else if (i < 32) {
1411 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1412 		} else if (i < 48) {
1413 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1414 		} else if (i < 64) {
1415 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1416 		} else if (i < 80) {
1417 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 80);
1418 		}
1419 		hdr2[i] = val;
1420 		byte_m[i] = mval;
1421 	}
1422 
1423 	kprintf("PASSED\n");
1424 }
1425 
1426 #define SK_COPY_LEN     128             /* length of each section */
1427 
1428 #define SK_COPY_PREPARE(t) do {                                         \
1429 	bzero(s2, SK_COPY_LEN);                                         \
1430 	bzero(s3, SK_COPY_LEN);                                         \
1431 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1432 	kprintf("Skywalk: " #t " ... ");                                \
1433 } while (0)
1434 
1435 #define SK_COPY_VERIFY(t) do {                                          \
1436 	if (_s1 != s1 || _s2 != s2 || _s3 != s3) {                      \
1437 	        panic_plain("\ninput registers clobbered: " #t "\n");   \
1438 	/* NOTREACHED */                                        \
1439 	        __builtin_unreachable();                                \
1440 	}                                                               \
1441 	if (bcmp(s2, s3, SK_COPY_LEN) != 0) {                           \
1442 	        panic_plain("\nbroken: " #t "\n");                      \
1443 	/* NOTREACHED */                                        \
1444 	        __builtin_unreachable();                                \
1445 	} else {                                                        \
1446 	        kprintf("PASSED\n");                                    \
1447 	}                                                               \
1448 } while (0)
1449 
1450 #define SK_ZERO_PREPARE(t) do {                                         \
1451 	bcopy(s1, s2, SK_COPY_LEN);                                     \
1452 	bcopy(s1, s3, SK_COPY_LEN);                                     \
1453 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1454 	kprintf("Skywalk: " #t " ... ");                                \
1455 } while (0)
1456 
1457 #define SK_ZERO_VERIFY(t)       SK_COPY_VERIFY(t)
1458 
1459 static void
skywalk_self_tests(void)1460 skywalk_self_tests(void)
1461 {
1462 	void *s1, *s2, *s3;
1463 	void *_s1, *_s2, *_s3;
1464 	uintptr_t diff;
1465 
1466 	VERIFY(sk_dump_buf != NULL);
1467 
1468 	/*
1469 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1470 	 * section begins on a 128-bit boundary, and is a multiple of
1471 	 * 64-bytes len.  A section is 128-bytes long, which means we
1472 	 * need at least 16+(3*128) bytes.
1473 	 *
1474 	 * 1st section is source buffer full of random data;
1475 	 * 2nd section is reference target based on bcopy;
1476 	 * 3rd section is test target base on our stuff.
1477 	 */
1478 	_CASSERT(SK_COPY_LEN != 0 && (SK_COPY_LEN % 128) == 0);
1479 	_CASSERT((SK_COPY_LEN % 16) == 0);
1480 	_CASSERT((SK_DUMP_BUF_ALIGN % 16) == 0);
1481 	_CASSERT(SK_DUMP_BUF_SIZE >= (SK_DUMP_BUF_ALIGN + (SK_COPY_LEN * 3)));
1482 
1483 	s1 = sk_dump_buf;
1484 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1485 		diff = P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN) - (uintptr_t)s1;
1486 		s1 = (void *)((char *)s1 + diff);
1487 	}
1488 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1489 	s2 = (void *)((char *)s1 + SK_COPY_LEN);
1490 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1491 	s3 = (void *)((char *)s2 + SK_COPY_LEN);
1492 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1493 
1494 	/* fill s1 with random data */
1495 	read_frandom(s1, SK_COPY_LEN);
1496 
1497 	kprintf("Skywalk: running self-tests\n");
1498 
1499 	/* Copy 8-bytes, 64-bit aligned */
1500 	SK_COPY_PREPARE(sk_copy64_8);
1501 	bcopy(s1, s2, 8);
1502 	sk_copy64_8((uint64_t *)s1, (uint64_t *)s3);
1503 	SK_COPY_VERIFY(sk_copy64_8);
1504 
1505 	/* Copy 8-bytes, 32-bit aligned */
1506 	SK_COPY_PREPARE(sk_copy32_8);
1507 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1508 	    (void *)((char *)s2 + sizeof(uint32_t)), 8);
1509 	sk_copy32_8((void *)((char *)s1 + sizeof(uint32_t)),
1510 	    (void *)((char *)s3 + sizeof(uint32_t)));
1511 	SK_COPY_VERIFY(sk_copy32_8);
1512 
1513 	/* Copy 16-bytes, 64-bit aligned */
1514 	SK_COPY_PREPARE(sk_copy64_16);
1515 	bcopy(s1, s2, 16);
1516 	sk_copy64_16((uint64_t *)s1, (uint64_t *)s3);
1517 	SK_COPY_VERIFY(sk_copy64_16);
1518 
1519 	/* Copy 16-bytes, 32-bit aligned */
1520 	SK_COPY_PREPARE(sk_copy32_16);
1521 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1522 	    (void *)((char *)s2 + sizeof(uint32_t)), 16);
1523 	sk_copy32_16((void *)((char *)s1 + sizeof(uint32_t)),
1524 	    (void *)((char *)s3 + sizeof(uint32_t)));
1525 	SK_COPY_VERIFY(sk_copy32_16);
1526 
1527 	/* Copy 20-bytes, 64-bit aligned */
1528 	SK_COPY_PREPARE(sk_copy64_20);
1529 	bcopy(s1, s2, 20);
1530 	sk_copy64_20((uint64_t *)s1, (uint64_t *)s3);
1531 	SK_COPY_VERIFY(sk_copy64_20);
1532 
1533 	/* Copy 24-bytes, 64-bit aligned */
1534 	SK_COPY_PREPARE(sk_copy64_24);
1535 	bcopy(s1, s2, 24);
1536 	sk_copy64_24((uint64_t *)s1, (uint64_t *)s3);
1537 	SK_COPY_VERIFY(sk_copy64_24);
1538 
1539 	/* Copy 32-bytes, 64-bit aligned */
1540 	SK_COPY_PREPARE(sk_copy64_32);
1541 	bcopy(s1, s2, 32);
1542 	sk_copy64_32((uint64_t *)s1, (uint64_t *)s3);
1543 	SK_COPY_VERIFY(sk_copy64_32);
1544 
1545 	/* Copy 32-bytes, 32-bit aligned */
1546 	SK_COPY_PREPARE(sk_copy32_32);
1547 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1548 	    (void *)((char *)s2 + sizeof(uint32_t)), 32);
1549 	sk_copy32_32((void *)((char *)s1 + sizeof(uint32_t)),
1550 	    (void *)((char *)s3 + sizeof(uint32_t)));
1551 	SK_COPY_VERIFY(sk_copy32_32);
1552 
1553 	/* Copy 40-bytes, 64-bit aligned */
1554 	SK_COPY_PREPARE(sk_copy64_40);
1555 	bcopy(s1, s2, 40);
1556 	sk_copy64_40((uint64_t *)s1, (uint64_t *)s3);
1557 	SK_COPY_VERIFY(sk_copy64_40);
1558 
1559 	/* Copy entire section in 64-bytes chunks, 64-bit aligned */
1560 	SK_COPY_PREPARE(sk_copy64_64x);
1561 	bcopy(s1, s2, SK_COPY_LEN);
1562 	sk_copy64_64x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1563 	SK_COPY_VERIFY(sk_copy64_64x);
1564 
1565 	/* Copy entire section in 32-bytes chunks, 64-bit aligned */
1566 	SK_COPY_PREPARE(sk_copy64_32x);
1567 	bcopy(s1, s2, SK_COPY_LEN);
1568 	sk_copy64_32x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1569 	SK_COPY_VERIFY(sk_copy64_32x);
1570 
1571 	/* Copy entire section in 8-bytes chunks, 64-bit aligned */
1572 	SK_COPY_PREPARE(sk_copy64_8x);
1573 	bcopy(s1, s2, SK_COPY_LEN);
1574 	sk_copy64_8x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1575 	SK_COPY_VERIFY(sk_copy64_8x);
1576 
1577 	/* Copy entire section in 4-bytes chunks, 64-bit aligned */
1578 	SK_COPY_PREPARE(sk_copy64_4x);
1579 	bcopy(s1, s2, SK_COPY_LEN);
1580 	sk_copy64_4x((uint32_t *)s1, (uint32_t *)s3, SK_COPY_LEN);
1581 	SK_COPY_VERIFY(sk_copy64_4x);
1582 
1583 	/*
1584 	 * Re-use sk_dump_buf for testing sk_zero, same principle as above.
1585 	 *
1586 	 * 1st section is source buffer full of random data;
1587 	 * 2nd section is reference target based on bzero;
1588 	 * 3rd section is test target base on our stuff.
1589 	 */
1590 	SK_ZERO_PREPARE(sk_zero_16);
1591 	bzero(s2, 16);
1592 	sk_zero_16(s3);
1593 	SK_ZERO_VERIFY(sk_zero_16);
1594 
1595 	SK_ZERO_PREPARE(sk_zero_32);
1596 	bzero(s2, 32);
1597 	sk_zero_32(s3);
1598 	SK_ZERO_VERIFY(sk_zero_32);
1599 
1600 	SK_ZERO_PREPARE(sk_zero_48);
1601 	bzero(s2, 48);
1602 	sk_zero_48(s3);
1603 	SK_ZERO_VERIFY(sk_zero_48);
1604 
1605 	SK_ZERO_PREPARE(sk_zero_128);
1606 	bzero(s2, 128);
1607 	sk_zero_128(s3);
1608 	SK_ZERO_VERIFY(sk_zero_128);
1609 
1610 	/* Perform memcmp with mask self tests */
1611 	skywalk_memcmp_mask_self_tests();
1612 
1613 	/* reset sk_dump_buf */
1614 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1615 
1616 	/* Keep packet trace code in sync with ariadne plist */
1617 	_CASSERT(SK_KTRACE_AON_IF_STATS == 0x8100004);
1618 
1619 	_CASSERT(SK_KTRACE_FSW_DEV_RING_FLUSH == 0x8110004);
1620 	_CASSERT(SK_KTRACE_FSW_USER_RING_FLUSH == 0x8110008);
1621 	_CASSERT(SK_KTRACE_FSW_FLOW_TRACK_RTT == 0x8110010);
1622 
1623 	_CASSERT(SK_KTRACE_NETIF_RING_TX_REFILL == 0x8120004);
1624 	_CASSERT(SK_KTRACE_NETIF_HOST_ENQUEUE == 0x8120008);
1625 	_CASSERT(SK_KTRACE_NETIF_MIT_RX_INTR == 0x812000c);
1626 	_CASSERT(SK_KTRACE_NETIF_COMMON_INTR == 0x8120010);
1627 	_CASSERT(SK_KTRACE_NETIF_RX_NOTIFY_DEFAULT == 0x8120014);
1628 	_CASSERT(SK_KTRACE_NETIF_RX_NOTIFY_FAST == 0x8120018);
1629 
1630 	_CASSERT(SK_KTRACE_CHANNEL_TX_REFILL == 0x8130004);
1631 
1632 	_CASSERT(SK_KTRACE_PKT_RX_DRV == 0x8140004);
1633 	_CASSERT(SK_KTRACE_PKT_RX_FSW == 0x8140008);
1634 	_CASSERT(SK_KTRACE_PKT_RX_CHN == 0x814000c);
1635 	_CASSERT(SK_KTRACE_PKT_TX_FSW == 0x8140040);
1636 	_CASSERT(SK_KTRACE_PKT_TX_AQM == 0x8140044);
1637 	_CASSERT(SK_KTRACE_PKT_TX_DRV == 0x8140048);
1638 }
1639 #endif /* DEVELOPMENT || DEBUG */
1640