xref: /xnu-8019.80.24/bsd/skywalk/core/skywalk.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <pexpert/pexpert.h>    /* for PE_parse_boot_argn */
31 #include <sys/codesign.h>       /* for csproc_get_platform_binary */
32 #include <sys/signalvar.h>      /* for psignal_with_reason */
33 #include <sys/reason.h>
34 #include <sys/kern_memorystatus.h>
35 #if CONFIG_MACF
36 #include <security/mac_framework.h>
37 #endif /* CONFIG_MACF */
38 
39 #ifndef htole16
40 #if BYTE_ORDER == LITTLE_ENDIAN
41 #define htole16(x)      ((uint16_t)(x))
42 #else /* BYTE_ORDER != LITTLE_ENDIAN */
43 #define htole16(x)      bswap16((x))
44 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
45 #endif /* htole16 */
46 
47 LCK_GRP_DECLARE(sk_lock_group, "sk_lock");
48 LCK_ATTR_DECLARE(sk_lock_attr, 0, 0);
49 LCK_MTX_DECLARE_ATTR(sk_lock, &sk_lock_group, &sk_lock_attr);
50 
51 static void skywalk_fini(void);
52 static int sk_priv_chk(proc_t, kauth_cred_t, int);
53 
54 static int __sk_inited = 0;
55 #if (DEVELOPMENT || DEBUG)
56 size_t sk_copy_thres = SK_COPY_THRES;
57 uint64_t sk_verbose;
58 #endif /* DEVELOPMENT || DEBUG */
59 uint32_t sk_debug;
60 uint64_t sk_features =
61 #if SKYWALK
62     SK_FEATURE_SKYWALK |
63 #endif
64 #if DEVELOPMENT
65     SK_FEATURE_DEVELOPMENT |
66 #endif
67 #if DEBUG
68     SK_FEATURE_DEBUG |
69 #endif
70 #if CONFIG_NEXUS_FLOWSWITCH
71     SK_FEATURE_NEXUS_FLOWSWITCH |
72 #endif
73 #if CONFIG_NEXUS_MONITOR
74     SK_FEATURE_NEXUS_MONITOR |
75 #endif
76 #if CONFIG_NEXUS_NETIF
77     SK_FEATURE_NEXUS_NETIF |
78 #endif
79 #if CONFIG_NEXUS_USER_PIPE
80     SK_FEATURE_NEXUS_USER_PIPE |
81 #endif
82 #if CONFIG_NEXUS_KERNEL_PIPE
83     SK_FEATURE_NEXUS_KERNEL_PIPE |
84 #endif
85 #if CONFIG_NEXUS_KERNEL_PIPE && (DEVELOPMENT || DEBUG)
86     SK_FEATURE_NEXUS_KERNEL_PIPE_LOOPBACK |
87 #endif
88 #if (DEVELOPMENT || DEBUG)
89     SK_FEATURE_DEV_OR_DEBUG |
90 #endif
91     0;
92 
93 uint32_t sk_opp_defunct = 0;    /* opportunistic defunct */
94 
95 /* checksum offload is generic to any nexus (not specific to flowswitch) */
96 uint32_t sk_cksum_tx = 1;       /* advertise outbound offload */
97 uint32_t sk_cksum_rx = 1;       /* perform inbound checksum offload */
98 
99 /* guard pages */
100 uint32_t sk_guard = 0;          /* guard pages (0: disable) */
101 #define SK_GUARD_MIN    1       /* minimum # of guard pages */
102 #define SK_GUARD_MAX    4       /* maximum # of guard pages */
103 uint32_t sk_headguard_sz = SK_GUARD_MIN; /* # of leading guard pages */
104 uint32_t sk_tailguard_sz = SK_GUARD_MIN; /* # of trailing guard pages */
105 
106 #if (DEVELOPMENT || DEBUG)
107 uint32_t sk_txring_sz = 0;      /* flowswitch */
108 uint32_t sk_rxring_sz = 0;      /* flowswitch */
109 uint32_t sk_net_txring_sz = 0;  /* netif adapter */
110 uint32_t sk_net_rxring_sz = 0;  /* netif adapter */
111 uint32_t sk_min_pool_size = 0;  /* minimum packet pool size */
112 #endif /* !DEVELOPMENT && !DEBUG */
113 
114 uint32_t sk_max_flows = NX_FLOWADV_DEFAULT;
115 uint32_t sk_fadv_nchunks;       /* # of FO_FLOWADV_CHUNK in bitmap */
116 uint32_t sk_netif_compat_txmodel = NETIF_COMPAT_TXMODEL_DEFAULT;
117 uint32_t sk_netif_native_txmodel = NETIF_NATIVE_TXMODEL_DEFAULT;
118 /*
119  * Configures the RX aggregation logic for TCP in flowswitch.
120  * A non-zero value enables the aggregation logic, with the maximum
121  * aggregation length (in bytes) limited to this value.
122  */
123 uint32_t sk_fsw_rx_agg_tcp = 16384;
124 
125 /*
126  * Forces the RX host path to use or not use aggregation, regardless of the
127  * existence of filters (see sk_fsw_rx_agg_tcp_host_t for valid values).
128  */
129 uint32_t sk_fsw_rx_agg_tcp_host = SK_FSW_RX_AGG_TCP_HOST_AUTO;
130 
131 /*
132  * Configures the skywalk infrastructure for handling TCP TX aggregation.
133  * A non-zero value enables the support.
134  */
135 uint32_t sk_fsw_tx_agg_tcp = 1;
136 /*
137  * Configuration to limit the number of buffers for flowswitch VP channel.
138  */
139 uint32_t sk_fsw_max_bufs = 0;
140 
141 /* list of interfaces that allow direct open from userspace */
142 #define SK_NETIF_DIRECT_MAX     8
143 char sk_netif_direct[SK_NETIF_DIRECT_MAX][IFXNAMSIZ];
144 uint32_t sk_netif_direct_cnt = 0;
145 
146 uint16_t sk_tx_delay_qlen = 16;                 /* packets */
147 uint16_t sk_tx_delay_timeout = (1 * 1000);        /* microseconds */
148 
149 #define SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ     64
150 #define SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ     64
151 uint32_t sk_netif_compat_aux_cell_tx_ring_sz =
152     SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ;
153 uint32_t sk_netif_compat_aux_cell_rx_ring_sz =
154     SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ;
155 
156 /* Wi-Fi Access Point */
157 #define SK_NETIF_COMPAT_WAP_TX_RING_SZ  128
158 #define SK_NETIF_COMPAT_WAP_RX_RING_SZ  128
159 uint32_t sk_netif_compat_wap_tx_ring_sz = SK_NETIF_COMPAT_WAP_TX_RING_SZ;
160 uint32_t sk_netif_compat_wap_rx_ring_sz = SK_NETIF_COMPAT_WAP_RX_RING_SZ;
161 
162 /* AWDL */
163 #define SK_NETIF_COMPAT_AWDL_TX_RING_SZ 128
164 #define SK_NETIF_COMPAT_AWDL_RX_RING_SZ 128
165 uint32_t sk_netif_compat_awdl_tx_ring_sz = SK_NETIF_COMPAT_AWDL_TX_RING_SZ;
166 uint32_t sk_netif_compat_awdl_rx_ring_sz = SK_NETIF_COMPAT_AWDL_RX_RING_SZ;
167 
168 /* Wi-Fi Infrastructure */
169 #define SK_NETIF_COMPAT_WIF_TX_RING_SZ  128
170 #define SK_NETIF_COMPAT_WIF_RX_RING_SZ  128
171 uint32_t sk_netif_compat_wif_tx_ring_sz = SK_NETIF_COMPAT_WIF_TX_RING_SZ;
172 uint32_t sk_netif_compat_wif_rx_ring_sz = SK_NETIF_COMPAT_WIF_RX_RING_SZ;
173 
174 #define SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ      128
175 #define SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ      128
176 uint32_t sk_netif_compat_usb_eth_tx_ring_sz =
177     SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ;
178 uint32_t sk_netif_compat_usb_eth_rx_ring_sz =
179     SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ;
180 
181 #define SK_NETIF_COMPAT_RX_MBQ_LIMIT    8192
182 int sk_netif_compat_rx_mbq_limit = SK_NETIF_COMPAT_RX_MBQ_LIMIT;
183 
184 uint32_t sk_netif_tx_mit = SK_NETIF_MIT_AUTO;
185 uint32_t sk_netif_rx_mit = SK_NETIF_MIT_AUTO;
186 char sk_ll_prefix[IFNAMSIZ] = "llw";
187 uint32_t sk_rx_sync_packets = 1;
188 uint32_t sk_channel_buflet_alloc = 0;
189 
190 SYSCTL_NODE(_kern, OID_AUTO, skywalk, CTLFLAG_RW | CTLFLAG_LOCKED,
191     0, "Skywalk parameters");
192 SYSCTL_NODE(_kern_skywalk, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_LOCKED,
193     0, "Skywalk statistics");
194 
195 SYSCTL_OPAQUE(_kern_skywalk, OID_AUTO, features, CTLFLAG_RD | CTLFLAG_LOCKED,
196     &sk_features, sizeof(sk_features), "-", "Skywalk features");
197 
198 #if (DEVELOPMENT || DEBUG)
199 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
200     &sk_verbose, "Skywalk verbose mode");
201 SYSCTL_UINT(_kern_skywalk, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
202     &sk_debug, 0, "Skywalk debug mode");
203 SYSCTL_LONG(_kern_skywalk, OID_AUTO, sk_copy_thres, CTLFLAG_RW | CTLFLAG_LOCKED,
204     &sk_copy_thres, "Skywalk copy threshold");
205 static int __priv_check = 1;
206 SYSCTL_INT(_kern_skywalk, OID_AUTO, priv_check, CTLFLAG_RW | CTLFLAG_LOCKED,
207     &__priv_check, 0, "Skywalk privilege check");
208 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_opp_defunct, CTLFLAG_RW | CTLFLAG_LOCKED,
209     &sk_opp_defunct, 0, "Defunct opportunistically");
210 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_tx, CTLFLAG_RW | CTLFLAG_LOCKED,
211     &sk_cksum_tx, 0, "Advertise (and perform) outbound checksum offload");
212 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_rx, CTLFLAG_RW | CTLFLAG_LOCKED,
213     &sk_cksum_rx, 0, "Perform inbound checksum offload");
214 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_rx_sync_packets, CTLFLAG_RW | CTLFLAG_LOCKED,
215     &sk_rx_sync_packets, 0, "Enable RX sync packets");
216 SYSCTL_UINT(_kern_skywalk, OID_AUTO, chan_buf_alloc,
217     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_channel_buflet_alloc, 0,
218     "channel buflet allocation (enable/disable)");
219 #endif /* !DEVELOPMENT && !DEBUG */
220 
221 #if (DEVELOPMENT || DEBUG)
222 uint32_t sk_inject_error_rmask = 0x3;
223 SYSCTL_UINT(_kern_skywalk, OID_AUTO, inject_error_rmask,
224     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_inject_error_rmask, 0x3, "");
225 #endif /* !DEVELOPMENT && !DEBUG */
226 
227 #if (DEVELOPMENT || DEBUG)
228 static void skywalk_self_tests(void);
229 #endif /* (DEVELOPMENT || DEBUG) */
230 
231 #define SKMEM_TAG_SYSCTL_BUF "com.apple.skywalk.sysctl_buf"
232 kern_allocation_name_t skmem_tag_sysctl_buf;
233 
234 #define SKMEM_TAG_OID       "com.apple.skywalk.skoid"
235 kern_allocation_name_t skmem_tag_oid;
236 
237 #if (SK_LOG || DEVELOPMENT || DEBUG)
238 #define SKMEM_TAG_DUMP  "com.apple.skywalk.dump"
239 static kern_allocation_name_t skmem_tag_dump;
240 
241 static char *sk_dump_buf;
242 #define SK_DUMP_BUF_SIZE        2048
243 #define SK_DUMP_BUF_ALIGN       16
244 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
245 
246 boolean_t
skywalk_netif_direct_enabled(void)247 skywalk_netif_direct_enabled(void)
248 {
249 	return sk_netif_direct_cnt > 0;
250 }
251 
252 boolean_t
skywalk_netif_direct_allowed(const char * ifname)253 skywalk_netif_direct_allowed(const char *ifname)
254 {
255 	uint32_t i;
256 
257 	for (i = 0; i < sk_netif_direct_cnt; i++) {
258 		if (strncmp(ifname, sk_netif_direct[i], IFXNAMSIZ) == 0) {
259 			return TRUE;
260 		}
261 	}
262 	return FALSE;
263 }
264 
265 #if (DEVELOPMENT || DEBUG)
266 static void
parse_netif_direct(void)267 parse_netif_direct(void)
268 {
269 	char buf[(IFXNAMSIZ + 1) * SK_NETIF_DIRECT_MAX];
270 	size_t i, curr, len, iflen;
271 
272 	if (!PE_parse_boot_arg_str("sk_netif_direct", buf, sizeof(buf))) {
273 		return;
274 	}
275 
276 	curr = 0;
277 	len = strlen(buf);
278 	for (i = 0; i < len + 1 &&
279 	    sk_netif_direct_cnt < SK_NETIF_DIRECT_MAX; i++) {
280 		if (buf[i] != ',' && buf[i] != '\0') {
281 			continue;
282 		}
283 
284 		buf[i] = '\0';
285 		iflen = i - curr;
286 		if (iflen > 0 && iflen < IFXNAMSIZ) {
287 			(void) strlcpy(sk_netif_direct[sk_netif_direct_cnt],
288 			    buf + curr, IFXNAMSIZ);
289 			sk_netif_direct_cnt++;
290 		}
291 		curr = i + 1;
292 	}
293 }
294 #endif /* DEVELOPMENT || DEBUG */
295 
296 static void
skywalk_fini(void)297 skywalk_fini(void)
298 {
299 	SK_LOCK_ASSERT_HELD();
300 
301 	if (__sk_inited) {
302 #if (DEVELOPMENT || DEBUG)
303 		skmem_test_fini();
304 		cht_test_fini();
305 #endif /* (DEVELOPMENT || DEBUG) */
306 		channel_fini();
307 		nexus_fini();
308 		skmem_fini();
309 
310 #if (SK_LOG || DEVELOPMENT || DEBUG)
311 		if (skmem_tag_dump != NULL) {
312 			kern_allocation_name_release(skmem_tag_dump);
313 			skmem_tag_dump = NULL;
314 		}
315 		if (sk_dump_buf != NULL) {
316 			sk_free_data(sk_dump_buf, SK_DUMP_BUF_SIZE);
317 			sk_dump_buf = NULL;
318 		}
319 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
320 
321 		if (skmem_tag_sysctl_buf != NULL) {
322 			kern_allocation_name_release(skmem_tag_sysctl_buf);
323 			skmem_tag_sysctl_buf = NULL;
324 		}
325 
326 		if (skmem_tag_oid != NULL) {
327 			kern_allocation_name_release(skmem_tag_oid);
328 			skmem_tag_oid = NULL;
329 		}
330 
331 		__sk_inited = 0;
332 	}
333 }
334 
335 int
skywalk_init(void)336 skywalk_init(void)
337 {
338 	int error;
339 
340 	VERIFY(!__sk_inited);
341 
342 	_CASSERT(sizeof(kern_packet_t) == sizeof(uint64_t));
343 	_CASSERT(sizeof(bitmap_t) == sizeof(uint64_t));
344 
345 #if (DEVELOPMENT || DEBUG)
346 	PE_parse_boot_argn("sk_debug", &sk_debug, sizeof(sk_debug));
347 	PE_parse_boot_argn("sk_verbose", &sk_verbose, sizeof(sk_verbose));
348 	(void) PE_parse_boot_argn("sk_opp_defunct", &sk_opp_defunct,
349 	    sizeof(sk_opp_defunct));
350 	(void) PE_parse_boot_argn("sk_cksum_tx", &sk_cksum_tx,
351 	    sizeof(sk_cksum_tx));
352 	(void) PE_parse_boot_argn("sk_cksum_rx", &sk_cksum_rx,
353 	    sizeof(sk_cksum_rx));
354 	(void) PE_parse_boot_argn("sk_txring_sz", &sk_txring_sz,
355 	    sizeof(sk_txring_sz));
356 	(void) PE_parse_boot_argn("sk_rxring_sz", &sk_rxring_sz,
357 	    sizeof(sk_rxring_sz));
358 	(void) PE_parse_boot_argn("sk_net_txring_sz", &sk_net_txring_sz,
359 	    sizeof(sk_net_txring_sz));
360 	(void) PE_parse_boot_argn("sk_net_rxring_sz", &sk_net_rxring_sz,
361 	    sizeof(sk_net_rxring_sz));
362 	(void) PE_parse_boot_argn("sk_max_flows", &sk_max_flows,
363 	    sizeof(sk_max_flows));
364 	(void) PE_parse_boot_argn("sk_native_txmodel", &sk_netif_native_txmodel,
365 	    sizeof(sk_netif_native_txmodel));
366 	(void) PE_parse_boot_argn("sk_compat_txmodel", &sk_netif_compat_txmodel,
367 	    sizeof(sk_netif_compat_txmodel));
368 	(void) PE_parse_boot_argn("sk_tx_delay_qlen", &sk_tx_delay_qlen,
369 	    sizeof(sk_tx_delay_qlen));
370 	(void) PE_parse_boot_argn("sk_ts_delay_timeout", &sk_tx_delay_timeout,
371 	    sizeof(sk_tx_delay_timeout));
372 	(void) PE_parse_boot_argn("sk_compat_aux_cell_tx_ring_sz",
373 	    &sk_netif_compat_aux_cell_tx_ring_sz,
374 	    sizeof(sk_netif_compat_aux_cell_tx_ring_sz));
375 	(void) PE_parse_boot_argn("sk_compat_aux_cell_rx_ring_sz",
376 	    &sk_netif_compat_aux_cell_rx_ring_sz,
377 	    sizeof(sk_netif_compat_aux_cell_rx_ring_sz));
378 	(void) PE_parse_boot_argn("sk_compat_wap_tx_ring_sz",
379 	    &sk_netif_compat_wap_tx_ring_sz,
380 	    sizeof(sk_netif_compat_wap_tx_ring_sz));
381 	(void) PE_parse_boot_argn("sk_compat_wap_rx_ring_sz",
382 	    &sk_netif_compat_wap_rx_ring_sz,
383 	    sizeof(sk_netif_compat_wap_rx_ring_sz));
384 	(void) PE_parse_boot_argn("sk_compat_awdl_tx_ring_sz",
385 	    &sk_netif_compat_awdl_tx_ring_sz,
386 	    sizeof(sk_netif_compat_awdl_tx_ring_sz));
387 	(void) PE_parse_boot_argn("sk_compat_awdl_rx_ring_sz",
388 	    &sk_netif_compat_awdl_rx_ring_sz,
389 	    sizeof(sk_netif_compat_awdl_rx_ring_sz));
390 	(void) PE_parse_boot_argn("sk_compat_wif_tx_ring_sz",
391 	    &sk_netif_compat_wif_tx_ring_sz,
392 	    sizeof(sk_netif_compat_wif_tx_ring_sz));
393 	(void) PE_parse_boot_argn("sk_compat_wif_rx_ring_sz",
394 	    &sk_netif_compat_wif_rx_ring_sz,
395 	    sizeof(sk_netif_compat_wif_rx_ring_sz));
396 	(void) PE_parse_boot_argn("sk_compat_usb_eth_tx_ring_sz",
397 	    &sk_netif_compat_usb_eth_tx_ring_sz,
398 	    sizeof(sk_netif_compat_usb_eth_tx_ring_sz));
399 	(void) PE_parse_boot_argn("sk_compat_usb_eth_rx_ring_sz",
400 	    &sk_netif_compat_usb_eth_rx_ring_sz,
401 	    sizeof(sk_netif_compat_usb_eth_rx_ring_sz));
402 	(void) PE_parse_boot_argn("sk_compat_rx_mbq_limit",
403 	    &sk_netif_compat_rx_mbq_limit, sizeof(sk_netif_compat_rx_mbq_limit));
404 	(void) PE_parse_boot_argn("sk_netif_tx_mit",
405 	    &sk_netif_tx_mit, sizeof(sk_netif_tx_mit));
406 	(void) PE_parse_boot_argn("sk_netif_rx_mit",
407 	    &sk_netif_rx_mit, sizeof(sk_netif_rx_mit));
408 	(void) PE_parse_boot_arg_str("sk_ll_prefix", sk_ll_prefix,
409 	    sizeof(sk_ll_prefix));
410 	parse_netif_direct();
411 	(void) PE_parse_boot_argn("sk_fsw_rx_agg_tcp", &sk_fsw_rx_agg_tcp,
412 	    sizeof(sk_fsw_rx_agg_tcp));
413 	(void) PE_parse_boot_argn("sk_fsw_tx_agg_tcp", &sk_fsw_tx_agg_tcp,
414 	    sizeof(sk_fsw_tx_agg_tcp));
415 	(void) PE_parse_boot_argn("sk_fsw_max_bufs", &sk_fsw_max_bufs,
416 	    sizeof(sk_fsw_max_bufs));
417 	(void) PE_parse_boot_argn("sk_rx_sync_packets", &sk_rx_sync_packets,
418 	    sizeof(sk_rx_sync_packets));
419 	(void) PE_parse_boot_argn("sk_chan_buf_alloc", &sk_channel_buflet_alloc,
420 	    sizeof(sk_channel_buflet_alloc));
421 	(void) PE_parse_boot_argn("sk_guard", &sk_guard, sizeof(sk_guard));
422 	(void) PE_parse_boot_argn("sk_headguard_sz", &sk_headguard_sz,
423 	    sizeof(sk_headguard_sz));
424 	(void) PE_parse_boot_argn("sk_tailguard_sz", &sk_tailguard_sz,
425 	    sizeof(sk_tailguard_sz));
426 	(void) PE_parse_boot_argn("sk_min_pool_size", &sk_min_pool_size,
427 	    sizeof(sk_min_pool_size));
428 #endif /* DEVELOPMENT || DEBUG */
429 
430 	if (sk_max_flows == 0) {
431 		sk_max_flows = NX_FLOWADV_DEFAULT;
432 	} else if (sk_max_flows > NX_FLOWADV_MAX) {
433 		sk_max_flows = NX_FLOWADV_MAX;
434 	}
435 
436 	if (sk_netif_tx_mit > SK_NETIF_MIT_MAX) {
437 		sk_netif_tx_mit = SK_NETIF_MIT_MAX;
438 	}
439 	if (sk_netif_rx_mit > SK_NETIF_MIT_MAX) {
440 		sk_netif_rx_mit = SK_NETIF_MIT_MAX;
441 	}
442 
443 	sk_fadv_nchunks = (uint32_t)P2ROUNDUP(sk_max_flows, FO_FLOWADV_CHUNK) /
444 	    FO_FLOWADV_CHUNK;
445 
446 	if (sk_guard) {
447 		uint32_t sz;
448 		/* leading guard page(s) */
449 		if (sk_headguard_sz == 0) {
450 			read_frandom(&sz, sizeof(sz));
451 			sk_headguard_sz = (sz % (SK_GUARD_MAX + 1));
452 		} else if (sk_headguard_sz > SK_GUARD_MAX) {
453 			sk_headguard_sz = SK_GUARD_MAX;
454 		}
455 		if (sk_headguard_sz < SK_GUARD_MIN) {
456 			sk_headguard_sz = SK_GUARD_MIN;
457 		}
458 		/* trailing guard page(s) */
459 		if (sk_tailguard_sz == 0) {
460 			read_frandom(&sz, sizeof(sz));
461 			sk_tailguard_sz = (sz % (SK_GUARD_MAX + 1));
462 		} else if (sk_tailguard_sz > SK_GUARD_MAX) {
463 			sk_tailguard_sz = SK_GUARD_MAX;
464 		}
465 		if (sk_tailguard_sz < SK_GUARD_MIN) {
466 			sk_tailguard_sz = SK_GUARD_MIN;
467 		}
468 	} else {
469 		sk_headguard_sz = sk_tailguard_sz = SK_GUARD_MIN;
470 	}
471 	ASSERT(sk_headguard_sz >= SK_GUARD_MIN);
472 	ASSERT(sk_headguard_sz <= SK_GUARD_MAX);
473 	ASSERT(sk_tailguard_sz >= SK_GUARD_MIN);
474 	ASSERT(sk_tailguard_sz <= SK_GUARD_MAX);
475 
476 	__sk_inited = 1;
477 
478 	SK_LOCK();
479 	cuckoo_hashtable_init();
480 	skmem_init();
481 	error = nexus_init();
482 	if (error == 0) {
483 		error = channel_init();
484 	}
485 	if (error != 0) {
486 		skywalk_fini();
487 	}
488 	SK_UNLOCK();
489 
490 	if (error == 0) {
491 		ASSERT(skmem_tag_oid == NULL);
492 		skmem_tag_oid = kern_allocation_name_allocate(SKMEM_TAG_OID, 0);
493 		ASSERT(skmem_tag_oid != NULL);
494 
495 		ASSERT(skmem_tag_sysctl_buf == NULL);
496 		skmem_tag_sysctl_buf =
497 		    kern_allocation_name_allocate(SKMEM_TAG_SYSCTL_BUF, 0);
498 		ASSERT(skmem_tag_sysctl_buf != NULL);
499 
500 #if (SK_LOG || DEVELOPMENT || DEBUG)
501 		ASSERT(skmem_tag_dump == NULL);
502 		skmem_tag_dump =
503 		    kern_allocation_name_allocate(SKMEM_TAG_DUMP, 0);
504 		ASSERT(skmem_tag_dump != NULL);
505 
506 		/* allocate space for sk_dump_buf */
507 		sk_dump_buf = sk_alloc_data(SK_DUMP_BUF_SIZE, Z_WAITOK | Z_NOFAIL,
508 		    skmem_tag_dump);
509 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
510 
511 		netns_init();
512 		protons_init();
513 
514 #if (DEVELOPMENT || DEBUG)
515 		skywalk_self_tests();
516 		skmem_test_init();
517 		cht_test_init();
518 #endif /* DEVELOPMENT || DEBUG */
519 	}
520 
521 	return error;
522 }
523 
524 /*
525  * csproc_get_platform_binary() returns non-zero if the process is platform
526  * code, which means that it is considered part of the Operating System.
527  * On iOS, that means it's contained in the trust cache or a loaded one.
528  * On macOS, everything signed by B&I is currently platform code, but the
529  * policy in general is subject to change.  Thus this is an approximate.
530  */
531 boolean_t
skywalk_check_platform_binary(proc_t p)532 skywalk_check_platform_binary(proc_t p)
533 {
534 	return (csproc_get_platform_binary(p) == 0) ? FALSE : TRUE;
535 }
536 
537 static int
sk_priv_chk(proc_t p,kauth_cred_t cred,int priv)538 sk_priv_chk(proc_t p, kauth_cred_t cred, int priv)
539 {
540 #pragma unused(p)
541 	int ret = priv_check_cred(cred, priv, 0);
542 
543 #if (DEVELOPMENT || DEBUG)
544 	if (ret != 0) {
545 		const char *pstr;
546 
547 		switch (priv) {
548 		case PRIV_SKYWALK_REGISTER_USER_PIPE:
549 			pstr = "com.apple.private.skywalk.register-user-pipe";
550 			break;
551 
552 		case PRIV_SKYWALK_REGISTER_KERNEL_PIPE:
553 			pstr = "com.apple.private.skywalk.register-kernel-pipe";
554 			break;
555 
556 		case PRIV_SKYWALK_REGISTER_NET_IF:
557 			pstr = "com.apple.private.skywalk.register-net-if";
558 			break;
559 
560 		case PRIV_SKYWALK_REGISTER_FLOW_SWITCH:
561 			pstr = "com.apple.private.skywalk.register-flow-switch";
562 			break;
563 
564 		case PRIV_SKYWALK_OBSERVE_ALL:
565 			pstr = "com.apple.private.skywalk.observe-all";
566 			break;
567 
568 		case PRIV_SKYWALK_OBSERVE_STATS:
569 			pstr = "com.apple.private.skywalk.observe-stats";
570 			break;
571 
572 		case PRIV_SKYWALK_LOW_LATENCY_CHANNEL:
573 			pstr = "com.apple.private.skywalk.low-latency-channel";
574 			break;
575 
576 		default:
577 			pstr = "unknown";
578 			break;
579 		}
580 
581 #if SK_LOG
582 		if (__priv_check) {
583 			SK_DF(SK_VERB_PRIV, "%s(%d) insufficient privilege %d "
584 			    "(\"%s\") err %d", sk_proc_name_address(p),
585 			    sk_proc_pid(p), priv, pstr, ret);
586 		} else {
587 			SK_DF(SK_VERB_PRIV, "%s(%d) IGNORING missing privilege "
588 			    "%d (\"%s\") err %d", sk_proc_name_address(p),
589 			    sk_proc_pid(p), priv, pstr, ret);
590 		}
591 #endif /* SK_LOG */
592 
593 		/* ignore privilege check failures if requested */
594 		if (!__priv_check) {
595 			ret = 0;
596 		}
597 	}
598 #endif /* !DEVELOPMENT && !DEBUG */
599 
600 	return ret;
601 }
602 
603 int
skywalk_priv_check_cred(proc_t p,kauth_cred_t cred,int priv)604 skywalk_priv_check_cred(proc_t p, kauth_cred_t cred, int priv)
605 {
606 	return sk_priv_chk(p, cred, priv);
607 }
608 
609 int
skywalk_priv_check_proc_cred(proc_t p,int priv)610 skywalk_priv_check_proc_cred(proc_t p, int priv)
611 {
612 	int ret;
613 	kauth_cred_t cred = kauth_cred_proc_ref(p);
614 	ret = sk_priv_chk(p, cred, priv);
615 	kauth_cred_unref(&cred);
616 
617 	return ret;
618 }
619 
620 #if CONFIG_MACF
621 int
skywalk_mac_system_check_proc_cred(proc_t p,const char * info_type)622 skywalk_mac_system_check_proc_cred(proc_t p, const char *info_type)
623 {
624 	int ret;
625 	kauth_cred_t cred = kauth_cred_proc_ref(p);
626 	ret = mac_system_check_info(cred, info_type);
627 	kauth_cred_unref(&cred);
628 
629 	return ret;
630 }
631 #endif /* CONFIG_MACF */
632 
633 /*
634  * Scan thru the list of privileges needed before we allow someone
635  * to open a handle to the Nexus controller.  This should be done
636  * at nxctl_create() time, and additional privilege check specific
637  * to the operation (e.g. register, etc.) should be done afterwards.
638  */
639 int
skywalk_nxctl_check_privileges(proc_t p,kauth_cred_t cred)640 skywalk_nxctl_check_privileges(proc_t p, kauth_cred_t cred)
641 {
642 	int ret = 0;
643 
644 	if (p == kernproc) {
645 		goto done;
646 	}
647 
648 	do {
649 		/*
650 		 * Check for observe-{stats,all} entitlements first
651 		 * before the rest, to account for nexus controller
652 		 * clients that don't need anything but statistics;
653 		 * it would help quiesce sandbox violation warnings.
654 		 */
655 		if ((ret = sk_priv_chk(p, cred,
656 		    PRIV_SKYWALK_OBSERVE_STATS)) == 0) {
657 			break;
658 		}
659 		if ((ret = sk_priv_chk(p, cred,
660 		    PRIV_SKYWALK_OBSERVE_ALL)) == 0) {
661 			break;
662 		}
663 		if ((ret = sk_priv_chk(p, cred,
664 		    PRIV_SKYWALK_REGISTER_USER_PIPE)) == 0) {
665 			break;
666 		}
667 		if ((ret = sk_priv_chk(p, cred,
668 		    PRIV_SKYWALK_REGISTER_KERNEL_PIPE)) == 0) {
669 			break;
670 		}
671 		if ((ret = sk_priv_chk(p, cred,
672 		    PRIV_SKYWALK_REGISTER_NET_IF)) == 0) {
673 			break;
674 		}
675 		if ((ret = sk_priv_chk(p, cred,
676 		    PRIV_SKYWALK_REGISTER_FLOW_SWITCH)) == 0) {
677 			break;
678 		}
679 		/* none set, so too bad */
680 		ret = EPERM;
681 	} while (0);
682 
683 #if (DEVELOPMENT || DEBUG)
684 	if (ret != 0) {
685 		SK_ERR("%s(%d) insufficient privilege to open nexus controller "
686 		    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ret);
687 	}
688 #endif /* !DEVELOPMENT && !DEBUG */
689 done:
690 	return ret;
691 }
692 
693 void
sk_gen_guard_id(boolean_t isch,const uuid_t uuid,guardid_t * guard)694 sk_gen_guard_id(boolean_t isch, const uuid_t uuid, guardid_t *guard)
695 {
696 #define GUARD_CH_SIG    0x4348  /* 'CH' */
697 #define GUARD_NX_SIG    0x4e58  /* 'NX' */
698 	union {
699 		uint8_t         _u8[8];
700 		uint16_t        _u16[4];
701 		uint64_t        _u64;
702 	} __u;
703 
704 	read_random(&__u._u16[0], sizeof(uint16_t));
705 	bcopy(uuid, (void *)&__u._u16[1], sizeof(uint16_t));
706 	__u._u16[2] = htole16(isch ? GUARD_CH_SIG : GUARD_NX_SIG);
707 	__u._u16[3] = htole16(0x534b);  /* 'SK' */
708 	VERIFY(__u._u64 != 0);
709 
710 	bzero(guard, sizeof(*guard));
711 	bcopy((void *)&__u._u64, guard, MIN(sizeof(*guard),
712 	    sizeof(uint64_t)));
713 }
714 
715 
716 extern const char *
sk_uuid_unparse(const uuid_t uu,uuid_string_t out)717 sk_uuid_unparse(const uuid_t uu, uuid_string_t out)
718 {
719 	uuid_unparse_upper(uu, out);
720 	return (const char *)out;
721 }
722 
723 #if SK_LOG
724 /*
725  * packet-dump function, user-supplied or static buffer.
726  * The destination buffer must be at least 30+4*len
727  *
728  * @param p
729  *   buffer to be dumped.
730  * @param len
731  *   buffer's total length.
732  * @param dumplen
733  *   length to be dumped.
734  * @param dst
735  *   destination char buffer. sk_dump_buf would be used if dst is NULL.
736  * @param lim
737  *   destination char buffer max length. Not used if dst is NULL.
738  *
739  */
740 const char *
sk_dump(const char * label,const void * obj,int len,int dumplen,char * dst,int lim)741 sk_dump(const char *label, const void *obj, int len, int dumplen,
742     char *dst, int lim)
743 {
744 	int i, j, i0, n = 0;
745 	static char hex[] = "0123456789abcdef";
746 	const char *p = obj;    /* dump cursor */
747 	char *o;        /* output position */
748 
749 #define P_HI(x) hex[((x) & 0xf0) >> 4]
750 #define P_LO(x) hex[((x) & 0xf)]
751 #define P_C(x)  ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
752 	if (lim <= 0 || lim > len) {
753 		lim = len;
754 	}
755 	if (dst == NULL) {
756 		dst = sk_dump_buf;
757 		lim = SK_DUMP_BUF_SIZE;
758 	}
759 	dumplen = MIN(len, dumplen);
760 	o = dst;
761 	n += scnprintf(o, lim, "%s 0x%llx len %d lim %d\n", label,
762 	    SK_KVA(p), len, lim);
763 	o += strlen(o);
764 	/* hexdump routine */
765 	for (i = 0; i < dumplen;) {
766 		n += scnprintf(o, lim - n, "%5d: ", i);
767 		o += strlen(o);
768 		memset(o, ' ', 48);
769 		i0 = i;
770 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
771 			o[j * 3] = P_HI(p[i]);
772 			o[j * 3 + 1] = P_LO(p[i]);
773 		}
774 		i = i0;
775 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
776 			o[j + 48] = P_C(p[i]);
777 		}
778 		o[j + 48] = '\n';
779 		o += j + 49;
780 	}
781 	*o = '\0';
782 #undef P_HI
783 #undef P_LO
784 #undef P_C
785 	return dst;
786 }
787 
788 /*
789  * "Safe" variant of proc_name_address(), mean to be used only for logging.
790  */
791 const char *
sk_proc_name_address(struct proc * p)792 sk_proc_name_address(struct proc *p)
793 {
794 	if (p == PROC_NULL) {
795 		return "proc_null";
796 	}
797 
798 	return proc_name_address(p);
799 }
800 
801 /*
802  * "Safe" variant of proc_pid(), mean to be used only for logging.
803  */
804 int
sk_proc_pid(struct proc * p)805 sk_proc_pid(struct proc *p)
806 {
807 	if (p == PROC_NULL) {
808 		return -1;
809 	}
810 
811 	return proc_pid(p);
812 }
813 
814 const char *
sk_sa_ntop(struct sockaddr * sa,char * addr_str,size_t addr_strlen)815 sk_sa_ntop(struct sockaddr *sa, char *addr_str, size_t addr_strlen)
816 {
817 	const char *str = NULL;
818 
819 	addr_str[0] = '\0';
820 
821 	switch (sa->sa_family) {
822 	case AF_INET:
823 		str = inet_ntop(AF_INET, &SIN(sa)->sin_addr.s_addr,
824 		    addr_str, (socklen_t)addr_strlen);
825 		break;
826 
827 	case AF_INET6:
828 		str = inet_ntop(AF_INET6, &SIN6(sa)->sin6_addr,
829 		    addr_str, (socklen_t)addr_strlen);
830 		break;
831 
832 	default:
833 		str = addr_str;
834 		break;
835 	}
836 
837 	return str;
838 }
839 
840 const char *
sk_memstatus2str(uint32_t status)841 sk_memstatus2str(uint32_t status)
842 {
843 	const char *str = NULL;
844 
845 	switch (status) {
846 	case kMemorystatusInvalid:
847 		str = "kMemorystatusInvalid";
848 		break;
849 
850 	case kMemorystatusKilled:
851 		str = "kMemorystatusKilled";
852 		break;
853 
854 	case kMemorystatusKilledHiwat:
855 		str = "kMemorystatusKilledHiwat";
856 		break;
857 
858 	case kMemorystatusKilledVnodes:
859 		str = "kMemorystatusKilledVnodes";
860 		break;
861 
862 	case kMemorystatusKilledVMPageShortage:
863 		str = "kMemorystatusKilledVMPageShortage";
864 		break;
865 
866 	case kMemorystatusKilledProcThrashing:
867 		str = "kMemorystatusKilledProcThrashing";
868 		break;
869 
870 	case kMemorystatusKilledVMCompressorThrashing:
871 		str = "kMemorystatusKilledVMCompressorThrashing";
872 		break;
873 
874 	case kMemorystatusKilledVMCompressorSpaceShortage:
875 		str = "kMemorystatusKilledVMCompressorSpaceShortage";
876 		break;
877 
878 	case kMemorystatusKilledFCThrashing:
879 		str = "kMemorystatusKilledFCThrashing";
880 		break;
881 
882 	case kMemorystatusKilledPerProcessLimit:
883 		str = "kMemorystatusKilledPerProcessLimit";
884 		break;
885 
886 	case kMemorystatusKilledDiskSpaceShortage:
887 		str = "kMemorystatusKilledDiskSpaceShortage";
888 		break;
889 
890 	case kMemorystatusKilledIdleExit:
891 		str = "kMemorystatusKilledIdleExit";
892 		break;
893 
894 	case kMemorystatusKilledZoneMapExhaustion:
895 		str = "kMemorystatusKilledZoneMapExhaustion";
896 		break;
897 
898 	default:
899 		str = "unknown";
900 		break;
901 	}
902 
903 	return str;
904 }
905 #endif /* SK_LOG */
906 
907 bool
sk_sa_has_addr(struct sockaddr * sa)908 sk_sa_has_addr(struct sockaddr *sa)
909 {
910 	switch (sa->sa_family) {
911 	case AF_INET:
912 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
913 		return SIN(sa)->sin_addr.s_addr != INADDR_ANY;
914 	case AF_INET6:
915 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
916 		return !IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr);
917 	default:
918 		return false;
919 	}
920 }
921 
922 bool
sk_sa_has_port(struct sockaddr * sa)923 sk_sa_has_port(struct sockaddr *sa)
924 {
925 	switch (sa->sa_family) {
926 	case AF_INET:
927 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
928 		return SIN(sa)->sin_port != 0;
929 	case AF_INET6:
930 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
931 		return SIN6(sa)->sin6_port != 0;
932 	default:
933 		return false;
934 	}
935 }
936 
937 /* returns port number in host byte order */
938 uint16_t
sk_sa_get_port(struct sockaddr * sa)939 sk_sa_get_port(struct sockaddr *sa)
940 {
941 	switch (sa->sa_family) {
942 	case AF_INET:
943 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
944 		return ntohs(SIN(sa)->sin_port);
945 	case AF_INET6:
946 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
947 		return ntohs(SIN6(sa)->sin6_port);
948 	default:
949 		VERIFY(0);
950 		/* NOTREACHED */
951 		__builtin_unreachable();
952 	}
953 }
954 
955 void
skywalk_kill_process(struct proc * p,uint64_t reason_code)956 skywalk_kill_process(struct proc *p, uint64_t reason_code)
957 {
958 	os_reason_t exit_reason = OS_REASON_NULL;
959 
960 	VERIFY(p != kernproc);
961 
962 	exit_reason = os_reason_create(OS_REASON_SKYWALK, reason_code);
963 	if (exit_reason == OS_REASON_NULL) {
964 		SK_ERR("%s(%d) unable to allocate memory for crash reason "
965 		    "0x%llX", sk_proc_name_address(p), sk_proc_pid(p),
966 		    reason_code);
967 	} else {
968 		exit_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
969 		SK_ERR("%s(%d) aborted for reason 0x%llX",
970 		    sk_proc_name_address(p), sk_proc_pid(p), reason_code);
971 	}
972 
973 	psignal_try_thread_with_reason(p, current_thread(), SIGABRT,
974 	    exit_reason);
975 }
976 
977 #if (DEVELOPMENT || DEBUG)
978 #define SK_MEMCMP_LEN 256               /* length of each section */
979 #define SK_MASK_MAXLEN 80               /* maximum mask length */
980 
981 #define SK_MEMCMP_MASK_VERIFY(t, l, lr) do {                            \
982 	_CASSERT(sizeof(t##_m) == SK_MASK_MAXLEN);                      \
983 	if ((sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) ^           \
984 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
985 	        panic_plain("\nbroken: " #t " using "                   \
986 	            "sk_memcmp_mask_" #l " at i=%d\n", i);              \
987 	/* NOTREACHED */                                        \
988 	        __builtin_unreachable();                                \
989 	}                                                               \
990 	if ((sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) ^  \
991 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
992 	        panic_plain("\nbroken: " #t " using "                   \
993 	            "sk_memcmp_mask_" #l "_scalar at i=%d\n", i);       \
994 	/* NOTREACHED */                                        \
995 	        __builtin_unreachable();                                \
996 	}                                                               \
997 } while (0)
998 
999 #define SK_MEMCMP_MASK_MATCH_VERIFY(t, l) do {                          \
1000 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) {            \
1001 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1002 	            " mismatch (expected match) at i=%d s1=0x%x"        \
1003 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
1004 	/* NOTREACHED */                                        \
1005 	        __builtin_unreachable();                                \
1006 	}                                                               \
1007 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) {   \
1008 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1009 	            "_scalar mismatch (expected match) at i=%d s1=0x%x" \
1010 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
1011 	/* NOTREACHED */                                        \
1012 	        __builtin_unreachable();                                \
1013 	}                                                               \
1014 } while (0)
1015 
1016 #define SK_MEMCMP_MASK_MISMATCH_VERIFY(t, l) do {                       \
1017 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) == 0) {            \
1018 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1019 	            " match (expected mismatch) at i=%d s1=0x%x"        \
1020 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
1021 	/* NOTREACHED */                                        \
1022 	        __builtin_unreachable();                                \
1023 	}                                                               \
1024 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) == 0) {   \
1025 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
1026 	            "_scalar match (expected mismatch) at i=%d "        \
1027 	            "s1=0x%x s2=0x%x\n", i, hdr1[i], hdr2[i]);          \
1028 	/* NOTREACHED */                                        \
1029 	        __builtin_unreachable();                                \
1030 	}                                                               \
1031 } while (0)
1032 
1033 #define SK_MEMCMP_BYTEMASK_VERIFY(t) do {                               \
1034 	if ((sk_memcmp_mask(hdr1, hdr2, t##_m, i) != 0) ^               \
1035 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
1036 	        panic_plain("\nbroken: " #t " using "                   \
1037 	            "sk_memcmp_mask at i=%d\n", i);                     \
1038 	/* NOTREACHED */                                        \
1039 	        __builtin_unreachable();                                \
1040 	}                                                               \
1041 	if ((sk_memcmp_mask_scalar(hdr1, hdr2, t##_m, i) != 0) ^        \
1042 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
1043 	        panic_plain("\nbroken: " #t " using "                   \
1044 	            "sk_memcmp_mask_scalar at i=%d\n", i);              \
1045 	/* NOTREACHED */                                        \
1046 	        __builtin_unreachable();                                \
1047 	}                                                               \
1048 } while (0)
1049 
1050 static inline int
skywalk_memcmp_mask_ref(const uint8_t * src1,const uint8_t * src2,const uint8_t * byte_mask,size_t n)1051 skywalk_memcmp_mask_ref(const uint8_t *src1, const uint8_t *src2,
1052     const uint8_t *byte_mask, size_t n)
1053 {
1054 	uint32_t result = 0;
1055 	for (size_t i = 0; i < n; i++) {
1056 		result |= (src1[i] ^ src2[i]) & byte_mask[i];
1057 	}
1058 	return result;
1059 }
1060 
1061 static void
skywalk_memcmp_mask_self_tests(void)1062 skywalk_memcmp_mask_self_tests(void)
1063 {
1064 	static const uint8_t ipv4_m[] = {
1065 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xff, 0xff,
1066 		0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
1067 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1068 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1069 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1070 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1071 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1072 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1073 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1074 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1075 	};
1076 	static const uint8_t ipv6_m[] = {
1077 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1078 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1079 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1080 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1081 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1082 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1083 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1084 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1085 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1086 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1087 	};
1088 	static const uint8_t tcp_m[] = {
1089 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1090 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1091 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1092 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1093 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1094 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1095 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1096 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1097 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1098 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1099 	};
1100 	static const uint8_t ipv6_tcp_m[] = {
1101 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1102 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1103 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1104 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1105 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1106 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1107 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1108 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1109 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1110 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1111 	};
1112 	static const uint8_t udp_m[] = {
1113 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1114 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1115 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1116 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1117 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1118 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1119 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1120 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1121 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1122 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1123 	};
1124 	static const uint8_t fk_all_m[] = {
1125 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1126 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1127 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1128 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1129 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1130 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1131 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1132 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1133 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1134 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1135 	};
1136 	static const uint8_t fk_t2_m[] = {
1137 		0x0a, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
1138 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1139 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1140 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1141 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1142 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1143 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1144 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1145 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1146 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1147 	};
1148 	static const uint8_t fk_t3_m[] = {
1149 		0x0f, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1150 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1151 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1152 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1153 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1154 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1155 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1156 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1157 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1158 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1159 	};
1160 	static const uint8_t fk_t4_m[] = {
1161 		0x2f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1162 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1163 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1164 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1165 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1166 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1167 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1168 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1169 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1170 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1171 	};
1172 	static const uint8_t fk_t5_m[] = {
1173 		0x3f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1174 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1175 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1176 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1177 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1178 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1179 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1180 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1181 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1182 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1183 	};
1184 	static const uint8_t fk_i1_m[] = {
1185 		0x02, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
1186 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1187 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1188 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1189 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1190 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1191 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1192 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1193 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1194 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1195 	};
1196 	static const uint8_t fk_i2_m[] = {
1197 		0x07, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1198 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1199 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1200 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1201 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1202 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1203 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1204 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1205 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1206 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1207 	};
1208 	static const uint8_t fk_i3_m[] = {
1209 		0x17, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1210 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1211 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1212 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1213 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1214 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1215 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1216 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1217 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1218 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1219 	};
1220 
1221 	/* validate flow entry mask (2-tuple) */
1222 	_CASSERT(FKMASK_2TUPLE == (FKMASK_PROTO | FKMASK_SPORT));
1223 	VERIFY(fk_mask_2tuple.fk_mask == FKMASK_2TUPLE);
1224 	VERIFY(fk_mask_2tuple.fk_ipver == 0);
1225 	VERIFY(fk_mask_2tuple.fk_proto == 0xff);
1226 	VERIFY(fk_mask_2tuple.fk_sport == 0xffff);
1227 	VERIFY(fk_mask_2tuple.fk_dport == 0);
1228 	VERIFY(fk_mask_2tuple.fk_src._addr64[0] == 0);
1229 	VERIFY(fk_mask_2tuple.fk_src._addr64[1] == 0);
1230 	VERIFY(fk_mask_2tuple.fk_dst._addr64[0] == 0);
1231 	VERIFY(fk_mask_2tuple.fk_dst._addr64[1] == 0);
1232 	VERIFY(fk_mask_2tuple.fk_pad[0] == 0);
1233 
1234 	_CASSERT(FKMASK_3TUPLE == (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC));
1235 	VERIFY(fk_mask_3tuple.fk_mask == FKMASK_3TUPLE);
1236 	VERIFY(fk_mask_3tuple.fk_ipver == 0xff);
1237 	VERIFY(fk_mask_3tuple.fk_proto == 0xff);
1238 	VERIFY(fk_mask_3tuple.fk_sport == 0xffff);
1239 	VERIFY(fk_mask_3tuple.fk_dport == 0);
1240 	VERIFY(fk_mask_3tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1241 	VERIFY(fk_mask_3tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1242 	VERIFY(fk_mask_3tuple.fk_dst._addr64[0] == 0);
1243 	VERIFY(fk_mask_3tuple.fk_dst._addr64[1] == 0);
1244 	VERIFY(fk_mask_3tuple.fk_pad[0] == 0);
1245 
1246 	_CASSERT(FKMASK_4TUPLE == (FKMASK_3TUPLE | FKMASK_DPORT));
1247 	VERIFY(fk_mask_4tuple.fk_mask == FKMASK_4TUPLE);
1248 	VERIFY(fk_mask_4tuple.fk_ipver == 0xff);
1249 	VERIFY(fk_mask_4tuple.fk_proto == 0xff);
1250 	VERIFY(fk_mask_4tuple.fk_sport == 0xffff);
1251 	VERIFY(fk_mask_4tuple.fk_dport == 0xffff);
1252 	VERIFY(fk_mask_4tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1253 	VERIFY(fk_mask_4tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1254 	VERIFY(fk_mask_4tuple.fk_dst._addr64[0] == 0);
1255 	VERIFY(fk_mask_4tuple.fk_dst._addr64[1] == 0);
1256 	VERIFY(fk_mask_4tuple.fk_pad[0] == 0);
1257 
1258 	_CASSERT(FKMASK_5TUPLE == (FKMASK_4TUPLE | FKMASK_DST));
1259 	VERIFY(fk_mask_5tuple.fk_mask == FKMASK_5TUPLE);
1260 	VERIFY(fk_mask_5tuple.fk_ipver == 0xff);
1261 	VERIFY(fk_mask_5tuple.fk_proto == 0xff);
1262 	VERIFY(fk_mask_5tuple.fk_sport == 0xffff);
1263 	VERIFY(fk_mask_5tuple.fk_dport == 0xffff);
1264 	VERIFY(fk_mask_5tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1265 	VERIFY(fk_mask_5tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1266 	VERIFY(fk_mask_5tuple.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1267 	VERIFY(fk_mask_5tuple.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1268 	VERIFY(fk_mask_5tuple.fk_pad[0] == 0);
1269 
1270 	_CASSERT(FKMASK_IPFLOW1 == FKMASK_PROTO);
1271 	VERIFY(fk_mask_ipflow1.fk_mask == FKMASK_IPFLOW1);
1272 	VERIFY(fk_mask_ipflow1.fk_ipver == 0);
1273 	VERIFY(fk_mask_ipflow1.fk_proto == 0xff);
1274 	VERIFY(fk_mask_ipflow1.fk_sport == 0);
1275 	VERIFY(fk_mask_ipflow1.fk_dport == 0);
1276 	VERIFY(fk_mask_ipflow1.fk_src._addr64[0] == 0);
1277 	VERIFY(fk_mask_ipflow1.fk_src._addr64[1] == 0);
1278 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[0] == 0);
1279 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[1] == 0);
1280 	VERIFY(fk_mask_ipflow1.fk_pad[0] == 0);
1281 
1282 	_CASSERT(FKMASK_IPFLOW2 == (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC));
1283 	VERIFY(fk_mask_ipflow2.fk_mask == FKMASK_IPFLOW2);
1284 	VERIFY(fk_mask_ipflow2.fk_ipver == 0xff);
1285 	VERIFY(fk_mask_ipflow2.fk_proto == 0xff);
1286 	VERIFY(fk_mask_ipflow2.fk_sport == 0);
1287 	VERIFY(fk_mask_ipflow2.fk_dport == 0);
1288 	VERIFY(fk_mask_ipflow2.fk_src._addr64[0] == 0xffffffffffffffffULL);
1289 	VERIFY(fk_mask_ipflow2.fk_src._addr64[1] == 0xffffffffffffffffULL);
1290 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[0] == 0);
1291 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[1] == 0);
1292 	VERIFY(fk_mask_ipflow2.fk_pad[0] == 0);
1293 
1294 	_CASSERT(FKMASK_IPFLOW3 == (FKMASK_IPFLOW2 | FKMASK_DST));
1295 	VERIFY(fk_mask_ipflow3.fk_mask == FKMASK_IPFLOW3);
1296 	VERIFY(fk_mask_ipflow3.fk_ipver == 0xff);
1297 	VERIFY(fk_mask_ipflow3.fk_proto == 0xff);
1298 	VERIFY(fk_mask_ipflow3.fk_sport == 0);
1299 	VERIFY(fk_mask_ipflow3.fk_dport == 0);
1300 	VERIFY(fk_mask_ipflow3.fk_src._addr64[0] == 0xffffffffffffffffULL);
1301 	VERIFY(fk_mask_ipflow3.fk_src._addr64[1] == 0xffffffffffffffffULL);
1302 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1303 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1304 	VERIFY(fk_mask_ipflow3.fk_pad[0] == 0);
1305 
1306 	VERIFY(sk_dump_buf != NULL);
1307 
1308 	/* reset sk_dump_buf */
1309 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1310 
1311 	/*
1312 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1313 	 * section begins on a 128-bit boundary, and is a multiple of
1314 	 * 64-bytes len.  A section is SK_MEMCMP_LEN-bytes long,
1315 	 * which means we need at least 16+(3*SK_MEMCMP_LEN) bytes.
1316 	 *
1317 	 * 1st section is s1 -> (hdr1 aligned to 16-bytes)
1318 	 * 2nd section is s2 -> (hdr2 = hdr1 + SK_MEMCMP_LEN)
1319 	 * 3rd section is s3 -> (mask = hdr2 + SK_MEMCMP_LEN)
1320 	 */
1321 	void *s1, *s2, *s3;
1322 
1323 	s1 = sk_dump_buf;
1324 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1325 		s1 = (void *)P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN);
1326 	}
1327 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1328 	s2 = (void *)((uintptr_t)s1 + SK_MEMCMP_LEN);
1329 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1330 	s3 = (void *)((uintptr_t)s2 + SK_MEMCMP_LEN);
1331 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1332 
1333 	uint8_t *hdr1 = s1;
1334 	uint8_t *hdr2 = s2;
1335 	uint8_t *byte_m = s3;
1336 
1337 	/* fill byte mask with random data */
1338 	read_frandom(byte_m, SK_MEMCMP_LEN);
1339 
1340 	kprintf("Skywalk: memcmp mask ... ");
1341 
1342 	int i;
1343 	for (i = 0; i < 80; i++) {
1344 		hdr1[i] = 1;
1345 		SK_MEMCMP_MASK_VERIFY(ipv4, 32, 20);
1346 		SK_MEMCMP_MASK_VERIFY(ipv6, 64, 40);
1347 		SK_MEMCMP_MASK_VERIFY(ipv6_tcp, 80, 64);
1348 		SK_MEMCMP_MASK_VERIFY(tcp, 32, 24);
1349 		SK_MEMCMP_MASK_VERIFY(udp, 16, 6);
1350 		SK_MEMCMP_MASK_VERIFY(fk_all, 48, 48);
1351 		SK_MEMCMP_MASK_VERIFY(fk_t2, 48, 48);
1352 		SK_MEMCMP_MASK_VERIFY(fk_t3, 48, 48);
1353 		SK_MEMCMP_MASK_VERIFY(fk_t4, 48, 48);
1354 		SK_MEMCMP_MASK_VERIFY(fk_t5, 48, 48);
1355 		SK_MEMCMP_MASK_VERIFY(fk_i1, 48, 48);
1356 		SK_MEMCMP_MASK_VERIFY(fk_i2, 48, 48);
1357 		SK_MEMCMP_MASK_VERIFY(fk_i3, 48, 48);
1358 		hdr2[i] = 1;
1359 	}
1360 
1361 	bzero(hdr1, SK_MEMCMP_LEN);
1362 	bzero(hdr2, SK_MEMCMP_LEN);
1363 
1364 	/* re-fill byte mask with random data */
1365 	read_frandom(byte_m, SK_MEMCMP_LEN);
1366 
1367 	for (i = 0; i < SK_MEMCMP_LEN; i++) {
1368 		hdr1[i] = 1;
1369 		SK_MEMCMP_BYTEMASK_VERIFY(byte);
1370 		hdr2[i] = 1;
1371 	}
1372 
1373 	/* fill hdr1 and hd2 with random data */
1374 	read_frandom(hdr1, SK_MEMCMP_LEN);
1375 	bcopy(hdr1, hdr2, SK_MEMCMP_LEN);
1376 	memset(byte_m, 0xff, SK_MEMCMP_LEN);
1377 
1378 	for (i = 0; i < 80; i++) {
1379 		uint8_t val = hdr2[i];
1380 		uint8_t mval = byte_m[i];
1381 
1382 		while (hdr2[i] == hdr1[i] || hdr2[i] == 0) {
1383 			uint8_t newval;
1384 			read_frandom(&newval, sizeof(newval));
1385 			hdr2[i] = newval;
1386 		}
1387 		if (i < 16) {
1388 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 16);
1389 		} else if (i < 32) {
1390 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 32);
1391 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1392 		} else if (i < 48) {
1393 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 48);
1394 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1395 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1396 		} else if (i < 64) {
1397 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 64);
1398 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1399 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1400 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1401 		} else if (i < 80) {
1402 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 80);
1403 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1404 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1405 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1406 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1407 		}
1408 		byte_m[i] = 0;
1409 		if (i < 16) {
1410 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1411 		} else if (i < 32) {
1412 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1413 		} else if (i < 48) {
1414 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1415 		} else if (i < 64) {
1416 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1417 		} else if (i < 80) {
1418 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 80);
1419 		}
1420 		hdr2[i] = val;
1421 		byte_m[i] = mval;
1422 	}
1423 
1424 	kprintf("PASSED\n");
1425 }
1426 
1427 #define SK_COPY_LEN     128             /* length of each section */
1428 
1429 #define SK_COPY_PREPARE(t) do {                                         \
1430 	bzero(s2, SK_COPY_LEN);                                         \
1431 	bzero(s3, SK_COPY_LEN);                                         \
1432 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1433 	kprintf("Skywalk: " #t " ... ");                                \
1434 } while (0)
1435 
1436 #define SK_COPY_VERIFY(t) do {                                          \
1437 	if (_s1 != s1 || _s2 != s2 || _s3 != s3) {                      \
1438 	        panic_plain("\ninput registers clobbered: " #t "\n");   \
1439 	/* NOTREACHED */                                        \
1440 	        __builtin_unreachable();                                \
1441 	}                                                               \
1442 	if (bcmp(s2, s3, SK_COPY_LEN) != 0) {                           \
1443 	        panic_plain("\nbroken: " #t "\n");                      \
1444 	/* NOTREACHED */                                        \
1445 	        __builtin_unreachable();                                \
1446 	} else {                                                        \
1447 	        kprintf("PASSED\n");                                    \
1448 	}                                                               \
1449 } while (0)
1450 
1451 #define SK_ZERO_PREPARE(t) do {                                         \
1452 	bcopy(s1, s2, SK_COPY_LEN);                                     \
1453 	bcopy(s1, s3, SK_COPY_LEN);                                     \
1454 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1455 	kprintf("Skywalk: " #t " ... ");                                \
1456 } while (0)
1457 
1458 #define SK_ZERO_VERIFY(t)       SK_COPY_VERIFY(t)
1459 
1460 static void
skywalk_self_tests(void)1461 skywalk_self_tests(void)
1462 {
1463 	void *s1, *s2, *s3;
1464 	void *_s1, *_s2, *_s3;
1465 
1466 	VERIFY(sk_dump_buf != NULL);
1467 
1468 	/*
1469 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1470 	 * section begins on a 128-bit boundary, and is a multiple of
1471 	 * 64-bytes len.  A section is 128-bytes long, which means we
1472 	 * need at least 16+(3*128) bytes.
1473 	 *
1474 	 * 1st section is source buffer full of random data;
1475 	 * 2nd section is reference target based on bcopy;
1476 	 * 3rd section is test target base on our stuff.
1477 	 */
1478 	_CASSERT(SK_COPY_LEN != 0 && (SK_COPY_LEN % 128) == 0);
1479 	_CASSERT((SK_COPY_LEN % 16) == 0);
1480 	_CASSERT((SK_DUMP_BUF_ALIGN % 16) == 0);
1481 	_CASSERT(SK_DUMP_BUF_SIZE >= (SK_DUMP_BUF_ALIGN + (SK_COPY_LEN * 3)));
1482 
1483 	s1 = sk_dump_buf;
1484 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1485 		s1 = (void *)P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN);
1486 	}
1487 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1488 	s2 = (void *)((uintptr_t)s1 + SK_COPY_LEN);
1489 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1490 	s3 = (void *)((uintptr_t)s2 + SK_COPY_LEN);
1491 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1492 
1493 	/* fill s1 with random data */
1494 	read_frandom(s1, SK_COPY_LEN);
1495 
1496 	kprintf("Skywalk: running self-tests\n");
1497 
1498 	/* Copy 8-bytes, 64-bit aligned */
1499 	SK_COPY_PREPARE(sk_copy64_8);
1500 	bcopy(s1, s2, 8);
1501 	sk_copy64_8((uint64_t *)s1, (uint64_t *)s3);
1502 	SK_COPY_VERIFY(sk_copy64_8);
1503 
1504 	/* Copy 8-bytes, 32-bit aligned */
1505 	SK_COPY_PREPARE(sk_copy32_8);
1506 	bcopy((void *)((uintptr_t)s1 + sizeof(uint32_t)),
1507 	    (void *)((uintptr_t)s2 + sizeof(uint32_t)), 8);
1508 	sk_copy32_8((uint32_t *)((uintptr_t)s1 + sizeof(uint32_t)),
1509 	    (uint32_t *)((uintptr_t)s3 + sizeof(uint32_t)));
1510 	SK_COPY_VERIFY(sk_copy32_8);
1511 
1512 	/* Copy 16-bytes, 64-bit aligned */
1513 	SK_COPY_PREPARE(sk_copy64_16);
1514 	bcopy(s1, s2, 16);
1515 	sk_copy64_16((uint64_t *)s1, (uint64_t *)s3);
1516 	SK_COPY_VERIFY(sk_copy64_16);
1517 
1518 	/* Copy 16-bytes, 32-bit aligned */
1519 	SK_COPY_PREPARE(sk_copy32_16);
1520 	bcopy((void *)((uintptr_t)s1 + sizeof(uint32_t)),
1521 	    (void *)((uintptr_t)s2 + sizeof(uint32_t)), 16);
1522 	sk_copy32_16((uint32_t *)((uintptr_t)s1 + sizeof(uint32_t)),
1523 	    (uint32_t *)((uintptr_t)s3 + sizeof(uint32_t)));
1524 	SK_COPY_VERIFY(sk_copy32_16);
1525 
1526 	/* Copy 20-bytes, 64-bit aligned */
1527 	SK_COPY_PREPARE(sk_copy64_20);
1528 	bcopy(s1, s2, 20);
1529 	sk_copy64_20((uint64_t *)s1, (uint64_t *)s3);
1530 	SK_COPY_VERIFY(sk_copy64_20);
1531 
1532 	/* Copy 24-bytes, 64-bit aligned */
1533 	SK_COPY_PREPARE(sk_copy64_24);
1534 	bcopy(s1, s2, 24);
1535 	sk_copy64_24((uint64_t *)s1, (uint64_t *)s3);
1536 	SK_COPY_VERIFY(sk_copy64_24);
1537 
1538 	/* Copy 32-bytes, 64-bit aligned */
1539 	SK_COPY_PREPARE(sk_copy64_32);
1540 	bcopy(s1, s2, 32);
1541 	sk_copy64_32((uint64_t *)s1, (uint64_t *)s3);
1542 	SK_COPY_VERIFY(sk_copy64_32);
1543 
1544 	/* Copy 32-bytes, 32-bit aligned */
1545 	SK_COPY_PREPARE(sk_copy32_32);
1546 	bcopy((void *)((uintptr_t)s1 + sizeof(uint32_t)),
1547 	    (void *)((uintptr_t)s2 + sizeof(uint32_t)), 32);
1548 	sk_copy32_32((uint32_t *)((uintptr_t)s1 + sizeof(uint32_t)),
1549 	    (uint32_t *)((uintptr_t)s3 + sizeof(uint32_t)));
1550 	SK_COPY_VERIFY(sk_copy32_32);
1551 
1552 	/* Copy 40-bytes, 64-bit aligned */
1553 	SK_COPY_PREPARE(sk_copy64_40);
1554 	bcopy(s1, s2, 40);
1555 	sk_copy64_40((uint64_t *)s1, (uint64_t *)s3);
1556 	SK_COPY_VERIFY(sk_copy64_40);
1557 
1558 	/* Copy entire section in 64-bytes chunks, 64-bit aligned */
1559 	SK_COPY_PREPARE(sk_copy64_64x);
1560 	bcopy(s1, s2, SK_COPY_LEN);
1561 	sk_copy64_64x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1562 	SK_COPY_VERIFY(sk_copy64_64x);
1563 
1564 	/* Copy entire section in 32-bytes chunks, 64-bit aligned */
1565 	SK_COPY_PREPARE(sk_copy64_32x);
1566 	bcopy(s1, s2, SK_COPY_LEN);
1567 	sk_copy64_32x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1568 	SK_COPY_VERIFY(sk_copy64_32x);
1569 
1570 	/* Copy entire section in 8-bytes chunks, 64-bit aligned */
1571 	SK_COPY_PREPARE(sk_copy64_8x);
1572 	bcopy(s1, s2, SK_COPY_LEN);
1573 	sk_copy64_8x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1574 	SK_COPY_VERIFY(sk_copy64_8x);
1575 
1576 	/* Copy entire section in 4-bytes chunks, 64-bit aligned */
1577 	SK_COPY_PREPARE(sk_copy64_4x);
1578 	bcopy(s1, s2, SK_COPY_LEN);
1579 	sk_copy64_4x((uint32_t *)s1, (uint32_t *)s3, SK_COPY_LEN);
1580 	SK_COPY_VERIFY(sk_copy64_4x);
1581 
1582 	/*
1583 	 * Re-use sk_dump_buf for testing sk_zero, same principle as above.
1584 	 *
1585 	 * 1st section is source buffer full of random data;
1586 	 * 2nd section is reference target based on bzero;
1587 	 * 3rd section is test target base on our stuff.
1588 	 */
1589 	SK_ZERO_PREPARE(sk_zero_16);
1590 	bzero(s2, 16);
1591 	sk_zero_16(s3);
1592 	SK_ZERO_VERIFY(sk_zero_16);
1593 
1594 	SK_ZERO_PREPARE(sk_zero_32);
1595 	bzero(s2, 32);
1596 	sk_zero_32(s3);
1597 	SK_ZERO_VERIFY(sk_zero_32);
1598 
1599 	SK_ZERO_PREPARE(sk_zero_48);
1600 	bzero(s2, 48);
1601 	sk_zero_48(s3);
1602 	SK_ZERO_VERIFY(sk_zero_48);
1603 
1604 	SK_ZERO_PREPARE(sk_zero_128);
1605 	bzero(s2, 128);
1606 	sk_zero_128(s3);
1607 	SK_ZERO_VERIFY(sk_zero_128);
1608 
1609 	/* Perform memcmp with mask self tests */
1610 	skywalk_memcmp_mask_self_tests();
1611 
1612 	/* reset sk_dump_buf */
1613 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1614 
1615 	/* Keep packet trace code in sync with ariadne plist */
1616 	_CASSERT(SK_KTRACE_AON_IF_STATS == 0x8100004);
1617 
1618 	_CASSERT(SK_KTRACE_FSW_DEV_RING_FLUSH == 0x8110004);
1619 	_CASSERT(SK_KTRACE_FSW_USER_RING_FLUSH == 0x8110008);
1620 	_CASSERT(SK_KTRACE_FSW_FLOW_TRACK_RTT == 0x8110010);
1621 
1622 	_CASSERT(SK_KTRACE_NETIF_RING_TX_REFILL == 0x8120004);
1623 	_CASSERT(SK_KTRACE_NETIF_HOST_ENQUEUE == 0x8120008);
1624 	_CASSERT(SK_KTRACE_NETIF_MIT_RX_INTR == 0x812000c);
1625 	_CASSERT(SK_KTRACE_NETIF_COMMON_INTR == 0x8120010);
1626 	_CASSERT(SK_KTRACE_NETIF_RX_NOTIFY_DEFAULT == 0x8120014);
1627 	_CASSERT(SK_KTRACE_NETIF_RX_NOTIFY_FAST == 0x8120018);
1628 
1629 	_CASSERT(SK_KTRACE_CHANNEL_TX_REFILL == 0x8130004);
1630 
1631 	_CASSERT(SK_KTRACE_PKT_RX_DRV == 0x8140004);
1632 	_CASSERT(SK_KTRACE_PKT_RX_FSW == 0x8140008);
1633 	_CASSERT(SK_KTRACE_PKT_RX_CHN == 0x814000c);
1634 	_CASSERT(SK_KTRACE_PKT_TX_FSW == 0x8140040);
1635 	_CASSERT(SK_KTRACE_PKT_TX_AQM == 0x8140044);
1636 	_CASSERT(SK_KTRACE_PKT_TX_DRV == 0x8140048);
1637 }
1638 #endif /* DEVELOPMENT || DEBUG */
1639