xref: /xnu-11417.121.6/bsd/skywalk/mem/skmem_arena.c (revision a1e26a70f38d1d7daa7b49b258e2f8538ad81650)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* BEGIN CSTYLED */
30 /*
31  * SKMEM_ARENA_TYPE_NEXUS:
32  *
33  *   This arena represents the memory subsystem of a nexus adapter.  It consist
34  *   of a collection of memory regions that are usable by the nexus, as well
35  *   as the various caches for objects in those regions.
36  *
37  *       (1 per nexus adapter)
38  *     +=======================+
39  *     |      skmem_arena      |
40  *     +-----------------------+              (backing regions)
41  *     |     ar_regions[0]     |           +=======================+
42  *     :          ...          : ------->> |     skmem_region      |===+
43  *     |     ar_regions[n]     |           +=======================+   |===+
44  *     +=======================+               +=======================+   |
45  *     |     arn_{caches,pp}   | ---+              +=======================+
46  *     +-----------------------+    |
47  *     |     arn_stats_obj     |    |
48  *     |     arn_flowadv_obj   |    |         (cache frontends)
49  *     |     arn_nexusadv_obj  |    |      +=======================+
50  *     +-----------------------+    +--->> |     skmem_cache       |===+
51  *                                         +=======================+   |===+
52  *                                             +=======================+   |
53  *                                                 +=======================+
54  *
55  *   Three regions {umd,kmd,buf} are used for the packet buffer pool, which
56  *   may be external to the nexus adapter, e.g. created by the driver or an
57  *   external entity.  If not supplied, we create these regions along with
58  *   the packet buffer pool ourselves.  The rest of the regions (unrelated
59  *   to the packet buffer pool) are unique to the arena and are allocated at
60  *   arena creation time.
61  *
62  *   An arena may be mapped to a user task/process for as many times as needed.
63  *   The result of each mapping is a contiguous range within the address space
64  *   of that task, indicated by [ami_mapaddr, ami_mapaddr + ami_mapsize) span.
65  *   This is achieved by leveraging the mapper memory object ar_mapper that
66  *   "stitches" the disjoint segments together.  Only user-mappable regions,
67  *   i.e. those marked with SKR_MODE_MMAPOK, will be included in this span.
68  *
69  *   Nexus adapters that are eligible for defunct will trigger the arena to
70  *   undergo memory redirection for all regions except those that are marked
71  *   with SKR_MODE_NOREDIRECT.  This happens when all of the channels opened
72  *   to the adapter are defunct.  Upon completion, those redirected regions
73  *   will be torn down in order to reduce their memory footprints.  When this
74  *   happens the adapter and its arena are no longer active or in service.
75  *
76  *   The arena exposes caches for allocating and freeing most region objects.
77  *   These slab-allocator based caches act as front-ends to the regions; only
78  *   the metadata cache (for kern_packet_t) utilizes the magazines layer.  All
79  *   other ones simply utilize skmem_cache for slab-based allocations.
80  *
81  *   Certain regions contain singleton objects that are simple enough to not
82  *   require the slab allocator, such as the ones used for statistics and flow
83  *   advisories.  Because of this, we directly allocate from those regions
84  *   and store the objects in the arena.
85  *
86  * SKMEM_ARENA_TYPE_NECP:
87  *
88  *   This arena represents the memory subsystem of an NECP file descriptor
89  *   object.  It consists of a memory region for per-flow statistics, as well
90  *   as a cache front-end for that region.
91  *
92  * SKMEM_ARENA_SYSTEM:
93  *
94  *   This arena represents general, system-wide objects.  It currently
95  *   consists of the sysctls region that's created once at init time.
96  */
97 /* END CSTYLED */
98 
99 #include <skywalk/os_skywalk_private.h>
100 #include <net/necp.h>
101 
102 static void skmem_arena_destroy(struct skmem_arena *);
103 static void skmem_arena_teardown(struct skmem_arena *, boolean_t);
104 static int skmem_arena_create_finalize(struct skmem_arena *);
105 static void skmem_arena_nexus_teardown(struct skmem_arena_nexus *, boolean_t);
106 static void skmem_arena_necp_teardown(struct skmem_arena_necp *, boolean_t);
107 static void skmem_arena_system_teardown(struct skmem_arena_system *, boolean_t);
108 static void skmem_arena_init_common(struct skmem_arena *ar,
109     skmem_arena_type_t type, size_t ar_zsize, const char *ar_str, const char *name);
110 static void skmem_arena_free(struct skmem_arena *);
111 static void skmem_arena_retain_locked(struct skmem_arena *);
112 static void skmem_arena_reap_locked(struct skmem_arena *, boolean_t);
113 static boolean_t skmem_arena_munmap_common(struct skmem_arena *,
114     struct skmem_arena_mmap_info *);
115 #if SK_LOG
116 static void skmem_arena_create_region_log(struct skmem_arena *);
117 #endif /* SK_LOG */
118 static int skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS;
119 
120 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, arena,
121     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
122     0, 0, skmem_arena_mib_get_sysctl, "S,sk_stats_arena",
123     "Skywalk arena statistics");
124 
125 static LCK_GRP_DECLARE(skmem_arena_lock_grp, "skmem_arena");
126 static LCK_MTX_DECLARE(skmem_arena_lock, &skmem_arena_lock_grp);
127 
128 static TAILQ_HEAD(, skmem_arena) skmem_arena_head = TAILQ_HEAD_INITIALIZER(skmem_arena_head);
129 
130 #define SKMEM_ARENA_LOCK()                      \
131 	lck_mtx_lock(&skmem_arena_lock)
132 #define SKMEM_ARENA_LOCK_ASSERT_HELD()          \
133 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_OWNED)
134 #define SKMEM_ARENA_LOCK_ASSERT_NOTHELD()       \
135 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_NOTOWNED)
136 #define SKMEM_ARENA_UNLOCK()                    \
137 	lck_mtx_unlock(&skmem_arena_lock)
138 
139 #define AR_NEXUS_SIZE           sizeof(struct skmem_arena_nexus)
140 static SKMEM_TYPE_DEFINE(ar_nexus_zone, struct skmem_arena_nexus);
141 
142 #define AR_NECP_SIZE            sizeof(struct skmem_arena_necp)
143 static SKMEM_TYPE_DEFINE(ar_necp_zone, struct skmem_arena_necp);
144 
145 #define AR_SYSTEM_SIZE          sizeof(struct skmem_arena_system)
146 static SKMEM_TYPE_DEFINE(ar_system_zone, struct skmem_arena_system);
147 
148 #define SKMEM_TAG_ARENA_MIB     "com.apple.skywalk.arena.mib"
149 static SKMEM_TAG_DEFINE(skmem_tag_arena_mib, SKMEM_TAG_ARENA_MIB);
150 
151 static_assert(SKMEM_ARENA_TYPE_NEXUS == SAR_TYPE_NEXUS);
152 static_assert(SKMEM_ARENA_TYPE_NECP == SAR_TYPE_NECP);
153 static_assert(SKMEM_ARENA_TYPE_SYSTEM == SAR_TYPE_SYSTEM);
154 
155 SK_NO_INLINE_ATTRIBUTE
156 static int
skmem_arena_sd_setup(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct skmem_arena * ar,boolean_t kernel_only,boolean_t tx)157 skmem_arena_sd_setup(const struct nexus_adapter *na,
158     struct skmem_region_params srp[SKMEM_REGIONS], struct skmem_arena *ar,
159     boolean_t kernel_only, boolean_t tx)
160 {
161 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
162 	struct skmem_cache **cachep;
163 	struct skmem_region *ksd_skr = NULL, *usd_skr = NULL;
164 	const char *__null_terminated name = NULL;
165 	char cname[64];
166 	skmem_region_id_t usd_type, ksd_type;
167 	int err = 0;
168 
169 	usd_type = tx ? SKMEM_REGION_TXAUSD : SKMEM_REGION_RXFUSD;
170 	ksd_type = tx ? SKMEM_REGION_TXAKSD : SKMEM_REGION_RXFKSD;
171 	if (tx) {
172 		usd_type = SKMEM_REGION_TXAUSD;
173 		ksd_type = SKMEM_REGION_TXAKSD;
174 		cachep = &arn->arn_txaksd_cache;
175 	} else {
176 		usd_type = SKMEM_REGION_RXFUSD;
177 		ksd_type = SKMEM_REGION_RXFKSD;
178 		cachep = &arn->arn_rxfksd_cache;
179 	}
180 	name = __unsafe_null_terminated_from_indexable(na->na_name);
181 	ksd_skr = skmem_region_create(name, &srp[ksd_type], NULL, NULL, NULL);
182 	if (ksd_skr == NULL) {
183 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
184 		    "create %s region", ar->ar_name, SK_KVA(ar),
185 		    ar->ar_flags, ARF_BITS, srp[ksd_type].srp_name);
186 		err = ENOMEM;
187 		goto failed;
188 	}
189 	ar->ar_regions[ksd_type] = ksd_skr;
190 	if (!kernel_only) {
191 		usd_skr = skmem_region_create(name, &srp[usd_type], NULL,
192 		    NULL, NULL);
193 		if (usd_skr == NULL) {
194 			err = ENOMEM;
195 			goto failed;
196 		}
197 		ar->ar_regions[usd_type] = usd_skr;
198 		skmem_region_mirror(ksd_skr, usd_skr);
199 	}
200 	name = tsnprintf(cname, sizeof(cname), "%s_ksd.%.*s",
201 	    tx ? "txa" : "rxf", (int)sizeof(na->na_name), na->na_name);
202 	ASSERT(ar->ar_regions[ksd_type] != NULL);
203 	*cachep = skmem_cache_create(name, srp[ksd_type].srp_c_obj_size, 0,
204 	    NULL, NULL, NULL, NULL, ar->ar_regions[ksd_type],
205 	    SKMEM_CR_NOMAGAZINES);
206 	if (*cachep == NULL) {
207 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
208 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
209 		err = ENOMEM;
210 		goto failed;
211 	}
212 	return 0;
213 
214 failed:
215 	if (ksd_skr != NULL) {
216 		skmem_region_release(ksd_skr);
217 		ar->ar_regions[ksd_type] = NULL;
218 	}
219 	if (usd_skr != NULL) {
220 		/*
221 		 * decrements refcnt incremented by skmem_region_mirror()
222 		 * this is not needed in case skmem_cache_create() succeeds
223 		 * because skmem_cache_destroy() does the release.
224 		 */
225 		skmem_region_release(usd_skr);
226 
227 		/* decrements the region's own refcnt */
228 		skmem_region_release(usd_skr);
229 		ar->ar_regions[usd_type] = NULL;
230 	}
231 	return err;
232 }
233 
234 SK_NO_INLINE_ATTRIBUTE
235 static void
skmem_arena_sd_teardown(struct skmem_arena * ar,boolean_t tx)236 skmem_arena_sd_teardown(struct skmem_arena *ar, boolean_t tx)
237 {
238 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
239 	struct skmem_cache **cachep;
240 	struct skmem_region **ksd_rp, **usd_rp;
241 
242 	if (tx) {
243 		cachep = &arn->arn_txaksd_cache;
244 		ksd_rp = &ar->ar_regions[SKMEM_REGION_TXAKSD];
245 		usd_rp = &ar->ar_regions[SKMEM_REGION_TXAUSD];
246 	} else {
247 		cachep = &arn->arn_rxfksd_cache;
248 		ksd_rp = &ar->ar_regions[SKMEM_REGION_RXFKSD];
249 		usd_rp = &ar->ar_regions[SKMEM_REGION_RXFUSD];
250 	}
251 	if (*cachep != NULL) {
252 		skmem_cache_destroy(*cachep);
253 		*cachep = NULL;
254 	}
255 	if (*usd_rp != NULL) {
256 		skmem_region_release(*usd_rp);
257 		*usd_rp = NULL;
258 	}
259 	if (*ksd_rp != NULL) {
260 		skmem_region_release(*ksd_rp);
261 		*ksd_rp = NULL;
262 	}
263 }
264 
265 static bool
skmem_arena_pp_setup(struct skmem_arena * ar,struct skmem_region_params srp[SKMEM_REGIONS],const char * name,struct kern_pbufpool * rx_pp,struct kern_pbufpool * tx_pp,boolean_t kernel_only,boolean_t pp_truncated_buf)266 skmem_arena_pp_setup(struct skmem_arena *ar,
267     struct skmem_region_params srp[SKMEM_REGIONS], const char *name,
268     struct kern_pbufpool *rx_pp, struct kern_pbufpool *tx_pp,
269     boolean_t kernel_only, boolean_t pp_truncated_buf)
270 {
271 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
272 
273 	if (rx_pp == NULL && tx_pp == NULL) {
274 		uint32_t ppcreatef = 0;
275 		if (pp_truncated_buf) {
276 			ppcreatef |= PPCREATEF_TRUNCATED_BUF;
277 		}
278 		if (kernel_only) {
279 			ppcreatef |= PPCREATEF_KERNEL_ONLY;
280 		}
281 		if (srp[SKMEM_REGION_KMD].srp_max_frags > 1) {
282 			ppcreatef |= PPCREATEF_ONDEMAND_BUF;
283 		}
284 		/* callee retains pp upon success */
285 		rx_pp = pp_create(name, srp, NULL, NULL, NULL, NULL, NULL,
286 		    ppcreatef);
287 		if (rx_pp == NULL) {
288 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create pp",
289 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
290 			return false;
291 		}
292 		pp_retain(rx_pp);
293 		tx_pp = rx_pp;
294 	} else {
295 		if (rx_pp == NULL) {
296 			rx_pp = tx_pp;
297 		} else if (tx_pp == NULL) {
298 			tx_pp = rx_pp;
299 		}
300 
301 		ASSERT(rx_pp->pp_md_type == tx_pp->pp_md_type);
302 		ASSERT(rx_pp->pp_md_subtype == tx_pp->pp_md_subtype);
303 		ASSERT(!(!kernel_only &&
304 		    (PP_KERNEL_ONLY(rx_pp) || (PP_KERNEL_ONLY(tx_pp)))));
305 		arn->arn_mode |= AR_NEXUS_MODE_EXTERNAL_PPOOL;
306 		pp_retain(rx_pp);
307 		pp_retain(tx_pp);
308 	}
309 
310 	arn->arn_rx_pp = rx_pp;
311 	arn->arn_tx_pp = tx_pp;
312 	if (rx_pp == tx_pp) {
313 		skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
314 		if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
315 			skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
316 		}
317 		ar->ar_regions[SKMEM_REGION_BUF_DEF] = PP_BUF_REGION_DEF(rx_pp);
318 		ar->ar_regions[SKMEM_REGION_BUF_LARGE] =
319 		    PP_BUF_REGION_LARGE(rx_pp);
320 		ar->ar_regions[SKMEM_REGION_RXBUF_DEF] = NULL;
321 		ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] = NULL;
322 		ar->ar_regions[SKMEM_REGION_TXBUF_DEF] = NULL;
323 		ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] = NULL;
324 		skmem_region_retain(rx_pp->pp_kmd_region);
325 		ar->ar_regions[SKMEM_REGION_KMD] = rx_pp->pp_kmd_region;
326 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
327 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
328 		if (rx_pp->pp_kbft_region != NULL) {
329 			skmem_region_retain(rx_pp->pp_kbft_region);
330 			ar->ar_regions[SKMEM_REGION_KBFT] =
331 			    rx_pp->pp_kbft_region;
332 		}
333 		ar->ar_regions[SKMEM_REGION_RXKBFT] = NULL;
334 		ar->ar_regions[SKMEM_REGION_TXKBFT] = NULL;
335 	} else {
336 		ASSERT(kernel_only); /* split userspace pools not supported */
337 		ar->ar_regions[SKMEM_REGION_BUF_DEF] = NULL;
338 		ar->ar_regions[SKMEM_REGION_BUF_LARGE] = NULL;
339 		skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
340 		ar->ar_regions[SKMEM_REGION_RXBUF_DEF] =
341 		    PP_BUF_REGION_DEF(rx_pp);
342 		ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] =
343 		    PP_BUF_REGION_LARGE(rx_pp);
344 		if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
345 			skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
346 		}
347 		skmem_region_retain(PP_BUF_REGION_DEF(tx_pp));
348 		ar->ar_regions[SKMEM_REGION_TXBUF_DEF] =
349 		    PP_BUF_REGION_DEF(tx_pp);
350 		ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] =
351 		    PP_BUF_REGION_LARGE(tx_pp);
352 		if (PP_BUF_REGION_LARGE(tx_pp) != NULL) {
353 			skmem_region_retain(PP_BUF_REGION_LARGE(tx_pp));
354 		}
355 		ar->ar_regions[SKMEM_REGION_KMD] = NULL;
356 		skmem_region_retain(rx_pp->pp_kmd_region);
357 		ar->ar_regions[SKMEM_REGION_RXKMD] = rx_pp->pp_kmd_region;
358 		skmem_region_retain(tx_pp->pp_kmd_region);
359 		ar->ar_regions[SKMEM_REGION_TXKMD] = tx_pp->pp_kmd_region;
360 		ar->ar_regions[SKMEM_REGION_KBFT] = NULL;
361 		if (rx_pp->pp_kbft_region != NULL) {
362 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(rx_pp));
363 			skmem_region_retain(rx_pp->pp_kbft_region);
364 			ar->ar_regions[SKMEM_REGION_RXKBFT] =
365 			    rx_pp->pp_kbft_region;
366 		}
367 		if (tx_pp->pp_kbft_region != NULL) {
368 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(tx_pp));
369 			skmem_region_retain(tx_pp->pp_kbft_region);
370 			ar->ar_regions[SKMEM_REGION_TXKBFT] =
371 			    tx_pp->pp_kbft_region;
372 		}
373 	}
374 
375 	if (kernel_only) {
376 		if ((arn->arn_mode & AR_NEXUS_MODE_EXTERNAL_PPOOL) == 0) {
377 			ASSERT(PP_KERNEL_ONLY(rx_pp));
378 			ASSERT(PP_KERNEL_ONLY(tx_pp));
379 			ASSERT(rx_pp->pp_umd_region == NULL);
380 			ASSERT(tx_pp->pp_umd_region == NULL);
381 			ASSERT(rx_pp->pp_kmd_region->skr_mirror == NULL);
382 			ASSERT(tx_pp->pp_kmd_region->skr_mirror == NULL);
383 			ASSERT(rx_pp->pp_ubft_region == NULL);
384 			ASSERT(tx_pp->pp_ubft_region == NULL);
385 			if (rx_pp->pp_kbft_region != NULL) {
386 				ASSERT(rx_pp->pp_kbft_region->skr_mirror ==
387 				    NULL);
388 			}
389 			if (tx_pp->pp_kbft_region != NULL) {
390 				ASSERT(tx_pp->pp_kbft_region->skr_mirror ==
391 				    NULL);
392 			}
393 		}
394 	} else {
395 		ASSERT(rx_pp == tx_pp);
396 		ASSERT(!PP_KERNEL_ONLY(rx_pp));
397 		ASSERT(rx_pp->pp_umd_region->skr_mode & SKR_MODE_MIRRORED);
398 		ASSERT(rx_pp->pp_kmd_region->skr_mirror != NULL);
399 		ar->ar_regions[SKMEM_REGION_UMD] = rx_pp->pp_umd_region;
400 		skmem_region_retain(rx_pp->pp_umd_region);
401 		if (rx_pp->pp_kbft_region != NULL) {
402 			ASSERT(rx_pp->pp_kbft_region->skr_mirror != NULL);
403 			ASSERT(rx_pp->pp_ubft_region != NULL);
404 			ASSERT(rx_pp->pp_ubft_region->skr_mode &
405 			    SKR_MODE_MIRRORED);
406 			ar->ar_regions[SKMEM_REGION_UBFT] =
407 			    rx_pp->pp_ubft_region;
408 			skmem_region_retain(rx_pp->pp_ubft_region);
409 		}
410 	}
411 
412 	arn->arn_md_type = rx_pp->pp_md_type;
413 	arn->arn_md_subtype = rx_pp->pp_md_subtype;
414 	return true;
415 }
416 
417 static void
skmem_arena_init_common(struct skmem_arena * ar,skmem_arena_type_t type,size_t ar_zsize,const char * ar_str,const char * name)418 skmem_arena_init_common(struct skmem_arena *ar, skmem_arena_type_t type,
419     size_t ar_zsize, const char *ar_str, const char *name)
420 {
421 	ar->ar_type = type;
422 	ar->ar_zsize = ar_zsize;
423 	lck_mtx_init(&ar->ar_lock, &skmem_arena_lock_grp,
424 	    LCK_ATTR_NULL);
425 	(void) snprintf(ar->ar_name, sizeof(ar->ar_name),
426 	    "%s.%s.%s", SKMEM_ARENA_PREFIX, ar_str, name);
427 }
428 
429 /*
430  * Create a nexus adapter arena.
431  */
432 struct skmem_arena *
skmem_arena_create_for_nexus(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct kern_pbufpool ** tx_pp,struct kern_pbufpool ** rx_pp,boolean_t pp_truncated_buf,boolean_t kernel_only,struct kern_nexus_advisory * nxv,int * perr)433 skmem_arena_create_for_nexus(const struct nexus_adapter *na,
434     struct skmem_region_params srp[SKMEM_REGIONS], struct kern_pbufpool **tx_pp,
435     struct kern_pbufpool **rx_pp, boolean_t pp_truncated_buf,
436     boolean_t kernel_only, struct kern_nexus_advisory *nxv, int *perr)
437 {
438 #define SRP_CFLAGS(_id)         (srp[_id].srp_cflags)
439 	struct skmem_arena *ar;
440 	struct skmem_arena_nexus *__single arn;
441 	char cname[64];
442 	uint32_t i;
443 	const char *__null_terminated name =
444 	    __unsafe_null_terminated_from_indexable(na->na_name);
445 	uint32_t msize = 0;
446 	void *__sized_by(msize) maddr = NULL;
447 
448 	*perr = 0;
449 
450 	arn = zalloc_flags(ar_nexus_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
451 	ar = &arn->arn_cmn;
452 	skmem_arena_init_common(ar, SKMEM_ARENA_TYPE_NEXUS, AR_NEXUS_SIZE,
453 	    "nexus", name);
454 
455 	ASSERT(ar != NULL && ar->ar_zsize == AR_NEXUS_SIZE);
456 
457 	/* these regions must not be readable/writeable */
458 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_GUARD);
459 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_GUARD);
460 
461 	/* these regions must be read-only */
462 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_UREADONLY);
463 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_UREADONLY);
464 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_UREADONLY);
465 	if ((na->na_flags & NAF_USER_PKT_POOL) == 0) {
466 		ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
467 		    SKMEM_REGION_CR_UREADONLY);
468 		ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
469 		    SKMEM_REGION_CR_UREADONLY);
470 	} else {
471 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
472 		    SKMEM_REGION_CR_UREADONLY));
473 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
474 		    SKMEM_REGION_CR_UREADONLY));
475 	}
476 
477 	/* these regions must be user-mappable */
478 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_MMAPOK);
479 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_MMAPOK);
480 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_MMAPOK);
481 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_MMAPOK);
482 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_MMAPOK);
483 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_MMAPOK);
484 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_MMAPOK);
485 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_MMAPOK);
486 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_MMAPOK);
487 	ASSERT(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_MMAPOK);
488 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_MMAPOK);
489 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_MMAPOK);
490 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_MMAPOK);
491 
492 	/* these must not be user-mappable */
493 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_MMAPOK));
494 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_MMAPOK));
495 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_MMAPOK));
496 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_MMAPOK));
497 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_MMAPOK));
498 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_MMAPOK));
499 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_MMAPOK));
500 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_MMAPOK));
501 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_MMAPOK));
502 
503 	/* these regions must be shareable */
504 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_SHAREOK);
505 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
506 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
507 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
508 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
509 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
510 
511 	/* these regions must not be be shareable */
512 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_SHAREOK));
513 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_SHAREOK));
514 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_SHAREOK));
515 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_SHAREOK));
516 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_SHAREOK));
517 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_SHAREOK));
518 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_SHAREOK));
519 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_SHAREOK));
520 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_SHAREOK));
521 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_SHAREOK));
522 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_SHAREOK));
523 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_SHAREOK));
524 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_SHAREOK));
525 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_SHAREOK));
526 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_SHAREOK));
527 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_SHAREOK));
528 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_SHAREOK));
529 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_SHAREOK));
530 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_SHAREOK));
531 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_SHAREOK));
532 
533 	/* these must stay active */
534 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_NOREDIRECT);
535 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_NOREDIRECT);
536 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_NOREDIRECT);
537 
538 	/* no kstats for nexus */
539 	ASSERT(srp[SKMEM_REGION_KSTATS].srp_c_obj_cnt == 0);
540 
541 	/* these regions have memtag enabled */
542 	ASSERT(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_MEMTAG);
543 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_MEMTAG);
544 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_MEMTAG);
545 	ASSERT(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_MEMTAG);
546 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_MEMTAG);
547 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_MEMTAG);
548 
549 	AR_LOCK(ar);
550 	if (!skmem_arena_pp_setup(ar, srp, name, (rx_pp ? *rx_pp : NULL),
551 	    (tx_pp ? *tx_pp : NULL), kernel_only, pp_truncated_buf)) {
552 		goto failed;
553 	}
554 
555 	if (nxv != NULL && nxv->nxv_reg != NULL) {
556 		struct skmem_region *skr = nxv->nxv_reg;
557 
558 		ASSERT(skr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
559 		ASSERT(skr->skr_seg_max_cnt == 1);
560 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = skr;
561 		skmem_region_retain(skr);
562 
563 		ASSERT(nxv->nxv_adv != NULL);
564 		if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
565 			VERIFY(nxv->flowswitch_nxv_adv->nxadv_ver ==
566 			    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
567 		} else if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
568 			VERIFY(nxv->netif_nxv_adv->nna_version ==
569 			    NX_NETIF_ADVISORY_CURRENT_VERSION);
570 		} else {
571 			panic_plain("%s: invalid advisory type %d",
572 			    __func__, nxv->nxv_adv_type);
573 			/* NOTREACHED */
574 		}
575 		arn->arn_nexusadv_obj = nxv->nxv_adv;
576 	} else {
577 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
578 		ASSERT(srp[SKMEM_REGION_NEXUSADV].srp_c_obj_cnt == 0);
579 	}
580 
581 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, TRUE) != 0) {
582 		goto failed;
583 	}
584 
585 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, FALSE) != 0) {
586 		goto failed;
587 	}
588 
589 	for (i = 0; i < SKMEM_REGIONS; i++) {
590 		/* skip if already created */
591 		if (ar->ar_regions[i] != NULL) {
592 			continue;
593 		}
594 
595 		/* skip external regions from packet pool */
596 		if (skmem_region_for_pp(i)) {
597 			continue;
598 		}
599 
600 		/* skip slot descriptor regions */
601 		if (i == SKMEM_REGION_TXAUSD || i == SKMEM_REGION_RXFUSD ||
602 		    i == SKMEM_REGION_TXAKSD || i == SKMEM_REGION_RXFKSD) {
603 			continue;
604 		}
605 
606 		/* skip if region is configured to be empty */
607 		if (srp[i].srp_c_obj_cnt == 0) {
608 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
609 			    i == SKMEM_REGION_USTATS ||
610 			    i == SKMEM_REGION_KSTATS ||
611 			    i == SKMEM_REGION_INTRINSIC ||
612 			    i == SKMEM_REGION_FLOWADV ||
613 			    i == SKMEM_REGION_NEXUSADV ||
614 			    i == SKMEM_REGION_SYSCTLS ||
615 			    i == SKMEM_REGION_GUARD_TAIL);
616 			continue;
617 		}
618 
619 		ASSERT(srp[i].srp_id == i);
620 
621 		/*
622 		 * Skip {SCHEMA, RING, GUARD} for kernel-only arena.  Note
623 		 * that this is assuming kernel-only arena is always used
624 		 * for kernel-only nexus adapters (never used directly by
625 		 * user process.)
626 		 *
627 		 * XXX [email protected] - see comments in kern_pbufpool_create().
628 		 * We need to revisit this logic for "direct channel" access,
629 		 * perhaps via a separate adapter flag.
630 		 */
631 		if (kernel_only && (i == SKMEM_REGION_GUARD_HEAD ||
632 		    i == SKMEM_REGION_SCHEMA || i == SKMEM_REGION_RING ||
633 		    i == SKMEM_REGION_GUARD_TAIL)) {
634 			continue;
635 		}
636 
637 		/* not for nexus, or for us to create here */
638 		ASSERT(i != SKMEM_REGION_GUARD_HEAD || sk_guard);
639 		ASSERT(i != SKMEM_REGION_NEXUSADV);
640 		ASSERT(i != SKMEM_REGION_SYSCTLS);
641 		ASSERT(i != SKMEM_REGION_GUARD_TAIL || sk_guard);
642 		ASSERT(i != SKMEM_REGION_KSTATS);
643 		ASSERT(i != SKMEM_REGION_INTRINSIC);
644 
645 		/* otherwise create it */
646 		if ((ar->ar_regions[i] = skmem_region_create(name, &srp[i],
647 		    NULL, NULL, NULL)) == NULL) {
648 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
649 			    "create %s region", ar->ar_name, SK_KVA(ar),
650 			    ar->ar_flags, ARF_BITS, srp[i].srp_name);
651 			goto failed;
652 		}
653 	}
654 
655 	/* create skmem_cache for schema (without magazines) */
656 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL || kernel_only);
657 	if (ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL) {
658 		name = tsnprintf(cname, sizeof(cname), "schema.%.*s",
659 		    (int)sizeof(na->na_name), na->na_name);
660 		if ((arn->arn_schema_cache = skmem_cache_create(name,
661 		    srp[SKMEM_REGION_SCHEMA].srp_c_obj_size, 0, NULL,
662 		    NULL, NULL, NULL, ar->ar_regions[SKMEM_REGION_SCHEMA],
663 		    SKMEM_CR_NOMAGAZINES)) == NULL) {
664 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
665 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
666 			    cname);
667 			goto failed;
668 		}
669 	}
670 
671 	/* create skmem_cache for rings (without magazines) */
672 	name = tsnprintf(cname, sizeof(cname), "ring.%.*s",
673 	    (int)sizeof(na->na_name), na->na_name);
674 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] != NULL || kernel_only);
675 	if ((ar->ar_regions[SKMEM_REGION_RING] != NULL) &&
676 	    (arn->arn_ring_cache = skmem_cache_create(name,
677 	    srp[SKMEM_REGION_RING].srp_c_obj_size, 0, NULL, NULL, NULL,
678 	    NULL, ar->ar_regions[SKMEM_REGION_RING],
679 	    SKMEM_CR_NOMAGAZINES)) == NULL) {
680 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
681 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
682 		goto failed;
683 	}
684 
685 	/*
686 	 * If the stats region is present, allocate a single object directly
687 	 * from the region; we don't need to create an skmem_cache for this,
688 	 * as the object is allocated (and freed) only once.
689 	 */
690 	if (ar->ar_regions[SKMEM_REGION_USTATS] != NULL) {
691 		struct skmem_region *skr = ar->ar_regions[SKMEM_REGION_USTATS];
692 		void *obj;
693 
694 		/* no kstats for nexus */
695 		ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
696 		ASSERT(skr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
697 		ASSERT(skr->skr_seg_max_cnt == 1);
698 
699 		if ((obj = skmem_region_alloc(skr, &maddr,
700 		    NULL, NULL, SKMEM_SLEEP, skr->skr_c_obj_size, &msize)) == NULL) {
701 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
702 			    "stats", ar->ar_name, SK_KVA(ar), ar->ar_flags,
703 			    ARF_BITS);
704 			goto failed;
705 		}
706 		arn->arn_stats_obj = obj;
707 		arn->arn_stats_obj_size = skr->skr_c_obj_size;
708 	}
709 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
710 
711 	/*
712 	 * If the flowadv region is present, allocate a single object directly
713 	 * from the region; we don't need to create an skmem_cache for this,
714 	 * as the object is allocated (and freed) only once.
715 	 */
716 	if (ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL) {
717 		struct skmem_region *skr =
718 		    ar->ar_regions[SKMEM_REGION_FLOWADV];
719 		void *obj;
720 
721 		ASSERT(skr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
722 		ASSERT(skr->skr_seg_max_cnt == 1);
723 
724 		if ((obj = skmem_region_alloc(skr, &maddr,
725 		    NULL, NULL, SKMEM_SLEEP, skr->skr_c_obj_size, &msize)) == NULL) {
726 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
727 			    "flowadv", ar->ar_name, SK_KVA(ar), ar->ar_flags,
728 			    ARF_BITS);
729 			goto failed;
730 		}
731 		/* XXX -fbounds-safety: should get the count elsewhere */
732 		arn->arn_flowadv_obj = obj;
733 		arn->arn_flowadv_entries = sk_max_flows;
734 	}
735 
736 	if (skmem_arena_create_finalize(ar) != 0) {
737 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
738 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
739 		goto failed;
740 	}
741 
742 	++ar->ar_refcnt;        /* for caller */
743 	AR_UNLOCK(ar);
744 
745 	SKMEM_ARENA_LOCK();
746 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
747 	SKMEM_ARENA_UNLOCK();
748 
749 	/* caller didn't give us one, but would like us to return it? */
750 	if (rx_pp != NULL && *rx_pp == NULL) {
751 		*rx_pp = arn->arn_rx_pp;
752 		pp_retain(*rx_pp);
753 	}
754 	if (tx_pp != NULL && *tx_pp == NULL) {
755 		*tx_pp = arn->arn_tx_pp;
756 		pp_retain(*tx_pp);  /* for caller */
757 	}
758 
759 #if SK_LOG
760 	if (__improbable(sk_verbose != 0)) {
761 		skmem_arena_create_region_log(ar);
762 	}
763 #endif /* SK_LOG */
764 
765 	return ar;
766 
767 failed:
768 	AR_LOCK_ASSERT_HELD(ar);
769 	skmem_arena_destroy(ar);
770 	*perr = ENOMEM;
771 
772 	return NULL;
773 #undef SRP_CFLAGS
774 }
775 
776 void
skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus * arn,int cnt)777 skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus *arn, int cnt)
778 {
779 	struct skmem_arena *ar = &arn->arn_cmn;
780 
781 	AR_LOCK(ar);
782 	arn->arn_ksd_nodefunct += cnt;
783 	VERIFY(arn->arn_ksd_nodefunct >= 0);
784 	AR_UNLOCK(ar);
785 }
786 
787 boolean_t
skmem_arena_nexus_sd_idle(struct skmem_arena_nexus * arn)788 skmem_arena_nexus_sd_idle(struct skmem_arena_nexus *arn)
789 {
790 	struct skmem_arena *ar = &arn->arn_cmn;
791 	boolean_t idle;
792 
793 	AR_LOCK(ar);
794 	VERIFY(arn->arn_ksd_nodefunct >= 0);
795 	idle = (arn->arn_ksd_nodefunct == 0);
796 	AR_UNLOCK(ar);
797 
798 	return idle;
799 }
800 
801 static void
skmem_arena_nexus_teardown(struct skmem_arena_nexus * arn,boolean_t defunct)802 skmem_arena_nexus_teardown(struct skmem_arena_nexus *arn, boolean_t defunct)
803 {
804 	struct skmem_arena *ar = &arn->arn_cmn;
805 	struct skmem_region *skr;
806 	int i;
807 
808 	AR_LOCK_ASSERT_HELD(ar);
809 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
810 
811 	/* these should never be set for nexus arena */
812 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL || sk_guard);
813 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
814 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL || sk_guard);
815 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
816 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
817 
818 	if (arn->arn_stats_obj != NULL) {
819 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
820 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
821 		skmem_region_free(skr, arn->arn_stats_obj, NULL);
822 		arn->arn_stats_obj_size = 0;
823 		arn->arn_stats_obj = NULL;
824 		skmem_region_release(skr);
825 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
826 	}
827 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
828 	ASSERT(arn->arn_stats_obj == NULL);
829 
830 	if (arn->arn_flowadv_obj != NULL) {
831 		skr = ar->ar_regions[SKMEM_REGION_FLOWADV];
832 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
833 
834 		/* XXX -fbounds-safety */
835 		void *obj = __unsafe_forge_bidi_indexable(void *,
836 		    arn->arn_flowadv_obj, skr->skr_c_obj_size);
837 		skmem_region_free(skr, obj, NULL);
838 		arn->arn_flowadv_obj = NULL;
839 		arn->arn_flowadv_entries = 0;
840 		skmem_region_release(skr);
841 		ar->ar_regions[SKMEM_REGION_FLOWADV] = NULL;
842 	}
843 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
844 	ASSERT(arn->arn_flowadv_obj == NULL);
845 
846 	if (arn->arn_nexusadv_obj != NULL) {
847 		skr = ar->ar_regions[SKMEM_REGION_NEXUSADV];
848 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
849 		/* we didn't allocate this, so just nullify it */
850 		arn->arn_nexusadv_obj = NULL;
851 		skmem_region_release(skr);
852 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = NULL;
853 	}
854 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
855 	ASSERT(arn->arn_nexusadv_obj == NULL);
856 
857 	ASSERT(!((arn->arn_rx_pp == NULL) ^ (arn->arn_tx_pp == NULL)));
858 	if (arn->arn_rx_pp != NULL) {
859 		for (i = 0; i < SKMEM_PP_REGIONS; i++) {
860 			skmem_region_id_t reg = skmem_pp_region_ids[i];
861 			skr = ar->ar_regions[reg];
862 			if (skr != NULL) {
863 				ASSERT(!(skr->skr_mode & SKR_MODE_NOREDIRECT));
864 				skmem_region_release(skr);
865 				ar->ar_regions[reg] = NULL;
866 			}
867 		}
868 		pp_release(arn->arn_rx_pp);
869 		pp_release(arn->arn_tx_pp);
870 		arn->arn_rx_pp = NULL;
871 		arn->arn_tx_pp = NULL;
872 	}
873 	for (i = 0; i < SKMEM_PP_REGIONS; i++) {
874 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
875 	}
876 	ASSERT(arn->arn_rx_pp == NULL);
877 	ASSERT(arn->arn_tx_pp == NULL);
878 
879 	if (arn->arn_ring_cache != NULL) {
880 		skr = ar->ar_regions[SKMEM_REGION_RING];
881 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
882 		skmem_cache_destroy(arn->arn_ring_cache);
883 		arn->arn_ring_cache = NULL;
884 		skmem_region_release(skr);
885 		ar->ar_regions[SKMEM_REGION_RING] = NULL;
886 	}
887 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
888 	ASSERT(arn->arn_ring_cache == NULL);
889 
890 	/*
891 	 * Stop here if we're in the defunct context, and we're asked
892 	 * to keep the slot descriptor regions alive as they are still
893 	 * being referred to by the nexus owner (driver).
894 	 */
895 	if (defunct && arn->arn_ksd_nodefunct != 0) {
896 		ASSERT(arn->arn_ksd_nodefunct > 0);
897 		return;
898 	}
899 
900 	ASSERT(arn->arn_ksd_nodefunct == 0);
901 	skmem_arena_sd_teardown(ar, TRUE);
902 	skmem_arena_sd_teardown(ar, FALSE);
903 
904 	/* stop here if we're in the defunct context */
905 	if (defunct) {
906 		return;
907 	}
908 	if (arn->arn_schema_cache != NULL) {
909 		skr = ar->ar_regions[SKMEM_REGION_SCHEMA];
910 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
911 		skmem_cache_destroy(arn->arn_schema_cache);
912 		arn->arn_schema_cache = NULL;
913 		skmem_region_release(skr);
914 		ar->ar_regions[SKMEM_REGION_SCHEMA] = NULL;
915 	}
916 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
917 	ASSERT(arn->arn_schema_cache == NULL);
918 
919 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_HEAD]) != NULL) {
920 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
921 		skmem_region_release(skr);
922 		ar->ar_regions[SKMEM_REGION_GUARD_HEAD] = NULL;
923 	}
924 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
925 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_TAIL]) != NULL) {
926 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
927 		skmem_region_release(skr);
928 		ar->ar_regions[SKMEM_REGION_GUARD_TAIL] = NULL;
929 	}
930 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
931 }
932 
933 /*
934  * Create an NECP arena.
935  */
936 struct skmem_arena *
skmem_arena_create_for_necp(const char * name,struct skmem_region_params * srp_ustats,struct skmem_region_params * srp_kstats,int * perr)937 skmem_arena_create_for_necp(const char *name,
938     struct skmem_region_params *srp_ustats,
939     struct skmem_region_params *srp_kstats, int *perr)
940 {
941 	struct skmem_arena_necp *__single arc;
942 	struct skmem_arena *ar;
943 	char cname[64];
944 	const char *__null_terminated cache_name = NULL;
945 
946 	*perr = 0;
947 
948 	arc = zalloc_flags(ar_necp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
949 	ar = &arc->arc_cmn;
950 	skmem_arena_init_common(ar, SKMEM_ARENA_TYPE_NECP, AR_NECP_SIZE,
951 	    "necp", name);
952 
953 	ASSERT(ar != NULL && ar->ar_zsize == AR_NECP_SIZE);
954 
955 	/*
956 	 * Must be stats region, and must be user-mappable;
957 	 * don't assert for SKMEM_REGION_CR_MONOLITHIC here
958 	 * as the client might want multi-segment mode.
959 	 */
960 	ASSERT(srp_ustats->srp_id == SKMEM_REGION_USTATS);
961 	ASSERT(srp_kstats->srp_id == SKMEM_REGION_KSTATS);
962 	ASSERT(srp_ustats->srp_cflags & SKMEM_REGION_CR_MMAPOK);
963 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_MMAPOK));
964 	ASSERT(!(srp_ustats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
965 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
966 	ASSERT(srp_ustats->srp_c_obj_size != 0);
967 	ASSERT(srp_kstats->srp_c_obj_size != 0);
968 	ASSERT(srp_ustats->srp_c_obj_cnt != 0);
969 	ASSERT(srp_kstats->srp_c_obj_cnt != 0);
970 	ASSERT(srp_ustats->srp_c_seg_size == srp_kstats->srp_c_seg_size);
971 	ASSERT(srp_ustats->srp_seg_cnt == srp_kstats->srp_seg_cnt);
972 	ASSERT(srp_ustats->srp_c_obj_size == srp_kstats->srp_c_obj_size);
973 	ASSERT(srp_ustats->srp_c_obj_cnt == srp_kstats->srp_c_obj_cnt);
974 
975 	AR_LOCK(ar);
976 
977 	if ((ar->ar_regions[SKMEM_REGION_USTATS] = skmem_region_create(name,
978 	    srp_ustats, NULL, NULL, NULL)) == NULL) {
979 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
980 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
981 		    srp_ustats->srp_name);
982 		goto failed;
983 	}
984 
985 	if ((ar->ar_regions[SKMEM_REGION_KSTATS] = skmem_region_create(name,
986 	    srp_kstats, NULL, NULL, NULL)) == NULL) {
987 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
988 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
989 		    srp_kstats->srp_name);
990 		goto failed;
991 	}
992 
993 	skmem_region_mirror(ar->ar_regions[SKMEM_REGION_KSTATS],
994 	    ar->ar_regions[SKMEM_REGION_USTATS]);
995 
996 	/* create skmem_cache for kernel stats (without magazines) */
997 	cache_name = tsnprintf(cname, sizeof(cname), "kstats.%s", name);
998 	if ((arc->arc_kstats_cache = skmem_cache_create(cache_name,
999 	    srp_kstats->srp_c_obj_size, 0, necp_stats_ctor, NULL, NULL,
1000 	    NULL, ar->ar_regions[SKMEM_REGION_KSTATS],
1001 	    SKMEM_CR_NOMAGAZINES)) == NULL) {
1002 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
1003 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
1004 		goto failed;
1005 	}
1006 
1007 	if (skmem_arena_create_finalize(ar) != 0) {
1008 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
1009 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1010 		goto failed;
1011 	}
1012 
1013 	/*
1014 	 * These must never be configured for NECP arena.
1015 	 *
1016 	 * XXX: In theory we can add guard pages to this arena,
1017 	 * but for now leave that as an exercise for the future.
1018 	 */
1019 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1020 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1021 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1022 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1023 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1024 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1025 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1026 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1027 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1028 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1029 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1030 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1031 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1032 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1033 	}
1034 
1035 	/* these must be configured for NECP arena */
1036 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
1037 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] != NULL);
1038 
1039 	++ar->ar_refcnt;        /* for caller */
1040 	AR_UNLOCK(ar);
1041 
1042 	SKMEM_ARENA_LOCK();
1043 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1044 	SKMEM_ARENA_UNLOCK();
1045 
1046 #if SK_LOG
1047 	if (__improbable(sk_verbose != 0)) {
1048 		skmem_arena_create_region_log(ar);
1049 	}
1050 #endif /* SK_LOG */
1051 
1052 	return ar;
1053 
1054 failed:
1055 	AR_LOCK_ASSERT_HELD(ar);
1056 	skmem_arena_destroy(ar);
1057 	*perr = ENOMEM;
1058 
1059 	return NULL;
1060 }
1061 
1062 static void
skmem_arena_necp_teardown(struct skmem_arena_necp * arc,boolean_t defunct)1063 skmem_arena_necp_teardown(struct skmem_arena_necp *arc, boolean_t defunct)
1064 {
1065 #pragma unused(defunct)
1066 	struct skmem_arena *ar = &arc->arc_cmn;
1067 	struct skmem_region *skr;
1068 
1069 	AR_LOCK_ASSERT_HELD(ar);
1070 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NECP);
1071 
1072 	/* these must never be configured for NECP arena */
1073 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1074 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1075 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1076 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1077 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1078 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1079 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1080 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1081 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1082 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1083 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1084 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1085 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1086 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1087 	}
1088 
1089 	if (arc->arc_kstats_cache != NULL) {
1090 		skr = ar->ar_regions[SKMEM_REGION_KSTATS];
1091 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1092 		skmem_cache_destroy(arc->arc_kstats_cache);
1093 		arc->arc_kstats_cache = NULL;
1094 		skmem_region_release(skr);
1095 		ar->ar_regions[SKMEM_REGION_KSTATS] = NULL;
1096 
1097 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
1098 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1099 		skmem_region_release(skr);
1100 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
1101 	}
1102 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1103 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1104 	ASSERT(arc->arc_kstats_cache == NULL);
1105 }
1106 
1107 /*
1108  * Given an arena, return its NECP variant (if applicable).
1109  */
1110 struct skmem_arena_necp *
skmem_arena_necp(struct skmem_arena * ar)1111 skmem_arena_necp(struct skmem_arena *ar)
1112 {
1113 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_NECP)) {
1114 		return NULL;
1115 	}
1116 
1117 	return (struct skmem_arena_necp *)ar;
1118 }
1119 
1120 /*
1121  * Create a System arena.
1122  */
1123 struct skmem_arena *
skmem_arena_create_for_system(const char * name,int * perr)1124 skmem_arena_create_for_system(const char *name, int *perr)
1125 {
1126 	struct skmem_region *skrsys;
1127 	struct skmem_arena_system *ars;
1128 	struct skmem_arena *ar;
1129 
1130 	*perr = 0;
1131 
1132 	ars = zalloc_flags(ar_system_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1133 	ar = &ars->ars_cmn;
1134 	skmem_arena_init_common(ar, SKMEM_ARENA_TYPE_SYSTEM, AR_SYSTEM_SIZE,
1135 	    "system", name);
1136 
1137 	ASSERT(ar != NULL && ar->ar_zsize == AR_SYSTEM_SIZE);
1138 
1139 	AR_LOCK(ar);
1140 	/* retain system-wide sysctls region */
1141 	skrsys = skmem_get_sysctls_region();
1142 	ASSERT(skrsys != NULL && skrsys->skr_id == SKMEM_REGION_SYSCTLS);
1143 	ASSERT((skrsys->skr_mode & (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES |
1144 	    SKR_MODE_KREADONLY | SKR_MODE_UREADONLY | SKR_MODE_MONOLITHIC |
1145 	    SKR_MODE_SHAREOK)) ==
1146 	    (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES | SKR_MODE_UREADONLY |
1147 	    SKR_MODE_MONOLITHIC));
1148 	ar->ar_regions[SKMEM_REGION_SYSCTLS] = skrsys;
1149 	skmem_region_retain(skrsys);
1150 
1151 	/* object is valid as long as the sysctls region is retained */
1152 	ars->ars_sysctls_obj = skmem_get_sysctls_obj(&ars->ars_sysctls_objsize);
1153 	ASSERT(ars->ars_sysctls_obj != NULL);
1154 	ASSERT(ars->ars_sysctls_objsize != 0);
1155 
1156 	if (skmem_arena_create_finalize(ar) != 0) {
1157 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
1158 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1159 		goto failed;
1160 	}
1161 
1162 	/*
1163 	 * These must never be configured for system arena.
1164 	 *
1165 	 * XXX: In theory we can add guard pages to this arena,
1166 	 * but for now leave that as an exercise for the future.
1167 	 */
1168 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1169 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1170 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1171 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1172 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1173 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1174 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1175 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1176 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1177 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1178 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1179 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1180 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1181 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1182 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1183 	}
1184 
1185 	/* these must be configured for system arena */
1186 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] != NULL);
1187 
1188 	++ar->ar_refcnt;        /* for caller */
1189 	AR_UNLOCK(ar);
1190 
1191 	SKMEM_ARENA_LOCK();
1192 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1193 	SKMEM_ARENA_UNLOCK();
1194 
1195 #if SK_LOG
1196 	if (__improbable(sk_verbose != 0)) {
1197 		skmem_arena_create_region_log(ar);
1198 	}
1199 #endif /* SK_LOG */
1200 
1201 	return ar;
1202 
1203 failed:
1204 	AR_LOCK_ASSERT_HELD(ar);
1205 	skmem_arena_destroy(ar);
1206 	*perr = ENOMEM;
1207 
1208 	return NULL;
1209 }
1210 
1211 static void
skmem_arena_system_teardown(struct skmem_arena_system * ars,boolean_t defunct)1212 skmem_arena_system_teardown(struct skmem_arena_system *ars, boolean_t defunct)
1213 {
1214 	struct skmem_arena *ar = &ars->ars_cmn;
1215 	struct skmem_region *skr;
1216 
1217 	AR_LOCK_ASSERT_HELD(ar);
1218 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1219 
1220 	/* these must never be configured for system arena */
1221 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1222 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1223 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1224 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1225 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1226 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1227 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1228 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1229 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1230 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1231 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1232 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1233 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1234 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1235 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1236 	}
1237 
1238 	/* nothing to do here for now during defunct, just return */
1239 	if (defunct) {
1240 		return;
1241 	}
1242 
1243 	if (ars->ars_sysctls_obj != NULL) {
1244 		skr = ar->ar_regions[SKMEM_REGION_SYSCTLS];
1245 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
1246 		/* we didn't allocate this, so don't free it */
1247 		ars->ars_sysctls_obj = NULL;
1248 		ars->ars_sysctls_objsize = 0;
1249 		skmem_region_release(skr);
1250 		ar->ar_regions[SKMEM_REGION_SYSCTLS] = NULL;
1251 	}
1252 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1253 	ASSERT(ars->ars_sysctls_obj == NULL);
1254 	ASSERT(ars->ars_sysctls_objsize == 0);
1255 }
1256 
1257 /*
1258  * Given an arena, return its System variant (if applicable).
1259  */
1260 struct skmem_arena_system *
skmem_arena_system(struct skmem_arena * ar)1261 skmem_arena_system(struct skmem_arena *ar)
1262 {
1263 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_SYSTEM)) {
1264 		return NULL;
1265 	}
1266 
1267 	return (struct skmem_arena_system *)ar;
1268 }
1269 
1270 void *
skmem_arena_system_sysctls_obj_addr(struct skmem_arena * ar)1271 skmem_arena_system_sysctls_obj_addr(struct skmem_arena *ar)
1272 {
1273 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1274 	return skmem_arena_system(ar)->ars_sysctls_obj;
1275 }
1276 
1277 size_t
skmem_arena_system_sysctls_obj_size(struct skmem_arena * ar)1278 skmem_arena_system_sysctls_obj_size(struct skmem_arena *ar)
1279 {
1280 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1281 	return skmem_arena_system(ar)->ars_sysctls_objsize;
1282 }
1283 
1284 /*
1285  * Destroy a region.
1286  */
1287 static void
skmem_arena_destroy(struct skmem_arena * ar)1288 skmem_arena_destroy(struct skmem_arena *ar)
1289 {
1290 	AR_LOCK_ASSERT_HELD(ar);
1291 
1292 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags %b",
1293 	    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1294 
1295 	ASSERT(ar->ar_refcnt == 0);
1296 	if (ar->ar_link.tqe_next != NULL || ar->ar_link.tqe_prev != NULL) {
1297 		AR_UNLOCK(ar);
1298 		SKMEM_ARENA_LOCK();
1299 		TAILQ_REMOVE(&skmem_arena_head, ar, ar_link);
1300 		SKMEM_ARENA_UNLOCK();
1301 		AR_LOCK(ar);
1302 		ASSERT(ar->ar_refcnt == 0);
1303 	}
1304 
1305 	/* teardown all remaining memory regions and associated resources */
1306 	skmem_arena_teardown(ar, FALSE);
1307 
1308 	if (ar->ar_ar != NULL) {
1309 		IOSKArenaDestroy(ar->ar_ar);
1310 		ar->ar_ar = NULL;
1311 	}
1312 
1313 	if (ar->ar_flags & ARF_ACTIVE) {
1314 		ar->ar_flags &= ~ARF_ACTIVE;
1315 	}
1316 
1317 	AR_UNLOCK(ar);
1318 
1319 	skmem_arena_free(ar);
1320 }
1321 
1322 /*
1323  * Teardown (or defunct) a region.
1324  */
1325 static void
skmem_arena_teardown(struct skmem_arena * ar,boolean_t defunct)1326 skmem_arena_teardown(struct skmem_arena *ar, boolean_t defunct)
1327 {
1328 	uint32_t i;
1329 
1330 	switch (ar->ar_type) {
1331 	case SKMEM_ARENA_TYPE_NEXUS:
1332 		skmem_arena_nexus_teardown((struct skmem_arena_nexus *)ar,
1333 		    defunct);
1334 		break;
1335 
1336 	case SKMEM_ARENA_TYPE_NECP:
1337 		skmem_arena_necp_teardown((struct skmem_arena_necp *)ar,
1338 		    defunct);
1339 		break;
1340 
1341 	case SKMEM_ARENA_TYPE_SYSTEM:
1342 		skmem_arena_system_teardown((struct skmem_arena_system *)ar,
1343 		    defunct);
1344 		break;
1345 
1346 	default:
1347 		VERIFY(0);
1348 		/* NOTREACHED */
1349 		__builtin_unreachable();
1350 	}
1351 
1352 	/* stop here if we're in the defunct context */
1353 	if (defunct) {
1354 		return;
1355 	}
1356 
1357 	/* take care of any remaining ones */
1358 	for (i = 0; i < SKMEM_REGIONS; i++) {
1359 		if (ar->ar_regions[i] == NULL) {
1360 			continue;
1361 		}
1362 
1363 		skmem_region_release(ar->ar_regions[i]);
1364 		ar->ar_regions[i] = NULL;
1365 	}
1366 }
1367 
1368 static int
skmem_arena_create_finalize(struct skmem_arena * ar)1369 skmem_arena_create_finalize(struct skmem_arena *ar)
1370 {
1371 	IOSKRegionRef reg[SKMEM_REGIONS];
1372 	uint32_t i, regcnt = 0;
1373 	int err = 0;
1374 
1375 	AR_LOCK_ASSERT_HELD(ar);
1376 
1377 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1378 
1379 	/*
1380 	 * Prepare an array of regions that can be mapped to user task;
1381 	 * exclude regions that aren't eligible for user task mapping.
1382 	 */
1383 	bzero(&reg, sizeof(reg));
1384 	for (i = 0; i < SKMEM_REGIONS; i++) {
1385 		struct skmem_region *skr = ar->ar_regions[i];
1386 		if (skr == NULL || !(skr->skr_mode & SKR_MODE_MMAPOK)) {
1387 			continue;
1388 		}
1389 
1390 		ASSERT(skr->skr_reg != NULL);
1391 		reg[regcnt++] = skr->skr_reg;
1392 	}
1393 	ASSERT(regcnt != 0);
1394 
1395 	/*
1396 	 * Create backing IOSKArena handle.
1397 	 */
1398 	ar->ar_ar = IOSKArenaCreate(reg, (IOSKCount)regcnt);
1399 	if (ar->ar_ar == NULL) {
1400 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create "
1401 		    "IOSKArena of %u regions", ar->ar_name, SK_KVA(ar),
1402 		    ar->ar_flags, ARF_BITS, regcnt);
1403 		err = ENOMEM;
1404 		goto failed;
1405 	}
1406 
1407 	ar->ar_flags |= ARF_ACTIVE;
1408 
1409 failed:
1410 	return err;
1411 }
1412 
1413 static void
skmem_arena_free(struct skmem_arena * ar)1414 skmem_arena_free(struct skmem_arena *ar)
1415 {
1416 #if DEBUG || DEVELOPMENT
1417 	ASSERT(ar->ar_refcnt == 0);
1418 	ASSERT(!(ar->ar_flags & ARF_ACTIVE));
1419 	ASSERT(ar->ar_ar == NULL);
1420 	ASSERT(ar->ar_mapcnt == 0);
1421 	ASSERT(SLIST_EMPTY(&ar->ar_map_head));
1422 	for (uint32_t i = 0; i < SKMEM_REGIONS; i++) {
1423 		ASSERT(ar->ar_regions[i] == NULL);
1424 	}
1425 #endif /* DEBUG || DEVELOPMENT */
1426 
1427 	lck_mtx_destroy(&ar->ar_lock, &skmem_arena_lock_grp);
1428 	switch (ar->ar_type) {
1429 	case SKMEM_ARENA_TYPE_NEXUS:
1430 		zfree(ar_nexus_zone, ar);
1431 		break;
1432 
1433 	case SKMEM_ARENA_TYPE_NECP:
1434 		zfree(ar_necp_zone, ar);
1435 		break;
1436 
1437 	case SKMEM_ARENA_TYPE_SYSTEM:
1438 		zfree(ar_system_zone, ar);
1439 		break;
1440 
1441 	default:
1442 		VERIFY(0);
1443 		/* NOTREACHED */
1444 		__builtin_unreachable();
1445 	}
1446 }
1447 
1448 /*
1449  * Retain an arena.
1450  */
1451 __attribute__((always_inline))
1452 static inline void
skmem_arena_retain_locked(struct skmem_arena * ar)1453 skmem_arena_retain_locked(struct skmem_arena *ar)
1454 {
1455 	AR_LOCK_ASSERT_HELD(ar);
1456 	ar->ar_refcnt++;
1457 	ASSERT(ar->ar_refcnt != 0);
1458 }
1459 
1460 void
skmem_arena_retain(struct skmem_arena * ar)1461 skmem_arena_retain(struct skmem_arena *ar)
1462 {
1463 	AR_LOCK(ar);
1464 	skmem_arena_retain_locked(ar);
1465 	AR_UNLOCK(ar);
1466 }
1467 
1468 /*
1469  * Release (and potentially destroy) an arena.
1470  */
1471 __attribute__((always_inline))
1472 static inline boolean_t
skmem_arena_release_locked(struct skmem_arena * ar)1473 skmem_arena_release_locked(struct skmem_arena *ar)
1474 {
1475 	boolean_t lastref = FALSE;
1476 
1477 	AR_LOCK_ASSERT_HELD(ar);
1478 	ASSERT(ar->ar_refcnt != 0);
1479 	if (--ar->ar_refcnt == 0) {
1480 		skmem_arena_destroy(ar);
1481 		lastref = TRUE;
1482 	} else {
1483 		lastref = FALSE;
1484 	}
1485 
1486 	return lastref;
1487 }
1488 
1489 boolean_t
skmem_arena_release(struct skmem_arena * ar)1490 skmem_arena_release(struct skmem_arena *ar)
1491 {
1492 	boolean_t lastref;
1493 
1494 	AR_LOCK(ar);
1495 	/* unlock only if this isn't the last reference */
1496 	if (!(lastref = skmem_arena_release_locked(ar))) {
1497 		AR_UNLOCK(ar);
1498 	}
1499 
1500 	return lastref;
1501 }
1502 
1503 /*
1504  * Map an arena to the task's address space.
1505  */
1506 int
skmem_arena_mmap(struct skmem_arena * ar,struct proc * p,struct skmem_arena_mmap_info * ami)1507 skmem_arena_mmap(struct skmem_arena *ar, struct proc *p,
1508     struct skmem_arena_mmap_info *ami)
1509 {
1510 	struct task *__single task = proc_task(p);
1511 	IOReturn ioerr;
1512 	int err = 0;
1513 
1514 	ASSERT(task != kernel_task && task != TASK_NULL);
1515 	ASSERT(ami->ami_arena == NULL);
1516 	ASSERT(ami->ami_mapref == NULL);
1517 	ASSERT(ami->ami_maptask == TASK_NULL);
1518 	ASSERT(!ami->ami_redirect);
1519 
1520 	AR_LOCK(ar);
1521 	if ((ar->ar_flags & (ARF_ACTIVE | ARF_DEFUNCT)) != ARF_ACTIVE) {
1522 		err = ENODEV;
1523 		goto failed;
1524 	}
1525 
1526 	ASSERT(ar->ar_ar != NULL);
1527 	if ((ami->ami_mapref = IOSKMapperCreate(ar->ar_ar, task)) == NULL) {
1528 		err = ENOMEM;
1529 		goto failed;
1530 	}
1531 
1532 	ioerr = IOSKMapperGetAddress(ami->ami_mapref, &ami->ami_mapaddr,
1533 	    &ami->ami_mapsize);
1534 	VERIFY(ioerr == kIOReturnSuccess);
1535 
1536 	ami->ami_arena = ar;
1537 	skmem_arena_retain_locked(ar);
1538 	SLIST_INSERT_HEAD(&ar->ar_map_head, ami, ami_link);
1539 
1540 	ami->ami_maptask = task;
1541 	ar->ar_mapcnt++;
1542 	if (ar->ar_mapcnt == 1) {
1543 		ar->ar_mapsize = ami->ami_mapsize;
1544 	}
1545 
1546 	ASSERT(ami->ami_mapref != NULL);
1547 	ASSERT(ami->ami_arena == ar);
1548 	AR_UNLOCK(ar);
1549 
1550 	return 0;
1551 
1552 failed:
1553 	AR_UNLOCK(ar);
1554 	skmem_arena_munmap(ar, ami);
1555 	VERIFY(err != 0);
1556 
1557 	return err;
1558 }
1559 
1560 /*
1561  * Remove arena's memory mapping from task's address space (common code).
1562  * Returns true if caller needs to perform a deferred defunct.
1563  */
1564 static boolean_t
skmem_arena_munmap_common(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1565 skmem_arena_munmap_common(struct skmem_arena *ar,
1566     struct skmem_arena_mmap_info *ami)
1567 {
1568 	boolean_t need_defunct = FALSE;
1569 
1570 	AR_LOCK(ar);
1571 	if (ami->ami_mapref != NULL) {
1572 		IOSKMapperDestroy(ami->ami_mapref);
1573 		ami->ami_mapref = NULL;
1574 
1575 		VERIFY(ar->ar_mapcnt != 0);
1576 		ar->ar_mapcnt--;
1577 		if (ar->ar_mapcnt == 0) {
1578 			ar->ar_mapsize = 0;
1579 		}
1580 
1581 		VERIFY(ami->ami_arena == ar);
1582 		SLIST_REMOVE(&ar->ar_map_head, ami, skmem_arena_mmap_info,
1583 		    ami_link);
1584 
1585 		/*
1586 		 * We expect that the caller ensures an extra reference
1587 		 * held on the arena, in addition to the one in mmap_info.
1588 		 */
1589 		VERIFY(ar->ar_refcnt > 1);
1590 		(void) skmem_arena_release_locked(ar);
1591 		ami->ami_arena = NULL;
1592 
1593 		if (ami->ami_redirect) {
1594 			/*
1595 			 * This mapper has been redirected; decrement
1596 			 * the redirect count associated with it.
1597 			 */
1598 			VERIFY(ar->ar_maprdrcnt != 0);
1599 			ar->ar_maprdrcnt--;
1600 		} else if (ar->ar_maprdrcnt != 0 &&
1601 		    ar->ar_maprdrcnt == ar->ar_mapcnt) {
1602 			/*
1603 			 * The are other mappers for this arena that have
1604 			 * all been redirected, but the arena wasn't marked
1605 			 * inactive by skmem_arena_redirect() last time since
1606 			 * this particular mapper that we just destroyed
1607 			 * was using it.  Now that it's gone, finish the
1608 			 * postponed work below once we return to caller.
1609 			 */
1610 			ASSERT(ar->ar_flags & ARF_ACTIVE);
1611 			ar->ar_flags &= ~ARF_ACTIVE;
1612 			need_defunct = TRUE;
1613 		}
1614 	}
1615 	ASSERT(ami->ami_mapref == NULL);
1616 	ASSERT(ami->ami_arena == NULL);
1617 
1618 	ami->ami_maptask = TASK_NULL;
1619 	ami->ami_mapaddr = 0;
1620 	ami->ami_mapsize = 0;
1621 	ami->ami_redirect = FALSE;
1622 
1623 	AR_UNLOCK(ar);
1624 
1625 	return need_defunct;
1626 }
1627 
1628 /*
1629  * Remove arena's memory mapping from task's address space (channel version).
1630  * Will perform a deferred defunct if needed.
1631  */
1632 void
skmem_arena_munmap_channel(struct skmem_arena * ar,struct kern_channel * ch)1633 skmem_arena_munmap_channel(struct skmem_arena *ar, struct kern_channel *ch)
1634 {
1635 	SK_LOCK_ASSERT_HELD();
1636 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1637 
1638 	/*
1639 	 * If this is this is on a channel that was holding the last
1640 	 * active reference count on the arena, and that there are
1641 	 * other defunct channels pointing to that arena, perform the
1642 	 * actual arena defunct now.
1643 	 */
1644 	if (skmem_arena_munmap_common(ar, &ch->ch_mmap)) {
1645 		struct kern_nexus *nx = ch->ch_nexus;
1646 		struct kern_nexus_domain_provider *nxdom_prov = NX_DOM_PROV(nx);
1647 
1648 		/*
1649 		 * Similar to kern_channel_defunct(), where we let the
1650 		 * domain provider complete the defunct.  At this point
1651 		 * both sk_lock and the channel locks are held, and so
1652 		 * we indicate that to the callee.
1653 		 */
1654 		nxdom_prov->nxdom_prov_dom->nxdom_defunct_finalize(nxdom_prov,
1655 		    nx, ch, TRUE);
1656 	}
1657 }
1658 
1659 /*
1660  * Remove arena's memory mapping from task's address space (generic).
1661  * This routine should only be called on non-channel related arenas.
1662  */
1663 void
skmem_arena_munmap(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1664 skmem_arena_munmap(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami)
1665 {
1666 	(void) skmem_arena_munmap_common(ar, ami);
1667 }
1668 
1669 /*
1670  * Redirect eligible memory regions in the task's memory map so that
1671  * they get overwritten and backed with anonymous (zero-filled) pages.
1672  */
1673 int
skmem_arena_mredirect(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami,struct proc * p,boolean_t * need_defunct)1674 skmem_arena_mredirect(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami,
1675     struct proc *p, boolean_t *need_defunct)
1676 {
1677 #pragma unused(p)
1678 	int err = 0;
1679 
1680 	*need_defunct = FALSE;
1681 
1682 	AR_LOCK(ar);
1683 	ASSERT(ar->ar_ar != NULL);
1684 	if (ami->ami_redirect) {
1685 		err = EALREADY;
1686 	} else if (ami->ami_mapref == NULL) {
1687 		err = ENXIO;
1688 	} else {
1689 		VERIFY(ar->ar_mapcnt != 0);
1690 		ASSERT(ar->ar_flags & ARF_ACTIVE);
1691 		VERIFY(ami->ami_arena == ar);
1692 		/*
1693 		 * This effectively overwrites the mappings for all
1694 		 * redirectable memory regions (i.e. those without the
1695 		 * SKMEM_REGION_CR_NOREDIRECT flag) while preserving their
1696 		 * protection flags.  Accesses to these regions will be
1697 		 * redirected to anonymous, zero-filled pages.
1698 		 */
1699 		IOSKMapperRedirect(ami->ami_mapref);
1700 		ami->ami_redirect = TRUE;
1701 
1702 		/*
1703 		 * Mark the arena as inactive if all mapper instances are
1704 		 * redirected; otherwise, we do this later during unmap.
1705 		 * Once inactive, the arena will not allow further mmap,
1706 		 * and it is ready to be defunct later.
1707 		 */
1708 		if (++ar->ar_maprdrcnt == ar->ar_mapcnt) {
1709 			ar->ar_flags &= ~ARF_ACTIVE;
1710 			*need_defunct = TRUE;
1711 		}
1712 	}
1713 	AR_UNLOCK(ar);
1714 
1715 	SK_DF(((err != 0) ? SK_VERB_ERROR : SK_VERB_DEFAULT),
1716 	    "%s(%d) \"%s\" ar 0x%llx flags %b inactive %u need_defunct %u "
1717 	    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ar->ar_name,
1718 	    SK_KVA(ar), ar->ar_flags, ARF_BITS, !(ar->ar_flags & ARF_ACTIVE),
1719 	    *need_defunct, err);
1720 
1721 	return err;
1722 }
1723 
1724 /*
1725  * Defunct a region.
1726  */
1727 int
skmem_arena_defunct(struct skmem_arena * ar)1728 skmem_arena_defunct(struct skmem_arena *ar)
1729 {
1730 	AR_LOCK(ar);
1731 
1732 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags 0x%b", ar->ar_name,
1733 	    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1734 
1735 	if (ar->ar_flags & ARF_DEFUNCT) {
1736 		AR_UNLOCK(ar);
1737 		return EALREADY;
1738 	} else if (ar->ar_flags & ARF_ACTIVE) {
1739 		AR_UNLOCK(ar);
1740 		return EBUSY;
1741 	}
1742 
1743 	/* purge the caches now */
1744 	skmem_arena_reap_locked(ar, TRUE);
1745 
1746 	/* teardown eligible memory regions and associated resources */
1747 	skmem_arena_teardown(ar, TRUE);
1748 
1749 	ar->ar_flags |= ARF_DEFUNCT;
1750 
1751 	AR_UNLOCK(ar);
1752 
1753 	return 0;
1754 }
1755 
1756 /*
1757  * Retrieve total and in-use memory statistics of regions in the arena.
1758  */
1759 void
skmem_arena_get_stats(struct skmem_arena * ar,uint64_t * mem_total,uint64_t * mem_inuse)1760 skmem_arena_get_stats(struct skmem_arena *ar, uint64_t *mem_total,
1761     uint64_t *mem_inuse)
1762 {
1763 	uint32_t i;
1764 
1765 	if (mem_total != NULL) {
1766 		*mem_total = 0;
1767 	}
1768 	if (mem_inuse != NULL) {
1769 		*mem_inuse = 0;
1770 	}
1771 
1772 	AR_LOCK(ar);
1773 	for (i = 0; i < SKMEM_REGIONS; i++) {
1774 		if (ar->ar_regions[i] == NULL) {
1775 			continue;
1776 		}
1777 
1778 		if (mem_total != NULL) {
1779 			*mem_total += AR_MEM_TOTAL(ar, i);
1780 		}
1781 		if (mem_inuse != NULL) {
1782 			*mem_inuse += AR_MEM_INUSE(ar, i);
1783 		}
1784 	}
1785 	AR_UNLOCK(ar);
1786 }
1787 
1788 /*
1789  * Retrieve the offset of a particular region (identified by its ID)
1790  * from the base of the arena.
1791  */
1792 mach_vm_offset_t
skmem_arena_get_region_offset(struct skmem_arena * ar,skmem_region_id_t id)1793 skmem_arena_get_region_offset(struct skmem_arena *ar, skmem_region_id_t id)
1794 {
1795 	mach_vm_offset_t offset = 0;
1796 	uint32_t i;
1797 
1798 	ASSERT(id < SKMEM_REGIONS);
1799 
1800 	AR_LOCK(ar);
1801 	for (i = 0; i < id; i++) {
1802 		if (ar->ar_regions[i] == NULL) {
1803 			continue;
1804 		}
1805 
1806 		offset += ar->ar_regions[i]->skr_size;
1807 	}
1808 	AR_UNLOCK(ar);
1809 
1810 	return offset;
1811 }
1812 
1813 static void
skmem_reap_pbufpool_caches(struct kern_pbufpool * pp,boolean_t purge)1814 skmem_reap_pbufpool_caches(struct kern_pbufpool *pp, boolean_t purge)
1815 {
1816 	if (pp->pp_kmd_cache != NULL) {
1817 		skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1818 	}
1819 	if (PP_BUF_CACHE_DEF(pp) != NULL) {
1820 		skmem_cache_reap_now(PP_BUF_CACHE_DEF(pp), purge);
1821 	}
1822 	if (PP_BUF_CACHE_LARGE(pp) != NULL) {
1823 		skmem_cache_reap_now(PP_BUF_CACHE_LARGE(pp), purge);
1824 	}
1825 	if (PP_KBFT_CACHE_DEF(pp) != NULL) {
1826 		skmem_cache_reap_now(PP_KBFT_CACHE_DEF(pp), purge);
1827 	}
1828 	if (PP_KBFT_CACHE_LARGE(pp) != NULL) {
1829 		skmem_cache_reap_now(PP_KBFT_CACHE_LARGE(pp), purge);
1830 	}
1831 }
1832 
1833 /*
1834  * Reap all of configured caches in the arena, so that any excess amount
1835  * outside of their working sets gets released to their respective backing
1836  * regions.  If purging is specified, we empty the caches' working sets,
1837  * including everything that's cached at the CPU layer.
1838  */
1839 static void
skmem_arena_reap_locked(struct skmem_arena * ar,boolean_t purge)1840 skmem_arena_reap_locked(struct skmem_arena *ar, boolean_t purge)
1841 {
1842 	struct skmem_arena_nexus *arn;
1843 	struct skmem_arena_necp *arc;
1844 	struct kern_pbufpool *pp;
1845 
1846 	AR_LOCK_ASSERT_HELD(ar);
1847 
1848 	switch (ar->ar_type) {
1849 	case SKMEM_ARENA_TYPE_NEXUS:
1850 		arn = (struct skmem_arena_nexus *)ar;
1851 		if (arn->arn_schema_cache != NULL) {
1852 			skmem_cache_reap_now(arn->arn_schema_cache, purge);
1853 		}
1854 		if (arn->arn_ring_cache != NULL) {
1855 			skmem_cache_reap_now(arn->arn_ring_cache, purge);
1856 		}
1857 		if ((pp = arn->arn_rx_pp) != NULL) {
1858 			skmem_reap_pbufpool_caches(pp, purge);
1859 		}
1860 		if ((pp = arn->arn_tx_pp) != NULL && pp != arn->arn_rx_pp) {
1861 			skmem_reap_pbufpool_caches(pp, purge);
1862 		}
1863 		break;
1864 
1865 	case SKMEM_ARENA_TYPE_NECP:
1866 		arc = (struct skmem_arena_necp *)ar;
1867 		if (arc->arc_kstats_cache != NULL) {
1868 			skmem_cache_reap_now(arc->arc_kstats_cache, purge);
1869 		}
1870 		break;
1871 
1872 	case SKMEM_ARENA_TYPE_SYSTEM:
1873 		break;
1874 	}
1875 }
1876 
1877 void
skmem_arena_reap(struct skmem_arena * ar,boolean_t purge)1878 skmem_arena_reap(struct skmem_arena *ar, boolean_t purge)
1879 {
1880 	AR_LOCK(ar);
1881 	skmem_arena_reap_locked(ar, purge);
1882 	AR_UNLOCK(ar);
1883 }
1884 
1885 #if SK_LOG
1886 SK_LOG_ATTRIBUTE
1887 static void
skmem_arena_create_region_log(struct skmem_arena * ar)1888 skmem_arena_create_region_log(struct skmem_arena *ar)
1889 {
1890 	char label[32];
1891 	int i;
1892 
1893 	switch (ar->ar_type) {
1894 	case SKMEM_ARENA_TYPE_NEXUS:
1895 		SK_D("\"%s\" ar 0x%llx flags %b rx_pp 0x%llx tx_pp 0x%llu",
1896 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
1897 		    SK_KVA(skmem_arena_nexus(ar)->arn_rx_pp),
1898 		    SK_KVA(skmem_arena_nexus(ar)->arn_tx_pp));
1899 		break;
1900 
1901 	case SKMEM_ARENA_TYPE_NECP:
1902 	case SKMEM_ARENA_TYPE_SYSTEM:
1903 		SK_D("\"%s\" ar 0x%llx flags %b", ar->ar_name,
1904 		    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1905 		break;
1906 	}
1907 
1908 	for (i = 0; i < SKMEM_REGIONS; i++) {
1909 		if (ar->ar_regions[i] == NULL) {
1910 			continue;
1911 		}
1912 
1913 		(void) snprintf(label, sizeof(label), "REGION_%s:",
1914 		    skmem_region_id2name(i));
1915 		SK_D("  %-16s %6u KB s:[%2u x %6u KB] "
1916 		    "o:[%4u x %6u -> %4u x %6u]", label,
1917 		    (uint32_t)AR_MEM_TOTAL(ar, i) >> 10,
1918 		    (uint32_t)AR_MEM_SEGCNT(ar, i),
1919 		    (uint32_t)AR_MEM_SEGSIZE(ar, i) >> 10,
1920 		    (uint32_t)AR_MEM_OBJCNT_R(ar, i),
1921 		    (uint32_t)AR_MEM_OBJSIZE_R(ar, i),
1922 		    (uint32_t)AR_MEM_OBJCNT_C(ar, i),
1923 		    (uint32_t)AR_MEM_OBJSIZE_C(ar, i));
1924 	}
1925 }
1926 #endif /* SK_LOG */
1927 
1928 static size_t
skmem_arena_mib_get_stats(struct skmem_arena * ar,void * __sized_by (len)out,size_t len)1929 skmem_arena_mib_get_stats(struct skmem_arena *ar, void *__sized_by(len) out,
1930     size_t len)
1931 {
1932 	size_t actual_space = sizeof(struct sk_stats_arena);
1933 	struct sk_stats_arena *__single sar;
1934 	struct skmem_arena_mmap_info *ami = NULL;
1935 	pid_t proc_pid;
1936 	int i;
1937 
1938 	if (out == NULL || len < actual_space) {
1939 		goto done;
1940 	}
1941 	sar = out;
1942 
1943 	AR_LOCK(ar);
1944 	(void) snprintf(sar->sar_name, sizeof(sar->sar_name),
1945 	    "%s", ar->ar_name);
1946 	sar->sar_type = (sk_stats_arena_type_t)ar->ar_type;
1947 	sar->sar_mapsize = (uint64_t)ar->ar_mapsize;
1948 	i = 0;
1949 	SLIST_FOREACH(ami, &ar->ar_map_head, ami_link) {
1950 		if (ami->ami_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS) {
1951 			struct kern_channel *__single ch;
1952 			ch = __unsafe_forge_single(struct kern_channel *,
1953 			    container_of(ami, struct kern_channel, ch_mmap));
1954 			proc_pid = ch->ch_pid;
1955 		} else {
1956 			ASSERT((ami->ami_arena->ar_type ==
1957 			    SKMEM_ARENA_TYPE_NECP) ||
1958 			    (ami->ami_arena->ar_type ==
1959 			    SKMEM_ARENA_TYPE_SYSTEM));
1960 			proc_pid =
1961 			    necp_client_get_proc_pid_from_arena_info(ami);
1962 		}
1963 		sar->sar_mapped_pids[i++] = proc_pid;
1964 		if (i >= SK_STATS_ARENA_MAPPED_PID_MAX) {
1965 			break;
1966 		}
1967 	}
1968 
1969 	for (i = 0; i < SKMEM_REGIONS; i++) {
1970 		struct skmem_region *skr = ar->ar_regions[i];
1971 		uuid_t *sreg_uuid = &sar->sar_regions_uuid[i];
1972 
1973 		if (skr == NULL) {
1974 			uuid_clear(*sreg_uuid);
1975 			continue;
1976 		}
1977 
1978 		uuid_copy(*sreg_uuid, skr->skr_uuid);
1979 	}
1980 	AR_UNLOCK(ar);
1981 
1982 done:
1983 	return actual_space;
1984 }
1985 
1986 static int
1987 skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS
1988 {
1989 #pragma unused(arg1, arg2, oidp)
1990 	struct skmem_arena *ar;
1991 	size_t actual_space;
1992 	size_t buffer_space;
1993 	size_t allocated_space = 0;
1994 	caddr_t __sized_by(allocated_space) buffer = NULL;
1995 	caddr_t scan;
1996 	int error = 0;
1997 
1998 	if (!kauth_cred_issuser(kauth_cred_get())) {
1999 		return EPERM;
2000 	}
2001 
2002 	net_update_uptime();
2003 	buffer_space = req->oldlen;
2004 	if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
2005 		if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
2006 			buffer_space = SK_SYSCTL_ALLOC_MAX;
2007 		}
2008 		caddr_t temp;
2009 		temp = sk_alloc_data(buffer_space, Z_WAITOK, skmem_tag_arena_mib);
2010 		if (__improbable(temp == NULL)) {
2011 			return ENOBUFS;
2012 		}
2013 		buffer = temp;
2014 		allocated_space = buffer_space;
2015 	} else if (req->oldptr == USER_ADDR_NULL) {
2016 		buffer_space = 0;
2017 	}
2018 	actual_space = 0;
2019 	scan = buffer;
2020 
2021 	SKMEM_ARENA_LOCK();
2022 	TAILQ_FOREACH(ar, &skmem_arena_head, ar_link) {
2023 		size_t size = skmem_arena_mib_get_stats(ar, scan, buffer_space);
2024 		if (scan != NULL) {
2025 			if (buffer_space < size) {
2026 				/* supplied buffer too small, stop copying */
2027 				error = ENOMEM;
2028 				break;
2029 			}
2030 			scan += size;
2031 			buffer_space -= size;
2032 		}
2033 		actual_space += size;
2034 	}
2035 	SKMEM_ARENA_UNLOCK();
2036 
2037 	if (actual_space != 0) {
2038 		int out_error = SYSCTL_OUT(req, buffer, actual_space);
2039 		if (out_error != 0) {
2040 			error = out_error;
2041 		}
2042 	}
2043 	if (buffer != NULL) {
2044 		sk_free_data_sized_by(buffer, allocated_space);
2045 	}
2046 
2047 	return error;
2048 }
2049