xref: /xnu-8020.140.41/bsd/skywalk/mem/skmem_arena.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* BEGIN CSTYLED */
30 /*
31  * SKMEM_ARENA_TYPE_NEXUS:
32  *
33  *   This arena represents the memory subsystem of a nexus adapter.  It consist
34  *   of a collection of memory regions that are usable by the nexus, as well
35  *   as the various caches for objects in those regions.
36  *
37  *       (1 per nexus adapter)
38  *     +=======================+
39  *     |      skmem_arena      |
40  *     +-----------------------+              (backing regions)
41  *     |     ar_regions[0]     |           +=======================+
42  *     :          ...          : ------->> |     skmem_region      |===+
43  *     |     ar_regions[n]     |           +=======================+   |===+
44  *     +=======================+               +=======================+   |
45  *     |     arn_{caches,pp}   | ---+              +=======================+
46  *     +-----------------------+    |
47  *     |     arn_stats_obj     |    |
48  *     |     arn_flowadv_obj   |    |         (cache frontends)
49  *     |     arn_nexusadv_obj  |    |      +=======================+
50  *     +-----------------------+    +--->> |     skmem_cache       |===+
51  *                                         +=======================+   |===+
52  *                                             +=======================+   |
53  *                                                 +=======================+
54  *
55  *   Three regions {umd,kmd,buf} are used for the packet buffer pool, which
56  *   may be external to the nexus adapter, e.g. created by the driver or an
57  *   external entity.  If not supplied, we create these regions along with
58  *   the packet buffer pool ourselves.  The rest of the regions (unrelated
59  *   to the packet buffer pool) are unique to the arena and are allocated at
60  *   arena creation time.
61  *
62  *   An arena may be mapped to a user task/process for as many times as needed.
63  *   The result of each mapping is a contiguous range within the address space
64  *   of that task, indicated by [ami_mapaddr, ami_mapaddr + ami_mapsize) span.
65  *   This is achieved by leveraging the mapper memory object ar_mapper that
66  *   "stitches" the disjoint segments together.  Only user-mappable regions,
67  *   i.e. those marked with SKR_MODE_MMAPOK, will be included in this span.
68  *
69  *   Nexus adapters that are eligible for defunct will trigger the arena to
70  *   undergo memory redirection for all regions except those that are marked
71  *   with SKR_MODE_NOREDIRECT.  This happens when all of the channels opened
72  *   to the adapter are defunct.  Upon completion, those redirected regions
73  *   will be torn down in order to reduce their memory footprints.  When this
74  *   happens the adapter and its arena are no longer active or in service.
75  *
76  *   The arena exposes caches for allocating and freeing most region objects.
77  *   These slab-allocator based caches act as front-ends to the regions; only
78  *   the metadata cache (for kern_packet_t) utilizes the magazines layer.  All
79  *   other ones simply utilize skmem_cache for slab-based allocations.
80  *
81  *   Certain regions contain singleton objects that are simple enough to not
82  *   require the slab allocator, such as the ones used for statistics and flow
83  *   advisories.  Because of this, we directly allocate from those regions
84  *   and store the objects in the arena.
85  *
86  * SKMEM_ARENA_TYPE_NECP:
87  *
88  *   This arena represents the memory subsystem of an NECP file descriptor
89  *   object.  It consists of a memory region for per-flow statistics, as well
90  *   as a cache front-end for that region.
91  *
92  * SKMEM_ARENA_SYSTEM:
93  *
94  *   This arena represents general, system-wide objects.  It currently
95  *   consists of the sysctls region that's created once at init time.
96  */
97 /* END CSTYLED */
98 
99 #include <skywalk/os_skywalk_private.h>
100 #include <net/necp.h>
101 
102 static void skmem_arena_destroy(struct skmem_arena *);
103 static void skmem_arena_teardown(struct skmem_arena *, boolean_t);
104 static int skmem_arena_create_finalize(struct skmem_arena *);
105 static void skmem_arena_nexus_teardown(struct skmem_arena_nexus *, boolean_t);
106 static void skmem_arena_necp_teardown(struct skmem_arena_necp *, boolean_t);
107 static void skmem_arena_system_teardown(struct skmem_arena_system *, boolean_t);
108 static struct skmem_arena *skmem_arena_alloc(skmem_arena_type_t,
109     const char *);
110 static void skmem_arena_free(struct skmem_arena *);
111 static void skmem_arena_retain_locked(struct skmem_arena *);
112 static void skmem_arena_reap_locked(struct skmem_arena *, boolean_t);
113 static boolean_t skmem_arena_munmap_common(struct skmem_arena *,
114     struct skmem_arena_mmap_info *);
115 #if SK_LOG
116 static void skmem_arena_create_region_log(struct skmem_arena *);
117 #endif /* SK_LOG */
118 static int skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS;
119 
120 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, arena,
121     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
122     0, 0, skmem_arena_mib_get_sysctl, "S,sk_stats_arena",
123     "Skywalk arena statistics");
124 
125 static LCK_GRP_DECLARE(skmem_arena_lock_grp, "skmem_arena");
126 static LCK_MTX_DECLARE(skmem_arena_lock, &skmem_arena_lock_grp);
127 
128 static TAILQ_HEAD(, skmem_arena) skmem_arena_head = TAILQ_HEAD_INITIALIZER(skmem_arena_head);
129 
130 #define SKMEM_ARENA_LOCK()                      \
131 	lck_mtx_lock(&skmem_arena_lock)
132 #define SKMEM_ARENA_LOCK_ASSERT_HELD()          \
133 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_OWNED)
134 #define SKMEM_ARENA_LOCK_ASSERT_NOTHELD()       \
135 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_NOTOWNED)
136 #define SKMEM_ARENA_UNLOCK()                    \
137 	lck_mtx_unlock(&skmem_arena_lock)
138 
139 #define AR_NEXUS_SIZE           sizeof(struct skmem_arena_nexus)
140 static ZONE_DEFINE(ar_nexus_zone, SKMEM_ZONE_PREFIX ".mem.arena.nexus",
141     AR_NEXUS_SIZE, ZC_ZFREE_CLEARMEM);
142 
143 #define AR_NECP_SIZE            sizeof(struct skmem_arena_necp)
144 static ZONE_DEFINE(ar_necp_zone, SKMEM_ZONE_PREFIX ".mem.arena.necp",
145     AR_NECP_SIZE, ZC_ZFREE_CLEARMEM);
146 
147 #define AR_SYSTEM_SIZE          sizeof(struct skmem_arena_system)
148 static ZONE_DEFINE(ar_system_zone, SKMEM_ZONE_PREFIX ".mem.arena.system",
149     AR_SYSTEM_SIZE, ZC_ZFREE_CLEARMEM);
150 
151 #define SKMEM_TAG_ARENA_MIB     "com.apple.skywalk.arena.mib"
152 static SKMEM_TAG_DEFINE(skmem_tag_arena_mib, SKMEM_TAG_ARENA_MIB);
153 
154 static_assert(SKMEM_ARENA_TYPE_NEXUS == SAR_TYPE_NEXUS);
155 static_assert(SKMEM_ARENA_TYPE_NECP == SAR_TYPE_NECP);
156 static_assert(SKMEM_ARENA_TYPE_SYSTEM == SAR_TYPE_SYSTEM);
157 
158 SK_NO_INLINE_ATTRIBUTE
159 static int
skmem_arena_sd_setup(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct skmem_arena * ar,boolean_t kernel_only,boolean_t tx)160 skmem_arena_sd_setup(const struct nexus_adapter *na,
161     struct skmem_region_params srp[SKMEM_REGIONS], struct skmem_arena *ar,
162     boolean_t kernel_only, boolean_t tx)
163 {
164 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
165 	struct skmem_cache **cachep;
166 	struct skmem_region *ksd_skr = NULL, *usd_skr = NULL;
167 	const char *name = na->na_name;
168 	char cname[64];
169 	skmem_region_id_t usd_type, ksd_type;
170 	int err = 0;
171 
172 	usd_type = tx ? SKMEM_REGION_TXAUSD : SKMEM_REGION_RXFUSD;
173 	ksd_type = tx ? SKMEM_REGION_TXAKSD : SKMEM_REGION_RXFKSD;
174 	if (tx) {
175 		usd_type = SKMEM_REGION_TXAUSD;
176 		ksd_type = SKMEM_REGION_TXAKSD;
177 		cachep = &arn->arn_txaksd_cache;
178 	} else {
179 		usd_type = SKMEM_REGION_RXFUSD;
180 		ksd_type = SKMEM_REGION_RXFKSD;
181 		cachep = &arn->arn_rxfksd_cache;
182 	}
183 	ksd_skr = skmem_region_create(name, &srp[ksd_type], NULL, NULL, NULL);
184 	if (ksd_skr == NULL) {
185 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
186 		    "create %s region", ar->ar_name, SK_KVA(ar),
187 		    ar->ar_flags, ARF_BITS, srp[ksd_type].srp_name);
188 		err = ENOMEM;
189 		goto failed;
190 	}
191 	ar->ar_regions[ksd_type] = ksd_skr;
192 	if (!kernel_only) {
193 		usd_skr = skmem_region_create(name, &srp[usd_type], NULL,
194 		    NULL, NULL);
195 		if (usd_skr == NULL) {
196 			err = ENOMEM;
197 			goto failed;
198 		}
199 		ar->ar_regions[usd_type] = usd_skr;
200 		skmem_region_mirror(ksd_skr, usd_skr);
201 	}
202 	snprintf(cname, sizeof(cname), tx ? "txa_ksd.%s" : "rxf_ksd.%s", name);
203 	ASSERT(ar->ar_regions[ksd_type] != NULL);
204 	*cachep = skmem_cache_create(cname,
205 	    srp[ksd_type].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
206 	    ar->ar_regions[ksd_type], SKMEM_CR_NOMAGAZINES);
207 	if (*cachep == NULL) {
208 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
209 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
210 		err = ENOMEM;
211 		goto failed;
212 	}
213 	return 0;
214 
215 failed:
216 	if (ksd_skr != NULL) {
217 		skmem_region_release(ksd_skr);
218 		ar->ar_regions[ksd_type] = NULL;
219 	}
220 	if (usd_skr != NULL) {
221 		/*
222 		 * decrements refcnt incremented by skmem_region_mirror()
223 		 * this is not needed in case skmem_cache_create() succeeds
224 		 * because skmem_cache_destroy() does the release.
225 		 */
226 		skmem_region_release(usd_skr);
227 
228 		/* decrements the region's own refcnt */
229 		skmem_region_release(usd_skr);
230 		ar->ar_regions[usd_type] = NULL;
231 	}
232 	return err;
233 }
234 
235 SK_NO_INLINE_ATTRIBUTE
236 static void
skmem_arena_sd_teardown(struct skmem_arena * ar,boolean_t tx)237 skmem_arena_sd_teardown(struct skmem_arena *ar, boolean_t tx)
238 {
239 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
240 	struct skmem_cache **cachep;
241 	struct skmem_region **ksd_rp, **usd_rp;
242 
243 	if (tx) {
244 		cachep = &arn->arn_txaksd_cache;
245 		ksd_rp = &ar->ar_regions[SKMEM_REGION_TXAKSD];
246 		usd_rp = &ar->ar_regions[SKMEM_REGION_TXAUSD];
247 	} else {
248 		cachep = &arn->arn_rxfksd_cache;
249 		ksd_rp = &ar->ar_regions[SKMEM_REGION_RXFKSD];
250 		usd_rp = &ar->ar_regions[SKMEM_REGION_RXFUSD];
251 	}
252 	if (*cachep != NULL) {
253 		skmem_cache_destroy(*cachep);
254 		*cachep = NULL;
255 	}
256 	if (*usd_rp != NULL) {
257 		skmem_region_release(*usd_rp);
258 		*usd_rp = NULL;
259 	}
260 	if (*ksd_rp != NULL) {
261 		skmem_region_release(*ksd_rp);
262 		*ksd_rp = NULL;
263 	}
264 }
265 
266 static bool
skmem_arena_pp_setup(struct skmem_arena * ar,struct skmem_region_params srp[SKMEM_REGIONS],const char * name,struct kern_pbufpool * rx_pp,struct kern_pbufpool * tx_pp,boolean_t kernel_only,boolean_t pp_truncated_buf)267 skmem_arena_pp_setup(struct skmem_arena *ar,
268     struct skmem_region_params srp[SKMEM_REGIONS], const char *name,
269     struct kern_pbufpool *rx_pp, struct kern_pbufpool *tx_pp,
270     boolean_t kernel_only, boolean_t pp_truncated_buf)
271 {
272 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
273 
274 	if (rx_pp == NULL && tx_pp == NULL) {
275 		uint32_t ppcreatef = 0;
276 		if (pp_truncated_buf) {
277 			ppcreatef |= PPCREATEF_TRUNCATED_BUF;
278 		}
279 		if (kernel_only) {
280 			ppcreatef |= PPCREATEF_KERNEL_ONLY;
281 		}
282 		if (srp[SKMEM_REGION_KMD].srp_max_frags > 1) {
283 			ppcreatef |= PPCREATEF_ONDEMAND_BUF;
284 		}
285 		/* callee retains pp upon success */
286 		rx_pp = pp_create(name, &srp[SKMEM_REGION_BUF],
287 		    &srp[SKMEM_REGION_KMD], &srp[SKMEM_REGION_UMD],
288 		    &srp[SKMEM_REGION_KBFT], &srp[SKMEM_REGION_UBFT], NULL,
289 		    NULL, NULL, NULL, NULL, ppcreatef);
290 		if (rx_pp == NULL) {
291 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create pp",
292 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
293 			return false;
294 		}
295 		pp_retain(rx_pp);
296 		tx_pp = rx_pp;
297 	} else {
298 		if (rx_pp == NULL) {
299 			rx_pp = tx_pp;
300 		} else if (tx_pp == NULL) {
301 			tx_pp = rx_pp;
302 		}
303 
304 		ASSERT(rx_pp->pp_md_type == tx_pp->pp_md_type);
305 		ASSERT(rx_pp->pp_md_subtype == tx_pp->pp_md_subtype);
306 		ASSERT(!(!kernel_only &&
307 		    (PP_KERNEL_ONLY(rx_pp) || (PP_KERNEL_ONLY(tx_pp)))));
308 		arn->arn_mode |= AR_NEXUS_MODE_EXTERNAL_PPOOL;
309 		pp_retain(rx_pp);
310 		pp_retain(tx_pp);
311 	}
312 
313 	arn->arn_rx_pp = rx_pp;
314 	arn->arn_tx_pp = tx_pp;
315 	if (rx_pp == tx_pp) {
316 		skmem_region_retain(rx_pp->pp_buf_region);
317 		ar->ar_regions[SKMEM_REGION_BUF] = rx_pp->pp_buf_region;
318 		ar->ar_regions[SKMEM_REGION_RXBUF] = NULL;
319 		ar->ar_regions[SKMEM_REGION_TXBUF] = NULL;
320 		skmem_region_retain(rx_pp->pp_kmd_region);
321 		ar->ar_regions[SKMEM_REGION_KMD] = rx_pp->pp_kmd_region;
322 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
323 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
324 		if (rx_pp->pp_kbft_region != NULL) {
325 			skmem_region_retain(rx_pp->pp_kbft_region);
326 			ar->ar_regions[SKMEM_REGION_KBFT] =
327 			    rx_pp->pp_kbft_region;
328 		}
329 		ar->ar_regions[SKMEM_REGION_RXKBFT] = NULL;
330 		ar->ar_regions[SKMEM_REGION_TXKBFT] = NULL;
331 	} else {
332 		ASSERT(kernel_only); /* split userspace pools not supported */
333 		ar->ar_regions[SKMEM_REGION_BUF] = NULL;
334 		skmem_region_retain(rx_pp->pp_buf_region);
335 		ar->ar_regions[SKMEM_REGION_RXBUF] = rx_pp->pp_buf_region;
336 		skmem_region_retain(tx_pp->pp_buf_region);
337 		ar->ar_regions[SKMEM_REGION_TXBUF] = tx_pp->pp_buf_region;
338 		ar->ar_regions[SKMEM_REGION_KMD] = NULL;
339 		skmem_region_retain(rx_pp->pp_kmd_region);
340 		ar->ar_regions[SKMEM_REGION_RXKMD] = rx_pp->pp_kmd_region;
341 		skmem_region_retain(tx_pp->pp_kmd_region);
342 		ar->ar_regions[SKMEM_REGION_TXKMD] = tx_pp->pp_kmd_region;
343 		ar->ar_regions[SKMEM_REGION_KBFT] = NULL;
344 		if (rx_pp->pp_kbft_region != NULL) {
345 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(rx_pp));
346 			skmem_region_retain(rx_pp->pp_kbft_region);
347 			ar->ar_regions[SKMEM_REGION_RXKBFT] =
348 			    rx_pp->pp_kbft_region;
349 		}
350 		if (tx_pp->pp_kbft_region != NULL) {
351 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(tx_pp));
352 			skmem_region_retain(tx_pp->pp_kbft_region);
353 			ar->ar_regions[SKMEM_REGION_TXKBFT] =
354 			    tx_pp->pp_kbft_region;
355 		}
356 	}
357 
358 	if (kernel_only) {
359 		if ((arn->arn_mode & AR_NEXUS_MODE_EXTERNAL_PPOOL) == 0) {
360 			ASSERT(PP_KERNEL_ONLY(rx_pp));
361 			ASSERT(PP_KERNEL_ONLY(tx_pp));
362 			ASSERT(rx_pp->pp_umd_region == NULL);
363 			ASSERT(tx_pp->pp_umd_region == NULL);
364 			ASSERT(rx_pp->pp_kmd_region->skr_mirror == NULL);
365 			ASSERT(tx_pp->pp_kmd_region->skr_mirror == NULL);
366 			ASSERT(rx_pp->pp_ubft_region == NULL);
367 			ASSERT(tx_pp->pp_ubft_region == NULL);
368 			if (rx_pp->pp_kbft_region != NULL) {
369 				ASSERT(rx_pp->pp_kbft_region->skr_mirror ==
370 				    NULL);
371 			}
372 			if (tx_pp->pp_kbft_region != NULL) {
373 				ASSERT(tx_pp->pp_kbft_region->skr_mirror ==
374 				    NULL);
375 			}
376 		}
377 	} else {
378 		ASSERT(rx_pp == tx_pp);
379 		ASSERT(!PP_KERNEL_ONLY(rx_pp));
380 		ASSERT(rx_pp->pp_umd_region->skr_mode & SKR_MODE_MIRRORED);
381 		ASSERT(rx_pp->pp_kmd_region->skr_mirror != NULL);
382 		ar->ar_regions[SKMEM_REGION_UMD] = rx_pp->pp_umd_region;
383 		skmem_region_retain(rx_pp->pp_umd_region);
384 		if (rx_pp->pp_kbft_region != NULL) {
385 			ASSERT(rx_pp->pp_kbft_region->skr_mirror != NULL);
386 			ASSERT(rx_pp->pp_ubft_region != NULL);
387 			ASSERT(rx_pp->pp_ubft_region->skr_mode &
388 			    SKR_MODE_MIRRORED);
389 			ar->ar_regions[SKMEM_REGION_UBFT] =
390 			    rx_pp->pp_ubft_region;
391 			skmem_region_retain(rx_pp->pp_ubft_region);
392 		}
393 	}
394 
395 	arn->arn_md_type = rx_pp->pp_md_type;
396 	arn->arn_md_subtype = rx_pp->pp_md_subtype;
397 	return true;
398 }
399 
400 /*
401  * Create a nexus adapter arena.
402  */
403 struct skmem_arena *
skmem_arena_create_for_nexus(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct kern_pbufpool ** tx_pp,struct kern_pbufpool ** rx_pp,boolean_t pp_truncated_buf,boolean_t kernel_only,struct kern_nexus_advisory * nxv,int * perr)404 skmem_arena_create_for_nexus(const struct nexus_adapter *na,
405     struct skmem_region_params srp[SKMEM_REGIONS], struct kern_pbufpool **tx_pp,
406     struct kern_pbufpool **rx_pp, boolean_t pp_truncated_buf,
407     boolean_t kernel_only, struct kern_nexus_advisory *nxv, int *perr)
408 {
409 #define SRP_CFLAGS(_id)         (srp[_id].srp_cflags)
410 	struct skmem_arena_nexus *arn;
411 	struct skmem_arena *ar;
412 	char cname[64];
413 	uint32_t i;
414 	const char *name = na->na_name;
415 
416 	*perr = 0;
417 
418 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NEXUS, name);
419 	ASSERT(ar != NULL && ar->ar_zsize == AR_NEXUS_SIZE);
420 	arn = (struct skmem_arena_nexus *)ar;
421 
422 	/* these regions must not be readable/writeable */
423 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_GUARD);
424 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_GUARD);
425 
426 	/* these regions must be read-only */
427 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_UREADONLY);
428 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_UREADONLY);
429 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_UREADONLY);
430 	if ((na->na_flags & NAF_USER_PKT_POOL) == 0) {
431 		ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
432 		    SKMEM_REGION_CR_UREADONLY);
433 		ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
434 		    SKMEM_REGION_CR_UREADONLY);
435 	} else {
436 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
437 		    SKMEM_REGION_CR_UREADONLY));
438 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
439 		    SKMEM_REGION_CR_UREADONLY));
440 	}
441 
442 	/* these regions must be user-mappable */
443 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_MMAPOK);
444 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_MMAPOK);
445 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_MMAPOK);
446 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF) & SKMEM_REGION_CR_MMAPOK);
447 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_MMAPOK);
448 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_MMAPOK);
449 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_MMAPOK);
450 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_MMAPOK);
451 	ASSERT(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_MMAPOK);
452 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_MMAPOK);
453 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_MMAPOK);
454 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_MMAPOK);
455 
456 	/* these must not be user-mappable */
457 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_MMAPOK));
458 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_MMAPOK));
459 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_MMAPOK));
460 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_MMAPOK));
461 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_MMAPOK));
462 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_MMAPOK));
463 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_MMAPOK));
464 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_MMAPOK));
465 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_MMAPOK));
466 
467 	/* these regions must be shareable */
468 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF) & SKMEM_REGION_CR_SHAREOK);
469 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF) & SKMEM_REGION_CR_SHAREOK);
470 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF) & SKMEM_REGION_CR_SHAREOK);
471 
472 	/* these regions must not be be shareable */
473 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_SHAREOK));
474 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_SHAREOK));
475 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_SHAREOK));
476 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_SHAREOK));
477 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_SHAREOK));
478 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_SHAREOK));
479 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_SHAREOK));
480 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_SHAREOK));
481 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_SHAREOK));
482 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_SHAREOK));
483 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_SHAREOK));
484 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_SHAREOK));
485 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_SHAREOK));
486 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_SHAREOK));
487 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_SHAREOK));
488 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_SHAREOK));
489 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_SHAREOK));
490 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_SHAREOK));
491 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_SHAREOK));
492 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_SHAREOK));
493 
494 	/* these must stay active */
495 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_NOREDIRECT);
496 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_NOREDIRECT);
497 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_NOREDIRECT);
498 
499 	/* no kstats for nexus */
500 	ASSERT(srp[SKMEM_REGION_KSTATS].srp_c_obj_cnt == 0);
501 
502 	AR_LOCK(ar);
503 	if (!skmem_arena_pp_setup(ar, srp, name, (rx_pp ? *rx_pp : NULL),
504 	    (tx_pp ? *tx_pp : NULL), kernel_only, pp_truncated_buf)) {
505 		goto failed;
506 	}
507 
508 	if (nxv != NULL && nxv->nxv_reg != NULL) {
509 		struct skmem_region *svr = nxv->nxv_reg;
510 
511 		ASSERT(svr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
512 		ASSERT(svr->skr_seg_max_cnt == 1);
513 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = svr;
514 		skmem_region_retain(svr);
515 
516 		ASSERT(nxv->nxv_adv != NULL);
517 		if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
518 			VERIFY(nxv->flowswitch_nxv_adv->nxadv_ver ==
519 			    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
520 		} else if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
521 			VERIFY(nxv->netif_nxv_adv->nna_version ==
522 			    NX_NETIF_ADVISORY_CURRENT_VERSION);
523 		} else {
524 			panic_plain("%s: invalid advisory type %d",
525 			    __func__, nxv->nxv_adv_type);
526 			/* NOTREACHED */
527 		}
528 		arn->arn_nexusadv_obj = nxv->nxv_adv;
529 	} else {
530 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
531 		ASSERT(srp[SKMEM_REGION_NEXUSADV].srp_c_obj_cnt == 0);
532 	}
533 
534 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, TRUE) != 0) {
535 		goto failed;
536 	}
537 
538 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, FALSE) != 0) {
539 		goto failed;
540 	}
541 
542 	for (i = 0; i < SKMEM_REGIONS; i++) {
543 		/* skip if already created */
544 		if (ar->ar_regions[i] != NULL) {
545 			continue;
546 		}
547 
548 		/* skip external regions from packet pool */
549 		if (skmem_region_for_pp(i)) {
550 			continue;
551 		}
552 
553 		/* skip slot descriptor regions */
554 		if (i == SKMEM_REGION_TXAUSD || i == SKMEM_REGION_RXFUSD ||
555 		    i == SKMEM_REGION_TXAKSD || i == SKMEM_REGION_RXFKSD) {
556 			continue;
557 		}
558 
559 		/* skip if region is configured to be empty */
560 		if (srp[i].srp_c_obj_cnt == 0) {
561 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
562 			    i == SKMEM_REGION_USTATS ||
563 			    i == SKMEM_REGION_KSTATS ||
564 			    i == SKMEM_REGION_INTRINSIC ||
565 			    i == SKMEM_REGION_FLOWADV ||
566 			    i == SKMEM_REGION_NEXUSADV ||
567 			    i == SKMEM_REGION_SYSCTLS ||
568 			    i == SKMEM_REGION_GUARD_TAIL);
569 			continue;
570 		}
571 
572 		ASSERT(srp[i].srp_id == i);
573 
574 		/*
575 		 * Skip {SCHEMA, RING, GUARD} for kernel-only arena.  Note
576 		 * that this is assuming kernel-only arena is always used
577 		 * for kernel-only nexus adapters (never used directly by
578 		 * user process.)
579 		 *
580 		 * XXX [email protected] - see comments in kern_pbufpool_create().
581 		 * We need to revisit this logic for "direct channel" access,
582 		 * perhaps via a separate adapter flag.
583 		 */
584 		if (kernel_only && (i == SKMEM_REGION_GUARD_HEAD ||
585 		    i == SKMEM_REGION_SCHEMA || i == SKMEM_REGION_RING ||
586 		    i == SKMEM_REGION_GUARD_TAIL)) {
587 			continue;
588 		}
589 
590 		/* not for nexus, or for us to create here */
591 		ASSERT(i != SKMEM_REGION_GUARD_HEAD || sk_guard);
592 		ASSERT(i != SKMEM_REGION_NEXUSADV);
593 		ASSERT(i != SKMEM_REGION_SYSCTLS);
594 		ASSERT(i != SKMEM_REGION_GUARD_TAIL || sk_guard);
595 		ASSERT(i != SKMEM_REGION_KSTATS);
596 		ASSERT(i != SKMEM_REGION_INTRINSIC);
597 
598 		/* otherwise create it */
599 		if ((ar->ar_regions[i] = skmem_region_create(name, &srp[i],
600 		    NULL, NULL, NULL)) == NULL) {
601 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
602 			    "create %s region", ar->ar_name, SK_KVA(ar),
603 			    ar->ar_flags, ARF_BITS, srp[i].srp_name);
604 			goto failed;
605 		}
606 	}
607 
608 	/* create skmem_cache for schema (without magazines) */
609 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL || kernel_only);
610 	if (ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL) {
611 		(void) snprintf(cname, sizeof(cname), "schema.%s", name);
612 		if ((arn->arn_schema_cache = skmem_cache_create(cname,
613 		    srp[SKMEM_REGION_SCHEMA].srp_c_obj_size, 0, NULL, NULL,
614 		    NULL, NULL, ar->ar_regions[SKMEM_REGION_SCHEMA],
615 		    SKMEM_CR_NOMAGAZINES)) == NULL) {
616 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
617 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
618 			    cname);
619 			goto failed;
620 		}
621 	}
622 
623 	/* create skmem_cache for rings (without magazines) */
624 	(void) snprintf(cname, sizeof(cname), "ring.%s", name);
625 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] != NULL || kernel_only);
626 	if ((ar->ar_regions[SKMEM_REGION_RING] != NULL) &&
627 	    (arn->arn_ring_cache = skmem_cache_create(cname,
628 	    srp[SKMEM_REGION_RING].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
629 	    ar->ar_regions[SKMEM_REGION_RING], SKMEM_CR_NOMAGAZINES)) == NULL) {
630 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
631 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
632 		goto failed;
633 	}
634 
635 	/*
636 	 * If the stats region is present, allocate a single object directly
637 	 * from the region; we don't need to create an skmem_cache for this,
638 	 * as the object is allocated (and freed) only once.
639 	 */
640 	if (ar->ar_regions[SKMEM_REGION_USTATS] != NULL) {
641 		struct skmem_region *str = ar->ar_regions[SKMEM_REGION_USTATS];
642 
643 		/* no kstats for nexus */
644 		ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
645 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
646 		ASSERT(str->skr_seg_max_cnt == 1);
647 
648 		if ((arn->arn_stats_obj = skmem_region_alloc(str, NULL,
649 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
650 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
651 			    "stats", ar->ar_name, SK_KVA(ar), ar->ar_flags,
652 			    ARF_BITS);
653 			goto failed;
654 		}
655 	}
656 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
657 
658 	/*
659 	 * If the flowadv region is present, allocate a single object directly
660 	 * from the region; we don't need to create an skmem_cache for this,
661 	 * as the object is allocated (and freed) only once.
662 	 */
663 	if (ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL) {
664 		struct skmem_region *str =
665 		    ar->ar_regions[SKMEM_REGION_FLOWADV];
666 
667 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
668 		ASSERT(str->skr_seg_max_cnt == 1);
669 
670 		if ((arn->arn_flowadv_obj = skmem_region_alloc(str, NULL,
671 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
672 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
673 			    "flowadv", ar->ar_name, SK_KVA(ar), ar->ar_flags,
674 			    ARF_BITS);
675 			goto failed;
676 		}
677 	}
678 
679 	if (skmem_arena_create_finalize(ar) != 0) {
680 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
681 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
682 		goto failed;
683 	}
684 
685 	++ar->ar_refcnt;        /* for caller */
686 	AR_UNLOCK(ar);
687 
688 	SKMEM_ARENA_LOCK();
689 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
690 	SKMEM_ARENA_UNLOCK();
691 
692 	/* caller didn't give us one, but would like us to return it? */
693 	if (rx_pp != NULL && *rx_pp == NULL) {
694 		*rx_pp = arn->arn_rx_pp;
695 		pp_retain(*rx_pp);
696 	}
697 	if (tx_pp != NULL && *tx_pp == NULL) {
698 		*tx_pp = arn->arn_tx_pp;
699 		pp_retain(*tx_pp);  /* for caller */
700 	}
701 
702 #if SK_LOG
703 	if (__improbable(sk_verbose != 0)) {
704 		skmem_arena_create_region_log(ar);
705 	}
706 #endif /* SK_LOG */
707 
708 	return ar;
709 
710 failed:
711 	AR_LOCK_ASSERT_HELD(ar);
712 	skmem_arena_destroy(ar);
713 	*perr = ENOMEM;
714 
715 	return NULL;
716 #undef SRP_CFLAGS
717 }
718 
719 void
skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus * arn,int cnt)720 skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus *arn, int cnt)
721 {
722 	struct skmem_arena *ar = &arn->arn_cmn;
723 
724 	AR_LOCK(ar);
725 	arn->arn_ksd_nodefunct += cnt;
726 	VERIFY(arn->arn_ksd_nodefunct >= 0);
727 	AR_UNLOCK(ar);
728 }
729 
730 boolean_t
skmem_arena_nexus_sd_idle(struct skmem_arena_nexus * arn)731 skmem_arena_nexus_sd_idle(struct skmem_arena_nexus *arn)
732 {
733 	struct skmem_arena *ar = &arn->arn_cmn;
734 	boolean_t idle;
735 
736 	AR_LOCK(ar);
737 	VERIFY(arn->arn_ksd_nodefunct >= 0);
738 	idle = (arn->arn_ksd_nodefunct == 0);
739 	AR_UNLOCK(ar);
740 
741 	return idle;
742 }
743 
744 static void
skmem_arena_nexus_teardown(struct skmem_arena_nexus * arn,boolean_t defunct)745 skmem_arena_nexus_teardown(struct skmem_arena_nexus *arn, boolean_t defunct)
746 {
747 	struct skmem_arena *ar = &arn->arn_cmn;
748 	struct skmem_region *skr;
749 	int i;
750 
751 	AR_LOCK_ASSERT_HELD(ar);
752 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
753 
754 	/* these should never be set for nexus arena */
755 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL || sk_guard);
756 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
757 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL || sk_guard);
758 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
759 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
760 
761 	if (arn->arn_stats_obj != NULL) {
762 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
763 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
764 		skmem_region_free(skr, arn->arn_stats_obj, NULL);
765 		arn->arn_stats_obj = NULL;
766 		skmem_region_release(skr);
767 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
768 	}
769 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
770 	ASSERT(arn->arn_stats_obj == NULL);
771 
772 	if (arn->arn_flowadv_obj != NULL) {
773 		skr = ar->ar_regions[SKMEM_REGION_FLOWADV];
774 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
775 		skmem_region_free(skr, arn->arn_flowadv_obj, NULL);
776 		arn->arn_flowadv_obj = NULL;
777 		skmem_region_release(skr);
778 		ar->ar_regions[SKMEM_REGION_FLOWADV] = NULL;
779 	}
780 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
781 	ASSERT(arn->arn_flowadv_obj == NULL);
782 
783 	if (arn->arn_nexusadv_obj != NULL) {
784 		skr = ar->ar_regions[SKMEM_REGION_NEXUSADV];
785 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
786 		/* we didn't allocate this, so just nullify it */
787 		arn->arn_nexusadv_obj = NULL;
788 		skmem_region_release(skr);
789 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = NULL;
790 	}
791 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
792 	ASSERT(arn->arn_nexusadv_obj == NULL);
793 
794 	ASSERT(!((arn->arn_rx_pp == NULL) ^ (arn->arn_tx_pp == NULL)));
795 	if (arn->arn_rx_pp != NULL) {
796 		for (i = 0; i < SKMEM_PP_REGIONS; i++) {
797 			skmem_region_id_t reg = skmem_pp_region_ids[i];
798 			skr = ar->ar_regions[reg];
799 			if (skr != NULL) {
800 				ASSERT(!(skr->skr_mode & SKR_MODE_NOREDIRECT));
801 				skmem_region_release(skr);
802 				ar->ar_regions[reg] = NULL;
803 			}
804 		}
805 		pp_release(arn->arn_rx_pp);
806 		pp_release(arn->arn_tx_pp);
807 		arn->arn_rx_pp = NULL;
808 		arn->arn_tx_pp = NULL;
809 	}
810 	for (i = 0; i < SKMEM_PP_REGIONS; i++) {
811 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
812 	}
813 	ASSERT(arn->arn_rx_pp == NULL);
814 	ASSERT(arn->arn_tx_pp == NULL);
815 
816 	if (arn->arn_ring_cache != NULL) {
817 		skr = ar->ar_regions[SKMEM_REGION_RING];
818 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
819 		skmem_cache_destroy(arn->arn_ring_cache);
820 		arn->arn_ring_cache = NULL;
821 		skmem_region_release(skr);
822 		ar->ar_regions[SKMEM_REGION_RING] = NULL;
823 	}
824 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
825 	ASSERT(arn->arn_ring_cache == NULL);
826 
827 	/*
828 	 * Stop here if we're in the defunct context, and we're asked
829 	 * to keep the slot descriptor regions alive as they are still
830 	 * being referred to by the nexus owner (driver).
831 	 */
832 	if (defunct && arn->arn_ksd_nodefunct != 0) {
833 		ASSERT(arn->arn_ksd_nodefunct > 0);
834 		return;
835 	}
836 
837 	ASSERT(arn->arn_ksd_nodefunct == 0);
838 	skmem_arena_sd_teardown(ar, TRUE);
839 	skmem_arena_sd_teardown(ar, FALSE);
840 
841 	/* stop here if we're in the defunct context */
842 	if (defunct) {
843 		return;
844 	}
845 	if (arn->arn_schema_cache != NULL) {
846 		skr = ar->ar_regions[SKMEM_REGION_SCHEMA];
847 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
848 		skmem_cache_destroy(arn->arn_schema_cache);
849 		arn->arn_schema_cache = NULL;
850 		skmem_region_release(skr);
851 		ar->ar_regions[SKMEM_REGION_SCHEMA] = NULL;
852 	}
853 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
854 	ASSERT(arn->arn_schema_cache == NULL);
855 
856 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_HEAD]) != NULL) {
857 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
858 		skmem_region_release(skr);
859 		ar->ar_regions[SKMEM_REGION_GUARD_HEAD] = NULL;
860 	}
861 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
862 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_TAIL]) != NULL) {
863 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
864 		skmem_region_release(skr);
865 		ar->ar_regions[SKMEM_REGION_GUARD_TAIL] = NULL;
866 	}
867 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
868 }
869 
870 /*
871  * Create an NECP arena.
872  */
873 struct skmem_arena *
skmem_arena_create_for_necp(const char * name,struct skmem_region_params * srp_ustats,struct skmem_region_params * srp_kstats,int * perr)874 skmem_arena_create_for_necp(const char *name,
875     struct skmem_region_params *srp_ustats,
876     struct skmem_region_params *srp_kstats, int *perr)
877 {
878 	struct skmem_arena_necp *arc;
879 	struct skmem_arena *ar;
880 	char cname[64];
881 
882 	*perr = 0;
883 
884 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NECP, name);
885 	ASSERT(ar != NULL && ar->ar_zsize == AR_NECP_SIZE);
886 	arc = (struct skmem_arena_necp *)ar;
887 
888 	/*
889 	 * Must be stats region, and must be user-mappable;
890 	 * don't assert for SKMEM_REGION_CR_MONOLITHIC here
891 	 * as the client might want multi-segment mode.
892 	 */
893 	ASSERT(srp_ustats->srp_id == SKMEM_REGION_USTATS);
894 	ASSERT(srp_kstats->srp_id == SKMEM_REGION_KSTATS);
895 	ASSERT(srp_ustats->srp_cflags & SKMEM_REGION_CR_MMAPOK);
896 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_MMAPOK));
897 	ASSERT(!(srp_ustats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
898 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
899 	ASSERT(srp_ustats->srp_c_obj_size != 0);
900 	ASSERT(srp_kstats->srp_c_obj_size != 0);
901 	ASSERT(srp_ustats->srp_c_obj_cnt != 0);
902 	ASSERT(srp_kstats->srp_c_obj_cnt != 0);
903 	ASSERT(srp_ustats->srp_c_seg_size == srp_kstats->srp_c_seg_size);
904 	ASSERT(srp_ustats->srp_seg_cnt == srp_kstats->srp_seg_cnt);
905 	ASSERT(srp_ustats->srp_c_obj_size == srp_kstats->srp_c_obj_size);
906 	ASSERT(srp_ustats->srp_c_obj_cnt == srp_kstats->srp_c_obj_cnt);
907 
908 	AR_LOCK(ar);
909 
910 	if ((ar->ar_regions[SKMEM_REGION_USTATS] = skmem_region_create(name,
911 	    srp_ustats, NULL, NULL, NULL)) == NULL) {
912 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
913 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
914 		    srp_ustats->srp_name);
915 		goto failed;
916 	}
917 
918 	if ((ar->ar_regions[SKMEM_REGION_KSTATS] = skmem_region_create(name,
919 	    srp_kstats, NULL, NULL, NULL)) == NULL) {
920 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
921 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
922 		    srp_kstats->srp_name);
923 		goto failed;
924 	}
925 
926 	skmem_region_mirror(ar->ar_regions[SKMEM_REGION_KSTATS],
927 	    ar->ar_regions[SKMEM_REGION_USTATS]);
928 
929 	/* create skmem_cache for kernel stats (without magazines) */
930 	(void) snprintf(cname, sizeof(cname), "kstats.%s", name);
931 	if ((arc->arc_kstats_cache = skmem_cache_create(cname,
932 	    srp_kstats->srp_c_obj_size, 0, necp_stats_ctor, NULL, NULL, NULL,
933 	    ar->ar_regions[SKMEM_REGION_KSTATS],
934 	    SKMEM_CR_NOMAGAZINES)) == NULL) {
935 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
936 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
937 		goto failed;
938 	}
939 
940 	if (skmem_arena_create_finalize(ar) != 0) {
941 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
942 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
943 		goto failed;
944 	}
945 
946 	/*
947 	 * These must never be configured for NECP arena.
948 	 *
949 	 * XXX: In theory we can add guard pages to this arena,
950 	 * but for now leave that as an exercise for the future.
951 	 */
952 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
953 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
954 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
955 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
956 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
957 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
958 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
959 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
960 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
961 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
962 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
963 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
964 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
965 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
966 	}
967 
968 	/* these must be configured for NECP arena */
969 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
970 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] != NULL);
971 
972 	++ar->ar_refcnt;        /* for caller */
973 	AR_UNLOCK(ar);
974 
975 	SKMEM_ARENA_LOCK();
976 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
977 	SKMEM_ARENA_UNLOCK();
978 
979 #if SK_LOG
980 	if (__improbable(sk_verbose != 0)) {
981 		skmem_arena_create_region_log(ar);
982 	}
983 #endif /* SK_LOG */
984 
985 	return ar;
986 
987 failed:
988 	AR_LOCK_ASSERT_HELD(ar);
989 	skmem_arena_destroy(ar);
990 	*perr = ENOMEM;
991 
992 	return NULL;
993 }
994 
995 static void
skmem_arena_necp_teardown(struct skmem_arena_necp * arc,boolean_t defunct)996 skmem_arena_necp_teardown(struct skmem_arena_necp *arc, boolean_t defunct)
997 {
998 #pragma unused(defunct)
999 	struct skmem_arena *ar = &arc->arc_cmn;
1000 	struct skmem_region *skr;
1001 
1002 	AR_LOCK_ASSERT_HELD(ar);
1003 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NECP);
1004 
1005 	/* these must never be configured for NECP arena */
1006 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1007 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1008 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1009 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1010 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1011 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1012 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1013 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1014 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1015 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1016 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1017 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1018 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1019 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1020 	}
1021 
1022 	if (arc->arc_kstats_cache != NULL) {
1023 		skr = ar->ar_regions[SKMEM_REGION_KSTATS];
1024 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1025 		skmem_cache_destroy(arc->arc_kstats_cache);
1026 		arc->arc_kstats_cache = NULL;
1027 		skmem_region_release(skr);
1028 		ar->ar_regions[SKMEM_REGION_KSTATS] = NULL;
1029 
1030 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
1031 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1032 		skmem_region_release(skr);
1033 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
1034 	}
1035 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1036 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1037 	ASSERT(arc->arc_kstats_cache == NULL);
1038 }
1039 
1040 /*
1041  * Given an arena, return its NECP variant (if applicable).
1042  */
1043 struct skmem_arena_necp *
skmem_arena_necp(struct skmem_arena * ar)1044 skmem_arena_necp(struct skmem_arena *ar)
1045 {
1046 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_NECP)) {
1047 		return NULL;
1048 	}
1049 
1050 	return (struct skmem_arena_necp *)ar;
1051 }
1052 
1053 /*
1054  * Create a System arena.
1055  */
1056 struct skmem_arena *
skmem_arena_create_for_system(const char * name,int * perr)1057 skmem_arena_create_for_system(const char *name, int *perr)
1058 {
1059 	struct skmem_region *skrsys;
1060 	struct skmem_arena_system *ars;
1061 	struct skmem_arena *ar;
1062 
1063 	*perr = 0;
1064 
1065 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_SYSTEM, name);
1066 	ASSERT(ar != NULL && ar->ar_zsize == AR_SYSTEM_SIZE);
1067 	ars = (struct skmem_arena_system *)ar;
1068 
1069 	AR_LOCK(ar);
1070 	/* retain system-wide sysctls region */
1071 	skrsys = skmem_get_sysctls_region();
1072 	ASSERT(skrsys != NULL && skrsys->skr_id == SKMEM_REGION_SYSCTLS);
1073 	ASSERT((skrsys->skr_mode & (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES |
1074 	    SKR_MODE_KREADONLY | SKR_MODE_UREADONLY | SKR_MODE_MONOLITHIC |
1075 	    SKR_MODE_SHAREOK)) ==
1076 	    (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES | SKR_MODE_UREADONLY |
1077 	    SKR_MODE_MONOLITHIC));
1078 	ar->ar_regions[SKMEM_REGION_SYSCTLS] = skrsys;
1079 	skmem_region_retain(skrsys);
1080 
1081 	/* object is valid as long as the sysctls region is retained */
1082 	ars->ars_sysctls_obj = skmem_get_sysctls_obj(&ars->ars_sysctls_objsize);
1083 	ASSERT(ars->ars_sysctls_obj != NULL);
1084 	ASSERT(ars->ars_sysctls_objsize != 0);
1085 
1086 	if (skmem_arena_create_finalize(ar) != 0) {
1087 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
1088 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1089 		goto failed;
1090 	}
1091 
1092 	/*
1093 	 * These must never be configured for system arena.
1094 	 *
1095 	 * XXX: In theory we can add guard pages to this arena,
1096 	 * but for now leave that as an exercise for the future.
1097 	 */
1098 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1099 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1100 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1101 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1102 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1103 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1104 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1105 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1106 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1107 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1108 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1109 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1110 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1111 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1112 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1113 	}
1114 
1115 	/* these must be configured for system arena */
1116 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] != NULL);
1117 
1118 	++ar->ar_refcnt;        /* for caller */
1119 	AR_UNLOCK(ar);
1120 
1121 	SKMEM_ARENA_LOCK();
1122 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1123 	SKMEM_ARENA_UNLOCK();
1124 
1125 #if SK_LOG
1126 	if (__improbable(sk_verbose != 0)) {
1127 		skmem_arena_create_region_log(ar);
1128 	}
1129 #endif /* SK_LOG */
1130 
1131 	return ar;
1132 
1133 failed:
1134 	AR_LOCK_ASSERT_HELD(ar);
1135 	skmem_arena_destroy(ar);
1136 	*perr = ENOMEM;
1137 
1138 	return NULL;
1139 }
1140 
1141 static void
skmem_arena_system_teardown(struct skmem_arena_system * ars,boolean_t defunct)1142 skmem_arena_system_teardown(struct skmem_arena_system *ars, boolean_t defunct)
1143 {
1144 	struct skmem_arena *ar = &ars->ars_cmn;
1145 	struct skmem_region *skr;
1146 
1147 	AR_LOCK_ASSERT_HELD(ar);
1148 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1149 
1150 	/* these must never be configured for system arena */
1151 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1152 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1153 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1154 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1155 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1156 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1157 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1158 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1159 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1160 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1161 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1162 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1163 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1164 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1165 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1166 	}
1167 
1168 	/* nothing to do here for now during defunct, just return */
1169 	if (defunct) {
1170 		return;
1171 	}
1172 
1173 	if (ars->ars_sysctls_obj != NULL) {
1174 		skr = ar->ar_regions[SKMEM_REGION_SYSCTLS];
1175 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
1176 		/* we didn't allocate this, so don't free it */
1177 		ars->ars_sysctls_obj = NULL;
1178 		ars->ars_sysctls_objsize = 0;
1179 		skmem_region_release(skr);
1180 		ar->ar_regions[SKMEM_REGION_SYSCTLS] = NULL;
1181 	}
1182 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1183 	ASSERT(ars->ars_sysctls_obj == NULL);
1184 	ASSERT(ars->ars_sysctls_objsize == 0);
1185 }
1186 
1187 /*
1188  * Given an arena, return its System variant (if applicable).
1189  */
1190 struct skmem_arena_system *
skmem_arena_system(struct skmem_arena * ar)1191 skmem_arena_system(struct skmem_arena *ar)
1192 {
1193 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_SYSTEM)) {
1194 		return NULL;
1195 	}
1196 
1197 	return (struct skmem_arena_system *)ar;
1198 }
1199 
1200 void *
skmem_arena_system_sysctls_obj_addr(struct skmem_arena * ar)1201 skmem_arena_system_sysctls_obj_addr(struct skmem_arena *ar)
1202 {
1203 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1204 	return skmem_arena_system(ar)->ars_sysctls_obj;
1205 }
1206 
1207 size_t
skmem_arena_system_sysctls_obj_size(struct skmem_arena * ar)1208 skmem_arena_system_sysctls_obj_size(struct skmem_arena *ar)
1209 {
1210 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1211 	return skmem_arena_system(ar)->ars_sysctls_objsize;
1212 }
1213 
1214 /*
1215  * Destroy a region.
1216  */
1217 static void
skmem_arena_destroy(struct skmem_arena * ar)1218 skmem_arena_destroy(struct skmem_arena *ar)
1219 {
1220 	AR_LOCK_ASSERT_HELD(ar);
1221 
1222 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags %b",
1223 	    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1224 
1225 	ASSERT(ar->ar_refcnt == 0);
1226 	if (ar->ar_link.tqe_next != NULL || ar->ar_link.tqe_prev != NULL) {
1227 		AR_UNLOCK(ar);
1228 		SKMEM_ARENA_LOCK();
1229 		TAILQ_REMOVE(&skmem_arena_head, ar, ar_link);
1230 		SKMEM_ARENA_UNLOCK();
1231 		AR_LOCK(ar);
1232 		ASSERT(ar->ar_refcnt == 0);
1233 	}
1234 
1235 	/* teardown all remaining memory regions and associated resources */
1236 	skmem_arena_teardown(ar, FALSE);
1237 
1238 	if (ar->ar_ar != NULL) {
1239 		IOSKArenaDestroy(ar->ar_ar);
1240 		ar->ar_ar = NULL;
1241 	}
1242 
1243 	if (ar->ar_flags & ARF_ACTIVE) {
1244 		ar->ar_flags &= ~ARF_ACTIVE;
1245 	}
1246 
1247 	AR_UNLOCK(ar);
1248 
1249 	skmem_arena_free(ar);
1250 }
1251 
1252 /*
1253  * Teardown (or defunct) a region.
1254  */
1255 static void
skmem_arena_teardown(struct skmem_arena * ar,boolean_t defunct)1256 skmem_arena_teardown(struct skmem_arena *ar, boolean_t defunct)
1257 {
1258 	uint32_t i;
1259 
1260 	switch (ar->ar_type) {
1261 	case SKMEM_ARENA_TYPE_NEXUS:
1262 		skmem_arena_nexus_teardown((struct skmem_arena_nexus *)ar,
1263 		    defunct);
1264 		break;
1265 
1266 	case SKMEM_ARENA_TYPE_NECP:
1267 		skmem_arena_necp_teardown((struct skmem_arena_necp *)ar,
1268 		    defunct);
1269 		break;
1270 
1271 	case SKMEM_ARENA_TYPE_SYSTEM:
1272 		skmem_arena_system_teardown((struct skmem_arena_system *)ar,
1273 		    defunct);
1274 		break;
1275 
1276 	default:
1277 		VERIFY(0);
1278 		/* NOTREACHED */
1279 		__builtin_unreachable();
1280 	}
1281 
1282 	/* stop here if we're in the defunct context */
1283 	if (defunct) {
1284 		return;
1285 	}
1286 
1287 	/* take care of any remaining ones */
1288 	for (i = 0; i < SKMEM_REGIONS; i++) {
1289 		if (ar->ar_regions[i] == NULL) {
1290 			continue;
1291 		}
1292 
1293 		skmem_region_release(ar->ar_regions[i]);
1294 		ar->ar_regions[i] = NULL;
1295 	}
1296 }
1297 
1298 static int
skmem_arena_create_finalize(struct skmem_arena * ar)1299 skmem_arena_create_finalize(struct skmem_arena *ar)
1300 {
1301 	IOSKRegionRef reg[SKMEM_REGIONS];
1302 	uint32_t i, regcnt = 0;
1303 	int err = 0;
1304 
1305 	AR_LOCK_ASSERT_HELD(ar);
1306 
1307 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1308 
1309 	/*
1310 	 * Prepare an array of regions that can be mapped to user task;
1311 	 * exclude regions that aren't eligible for user task mapping.
1312 	 */
1313 	bzero(&reg, sizeof(reg));
1314 	for (i = 0; i < SKMEM_REGIONS; i++) {
1315 		struct skmem_region *skr = ar->ar_regions[i];
1316 		if (skr == NULL || !(skr->skr_mode & SKR_MODE_MMAPOK)) {
1317 			continue;
1318 		}
1319 
1320 		ASSERT(skr->skr_reg != NULL);
1321 		reg[regcnt++] = skr->skr_reg;
1322 	}
1323 	ASSERT(regcnt != 0);
1324 
1325 	/*
1326 	 * Create backing IOSKArena handle.
1327 	 */
1328 	ar->ar_ar = IOSKArenaCreate(reg, (IOSKCount)regcnt);
1329 	if (ar->ar_ar == NULL) {
1330 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create "
1331 		    "IOSKArena of %u regions", ar->ar_name, SK_KVA(ar),
1332 		    ar->ar_flags, ARF_BITS, regcnt);
1333 		err = ENOMEM;
1334 		goto failed;
1335 	}
1336 
1337 	ar->ar_flags |= ARF_ACTIVE;
1338 
1339 failed:
1340 	return err;
1341 }
1342 
1343 static struct skmem_arena *
skmem_arena_alloc(skmem_arena_type_t type,const char * name)1344 skmem_arena_alloc(skmem_arena_type_t type, const char *name)
1345 {
1346 	const char *ar_str = NULL;
1347 	struct zone *ar_zone = NULL;
1348 	struct skmem_arena *ar;
1349 	size_t ar_zsize = 0;
1350 
1351 	switch (type) {
1352 	case SKMEM_ARENA_TYPE_NEXUS:
1353 		ar_zone = ar_nexus_zone;
1354 		ar_zsize = AR_NEXUS_SIZE;
1355 		ar_str = "nexus";
1356 		break;
1357 
1358 	case SKMEM_ARENA_TYPE_NECP:
1359 		ar_zone = ar_necp_zone;
1360 		ar_zsize = AR_NECP_SIZE;
1361 		ar_str = "necp";
1362 		break;
1363 
1364 	case SKMEM_ARENA_TYPE_SYSTEM:
1365 		ar_zone = ar_system_zone;
1366 		ar_zsize = AR_SYSTEM_SIZE;
1367 		ar_str = "system";
1368 		break;
1369 
1370 	default:
1371 		VERIFY(0);
1372 		/* NOTREACHED */
1373 		__builtin_unreachable();
1374 	}
1375 
1376 	ar = zalloc_flags(ar_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1377 	ar->ar_type = type;
1378 	ar->ar_zsize = ar_zsize;
1379 	ar->ar_zone = ar_zone;
1380 
1381 	lck_mtx_init(&ar->ar_lock, &skmem_arena_lock_grp,
1382 	    LCK_ATTR_NULL);
1383 	(void) snprintf(ar->ar_name, sizeof(ar->ar_name),
1384 	    "%s.%s.%s", SKMEM_ARENA_PREFIX, ar_str, name);
1385 
1386 	return ar;
1387 }
1388 
1389 static void
skmem_arena_free(struct skmem_arena * ar)1390 skmem_arena_free(struct skmem_arena *ar)
1391 {
1392 #if DEBUG || DEVELOPMENT
1393 	ASSERT(ar->ar_refcnt == 0);
1394 	ASSERT(!(ar->ar_flags & ARF_ACTIVE));
1395 	ASSERT(ar->ar_ar == NULL);
1396 	ASSERT(ar->ar_mapcnt == 0);
1397 	ASSERT(SLIST_EMPTY(&ar->ar_map_head));
1398 	for (uint32_t i = 0; i < SKMEM_REGIONS; i++) {
1399 		ASSERT(ar->ar_regions[i] == NULL);
1400 	}
1401 #endif /* DEBUG || DEVELOPMENT */
1402 
1403 	lck_mtx_destroy(&ar->ar_lock, &skmem_arena_lock_grp);
1404 	zfree(ar->ar_zone, ar);
1405 }
1406 
1407 /*
1408  * Retain an arena.
1409  */
1410 __attribute__((always_inline))
1411 static inline void
skmem_arena_retain_locked(struct skmem_arena * ar)1412 skmem_arena_retain_locked(struct skmem_arena *ar)
1413 {
1414 	AR_LOCK_ASSERT_HELD(ar);
1415 	ar->ar_refcnt++;
1416 	ASSERT(ar->ar_refcnt != 0);
1417 }
1418 
1419 void
skmem_arena_retain(struct skmem_arena * ar)1420 skmem_arena_retain(struct skmem_arena *ar)
1421 {
1422 	AR_LOCK(ar);
1423 	skmem_arena_retain_locked(ar);
1424 	AR_UNLOCK(ar);
1425 }
1426 
1427 /*
1428  * Release (and potentially destroy) an arena.
1429  */
1430 __attribute__((always_inline))
1431 static inline boolean_t
skmem_arena_release_locked(struct skmem_arena * ar)1432 skmem_arena_release_locked(struct skmem_arena *ar)
1433 {
1434 	boolean_t lastref = FALSE;
1435 
1436 	AR_LOCK_ASSERT_HELD(ar);
1437 	ASSERT(ar->ar_refcnt != 0);
1438 	if (--ar->ar_refcnt == 0) {
1439 		skmem_arena_destroy(ar);
1440 		lastref = TRUE;
1441 	} else {
1442 		lastref = FALSE;
1443 	}
1444 
1445 	return lastref;
1446 }
1447 
1448 boolean_t
skmem_arena_release(struct skmem_arena * ar)1449 skmem_arena_release(struct skmem_arena *ar)
1450 {
1451 	boolean_t lastref;
1452 
1453 	AR_LOCK(ar);
1454 	/* unlock only if this isn't the last reference */
1455 	if (!(lastref = skmem_arena_release_locked(ar))) {
1456 		AR_UNLOCK(ar);
1457 	}
1458 
1459 	return lastref;
1460 }
1461 
1462 /*
1463  * Map an arena to the task's address space.
1464  */
1465 int
skmem_arena_mmap(struct skmem_arena * ar,struct proc * p,struct skmem_arena_mmap_info * ami)1466 skmem_arena_mmap(struct skmem_arena *ar, struct proc *p,
1467     struct skmem_arena_mmap_info *ami)
1468 {
1469 	task_t task = proc_task(p);
1470 	IOReturn ioerr;
1471 	int err = 0;
1472 
1473 	ASSERT(task != kernel_task && task != TASK_NULL);
1474 	ASSERT(ami->ami_arena == NULL);
1475 	ASSERT(ami->ami_mapref == NULL);
1476 	ASSERT(ami->ami_maptask == TASK_NULL);
1477 	ASSERT(!ami->ami_redirect);
1478 
1479 	AR_LOCK(ar);
1480 	if ((ar->ar_flags & (ARF_ACTIVE | ARF_DEFUNCT)) != ARF_ACTIVE) {
1481 		err = ENODEV;
1482 		goto failed;
1483 	}
1484 
1485 	ASSERT(ar->ar_ar != NULL);
1486 	if ((ami->ami_mapref = IOSKMapperCreate(ar->ar_ar, task)) == NULL) {
1487 		err = ENOMEM;
1488 		goto failed;
1489 	}
1490 
1491 	ioerr = IOSKMapperGetAddress(ami->ami_mapref, &ami->ami_mapaddr,
1492 	    &ami->ami_mapsize);
1493 	VERIFY(ioerr == kIOReturnSuccess);
1494 
1495 	ami->ami_arena = ar;
1496 	skmem_arena_retain_locked(ar);
1497 	SLIST_INSERT_HEAD(&ar->ar_map_head, ami, ami_link);
1498 
1499 	ami->ami_maptask = task;
1500 	ar->ar_mapcnt++;
1501 	if (ar->ar_mapcnt == 1) {
1502 		ar->ar_mapsize = ami->ami_mapsize;
1503 	}
1504 
1505 	ASSERT(ami->ami_mapref != NULL);
1506 	ASSERT(ami->ami_arena == ar);
1507 	AR_UNLOCK(ar);
1508 
1509 	return 0;
1510 
1511 failed:
1512 	AR_UNLOCK(ar);
1513 	skmem_arena_munmap(ar, ami);
1514 	VERIFY(err != 0);
1515 
1516 	return err;
1517 }
1518 
1519 /*
1520  * Remove arena's memory mapping from task's address space (common code).
1521  * Returns true if caller needs to perform a deferred defunct.
1522  */
1523 static boolean_t
skmem_arena_munmap_common(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1524 skmem_arena_munmap_common(struct skmem_arena *ar,
1525     struct skmem_arena_mmap_info *ami)
1526 {
1527 	boolean_t need_defunct = FALSE;
1528 
1529 	AR_LOCK(ar);
1530 	if (ami->ami_mapref != NULL) {
1531 		IOSKMapperDestroy(ami->ami_mapref);
1532 		ami->ami_mapref = NULL;
1533 
1534 		VERIFY(ar->ar_mapcnt != 0);
1535 		ar->ar_mapcnt--;
1536 		if (ar->ar_mapcnt == 0) {
1537 			ar->ar_mapsize = 0;
1538 		}
1539 
1540 		VERIFY(ami->ami_arena == ar);
1541 		SLIST_REMOVE(&ar->ar_map_head, ami, skmem_arena_mmap_info,
1542 		    ami_link);
1543 
1544 		/*
1545 		 * We expect that the caller ensures an extra reference
1546 		 * held on the arena, in addition to the one in mmap_info.
1547 		 */
1548 		VERIFY(ar->ar_refcnt > 1);
1549 		(void) skmem_arena_release_locked(ar);
1550 		ami->ami_arena = NULL;
1551 
1552 		if (ami->ami_redirect) {
1553 			/*
1554 			 * This mapper has been redirected; decrement
1555 			 * the redirect count associated with it.
1556 			 */
1557 			VERIFY(ar->ar_maprdrcnt != 0);
1558 			ar->ar_maprdrcnt--;
1559 		} else if (ar->ar_maprdrcnt != 0 &&
1560 		    ar->ar_maprdrcnt == ar->ar_mapcnt) {
1561 			/*
1562 			 * The are other mappers for this arena that have
1563 			 * all been redirected, but the arena wasn't marked
1564 			 * inactive by skmem_arena_redirect() last time since
1565 			 * this particular mapper that we just destroyed
1566 			 * was using it.  Now that it's gone, finish the
1567 			 * postponed work below once we return to caller.
1568 			 */
1569 			ASSERT(ar->ar_flags & ARF_ACTIVE);
1570 			ar->ar_flags &= ~ARF_ACTIVE;
1571 			need_defunct = TRUE;
1572 		}
1573 	}
1574 	ASSERT(ami->ami_mapref == NULL);
1575 	ASSERT(ami->ami_arena == NULL);
1576 
1577 	ami->ami_maptask = TASK_NULL;
1578 	ami->ami_mapaddr = 0;
1579 	ami->ami_mapsize = 0;
1580 	ami->ami_redirect = FALSE;
1581 
1582 	AR_UNLOCK(ar);
1583 
1584 	return need_defunct;
1585 }
1586 
1587 /*
1588  * Remove arena's memory mapping from task's address space (channel version).
1589  * Will perform a deferred defunct if needed.
1590  */
1591 void
skmem_arena_munmap_channel(struct skmem_arena * ar,struct kern_channel * ch)1592 skmem_arena_munmap_channel(struct skmem_arena *ar, struct kern_channel *ch)
1593 {
1594 	SK_LOCK_ASSERT_HELD();
1595 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1596 
1597 	/*
1598 	 * If this is this is on a channel that was holding the last
1599 	 * active reference count on the arena, and that there are
1600 	 * other defunct channels pointing to that arena, perform the
1601 	 * actual arena defunct now.
1602 	 */
1603 	if (skmem_arena_munmap_common(ar, &ch->ch_mmap)) {
1604 		struct kern_nexus *nx = ch->ch_nexus;
1605 		struct kern_nexus_domain_provider *nxdom_prov = NX_DOM_PROV(nx);
1606 
1607 		/*
1608 		 * Similar to kern_channel_defunct(), where we let the
1609 		 * domain provider complete the defunct.  At this point
1610 		 * both sk_lock and the channel locks are held, and so
1611 		 * we indicate that to the callee.
1612 		 */
1613 		nxdom_prov->nxdom_prov_dom->nxdom_defunct_finalize(nxdom_prov,
1614 		    nx, ch, TRUE);
1615 	}
1616 }
1617 
1618 /*
1619  * Remove arena's memory mapping from task's address space (generic).
1620  * This routine should only be called on non-channel related arenas.
1621  */
1622 void
skmem_arena_munmap(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1623 skmem_arena_munmap(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami)
1624 {
1625 	(void) skmem_arena_munmap_common(ar, ami);
1626 }
1627 
1628 /*
1629  * Redirect eligible memory regions in the task's memory map so that
1630  * they get overwritten and backed with anonymous (zero-filled) pages.
1631  */
1632 int
skmem_arena_mredirect(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami,struct proc * p,boolean_t * need_defunct)1633 skmem_arena_mredirect(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami,
1634     struct proc *p, boolean_t *need_defunct)
1635 {
1636 #pragma unused(p)
1637 	int err = 0;
1638 
1639 	*need_defunct = FALSE;
1640 
1641 	AR_LOCK(ar);
1642 	ASSERT(ar->ar_ar != NULL);
1643 	if (ami->ami_redirect) {
1644 		err = EALREADY;
1645 	} else if (ami->ami_mapref == NULL) {
1646 		err = ENXIO;
1647 	} else {
1648 		VERIFY(ar->ar_mapcnt != 0);
1649 		ASSERT(ar->ar_flags & ARF_ACTIVE);
1650 		VERIFY(ami->ami_arena == ar);
1651 		/*
1652 		 * This effectively overwrites the mappings for all
1653 		 * redirectable memory regions (i.e. those without the
1654 		 * SKMEM_REGION_CR_NOREDIRECT flag) while preserving their
1655 		 * protection flags.  Accesses to these regions will be
1656 		 * redirected to anonymous, zero-filled pages.
1657 		 */
1658 		IOSKMapperRedirect(ami->ami_mapref);
1659 		ami->ami_redirect = TRUE;
1660 
1661 		/*
1662 		 * Mark the arena as inactive if all mapper instances are
1663 		 * redirected; otherwise, we do this later during unmap.
1664 		 * Once inactive, the arena will not allow further mmap,
1665 		 * and it is ready to be defunct later.
1666 		 */
1667 		if (++ar->ar_maprdrcnt == ar->ar_mapcnt) {
1668 			ar->ar_flags &= ~ARF_ACTIVE;
1669 			*need_defunct = TRUE;
1670 		}
1671 	}
1672 	AR_UNLOCK(ar);
1673 
1674 	SK_DF(((err != 0) ? SK_VERB_ERROR : SK_VERB_DEFAULT),
1675 	    "%s(%d) \"%s\" ar 0x%llx flags %b inactive %u need_defunct %u "
1676 	    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ar->ar_name,
1677 	    SK_KVA(ar), ar->ar_flags, ARF_BITS, !(ar->ar_flags & ARF_ACTIVE),
1678 	    *need_defunct, err);
1679 
1680 	return err;
1681 }
1682 
1683 /*
1684  * Defunct a region.
1685  */
1686 int
skmem_arena_defunct(struct skmem_arena * ar)1687 skmem_arena_defunct(struct skmem_arena *ar)
1688 {
1689 	AR_LOCK(ar);
1690 
1691 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags 0x%b", ar->ar_name,
1692 	    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1693 
1694 	if (ar->ar_flags & ARF_DEFUNCT) {
1695 		AR_UNLOCK(ar);
1696 		return EALREADY;
1697 	} else if (ar->ar_flags & ARF_ACTIVE) {
1698 		AR_UNLOCK(ar);
1699 		return EBUSY;
1700 	}
1701 
1702 	/* purge the caches now */
1703 	skmem_arena_reap_locked(ar, TRUE);
1704 
1705 	/* teardown eligible memory regions and associated resources */
1706 	skmem_arena_teardown(ar, TRUE);
1707 
1708 	ar->ar_flags |= ARF_DEFUNCT;
1709 
1710 	AR_UNLOCK(ar);
1711 
1712 	return 0;
1713 }
1714 
1715 /*
1716  * Retrieve total and in-use memory statistics of regions in the arena.
1717  */
1718 void
skmem_arena_get_stats(struct skmem_arena * ar,uint64_t * mem_total,uint64_t * mem_inuse)1719 skmem_arena_get_stats(struct skmem_arena *ar, uint64_t *mem_total,
1720     uint64_t *mem_inuse)
1721 {
1722 	uint32_t i;
1723 
1724 	if (mem_total != NULL) {
1725 		*mem_total = 0;
1726 	}
1727 	if (mem_inuse != NULL) {
1728 		*mem_inuse = 0;
1729 	}
1730 
1731 	AR_LOCK(ar);
1732 	for (i = 0; i < SKMEM_REGIONS; i++) {
1733 		if (ar->ar_regions[i] == NULL) {
1734 			continue;
1735 		}
1736 
1737 		if (mem_total != NULL) {
1738 			*mem_total += AR_MEM_TOTAL(ar, i);
1739 		}
1740 		if (mem_inuse != NULL) {
1741 			*mem_inuse += AR_MEM_INUSE(ar, i);
1742 		}
1743 	}
1744 	AR_UNLOCK(ar);
1745 }
1746 
1747 /*
1748  * Retrieve the offset of a particular region (identified by its ID)
1749  * from the base of the arena.
1750  */
1751 mach_vm_offset_t
skmem_arena_get_region_offset(struct skmem_arena * ar,skmem_region_id_t id)1752 skmem_arena_get_region_offset(struct skmem_arena *ar, skmem_region_id_t id)
1753 {
1754 	mach_vm_offset_t offset = 0;
1755 	uint32_t i;
1756 
1757 	ASSERT(id < SKMEM_REGIONS);
1758 
1759 	AR_LOCK(ar);
1760 	for (i = 0; i < id; i++) {
1761 		if (ar->ar_regions[i] == NULL) {
1762 			continue;
1763 		}
1764 
1765 		offset += ar->ar_regions[i]->skr_size;
1766 	}
1767 	AR_UNLOCK(ar);
1768 
1769 	return offset;
1770 }
1771 
1772 /*
1773  * Reap all of configured caches in the arena, so that any excess amount
1774  * outside of their working sets gets released to their respective backing
1775  * regions.  If purging is specified, we empty the caches' working sets,
1776  * including everything that's cached at the CPU layer.
1777  */
1778 static void
skmem_arena_reap_locked(struct skmem_arena * ar,boolean_t purge)1779 skmem_arena_reap_locked(struct skmem_arena *ar, boolean_t purge)
1780 {
1781 	struct skmem_arena_nexus *arn;
1782 	struct skmem_arena_necp *arc;
1783 	struct kern_pbufpool *pp;
1784 
1785 	AR_LOCK_ASSERT_HELD(ar);
1786 
1787 	switch (ar->ar_type) {
1788 	case SKMEM_ARENA_TYPE_NEXUS:
1789 		arn = (struct skmem_arena_nexus *)ar;
1790 		if (arn->arn_schema_cache != NULL) {
1791 			skmem_cache_reap_now(arn->arn_schema_cache, purge);
1792 		}
1793 		if (arn->arn_ring_cache != NULL) {
1794 			skmem_cache_reap_now(arn->arn_ring_cache, purge);
1795 		}
1796 		if ((pp = arn->arn_rx_pp) != NULL) {
1797 			if (pp->pp_kmd_cache != NULL) {
1798 				skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1799 			}
1800 			if (pp->pp_buf_cache != NULL) {
1801 				skmem_cache_reap_now(pp->pp_buf_cache, purge);
1802 			}
1803 			if (pp->pp_kbft_cache != NULL) {
1804 				skmem_cache_reap_now(pp->pp_kbft_cache, purge);
1805 			}
1806 		}
1807 		if ((pp = arn->arn_tx_pp) != NULL && pp != arn->arn_rx_pp) {
1808 			if (pp->pp_kmd_cache != NULL) {
1809 				skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1810 			}
1811 			if (pp->pp_buf_cache != NULL) {
1812 				skmem_cache_reap_now(pp->pp_buf_cache, purge);
1813 			}
1814 			if (pp->pp_kbft_cache != NULL) {
1815 				skmem_cache_reap_now(pp->pp_kbft_cache, purge);
1816 			}
1817 		}
1818 		break;
1819 
1820 	case SKMEM_ARENA_TYPE_NECP:
1821 		arc = (struct skmem_arena_necp *)ar;
1822 		if (arc->arc_kstats_cache != NULL) {
1823 			skmem_cache_reap_now(arc->arc_kstats_cache, purge);
1824 		}
1825 		break;
1826 
1827 	case SKMEM_ARENA_TYPE_SYSTEM:
1828 		break;
1829 	}
1830 }
1831 
1832 void
skmem_arena_reap(struct skmem_arena * ar,boolean_t purge)1833 skmem_arena_reap(struct skmem_arena *ar, boolean_t purge)
1834 {
1835 	AR_LOCK(ar);
1836 	skmem_arena_reap_locked(ar, purge);
1837 	AR_UNLOCK(ar);
1838 }
1839 
1840 #if SK_LOG
1841 SK_LOG_ATTRIBUTE
1842 static void
skmem_arena_create_region_log(struct skmem_arena * ar)1843 skmem_arena_create_region_log(struct skmem_arena *ar)
1844 {
1845 	char label[32];
1846 	int i;
1847 
1848 	switch (ar->ar_type) {
1849 	case SKMEM_ARENA_TYPE_NEXUS:
1850 		SK_D("\"%s\" ar 0x%llx flags %b rx_pp 0x%llx tx_pp 0x%llu",
1851 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
1852 		    SK_KVA(skmem_arena_nexus(ar)->arn_rx_pp),
1853 		    SK_KVA(skmem_arena_nexus(ar)->arn_tx_pp));
1854 		break;
1855 
1856 	case SKMEM_ARENA_TYPE_NECP:
1857 	case SKMEM_ARENA_TYPE_SYSTEM:
1858 		SK_D("\"%s\" ar 0x%llx flags %b", ar->ar_name,
1859 		    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1860 		break;
1861 	}
1862 
1863 	for (i = 0; i < SKMEM_REGIONS; i++) {
1864 		if (ar->ar_regions[i] == NULL) {
1865 			continue;
1866 		}
1867 
1868 		(void) snprintf(label, sizeof(label), "REGION_%s:",
1869 		    skmem_region_id2name(i));
1870 		SK_D("  %-16s %6u KB s:[%2u x %6u KB] "
1871 		    "o:[%4u x %6u -> %4u x %6u]", label,
1872 		    (uint32_t)AR_MEM_TOTAL(ar, i) >> 10,
1873 		    (uint32_t)AR_MEM_SEGCNT(ar, i),
1874 		    (uint32_t)AR_MEM_SEGSIZE(ar, i) >> 10,
1875 		    (uint32_t)AR_MEM_OBJCNT_R(ar, i),
1876 		    (uint32_t)AR_MEM_OBJSIZE_R(ar, i),
1877 		    (uint32_t)AR_MEM_OBJCNT_C(ar, i),
1878 		    (uint32_t)AR_MEM_OBJSIZE_C(ar, i));
1879 	}
1880 }
1881 #endif /* SK_LOG */
1882 
1883 static size_t
skmem_arena_mib_get_stats(struct skmem_arena * ar,void * out,size_t len)1884 skmem_arena_mib_get_stats(struct skmem_arena *ar, void *out, size_t len)
1885 {
1886 	size_t actual_space = sizeof(struct sk_stats_arena);
1887 	struct sk_stats_arena *sar = out;
1888 	struct skmem_arena_mmap_info *ami = NULL;
1889 	pid_t proc_pid;
1890 	int i;
1891 
1892 	if (out == NULL || len < actual_space) {
1893 		goto done;
1894 	}
1895 
1896 	AR_LOCK(ar);
1897 	(void) snprintf(sar->sar_name, sizeof(sar->sar_name),
1898 	    "%s", ar->ar_name);
1899 	sar->sar_type = (sk_stats_arena_type_t)ar->ar_type;
1900 	sar->sar_mapsize = (uint64_t)ar->ar_mapsize;
1901 	i = 0;
1902 	SLIST_FOREACH(ami, &ar->ar_map_head, ami_link) {
1903 		if (ami->ami_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS) {
1904 			struct kern_channel *ch;
1905 			ch = container_of(ami, struct kern_channel, ch_mmap);
1906 			proc_pid = ch->ch_pid;
1907 		} else {
1908 			ASSERT((ami->ami_arena->ar_type ==
1909 			    SKMEM_ARENA_TYPE_NECP) ||
1910 			    (ami->ami_arena->ar_type ==
1911 			    SKMEM_ARENA_TYPE_SYSTEM));
1912 			proc_pid =
1913 			    necp_client_get_proc_pid_from_arena_info(ami);
1914 		}
1915 		sar->sar_mapped_pids[i++] = proc_pid;
1916 		if (i >= SK_STATS_ARENA_MAPPED_PID_MAX) {
1917 			break;
1918 		}
1919 	}
1920 
1921 	for (i = 0; i < SKMEM_REGIONS; i++) {
1922 		struct skmem_region *skr = ar->ar_regions[i];
1923 		uuid_t *sreg_uuid = &sar->sar_regions_uuid[i];
1924 
1925 		if (skr == NULL) {
1926 			uuid_clear(*sreg_uuid);
1927 			continue;
1928 		}
1929 
1930 		uuid_copy(*sreg_uuid, skr->skr_uuid);
1931 	}
1932 	AR_UNLOCK(ar);
1933 
1934 done:
1935 	return actual_space;
1936 }
1937 
1938 static int
1939 skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS
1940 {
1941 #pragma unused(arg1, arg2, oidp)
1942 	struct skmem_arena *ar;
1943 	size_t actual_space;
1944 	size_t buffer_space;
1945 	size_t allocated_space;
1946 	caddr_t buffer = NULL;
1947 	caddr_t scan;
1948 	int error = 0;
1949 
1950 	if (!kauth_cred_issuser(kauth_cred_get())) {
1951 		return EPERM;
1952 	}
1953 
1954 	net_update_uptime();
1955 	buffer_space = req->oldlen;
1956 	if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
1957 		if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
1958 			buffer_space = SK_SYSCTL_ALLOC_MAX;
1959 		}
1960 		allocated_space = buffer_space;
1961 		buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_arena_mib);
1962 		if (__improbable(buffer == NULL)) {
1963 			return ENOBUFS;
1964 		}
1965 	} else if (req->oldptr == USER_ADDR_NULL) {
1966 		buffer_space = 0;
1967 	}
1968 	actual_space = 0;
1969 	scan = buffer;
1970 
1971 	SKMEM_ARENA_LOCK();
1972 	TAILQ_FOREACH(ar, &skmem_arena_head, ar_link) {
1973 		size_t size = skmem_arena_mib_get_stats(ar, scan, buffer_space);
1974 		if (scan != NULL) {
1975 			if (buffer_space < size) {
1976 				/* supplied buffer too small, stop copying */
1977 				error = ENOMEM;
1978 				break;
1979 			}
1980 			scan += size;
1981 			buffer_space -= size;
1982 		}
1983 		actual_space += size;
1984 	}
1985 	SKMEM_ARENA_UNLOCK();
1986 
1987 	if (actual_space != 0) {
1988 		int out_error = SYSCTL_OUT(req, buffer, actual_space);
1989 		if (out_error != 0) {
1990 			error = out_error;
1991 		}
1992 	}
1993 	if (buffer != NULL) {
1994 		sk_free_data(buffer, allocated_space);
1995 	}
1996 
1997 	return error;
1998 }
1999