xref: /xnu-8796.121.2/bsd/skywalk/mem/skmem_arena.c (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* BEGIN CSTYLED */
30 /*
31  * SKMEM_ARENA_TYPE_NEXUS:
32  *
33  *   This arena represents the memory subsystem of a nexus adapter.  It consist
34  *   of a collection of memory regions that are usable by the nexus, as well
35  *   as the various caches for objects in those regions.
36  *
37  *       (1 per nexus adapter)
38  *     +=======================+
39  *     |      skmem_arena      |
40  *     +-----------------------+              (backing regions)
41  *     |     ar_regions[0]     |           +=======================+
42  *     :          ...          : ------->> |     skmem_region      |===+
43  *     |     ar_regions[n]     |           +=======================+   |===+
44  *     +=======================+               +=======================+   |
45  *     |     arn_{caches,pp}   | ---+              +=======================+
46  *     +-----------------------+    |
47  *     |     arn_stats_obj     |    |
48  *     |     arn_flowadv_obj   |    |         (cache frontends)
49  *     |     arn_nexusadv_obj  |    |      +=======================+
50  *     +-----------------------+    +--->> |     skmem_cache       |===+
51  *                                         +=======================+   |===+
52  *                                             +=======================+   |
53  *                                                 +=======================+
54  *
55  *   Three regions {umd,kmd,buf} are used for the packet buffer pool, which
56  *   may be external to the nexus adapter, e.g. created by the driver or an
57  *   external entity.  If not supplied, we create these regions along with
58  *   the packet buffer pool ourselves.  The rest of the regions (unrelated
59  *   to the packet buffer pool) are unique to the arena and are allocated at
60  *   arena creation time.
61  *
62  *   An arena may be mapped to a user task/process for as many times as needed.
63  *   The result of each mapping is a contiguous range within the address space
64  *   of that task, indicated by [ami_mapaddr, ami_mapaddr + ami_mapsize) span.
65  *   This is achieved by leveraging the mapper memory object ar_mapper that
66  *   "stitches" the disjoint segments together.  Only user-mappable regions,
67  *   i.e. those marked with SKR_MODE_MMAPOK, will be included in this span.
68  *
69  *   Nexus adapters that are eligible for defunct will trigger the arena to
70  *   undergo memory redirection for all regions except those that are marked
71  *   with SKR_MODE_NOREDIRECT.  This happens when all of the channels opened
72  *   to the adapter are defunct.  Upon completion, those redirected regions
73  *   will be torn down in order to reduce their memory footprints.  When this
74  *   happens the adapter and its arena are no longer active or in service.
75  *
76  *   The arena exposes caches for allocating and freeing most region objects.
77  *   These slab-allocator based caches act as front-ends to the regions; only
78  *   the metadata cache (for kern_packet_t) utilizes the magazines layer.  All
79  *   other ones simply utilize skmem_cache for slab-based allocations.
80  *
81  *   Certain regions contain singleton objects that are simple enough to not
82  *   require the slab allocator, such as the ones used for statistics and flow
83  *   advisories.  Because of this, we directly allocate from those regions
84  *   and store the objects in the arena.
85  *
86  * SKMEM_ARENA_TYPE_NECP:
87  *
88  *   This arena represents the memory subsystem of an NECP file descriptor
89  *   object.  It consists of a memory region for per-flow statistics, as well
90  *   as a cache front-end for that region.
91  *
92  * SKMEM_ARENA_SYSTEM:
93  *
94  *   This arena represents general, system-wide objects.  It currently
95  *   consists of the sysctls region that's created once at init time.
96  */
97 /* END CSTYLED */
98 
99 #include <skywalk/os_skywalk_private.h>
100 #include <net/necp.h>
101 
102 static void skmem_arena_destroy(struct skmem_arena *);
103 static void skmem_arena_teardown(struct skmem_arena *, boolean_t);
104 static int skmem_arena_create_finalize(struct skmem_arena *);
105 static void skmem_arena_nexus_teardown(struct skmem_arena_nexus *, boolean_t);
106 static void skmem_arena_necp_teardown(struct skmem_arena_necp *, boolean_t);
107 static void skmem_arena_system_teardown(struct skmem_arena_system *, boolean_t);
108 static struct skmem_arena *skmem_arena_alloc(skmem_arena_type_t,
109     const char *);
110 static void skmem_arena_free(struct skmem_arena *);
111 static void skmem_arena_retain_locked(struct skmem_arena *);
112 static void skmem_arena_reap_locked(struct skmem_arena *, boolean_t);
113 static boolean_t skmem_arena_munmap_common(struct skmem_arena *,
114     struct skmem_arena_mmap_info *);
115 #if SK_LOG
116 static void skmem_arena_create_region_log(struct skmem_arena *);
117 #endif /* SK_LOG */
118 static int skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS;
119 
120 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, arena,
121     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
122     0, 0, skmem_arena_mib_get_sysctl, "S,sk_stats_arena",
123     "Skywalk arena statistics");
124 
125 static LCK_GRP_DECLARE(skmem_arena_lock_grp, "skmem_arena");
126 static LCK_MTX_DECLARE(skmem_arena_lock, &skmem_arena_lock_grp);
127 
128 static TAILQ_HEAD(, skmem_arena) skmem_arena_head = TAILQ_HEAD_INITIALIZER(skmem_arena_head);
129 
130 #define SKMEM_ARENA_LOCK()                      \
131 	lck_mtx_lock(&skmem_arena_lock)
132 #define SKMEM_ARENA_LOCK_ASSERT_HELD()          \
133 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_OWNED)
134 #define SKMEM_ARENA_LOCK_ASSERT_NOTHELD()       \
135 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_NOTOWNED)
136 #define SKMEM_ARENA_UNLOCK()                    \
137 	lck_mtx_unlock(&skmem_arena_lock)
138 
139 #define AR_NEXUS_SIZE           sizeof(struct skmem_arena_nexus)
140 static SKMEM_TYPE_DEFINE(ar_nexus_zone, struct skmem_arena_nexus);
141 
142 #define AR_NECP_SIZE            sizeof(struct skmem_arena_necp)
143 static SKMEM_TYPE_DEFINE(ar_necp_zone, struct skmem_arena_necp);
144 
145 #define AR_SYSTEM_SIZE          sizeof(struct skmem_arena_system)
146 static SKMEM_TYPE_DEFINE(ar_system_zone, struct skmem_arena_system);
147 
148 #define SKMEM_TAG_ARENA_MIB     "com.apple.skywalk.arena.mib"
149 static SKMEM_TAG_DEFINE(skmem_tag_arena_mib, SKMEM_TAG_ARENA_MIB);
150 
151 static_assert(SKMEM_ARENA_TYPE_NEXUS == SAR_TYPE_NEXUS);
152 static_assert(SKMEM_ARENA_TYPE_NECP == SAR_TYPE_NECP);
153 static_assert(SKMEM_ARENA_TYPE_SYSTEM == SAR_TYPE_SYSTEM);
154 
155 SK_NO_INLINE_ATTRIBUTE
156 static int
skmem_arena_sd_setup(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct skmem_arena * ar,boolean_t kernel_only,boolean_t tx)157 skmem_arena_sd_setup(const struct nexus_adapter *na,
158     struct skmem_region_params srp[SKMEM_REGIONS], struct skmem_arena *ar,
159     boolean_t kernel_only, boolean_t tx)
160 {
161 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
162 	struct skmem_cache **cachep;
163 	struct skmem_region *ksd_skr = NULL, *usd_skr = NULL;
164 	const char *name = na->na_name;
165 	char cname[64];
166 	skmem_region_id_t usd_type, ksd_type;
167 	int err = 0;
168 
169 	usd_type = tx ? SKMEM_REGION_TXAUSD : SKMEM_REGION_RXFUSD;
170 	ksd_type = tx ? SKMEM_REGION_TXAKSD : SKMEM_REGION_RXFKSD;
171 	if (tx) {
172 		usd_type = SKMEM_REGION_TXAUSD;
173 		ksd_type = SKMEM_REGION_TXAKSD;
174 		cachep = &arn->arn_txaksd_cache;
175 	} else {
176 		usd_type = SKMEM_REGION_RXFUSD;
177 		ksd_type = SKMEM_REGION_RXFKSD;
178 		cachep = &arn->arn_rxfksd_cache;
179 	}
180 	ksd_skr = skmem_region_create(name, &srp[ksd_type], NULL, NULL, NULL);
181 	if (ksd_skr == NULL) {
182 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
183 		    "create %s region", ar->ar_name, SK_KVA(ar),
184 		    ar->ar_flags, ARF_BITS, srp[ksd_type].srp_name);
185 		err = ENOMEM;
186 		goto failed;
187 	}
188 	ar->ar_regions[ksd_type] = ksd_skr;
189 	if (!kernel_only) {
190 		usd_skr = skmem_region_create(name, &srp[usd_type], NULL,
191 		    NULL, NULL);
192 		if (usd_skr == NULL) {
193 			err = ENOMEM;
194 			goto failed;
195 		}
196 		ar->ar_regions[usd_type] = usd_skr;
197 		skmem_region_mirror(ksd_skr, usd_skr);
198 	}
199 	snprintf(cname, sizeof(cname), tx ? "txa_ksd.%s" : "rxf_ksd.%s", name);
200 	ASSERT(ar->ar_regions[ksd_type] != NULL);
201 	*cachep = skmem_cache_create(cname,
202 	    srp[ksd_type].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
203 	    ar->ar_regions[ksd_type], SKMEM_CR_NOMAGAZINES);
204 	if (*cachep == NULL) {
205 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
206 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
207 		err = ENOMEM;
208 		goto failed;
209 	}
210 	return 0;
211 
212 failed:
213 	if (ksd_skr != NULL) {
214 		skmem_region_release(ksd_skr);
215 		ar->ar_regions[ksd_type] = NULL;
216 	}
217 	if (usd_skr != NULL) {
218 		/*
219 		 * decrements refcnt incremented by skmem_region_mirror()
220 		 * this is not needed in case skmem_cache_create() succeeds
221 		 * because skmem_cache_destroy() does the release.
222 		 */
223 		skmem_region_release(usd_skr);
224 
225 		/* decrements the region's own refcnt */
226 		skmem_region_release(usd_skr);
227 		ar->ar_regions[usd_type] = NULL;
228 	}
229 	return err;
230 }
231 
232 SK_NO_INLINE_ATTRIBUTE
233 static void
skmem_arena_sd_teardown(struct skmem_arena * ar,boolean_t tx)234 skmem_arena_sd_teardown(struct skmem_arena *ar, boolean_t tx)
235 {
236 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
237 	struct skmem_cache **cachep;
238 	struct skmem_region **ksd_rp, **usd_rp;
239 
240 	if (tx) {
241 		cachep = &arn->arn_txaksd_cache;
242 		ksd_rp = &ar->ar_regions[SKMEM_REGION_TXAKSD];
243 		usd_rp = &ar->ar_regions[SKMEM_REGION_TXAUSD];
244 	} else {
245 		cachep = &arn->arn_rxfksd_cache;
246 		ksd_rp = &ar->ar_regions[SKMEM_REGION_RXFKSD];
247 		usd_rp = &ar->ar_regions[SKMEM_REGION_RXFUSD];
248 	}
249 	if (*cachep != NULL) {
250 		skmem_cache_destroy(*cachep);
251 		*cachep = NULL;
252 	}
253 	if (*usd_rp != NULL) {
254 		skmem_region_release(*usd_rp);
255 		*usd_rp = NULL;
256 	}
257 	if (*ksd_rp != NULL) {
258 		skmem_region_release(*ksd_rp);
259 		*ksd_rp = NULL;
260 	}
261 }
262 
263 static bool
skmem_arena_pp_setup(struct skmem_arena * ar,struct skmem_region_params srp[SKMEM_REGIONS],const char * name,struct kern_pbufpool * rx_pp,struct kern_pbufpool * tx_pp,uint32_t flags)264 skmem_arena_pp_setup(struct skmem_arena *ar,
265     struct skmem_region_params srp[SKMEM_REGIONS], const char *name,
266     struct kern_pbufpool *rx_pp, struct kern_pbufpool *tx_pp,
267     uint32_t flags)
268 {
269 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
270 	boolean_t kernel_only = (flags & SKMEM_PP_FLAG_KERNEL_ONLY) != 0;
271 	if (rx_pp == NULL && tx_pp == NULL) {
272 		uint32_t ppcreatef = 0;
273 		if (flags & SKMEM_PP_FLAG_TRUNCATED_BUF) {
274 			ppcreatef |= PPCREATEF_TRUNCATED_BUF;
275 		}
276 		if (flags & SKMEM_PP_FLAG_KERNEL_ONLY) {
277 			ppcreatef |= PPCREATEF_KERNEL_ONLY;
278 		}
279 		if (srp[SKMEM_REGION_KMD].srp_max_frags > 1) {
280 			ppcreatef |= PPCREATEF_ONDEMAND_BUF;
281 		}
282 		if (flags & SKMEM_PP_FLAG_RAW_BFLT) {
283 			ppcreatef |= PPCREATEF_RAW_BFLT;
284 		}
285 		/* callee retains pp upon success */
286 		rx_pp = pp_create(name, srp, NULL, NULL, NULL, NULL, NULL,
287 		    ppcreatef);
288 		if (rx_pp == NULL) {
289 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create pp",
290 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
291 			return false;
292 		}
293 		pp_retain(rx_pp);
294 		tx_pp = rx_pp;
295 	} else {
296 		if (rx_pp == NULL) {
297 			rx_pp = tx_pp;
298 		} else if (tx_pp == NULL) {
299 			tx_pp = rx_pp;
300 		}
301 
302 		ASSERT(rx_pp->pp_md_type == tx_pp->pp_md_type);
303 		ASSERT(rx_pp->pp_md_subtype == tx_pp->pp_md_subtype);
304 		ASSERT(!(!kernel_only &&
305 		    (PP_KERNEL_ONLY(rx_pp) || (PP_KERNEL_ONLY(tx_pp)))));
306 		arn->arn_mode |= AR_NEXUS_MODE_EXTERNAL_PPOOL;
307 		pp_retain(rx_pp);
308 		pp_retain(tx_pp);
309 	}
310 
311 	arn->arn_rx_pp = rx_pp;
312 	arn->arn_tx_pp = tx_pp;
313 	if (rx_pp == tx_pp) {
314 		skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
315 		if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
316 			skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
317 		}
318 		ar->ar_regions[SKMEM_REGION_BUF_DEF] = PP_BUF_REGION_DEF(rx_pp);
319 		ar->ar_regions[SKMEM_REGION_BUF_LARGE] =
320 		    PP_BUF_REGION_LARGE(rx_pp);
321 		ar->ar_regions[SKMEM_REGION_RXBUF_DEF] = NULL;
322 		ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] = NULL;
323 		ar->ar_regions[SKMEM_REGION_TXBUF_DEF] = NULL;
324 		ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] = NULL;
325 		skmem_region_retain(rx_pp->pp_kmd_region);
326 		ar->ar_regions[SKMEM_REGION_KMD] = rx_pp->pp_kmd_region;
327 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
328 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
329 		if (rx_pp->pp_kbft_region != NULL) {
330 			skmem_region_retain(rx_pp->pp_kbft_region);
331 			ar->ar_regions[SKMEM_REGION_KBFT] =
332 			    rx_pp->pp_kbft_region;
333 		}
334 		ar->ar_regions[SKMEM_REGION_RXKBFT] = NULL;
335 		ar->ar_regions[SKMEM_REGION_TXKBFT] = NULL;
336 	} else {
337 		ASSERT(kernel_only); /* split userspace pools not supported */
338 		ar->ar_regions[SKMEM_REGION_BUF_DEF] = NULL;
339 		ar->ar_regions[SKMEM_REGION_BUF_LARGE] = NULL;
340 		skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
341 		ar->ar_regions[SKMEM_REGION_RXBUF_DEF] =
342 		    PP_BUF_REGION_DEF(rx_pp);
343 		ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] =
344 		    PP_BUF_REGION_LARGE(rx_pp);
345 		if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
346 			skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
347 		}
348 		skmem_region_retain(PP_BUF_REGION_DEF(tx_pp));
349 		ar->ar_regions[SKMEM_REGION_TXBUF_DEF] =
350 		    PP_BUF_REGION_DEF(tx_pp);
351 		ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] =
352 		    PP_BUF_REGION_LARGE(tx_pp);
353 		if (PP_BUF_REGION_LARGE(tx_pp) != NULL) {
354 			skmem_region_retain(PP_BUF_REGION_LARGE(tx_pp));
355 		}
356 		ar->ar_regions[SKMEM_REGION_KMD] = NULL;
357 		skmem_region_retain(rx_pp->pp_kmd_region);
358 		ar->ar_regions[SKMEM_REGION_RXKMD] = rx_pp->pp_kmd_region;
359 		skmem_region_retain(tx_pp->pp_kmd_region);
360 		ar->ar_regions[SKMEM_REGION_TXKMD] = tx_pp->pp_kmd_region;
361 		ar->ar_regions[SKMEM_REGION_KBFT] = NULL;
362 		if (rx_pp->pp_kbft_region != NULL) {
363 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(rx_pp));
364 			skmem_region_retain(rx_pp->pp_kbft_region);
365 			ar->ar_regions[SKMEM_REGION_RXKBFT] =
366 			    rx_pp->pp_kbft_region;
367 		}
368 		if (tx_pp->pp_kbft_region != NULL) {
369 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(tx_pp));
370 			skmem_region_retain(tx_pp->pp_kbft_region);
371 			ar->ar_regions[SKMEM_REGION_TXKBFT] =
372 			    tx_pp->pp_kbft_region;
373 		}
374 	}
375 
376 	if (kernel_only) {
377 		if ((arn->arn_mode & AR_NEXUS_MODE_EXTERNAL_PPOOL) == 0) {
378 			ASSERT(PP_KERNEL_ONLY(rx_pp));
379 			ASSERT(PP_KERNEL_ONLY(tx_pp));
380 			ASSERT(rx_pp->pp_umd_region == NULL);
381 			ASSERT(tx_pp->pp_umd_region == NULL);
382 			ASSERT(rx_pp->pp_kmd_region->skr_mirror == NULL);
383 			ASSERT(tx_pp->pp_kmd_region->skr_mirror == NULL);
384 			ASSERT(rx_pp->pp_ubft_region == NULL);
385 			ASSERT(tx_pp->pp_ubft_region == NULL);
386 			if (rx_pp->pp_kbft_region != NULL) {
387 				ASSERT(rx_pp->pp_kbft_region->skr_mirror ==
388 				    NULL);
389 			}
390 			if (tx_pp->pp_kbft_region != NULL) {
391 				ASSERT(tx_pp->pp_kbft_region->skr_mirror ==
392 				    NULL);
393 			}
394 		}
395 	} else {
396 		ASSERT(rx_pp == tx_pp);
397 		ASSERT(!PP_KERNEL_ONLY(rx_pp));
398 		ASSERT(rx_pp->pp_umd_region->skr_mode & SKR_MODE_MIRRORED);
399 		ASSERT(rx_pp->pp_kmd_region->skr_mirror != NULL);
400 		ar->ar_regions[SKMEM_REGION_UMD] = rx_pp->pp_umd_region;
401 		skmem_region_retain(rx_pp->pp_umd_region);
402 		if (rx_pp->pp_kbft_region != NULL) {
403 			ASSERT(rx_pp->pp_kbft_region->skr_mirror != NULL);
404 			ASSERT(rx_pp->pp_ubft_region != NULL);
405 			ASSERT(rx_pp->pp_ubft_region->skr_mode &
406 			    SKR_MODE_MIRRORED);
407 			ar->ar_regions[SKMEM_REGION_UBFT] =
408 			    rx_pp->pp_ubft_region;
409 			skmem_region_retain(rx_pp->pp_ubft_region);
410 		}
411 	}
412 
413 	arn->arn_md_type = rx_pp->pp_md_type;
414 	arn->arn_md_subtype = rx_pp->pp_md_subtype;
415 	return true;
416 }
417 
418 /*
419  * Create a nexus adapter arena.
420  */
421 struct skmem_arena *
skmem_arena_create_for_nexus(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct kern_pbufpool ** tx_pp,struct kern_pbufpool ** rx_pp,uint32_t pp_flags,struct kern_nexus_advisory * nxv,int * perr)422 skmem_arena_create_for_nexus(const struct nexus_adapter *na,
423     struct skmem_region_params srp[SKMEM_REGIONS], struct kern_pbufpool **tx_pp,
424     struct kern_pbufpool **rx_pp, uint32_t pp_flags,
425     struct kern_nexus_advisory *nxv, int *perr)
426 {
427 #define SRP_CFLAGS(_id)         (srp[_id].srp_cflags)
428 	struct skmem_arena_nexus *arn;
429 	struct skmem_arena *ar;
430 	char cname[64];
431 	uint32_t i;
432 	const char *name = na->na_name;
433 	boolean_t kernel_only = (pp_flags & SKMEM_PP_FLAG_KERNEL_ONLY) != 0;
434 
435 	*perr = 0;
436 
437 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NEXUS, name);
438 	ASSERT(ar != NULL && ar->ar_zsize == AR_NEXUS_SIZE);
439 	arn = (struct skmem_arena_nexus *)ar;
440 
441 	/* these regions must not be readable/writeable */
442 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_GUARD);
443 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_GUARD);
444 
445 	/* these regions must be read-only */
446 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_UREADONLY);
447 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_UREADONLY);
448 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_UREADONLY);
449 	if ((na->na_flags & NAF_USER_PKT_POOL) == 0) {
450 		ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
451 		    SKMEM_REGION_CR_UREADONLY);
452 		ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
453 		    SKMEM_REGION_CR_UREADONLY);
454 	} else {
455 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
456 		    SKMEM_REGION_CR_UREADONLY));
457 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
458 		    SKMEM_REGION_CR_UREADONLY));
459 	}
460 
461 	/* these regions must be user-mappable */
462 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_MMAPOK);
463 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_MMAPOK);
464 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_MMAPOK);
465 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_MMAPOK);
466 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_MMAPOK);
467 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_MMAPOK);
468 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_MMAPOK);
469 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_MMAPOK);
470 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_MMAPOK);
471 	ASSERT(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_MMAPOK);
472 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_MMAPOK);
473 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_MMAPOK);
474 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_MMAPOK);
475 
476 	/* these must not be user-mappable */
477 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_MMAPOK));
478 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_MMAPOK));
479 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_MMAPOK));
480 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_MMAPOK));
481 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_MMAPOK));
482 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_MMAPOK));
483 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_MMAPOK));
484 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_MMAPOK));
485 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_MMAPOK));
486 
487 	/* these regions must be shareable */
488 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_SHAREOK);
489 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
490 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
491 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
492 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
493 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
494 
495 	/* these regions must not be be shareable */
496 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_SHAREOK));
497 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_SHAREOK));
498 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_SHAREOK));
499 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_SHAREOK));
500 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_SHAREOK));
501 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_SHAREOK));
502 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_SHAREOK));
503 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_SHAREOK));
504 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_SHAREOK));
505 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_SHAREOK));
506 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_SHAREOK));
507 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_SHAREOK));
508 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_SHAREOK));
509 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_SHAREOK));
510 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_SHAREOK));
511 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_SHAREOK));
512 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_SHAREOK));
513 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_SHAREOK));
514 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_SHAREOK));
515 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_SHAREOK));
516 
517 	/* these must stay active */
518 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_NOREDIRECT);
519 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_NOREDIRECT);
520 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_NOREDIRECT);
521 
522 	/* no kstats for nexus */
523 	ASSERT(srp[SKMEM_REGION_KSTATS].srp_c_obj_cnt == 0);
524 
525 	AR_LOCK(ar);
526 	if (!skmem_arena_pp_setup(ar, srp, name, (rx_pp ? *rx_pp : NULL),
527 	    (tx_pp ? *tx_pp : NULL), pp_flags)) {
528 		goto failed;
529 	}
530 
531 	if (nxv != NULL && nxv->nxv_reg != NULL) {
532 		struct skmem_region *svr = nxv->nxv_reg;
533 
534 		ASSERT(svr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
535 		ASSERT(svr->skr_seg_max_cnt == 1);
536 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = svr;
537 		skmem_region_retain(svr);
538 
539 		ASSERT(nxv->nxv_adv != NULL);
540 		if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
541 			VERIFY(nxv->flowswitch_nxv_adv->nxadv_ver ==
542 			    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
543 		} else if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
544 			VERIFY(nxv->netif_nxv_adv->nna_version ==
545 			    NX_NETIF_ADVISORY_CURRENT_VERSION);
546 		} else {
547 			panic_plain("%s: invalid advisory type %d",
548 			    __func__, nxv->nxv_adv_type);
549 			/* NOTREACHED */
550 		}
551 		arn->arn_nexusadv_obj = nxv->nxv_adv;
552 	} else {
553 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
554 		ASSERT(srp[SKMEM_REGION_NEXUSADV].srp_c_obj_cnt == 0);
555 	}
556 
557 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, TRUE) != 0) {
558 		goto failed;
559 	}
560 
561 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, FALSE) != 0) {
562 		goto failed;
563 	}
564 
565 	for (i = 0; i < SKMEM_REGIONS; i++) {
566 		/* skip if already created */
567 		if (ar->ar_regions[i] != NULL) {
568 			continue;
569 		}
570 
571 		/* skip external regions from packet pool */
572 		if (skmem_region_for_pp(i)) {
573 			continue;
574 		}
575 
576 		/* skip slot descriptor regions */
577 		if (i == SKMEM_REGION_TXAUSD || i == SKMEM_REGION_RXFUSD ||
578 		    i == SKMEM_REGION_TXAKSD || i == SKMEM_REGION_RXFKSD) {
579 			continue;
580 		}
581 
582 		/* skip if region is configured to be empty */
583 		if (srp[i].srp_c_obj_cnt == 0) {
584 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
585 			    i == SKMEM_REGION_USTATS ||
586 			    i == SKMEM_REGION_KSTATS ||
587 			    i == SKMEM_REGION_INTRINSIC ||
588 			    i == SKMEM_REGION_FLOWADV ||
589 			    i == SKMEM_REGION_NEXUSADV ||
590 			    i == SKMEM_REGION_SYSCTLS ||
591 			    i == SKMEM_REGION_GUARD_TAIL);
592 			continue;
593 		}
594 
595 		ASSERT(srp[i].srp_id == i);
596 
597 		/*
598 		 * Skip {SCHEMA, RING, GUARD} for kernel-only arena.  Note
599 		 * that this is assuming kernel-only arena is always used
600 		 * for kernel-only nexus adapters (never used directly by
601 		 * user process.)
602 		 *
603 		 * XXX [email protected] - see comments in kern_pbufpool_create().
604 		 * We need to revisit this logic for "direct channel" access,
605 		 * perhaps via a separate adapter flag.
606 		 */
607 		if (kernel_only && (i == SKMEM_REGION_GUARD_HEAD ||
608 		    i == SKMEM_REGION_SCHEMA || i == SKMEM_REGION_RING ||
609 		    i == SKMEM_REGION_GUARD_TAIL)) {
610 			continue;
611 		}
612 
613 		/* not for nexus, or for us to create here */
614 		ASSERT(i != SKMEM_REGION_GUARD_HEAD || sk_guard);
615 		ASSERT(i != SKMEM_REGION_NEXUSADV);
616 		ASSERT(i != SKMEM_REGION_SYSCTLS);
617 		ASSERT(i != SKMEM_REGION_GUARD_TAIL || sk_guard);
618 		ASSERT(i != SKMEM_REGION_KSTATS);
619 		ASSERT(i != SKMEM_REGION_INTRINSIC);
620 
621 		/* otherwise create it */
622 		if ((ar->ar_regions[i] = skmem_region_create(name, &srp[i],
623 		    NULL, NULL, NULL)) == NULL) {
624 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
625 			    "create %s region", ar->ar_name, SK_KVA(ar),
626 			    ar->ar_flags, ARF_BITS, srp[i].srp_name);
627 			goto failed;
628 		}
629 	}
630 
631 	/* create skmem_cache for schema (without magazines) */
632 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL || kernel_only);
633 	if (ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL) {
634 		(void) snprintf(cname, sizeof(cname), "schema.%s", name);
635 		if ((arn->arn_schema_cache = skmem_cache_create(cname,
636 		    srp[SKMEM_REGION_SCHEMA].srp_c_obj_size, 0, NULL, NULL,
637 		    NULL, NULL, ar->ar_regions[SKMEM_REGION_SCHEMA],
638 		    SKMEM_CR_NOMAGAZINES)) == NULL) {
639 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
640 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
641 			    cname);
642 			goto failed;
643 		}
644 	}
645 
646 	/* create skmem_cache for rings (without magazines) */
647 	(void) snprintf(cname, sizeof(cname), "ring.%s", name);
648 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] != NULL || kernel_only);
649 	if ((ar->ar_regions[SKMEM_REGION_RING] != NULL) &&
650 	    (arn->arn_ring_cache = skmem_cache_create(cname,
651 	    srp[SKMEM_REGION_RING].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
652 	    ar->ar_regions[SKMEM_REGION_RING], SKMEM_CR_NOMAGAZINES)) == NULL) {
653 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
654 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
655 		goto failed;
656 	}
657 
658 	/*
659 	 * If the stats region is present, allocate a single object directly
660 	 * from the region; we don't need to create an skmem_cache for this,
661 	 * as the object is allocated (and freed) only once.
662 	 */
663 	if (ar->ar_regions[SKMEM_REGION_USTATS] != NULL) {
664 		struct skmem_region *str = ar->ar_regions[SKMEM_REGION_USTATS];
665 
666 		/* no kstats for nexus */
667 		ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
668 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
669 		ASSERT(str->skr_seg_max_cnt == 1);
670 
671 		if ((arn->arn_stats_obj = skmem_region_alloc(str, NULL,
672 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
673 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
674 			    "stats", ar->ar_name, SK_KVA(ar), ar->ar_flags,
675 			    ARF_BITS);
676 			goto failed;
677 		}
678 	}
679 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
680 
681 	/*
682 	 * If the flowadv region is present, allocate a single object directly
683 	 * from the region; we don't need to create an skmem_cache for this,
684 	 * as the object is allocated (and freed) only once.
685 	 */
686 	if (ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL) {
687 		struct skmem_region *str =
688 		    ar->ar_regions[SKMEM_REGION_FLOWADV];
689 
690 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
691 		ASSERT(str->skr_seg_max_cnt == 1);
692 
693 		if ((arn->arn_flowadv_obj = skmem_region_alloc(str, NULL,
694 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
695 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
696 			    "flowadv", ar->ar_name, SK_KVA(ar), ar->ar_flags,
697 			    ARF_BITS);
698 			goto failed;
699 		}
700 	}
701 
702 	if (skmem_arena_create_finalize(ar) != 0) {
703 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
704 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
705 		goto failed;
706 	}
707 
708 	++ar->ar_refcnt;        /* for caller */
709 	AR_UNLOCK(ar);
710 
711 	SKMEM_ARENA_LOCK();
712 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
713 	SKMEM_ARENA_UNLOCK();
714 
715 	/* caller didn't give us one, but would like us to return it? */
716 	if (rx_pp != NULL && *rx_pp == NULL) {
717 		*rx_pp = arn->arn_rx_pp;
718 		pp_retain(*rx_pp);
719 	}
720 	if (tx_pp != NULL && *tx_pp == NULL) {
721 		*tx_pp = arn->arn_tx_pp;
722 		pp_retain(*tx_pp);  /* for caller */
723 	}
724 
725 #if SK_LOG
726 	if (__improbable(sk_verbose != 0)) {
727 		skmem_arena_create_region_log(ar);
728 	}
729 #endif /* SK_LOG */
730 
731 	return ar;
732 
733 failed:
734 	AR_LOCK_ASSERT_HELD(ar);
735 	skmem_arena_destroy(ar);
736 	*perr = ENOMEM;
737 
738 	return NULL;
739 #undef SRP_CFLAGS
740 }
741 
742 void
skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus * arn,int cnt)743 skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus *arn, int cnt)
744 {
745 	struct skmem_arena *ar = &arn->arn_cmn;
746 
747 	AR_LOCK(ar);
748 	arn->arn_ksd_nodefunct += cnt;
749 	VERIFY(arn->arn_ksd_nodefunct >= 0);
750 	AR_UNLOCK(ar);
751 }
752 
753 boolean_t
skmem_arena_nexus_sd_idle(struct skmem_arena_nexus * arn)754 skmem_arena_nexus_sd_idle(struct skmem_arena_nexus *arn)
755 {
756 	struct skmem_arena *ar = &arn->arn_cmn;
757 	boolean_t idle;
758 
759 	AR_LOCK(ar);
760 	VERIFY(arn->arn_ksd_nodefunct >= 0);
761 	idle = (arn->arn_ksd_nodefunct == 0);
762 	AR_UNLOCK(ar);
763 
764 	return idle;
765 }
766 
767 static void
skmem_arena_nexus_teardown(struct skmem_arena_nexus * arn,boolean_t defunct)768 skmem_arena_nexus_teardown(struct skmem_arena_nexus *arn, boolean_t defunct)
769 {
770 	struct skmem_arena *ar = &arn->arn_cmn;
771 	struct skmem_region *skr;
772 	int i;
773 
774 	AR_LOCK_ASSERT_HELD(ar);
775 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
776 
777 	/* these should never be set for nexus arena */
778 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL || sk_guard);
779 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
780 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL || sk_guard);
781 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
782 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
783 
784 	if (arn->arn_stats_obj != NULL) {
785 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
786 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
787 		skmem_region_free(skr, arn->arn_stats_obj, NULL);
788 		arn->arn_stats_obj = NULL;
789 		skmem_region_release(skr);
790 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
791 	}
792 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
793 	ASSERT(arn->arn_stats_obj == NULL);
794 
795 	if (arn->arn_flowadv_obj != NULL) {
796 		skr = ar->ar_regions[SKMEM_REGION_FLOWADV];
797 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
798 		skmem_region_free(skr, arn->arn_flowadv_obj, NULL);
799 		arn->arn_flowadv_obj = NULL;
800 		skmem_region_release(skr);
801 		ar->ar_regions[SKMEM_REGION_FLOWADV] = NULL;
802 	}
803 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
804 	ASSERT(arn->arn_flowadv_obj == NULL);
805 
806 	if (arn->arn_nexusadv_obj != NULL) {
807 		skr = ar->ar_regions[SKMEM_REGION_NEXUSADV];
808 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
809 		/* we didn't allocate this, so just nullify it */
810 		arn->arn_nexusadv_obj = NULL;
811 		skmem_region_release(skr);
812 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = NULL;
813 	}
814 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
815 	ASSERT(arn->arn_nexusadv_obj == NULL);
816 
817 	ASSERT(!((arn->arn_rx_pp == NULL) ^ (arn->arn_tx_pp == NULL)));
818 	if (arn->arn_rx_pp != NULL) {
819 		for (i = 0; i < SKMEM_PP_REGIONS; i++) {
820 			skmem_region_id_t reg = skmem_pp_region_ids[i];
821 			skr = ar->ar_regions[reg];
822 			if (skr != NULL) {
823 				ASSERT(!(skr->skr_mode & SKR_MODE_NOREDIRECT));
824 				skmem_region_release(skr);
825 				ar->ar_regions[reg] = NULL;
826 			}
827 		}
828 		pp_release(arn->arn_rx_pp);
829 		pp_release(arn->arn_tx_pp);
830 		arn->arn_rx_pp = NULL;
831 		arn->arn_tx_pp = NULL;
832 	}
833 	for (i = 0; i < SKMEM_PP_REGIONS; i++) {
834 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
835 	}
836 	ASSERT(arn->arn_rx_pp == NULL);
837 	ASSERT(arn->arn_tx_pp == NULL);
838 
839 	if (arn->arn_ring_cache != NULL) {
840 		skr = ar->ar_regions[SKMEM_REGION_RING];
841 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
842 		skmem_cache_destroy(arn->arn_ring_cache);
843 		arn->arn_ring_cache = NULL;
844 		skmem_region_release(skr);
845 		ar->ar_regions[SKMEM_REGION_RING] = NULL;
846 	}
847 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
848 	ASSERT(arn->arn_ring_cache == NULL);
849 
850 	/*
851 	 * Stop here if we're in the defunct context, and we're asked
852 	 * to keep the slot descriptor regions alive as they are still
853 	 * being referred to by the nexus owner (driver).
854 	 */
855 	if (defunct && arn->arn_ksd_nodefunct != 0) {
856 		ASSERT(arn->arn_ksd_nodefunct > 0);
857 		return;
858 	}
859 
860 	ASSERT(arn->arn_ksd_nodefunct == 0);
861 	skmem_arena_sd_teardown(ar, TRUE);
862 	skmem_arena_sd_teardown(ar, FALSE);
863 
864 	/* stop here if we're in the defunct context */
865 	if (defunct) {
866 		return;
867 	}
868 	if (arn->arn_schema_cache != NULL) {
869 		skr = ar->ar_regions[SKMEM_REGION_SCHEMA];
870 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
871 		skmem_cache_destroy(arn->arn_schema_cache);
872 		arn->arn_schema_cache = NULL;
873 		skmem_region_release(skr);
874 		ar->ar_regions[SKMEM_REGION_SCHEMA] = NULL;
875 	}
876 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
877 	ASSERT(arn->arn_schema_cache == NULL);
878 
879 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_HEAD]) != NULL) {
880 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
881 		skmem_region_release(skr);
882 		ar->ar_regions[SKMEM_REGION_GUARD_HEAD] = NULL;
883 	}
884 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
885 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_TAIL]) != NULL) {
886 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
887 		skmem_region_release(skr);
888 		ar->ar_regions[SKMEM_REGION_GUARD_TAIL] = NULL;
889 	}
890 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
891 }
892 
893 /*
894  * Create an NECP arena.
895  */
896 struct skmem_arena *
skmem_arena_create_for_necp(const char * name,struct skmem_region_params * srp_ustats,struct skmem_region_params * srp_kstats,int * perr)897 skmem_arena_create_for_necp(const char *name,
898     struct skmem_region_params *srp_ustats,
899     struct skmem_region_params *srp_kstats, int *perr)
900 {
901 	struct skmem_arena_necp *arc;
902 	struct skmem_arena *ar;
903 	char cname[64];
904 
905 	*perr = 0;
906 
907 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NECP, name);
908 	ASSERT(ar != NULL && ar->ar_zsize == AR_NECP_SIZE);
909 	arc = (struct skmem_arena_necp *)ar;
910 
911 	/*
912 	 * Must be stats region, and must be user-mappable;
913 	 * don't assert for SKMEM_REGION_CR_MONOLITHIC here
914 	 * as the client might want multi-segment mode.
915 	 */
916 	ASSERT(srp_ustats->srp_id == SKMEM_REGION_USTATS);
917 	ASSERT(srp_kstats->srp_id == SKMEM_REGION_KSTATS);
918 	ASSERT(srp_ustats->srp_cflags & SKMEM_REGION_CR_MMAPOK);
919 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_MMAPOK));
920 	ASSERT(!(srp_ustats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
921 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
922 	ASSERT(srp_ustats->srp_c_obj_size != 0);
923 	ASSERT(srp_kstats->srp_c_obj_size != 0);
924 	ASSERT(srp_ustats->srp_c_obj_cnt != 0);
925 	ASSERT(srp_kstats->srp_c_obj_cnt != 0);
926 	ASSERT(srp_ustats->srp_c_seg_size == srp_kstats->srp_c_seg_size);
927 	ASSERT(srp_ustats->srp_seg_cnt == srp_kstats->srp_seg_cnt);
928 	ASSERT(srp_ustats->srp_c_obj_size == srp_kstats->srp_c_obj_size);
929 	ASSERT(srp_ustats->srp_c_obj_cnt == srp_kstats->srp_c_obj_cnt);
930 
931 	AR_LOCK(ar);
932 
933 	if ((ar->ar_regions[SKMEM_REGION_USTATS] = skmem_region_create(name,
934 	    srp_ustats, NULL, NULL, NULL)) == NULL) {
935 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
936 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
937 		    srp_ustats->srp_name);
938 		goto failed;
939 	}
940 
941 	if ((ar->ar_regions[SKMEM_REGION_KSTATS] = skmem_region_create(name,
942 	    srp_kstats, NULL, NULL, NULL)) == NULL) {
943 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
944 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
945 		    srp_kstats->srp_name);
946 		goto failed;
947 	}
948 
949 	skmem_region_mirror(ar->ar_regions[SKMEM_REGION_KSTATS],
950 	    ar->ar_regions[SKMEM_REGION_USTATS]);
951 
952 	/* create skmem_cache for kernel stats (without magazines) */
953 	(void) snprintf(cname, sizeof(cname), "kstats.%s", name);
954 	if ((arc->arc_kstats_cache = skmem_cache_create(cname,
955 	    srp_kstats->srp_c_obj_size, 0, necp_stats_ctor, NULL, NULL, NULL,
956 	    ar->ar_regions[SKMEM_REGION_KSTATS],
957 	    SKMEM_CR_NOMAGAZINES)) == NULL) {
958 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
959 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
960 		goto failed;
961 	}
962 
963 	if (skmem_arena_create_finalize(ar) != 0) {
964 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
965 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
966 		goto failed;
967 	}
968 
969 	/*
970 	 * These must never be configured for NECP arena.
971 	 *
972 	 * XXX: In theory we can add guard pages to this arena,
973 	 * but for now leave that as an exercise for the future.
974 	 */
975 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
976 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
977 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
978 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
979 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
980 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
981 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
982 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
983 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
984 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
985 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
986 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
987 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
988 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
989 	}
990 
991 	/* these must be configured for NECP arena */
992 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
993 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] != NULL);
994 
995 	++ar->ar_refcnt;        /* for caller */
996 	AR_UNLOCK(ar);
997 
998 	SKMEM_ARENA_LOCK();
999 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1000 	SKMEM_ARENA_UNLOCK();
1001 
1002 #if SK_LOG
1003 	if (__improbable(sk_verbose != 0)) {
1004 		skmem_arena_create_region_log(ar);
1005 	}
1006 #endif /* SK_LOG */
1007 
1008 	return ar;
1009 
1010 failed:
1011 	AR_LOCK_ASSERT_HELD(ar);
1012 	skmem_arena_destroy(ar);
1013 	*perr = ENOMEM;
1014 
1015 	return NULL;
1016 }
1017 
1018 static void
skmem_arena_necp_teardown(struct skmem_arena_necp * arc,boolean_t defunct)1019 skmem_arena_necp_teardown(struct skmem_arena_necp *arc, boolean_t defunct)
1020 {
1021 #pragma unused(defunct)
1022 	struct skmem_arena *ar = &arc->arc_cmn;
1023 	struct skmem_region *skr;
1024 
1025 	AR_LOCK_ASSERT_HELD(ar);
1026 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NECP);
1027 
1028 	/* these must never be configured for NECP arena */
1029 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1030 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1031 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1032 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1033 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1034 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1035 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1036 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1037 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1038 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1039 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1040 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1041 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1042 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1043 	}
1044 
1045 	if (arc->arc_kstats_cache != NULL) {
1046 		skr = ar->ar_regions[SKMEM_REGION_KSTATS];
1047 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1048 		skmem_cache_destroy(arc->arc_kstats_cache);
1049 		arc->arc_kstats_cache = NULL;
1050 		skmem_region_release(skr);
1051 		ar->ar_regions[SKMEM_REGION_KSTATS] = NULL;
1052 
1053 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
1054 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1055 		skmem_region_release(skr);
1056 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
1057 	}
1058 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1059 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1060 	ASSERT(arc->arc_kstats_cache == NULL);
1061 }
1062 
1063 /*
1064  * Given an arena, return its NECP variant (if applicable).
1065  */
1066 struct skmem_arena_necp *
skmem_arena_necp(struct skmem_arena * ar)1067 skmem_arena_necp(struct skmem_arena *ar)
1068 {
1069 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_NECP)) {
1070 		return NULL;
1071 	}
1072 
1073 	return (struct skmem_arena_necp *)ar;
1074 }
1075 
1076 /*
1077  * Create a System arena.
1078  */
1079 struct skmem_arena *
skmem_arena_create_for_system(const char * name,int * perr)1080 skmem_arena_create_for_system(const char *name, int *perr)
1081 {
1082 	struct skmem_region *skrsys;
1083 	struct skmem_arena_system *ars;
1084 	struct skmem_arena *ar;
1085 
1086 	*perr = 0;
1087 
1088 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_SYSTEM, name);
1089 	ASSERT(ar != NULL && ar->ar_zsize == AR_SYSTEM_SIZE);
1090 	ars = (struct skmem_arena_system *)ar;
1091 
1092 	AR_LOCK(ar);
1093 	/* retain system-wide sysctls region */
1094 	skrsys = skmem_get_sysctls_region();
1095 	ASSERT(skrsys != NULL && skrsys->skr_id == SKMEM_REGION_SYSCTLS);
1096 	ASSERT((skrsys->skr_mode & (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES |
1097 	    SKR_MODE_KREADONLY | SKR_MODE_UREADONLY | SKR_MODE_MONOLITHIC |
1098 	    SKR_MODE_SHAREOK)) ==
1099 	    (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES | SKR_MODE_UREADONLY |
1100 	    SKR_MODE_MONOLITHIC));
1101 	ar->ar_regions[SKMEM_REGION_SYSCTLS] = skrsys;
1102 	skmem_region_retain(skrsys);
1103 
1104 	/* object is valid as long as the sysctls region is retained */
1105 	ars->ars_sysctls_obj = skmem_get_sysctls_obj(&ars->ars_sysctls_objsize);
1106 	ASSERT(ars->ars_sysctls_obj != NULL);
1107 	ASSERT(ars->ars_sysctls_objsize != 0);
1108 
1109 	if (skmem_arena_create_finalize(ar) != 0) {
1110 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
1111 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1112 		goto failed;
1113 	}
1114 
1115 	/*
1116 	 * These must never be configured for system arena.
1117 	 *
1118 	 * XXX: In theory we can add guard pages to this arena,
1119 	 * but for now leave that as an exercise for the future.
1120 	 */
1121 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1122 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1123 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1124 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1125 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1126 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1127 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1128 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1129 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1130 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1131 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1132 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1133 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1134 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1135 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1136 	}
1137 
1138 	/* these must be configured for system arena */
1139 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] != NULL);
1140 
1141 	++ar->ar_refcnt;        /* for caller */
1142 	AR_UNLOCK(ar);
1143 
1144 	SKMEM_ARENA_LOCK();
1145 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1146 	SKMEM_ARENA_UNLOCK();
1147 
1148 #if SK_LOG
1149 	if (__improbable(sk_verbose != 0)) {
1150 		skmem_arena_create_region_log(ar);
1151 	}
1152 #endif /* SK_LOG */
1153 
1154 	return ar;
1155 
1156 failed:
1157 	AR_LOCK_ASSERT_HELD(ar);
1158 	skmem_arena_destroy(ar);
1159 	*perr = ENOMEM;
1160 
1161 	return NULL;
1162 }
1163 
1164 static void
skmem_arena_system_teardown(struct skmem_arena_system * ars,boolean_t defunct)1165 skmem_arena_system_teardown(struct skmem_arena_system *ars, boolean_t defunct)
1166 {
1167 	struct skmem_arena *ar = &ars->ars_cmn;
1168 	struct skmem_region *skr;
1169 
1170 	AR_LOCK_ASSERT_HELD(ar);
1171 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1172 
1173 	/* these must never be configured for system arena */
1174 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1175 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1176 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1177 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1178 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1179 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1180 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1181 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1182 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1183 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1184 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1185 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1186 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1187 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1188 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1189 	}
1190 
1191 	/* nothing to do here for now during defunct, just return */
1192 	if (defunct) {
1193 		return;
1194 	}
1195 
1196 	if (ars->ars_sysctls_obj != NULL) {
1197 		skr = ar->ar_regions[SKMEM_REGION_SYSCTLS];
1198 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
1199 		/* we didn't allocate this, so don't free it */
1200 		ars->ars_sysctls_obj = NULL;
1201 		ars->ars_sysctls_objsize = 0;
1202 		skmem_region_release(skr);
1203 		ar->ar_regions[SKMEM_REGION_SYSCTLS] = NULL;
1204 	}
1205 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1206 	ASSERT(ars->ars_sysctls_obj == NULL);
1207 	ASSERT(ars->ars_sysctls_objsize == 0);
1208 }
1209 
1210 /*
1211  * Given an arena, return its System variant (if applicable).
1212  */
1213 struct skmem_arena_system *
skmem_arena_system(struct skmem_arena * ar)1214 skmem_arena_system(struct skmem_arena *ar)
1215 {
1216 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_SYSTEM)) {
1217 		return NULL;
1218 	}
1219 
1220 	return (struct skmem_arena_system *)ar;
1221 }
1222 
1223 void *
skmem_arena_system_sysctls_obj_addr(struct skmem_arena * ar)1224 skmem_arena_system_sysctls_obj_addr(struct skmem_arena *ar)
1225 {
1226 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1227 	return skmem_arena_system(ar)->ars_sysctls_obj;
1228 }
1229 
1230 size_t
skmem_arena_system_sysctls_obj_size(struct skmem_arena * ar)1231 skmem_arena_system_sysctls_obj_size(struct skmem_arena *ar)
1232 {
1233 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1234 	return skmem_arena_system(ar)->ars_sysctls_objsize;
1235 }
1236 
1237 /*
1238  * Destroy a region.
1239  */
1240 static void
skmem_arena_destroy(struct skmem_arena * ar)1241 skmem_arena_destroy(struct skmem_arena *ar)
1242 {
1243 	AR_LOCK_ASSERT_HELD(ar);
1244 
1245 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags %b",
1246 	    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1247 
1248 	ASSERT(ar->ar_refcnt == 0);
1249 	if (ar->ar_link.tqe_next != NULL || ar->ar_link.tqe_prev != NULL) {
1250 		AR_UNLOCK(ar);
1251 		SKMEM_ARENA_LOCK();
1252 		TAILQ_REMOVE(&skmem_arena_head, ar, ar_link);
1253 		SKMEM_ARENA_UNLOCK();
1254 		AR_LOCK(ar);
1255 		ASSERT(ar->ar_refcnt == 0);
1256 	}
1257 
1258 	/* teardown all remaining memory regions and associated resources */
1259 	skmem_arena_teardown(ar, FALSE);
1260 
1261 	if (ar->ar_ar != NULL) {
1262 		IOSKArenaDestroy(ar->ar_ar);
1263 		ar->ar_ar = NULL;
1264 	}
1265 
1266 	if (ar->ar_flags & ARF_ACTIVE) {
1267 		ar->ar_flags &= ~ARF_ACTIVE;
1268 	}
1269 
1270 	AR_UNLOCK(ar);
1271 
1272 	skmem_arena_free(ar);
1273 }
1274 
1275 /*
1276  * Teardown (or defunct) a region.
1277  */
1278 static void
skmem_arena_teardown(struct skmem_arena * ar,boolean_t defunct)1279 skmem_arena_teardown(struct skmem_arena *ar, boolean_t defunct)
1280 {
1281 	uint32_t i;
1282 
1283 	switch (ar->ar_type) {
1284 	case SKMEM_ARENA_TYPE_NEXUS:
1285 		skmem_arena_nexus_teardown((struct skmem_arena_nexus *)ar,
1286 		    defunct);
1287 		break;
1288 
1289 	case SKMEM_ARENA_TYPE_NECP:
1290 		skmem_arena_necp_teardown((struct skmem_arena_necp *)ar,
1291 		    defunct);
1292 		break;
1293 
1294 	case SKMEM_ARENA_TYPE_SYSTEM:
1295 		skmem_arena_system_teardown((struct skmem_arena_system *)ar,
1296 		    defunct);
1297 		break;
1298 
1299 	default:
1300 		VERIFY(0);
1301 		/* NOTREACHED */
1302 		__builtin_unreachable();
1303 	}
1304 
1305 	/* stop here if we're in the defunct context */
1306 	if (defunct) {
1307 		return;
1308 	}
1309 
1310 	/* take care of any remaining ones */
1311 	for (i = 0; i < SKMEM_REGIONS; i++) {
1312 		if (ar->ar_regions[i] == NULL) {
1313 			continue;
1314 		}
1315 
1316 		skmem_region_release(ar->ar_regions[i]);
1317 		ar->ar_regions[i] = NULL;
1318 	}
1319 }
1320 
1321 static int
skmem_arena_create_finalize(struct skmem_arena * ar)1322 skmem_arena_create_finalize(struct skmem_arena *ar)
1323 {
1324 	IOSKRegionRef reg[SKMEM_REGIONS];
1325 	uint32_t i, regcnt = 0;
1326 	int err = 0;
1327 
1328 	AR_LOCK_ASSERT_HELD(ar);
1329 
1330 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1331 
1332 	/*
1333 	 * Prepare an array of regions that can be mapped to user task;
1334 	 * exclude regions that aren't eligible for user task mapping.
1335 	 */
1336 	bzero(&reg, sizeof(reg));
1337 	for (i = 0; i < SKMEM_REGIONS; i++) {
1338 		struct skmem_region *skr = ar->ar_regions[i];
1339 		if (skr == NULL || !(skr->skr_mode & SKR_MODE_MMAPOK)) {
1340 			continue;
1341 		}
1342 
1343 		ASSERT(skr->skr_reg != NULL);
1344 		reg[regcnt++] = skr->skr_reg;
1345 	}
1346 	ASSERT(regcnt != 0);
1347 
1348 	/*
1349 	 * Create backing IOSKArena handle.
1350 	 */
1351 	ar->ar_ar = IOSKArenaCreate(reg, (IOSKCount)regcnt);
1352 	if (ar->ar_ar == NULL) {
1353 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create "
1354 		    "IOSKArena of %u regions", ar->ar_name, SK_KVA(ar),
1355 		    ar->ar_flags, ARF_BITS, regcnt);
1356 		err = ENOMEM;
1357 		goto failed;
1358 	}
1359 
1360 	ar->ar_flags |= ARF_ACTIVE;
1361 
1362 failed:
1363 	return err;
1364 }
1365 
1366 static inline struct kalloc_type_view *
skmem_arena_zone(skmem_arena_type_t type)1367 skmem_arena_zone(skmem_arena_type_t type)
1368 {
1369 	switch (type) {
1370 	case SKMEM_ARENA_TYPE_NEXUS:
1371 		return ar_nexus_zone;
1372 
1373 	case SKMEM_ARENA_TYPE_NECP:
1374 		return ar_necp_zone;
1375 
1376 	case SKMEM_ARENA_TYPE_SYSTEM:
1377 		return ar_system_zone;
1378 
1379 	default:
1380 		VERIFY(0);
1381 		/* NOTREACHED */
1382 		__builtin_unreachable();
1383 	}
1384 }
1385 
1386 static struct skmem_arena *
skmem_arena_alloc(skmem_arena_type_t type,const char * name)1387 skmem_arena_alloc(skmem_arena_type_t type, const char *name)
1388 {
1389 	const char *ar_str = NULL;
1390 	struct skmem_arena *ar;
1391 	size_t ar_zsize = 0;
1392 
1393 	switch (type) {
1394 	case SKMEM_ARENA_TYPE_NEXUS:
1395 		ar_zsize = AR_NEXUS_SIZE;
1396 		ar_str = "nexus";
1397 		break;
1398 
1399 	case SKMEM_ARENA_TYPE_NECP:
1400 		ar_zsize = AR_NECP_SIZE;
1401 		ar_str = "necp";
1402 		break;
1403 
1404 	case SKMEM_ARENA_TYPE_SYSTEM:
1405 		ar_zsize = AR_SYSTEM_SIZE;
1406 		ar_str = "system";
1407 		break;
1408 
1409 	default:
1410 		VERIFY(0);
1411 		/* NOTREACHED */
1412 		__builtin_unreachable();
1413 	}
1414 
1415 	ar = zalloc_flags(skmem_arena_zone(type), Z_WAITOK | Z_ZERO | Z_NOFAIL);
1416 	ar->ar_type = type;
1417 	ar->ar_zsize = ar_zsize;
1418 
1419 	lck_mtx_init(&ar->ar_lock, &skmem_arena_lock_grp,
1420 	    LCK_ATTR_NULL);
1421 	(void) snprintf(ar->ar_name, sizeof(ar->ar_name),
1422 	    "%s.%s.%s", SKMEM_ARENA_PREFIX, ar_str, name);
1423 
1424 	return ar;
1425 }
1426 
1427 static void
skmem_arena_free(struct skmem_arena * ar)1428 skmem_arena_free(struct skmem_arena *ar)
1429 {
1430 #if DEBUG || DEVELOPMENT
1431 	ASSERT(ar->ar_refcnt == 0);
1432 	ASSERT(!(ar->ar_flags & ARF_ACTIVE));
1433 	ASSERT(ar->ar_ar == NULL);
1434 	ASSERT(ar->ar_mapcnt == 0);
1435 	ASSERT(SLIST_EMPTY(&ar->ar_map_head));
1436 	for (uint32_t i = 0; i < SKMEM_REGIONS; i++) {
1437 		ASSERT(ar->ar_regions[i] == NULL);
1438 	}
1439 #endif /* DEBUG || DEVELOPMENT */
1440 
1441 	lck_mtx_destroy(&ar->ar_lock, &skmem_arena_lock_grp);
1442 	zfree(skmem_arena_zone(ar->ar_type), ar);
1443 }
1444 
1445 /*
1446  * Retain an arena.
1447  */
1448 __attribute__((always_inline))
1449 static inline void
skmem_arena_retain_locked(struct skmem_arena * ar)1450 skmem_arena_retain_locked(struct skmem_arena *ar)
1451 {
1452 	AR_LOCK_ASSERT_HELD(ar);
1453 	ar->ar_refcnt++;
1454 	ASSERT(ar->ar_refcnt != 0);
1455 }
1456 
1457 void
skmem_arena_retain(struct skmem_arena * ar)1458 skmem_arena_retain(struct skmem_arena *ar)
1459 {
1460 	AR_LOCK(ar);
1461 	skmem_arena_retain_locked(ar);
1462 	AR_UNLOCK(ar);
1463 }
1464 
1465 /*
1466  * Release (and potentially destroy) an arena.
1467  */
1468 __attribute__((always_inline))
1469 static inline boolean_t
skmem_arena_release_locked(struct skmem_arena * ar)1470 skmem_arena_release_locked(struct skmem_arena *ar)
1471 {
1472 	boolean_t lastref = FALSE;
1473 
1474 	AR_LOCK_ASSERT_HELD(ar);
1475 	ASSERT(ar->ar_refcnt != 0);
1476 	if (--ar->ar_refcnt == 0) {
1477 		skmem_arena_destroy(ar);
1478 		lastref = TRUE;
1479 	} else {
1480 		lastref = FALSE;
1481 	}
1482 
1483 	return lastref;
1484 }
1485 
1486 boolean_t
skmem_arena_release(struct skmem_arena * ar)1487 skmem_arena_release(struct skmem_arena *ar)
1488 {
1489 	boolean_t lastref;
1490 
1491 	AR_LOCK(ar);
1492 	/* unlock only if this isn't the last reference */
1493 	if (!(lastref = skmem_arena_release_locked(ar))) {
1494 		AR_UNLOCK(ar);
1495 	}
1496 
1497 	return lastref;
1498 }
1499 
1500 /*
1501  * Map an arena to the task's address space.
1502  */
1503 int
skmem_arena_mmap(struct skmem_arena * ar,struct proc * p,struct skmem_arena_mmap_info * ami)1504 skmem_arena_mmap(struct skmem_arena *ar, struct proc *p,
1505     struct skmem_arena_mmap_info *ami)
1506 {
1507 	task_t task = proc_task(p);
1508 	IOReturn ioerr;
1509 	int err = 0;
1510 
1511 	ASSERT(task != kernel_task && task != TASK_NULL);
1512 	ASSERT(ami->ami_arena == NULL);
1513 	ASSERT(ami->ami_mapref == NULL);
1514 	ASSERT(ami->ami_maptask == TASK_NULL);
1515 	ASSERT(!ami->ami_redirect);
1516 
1517 	AR_LOCK(ar);
1518 	if ((ar->ar_flags & (ARF_ACTIVE | ARF_DEFUNCT)) != ARF_ACTIVE) {
1519 		err = ENODEV;
1520 		goto failed;
1521 	}
1522 
1523 	ASSERT(ar->ar_ar != NULL);
1524 	if ((ami->ami_mapref = IOSKMapperCreate(ar->ar_ar, task)) == NULL) {
1525 		err = ENOMEM;
1526 		goto failed;
1527 	}
1528 
1529 	ioerr = IOSKMapperGetAddress(ami->ami_mapref, &ami->ami_mapaddr,
1530 	    &ami->ami_mapsize);
1531 	VERIFY(ioerr == kIOReturnSuccess);
1532 
1533 	ami->ami_arena = ar;
1534 	skmem_arena_retain_locked(ar);
1535 	SLIST_INSERT_HEAD(&ar->ar_map_head, ami, ami_link);
1536 
1537 	ami->ami_maptask = task;
1538 	ar->ar_mapcnt++;
1539 	if (ar->ar_mapcnt == 1) {
1540 		ar->ar_mapsize = ami->ami_mapsize;
1541 	}
1542 
1543 	ASSERT(ami->ami_mapref != NULL);
1544 	ASSERT(ami->ami_arena == ar);
1545 	AR_UNLOCK(ar);
1546 
1547 	return 0;
1548 
1549 failed:
1550 	AR_UNLOCK(ar);
1551 	skmem_arena_munmap(ar, ami);
1552 	VERIFY(err != 0);
1553 
1554 	return err;
1555 }
1556 
1557 /*
1558  * Remove arena's memory mapping from task's address space (common code).
1559  * Returns true if caller needs to perform a deferred defunct.
1560  */
1561 static boolean_t
skmem_arena_munmap_common(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1562 skmem_arena_munmap_common(struct skmem_arena *ar,
1563     struct skmem_arena_mmap_info *ami)
1564 {
1565 	boolean_t need_defunct = FALSE;
1566 
1567 	AR_LOCK(ar);
1568 	if (ami->ami_mapref != NULL) {
1569 		IOSKMapperDestroy(ami->ami_mapref);
1570 		ami->ami_mapref = NULL;
1571 
1572 		VERIFY(ar->ar_mapcnt != 0);
1573 		ar->ar_mapcnt--;
1574 		if (ar->ar_mapcnt == 0) {
1575 			ar->ar_mapsize = 0;
1576 		}
1577 
1578 		VERIFY(ami->ami_arena == ar);
1579 		SLIST_REMOVE(&ar->ar_map_head, ami, skmem_arena_mmap_info,
1580 		    ami_link);
1581 
1582 		/*
1583 		 * We expect that the caller ensures an extra reference
1584 		 * held on the arena, in addition to the one in mmap_info.
1585 		 */
1586 		VERIFY(ar->ar_refcnt > 1);
1587 		(void) skmem_arena_release_locked(ar);
1588 		ami->ami_arena = NULL;
1589 
1590 		if (ami->ami_redirect) {
1591 			/*
1592 			 * This mapper has been redirected; decrement
1593 			 * the redirect count associated with it.
1594 			 */
1595 			VERIFY(ar->ar_maprdrcnt != 0);
1596 			ar->ar_maprdrcnt--;
1597 		} else if (ar->ar_maprdrcnt != 0 &&
1598 		    ar->ar_maprdrcnt == ar->ar_mapcnt) {
1599 			/*
1600 			 * The are other mappers for this arena that have
1601 			 * all been redirected, but the arena wasn't marked
1602 			 * inactive by skmem_arena_redirect() last time since
1603 			 * this particular mapper that we just destroyed
1604 			 * was using it.  Now that it's gone, finish the
1605 			 * postponed work below once we return to caller.
1606 			 */
1607 			ASSERT(ar->ar_flags & ARF_ACTIVE);
1608 			ar->ar_flags &= ~ARF_ACTIVE;
1609 			need_defunct = TRUE;
1610 		}
1611 	}
1612 	ASSERT(ami->ami_mapref == NULL);
1613 	ASSERT(ami->ami_arena == NULL);
1614 
1615 	ami->ami_maptask = TASK_NULL;
1616 	ami->ami_mapaddr = 0;
1617 	ami->ami_mapsize = 0;
1618 	ami->ami_redirect = FALSE;
1619 
1620 	AR_UNLOCK(ar);
1621 
1622 	return need_defunct;
1623 }
1624 
1625 /*
1626  * Remove arena's memory mapping from task's address space (channel version).
1627  * Will perform a deferred defunct if needed.
1628  */
1629 void
skmem_arena_munmap_channel(struct skmem_arena * ar,struct kern_channel * ch)1630 skmem_arena_munmap_channel(struct skmem_arena *ar, struct kern_channel *ch)
1631 {
1632 	SK_LOCK_ASSERT_HELD();
1633 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1634 
1635 	/*
1636 	 * If this is this is on a channel that was holding the last
1637 	 * active reference count on the arena, and that there are
1638 	 * other defunct channels pointing to that arena, perform the
1639 	 * actual arena defunct now.
1640 	 */
1641 	if (skmem_arena_munmap_common(ar, &ch->ch_mmap)) {
1642 		struct kern_nexus *nx = ch->ch_nexus;
1643 		struct kern_nexus_domain_provider *nxdom_prov = NX_DOM_PROV(nx);
1644 
1645 		/*
1646 		 * Similar to kern_channel_defunct(), where we let the
1647 		 * domain provider complete the defunct.  At this point
1648 		 * both sk_lock and the channel locks are held, and so
1649 		 * we indicate that to the callee.
1650 		 */
1651 		nxdom_prov->nxdom_prov_dom->nxdom_defunct_finalize(nxdom_prov,
1652 		    nx, ch, TRUE);
1653 	}
1654 }
1655 
1656 /*
1657  * Remove arena's memory mapping from task's address space (generic).
1658  * This routine should only be called on non-channel related arenas.
1659  */
1660 void
skmem_arena_munmap(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1661 skmem_arena_munmap(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami)
1662 {
1663 	(void) skmem_arena_munmap_common(ar, ami);
1664 }
1665 
1666 /*
1667  * Redirect eligible memory regions in the task's memory map so that
1668  * they get overwritten and backed with anonymous (zero-filled) pages.
1669  */
1670 int
skmem_arena_mredirect(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami,struct proc * p,boolean_t * need_defunct)1671 skmem_arena_mredirect(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami,
1672     struct proc *p, boolean_t *need_defunct)
1673 {
1674 #pragma unused(p)
1675 	int err = 0;
1676 
1677 	*need_defunct = FALSE;
1678 
1679 	AR_LOCK(ar);
1680 	ASSERT(ar->ar_ar != NULL);
1681 	if (ami->ami_redirect) {
1682 		err = EALREADY;
1683 	} else if (ami->ami_mapref == NULL) {
1684 		err = ENXIO;
1685 	} else {
1686 		VERIFY(ar->ar_mapcnt != 0);
1687 		ASSERT(ar->ar_flags & ARF_ACTIVE);
1688 		VERIFY(ami->ami_arena == ar);
1689 		/*
1690 		 * This effectively overwrites the mappings for all
1691 		 * redirectable memory regions (i.e. those without the
1692 		 * SKMEM_REGION_CR_NOREDIRECT flag) while preserving their
1693 		 * protection flags.  Accesses to these regions will be
1694 		 * redirected to anonymous, zero-filled pages.
1695 		 */
1696 		IOSKMapperRedirect(ami->ami_mapref);
1697 		ami->ami_redirect = TRUE;
1698 
1699 		/*
1700 		 * Mark the arena as inactive if all mapper instances are
1701 		 * redirected; otherwise, we do this later during unmap.
1702 		 * Once inactive, the arena will not allow further mmap,
1703 		 * and it is ready to be defunct later.
1704 		 */
1705 		if (++ar->ar_maprdrcnt == ar->ar_mapcnt) {
1706 			ar->ar_flags &= ~ARF_ACTIVE;
1707 			*need_defunct = TRUE;
1708 		}
1709 	}
1710 	AR_UNLOCK(ar);
1711 
1712 	SK_DF(((err != 0) ? SK_VERB_ERROR : SK_VERB_DEFAULT),
1713 	    "%s(%d) \"%s\" ar 0x%llx flags %b inactive %u need_defunct %u "
1714 	    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ar->ar_name,
1715 	    SK_KVA(ar), ar->ar_flags, ARF_BITS, !(ar->ar_flags & ARF_ACTIVE),
1716 	    *need_defunct, err);
1717 
1718 	return err;
1719 }
1720 
1721 /*
1722  * Defunct a region.
1723  */
1724 int
skmem_arena_defunct(struct skmem_arena * ar)1725 skmem_arena_defunct(struct skmem_arena *ar)
1726 {
1727 	AR_LOCK(ar);
1728 
1729 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags 0x%b", ar->ar_name,
1730 	    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1731 
1732 	if (ar->ar_flags & ARF_DEFUNCT) {
1733 		AR_UNLOCK(ar);
1734 		return EALREADY;
1735 	} else if (ar->ar_flags & ARF_ACTIVE) {
1736 		AR_UNLOCK(ar);
1737 		return EBUSY;
1738 	}
1739 
1740 	/* purge the caches now */
1741 	skmem_arena_reap_locked(ar, TRUE);
1742 
1743 	/* teardown eligible memory regions and associated resources */
1744 	skmem_arena_teardown(ar, TRUE);
1745 
1746 	ar->ar_flags |= ARF_DEFUNCT;
1747 
1748 	AR_UNLOCK(ar);
1749 
1750 	return 0;
1751 }
1752 
1753 /*
1754  * Retrieve total and in-use memory statistics of regions in the arena.
1755  */
1756 void
skmem_arena_get_stats(struct skmem_arena * ar,uint64_t * mem_total,uint64_t * mem_inuse)1757 skmem_arena_get_stats(struct skmem_arena *ar, uint64_t *mem_total,
1758     uint64_t *mem_inuse)
1759 {
1760 	uint32_t i;
1761 
1762 	if (mem_total != NULL) {
1763 		*mem_total = 0;
1764 	}
1765 	if (mem_inuse != NULL) {
1766 		*mem_inuse = 0;
1767 	}
1768 
1769 	AR_LOCK(ar);
1770 	for (i = 0; i < SKMEM_REGIONS; i++) {
1771 		if (ar->ar_regions[i] == NULL) {
1772 			continue;
1773 		}
1774 
1775 		if (mem_total != NULL) {
1776 			*mem_total += AR_MEM_TOTAL(ar, i);
1777 		}
1778 		if (mem_inuse != NULL) {
1779 			*mem_inuse += AR_MEM_INUSE(ar, i);
1780 		}
1781 	}
1782 	AR_UNLOCK(ar);
1783 }
1784 
1785 /*
1786  * Retrieve the offset of a particular region (identified by its ID)
1787  * from the base of the arena.
1788  */
1789 mach_vm_offset_t
skmem_arena_get_region_offset(struct skmem_arena * ar,skmem_region_id_t id)1790 skmem_arena_get_region_offset(struct skmem_arena *ar, skmem_region_id_t id)
1791 {
1792 	mach_vm_offset_t offset = 0;
1793 	uint32_t i;
1794 
1795 	ASSERT(id < SKMEM_REGIONS);
1796 
1797 	AR_LOCK(ar);
1798 	for (i = 0; i < id; i++) {
1799 		if (ar->ar_regions[i] == NULL) {
1800 			continue;
1801 		}
1802 
1803 		offset += ar->ar_regions[i]->skr_size;
1804 	}
1805 	AR_UNLOCK(ar);
1806 
1807 	return offset;
1808 }
1809 
1810 static void
skmem_reap_pbufpool_caches(struct kern_pbufpool * pp,boolean_t purge)1811 skmem_reap_pbufpool_caches(struct kern_pbufpool *pp, boolean_t purge)
1812 {
1813 	if (pp->pp_kmd_cache != NULL) {
1814 		skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1815 	}
1816 	if (PP_BUF_CACHE_DEF(pp) != NULL) {
1817 		skmem_cache_reap_now(PP_BUF_CACHE_DEF(pp), purge);
1818 	}
1819 	if (PP_BUF_CACHE_LARGE(pp) != NULL) {
1820 		skmem_cache_reap_now(PP_BUF_CACHE_LARGE(pp), purge);
1821 	}
1822 	if (PP_KBFT_CACHE_DEF(pp) != NULL) {
1823 		skmem_cache_reap_now(PP_KBFT_CACHE_DEF(pp), purge);
1824 	}
1825 	if (PP_KBFT_CACHE_LARGE(pp) != NULL) {
1826 		skmem_cache_reap_now(PP_KBFT_CACHE_LARGE(pp), purge);
1827 	}
1828 	if (pp->pp_raw_kbft_cache != NULL) {
1829 		skmem_cache_reap_now(pp->pp_raw_kbft_cache, purge);
1830 	}
1831 }
1832 
1833 /*
1834  * Reap all of configured caches in the arena, so that any excess amount
1835  * outside of their working sets gets released to their respective backing
1836  * regions.  If purging is specified, we empty the caches' working sets,
1837  * including everything that's cached at the CPU layer.
1838  */
1839 static void
skmem_arena_reap_locked(struct skmem_arena * ar,boolean_t purge)1840 skmem_arena_reap_locked(struct skmem_arena *ar, boolean_t purge)
1841 {
1842 	struct skmem_arena_nexus *arn;
1843 	struct skmem_arena_necp *arc;
1844 	struct kern_pbufpool *pp;
1845 
1846 	AR_LOCK_ASSERT_HELD(ar);
1847 
1848 	switch (ar->ar_type) {
1849 	case SKMEM_ARENA_TYPE_NEXUS:
1850 		arn = (struct skmem_arena_nexus *)ar;
1851 		if (arn->arn_schema_cache != NULL) {
1852 			skmem_cache_reap_now(arn->arn_schema_cache, purge);
1853 		}
1854 		if (arn->arn_ring_cache != NULL) {
1855 			skmem_cache_reap_now(arn->arn_ring_cache, purge);
1856 		}
1857 		if ((pp = arn->arn_rx_pp) != NULL) {
1858 			skmem_reap_pbufpool_caches(pp, purge);
1859 		}
1860 		if ((pp = arn->arn_tx_pp) != NULL && pp != arn->arn_rx_pp) {
1861 			skmem_reap_pbufpool_caches(pp, purge);
1862 		}
1863 		break;
1864 
1865 	case SKMEM_ARENA_TYPE_NECP:
1866 		arc = (struct skmem_arena_necp *)ar;
1867 		if (arc->arc_kstats_cache != NULL) {
1868 			skmem_cache_reap_now(arc->arc_kstats_cache, purge);
1869 		}
1870 		break;
1871 
1872 	case SKMEM_ARENA_TYPE_SYSTEM:
1873 		break;
1874 	}
1875 }
1876 
1877 void
skmem_arena_reap(struct skmem_arena * ar,boolean_t purge)1878 skmem_arena_reap(struct skmem_arena *ar, boolean_t purge)
1879 {
1880 	AR_LOCK(ar);
1881 	skmem_arena_reap_locked(ar, purge);
1882 	AR_UNLOCK(ar);
1883 }
1884 
1885 #if SK_LOG
1886 SK_LOG_ATTRIBUTE
1887 static void
skmem_arena_create_region_log(struct skmem_arena * ar)1888 skmem_arena_create_region_log(struct skmem_arena *ar)
1889 {
1890 	char label[32];
1891 	int i;
1892 
1893 	switch (ar->ar_type) {
1894 	case SKMEM_ARENA_TYPE_NEXUS:
1895 		SK_D("\"%s\" ar 0x%llx flags %b rx_pp 0x%llx tx_pp 0x%llu",
1896 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
1897 		    SK_KVA(skmem_arena_nexus(ar)->arn_rx_pp),
1898 		    SK_KVA(skmem_arena_nexus(ar)->arn_tx_pp));
1899 		break;
1900 
1901 	case SKMEM_ARENA_TYPE_NECP:
1902 	case SKMEM_ARENA_TYPE_SYSTEM:
1903 		SK_D("\"%s\" ar 0x%llx flags %b", ar->ar_name,
1904 		    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1905 		break;
1906 	}
1907 
1908 	for (i = 0; i < SKMEM_REGIONS; i++) {
1909 		if (ar->ar_regions[i] == NULL) {
1910 			continue;
1911 		}
1912 
1913 		(void) snprintf(label, sizeof(label), "REGION_%s:",
1914 		    skmem_region_id2name(i));
1915 		SK_D("  %-16s %6u KB s:[%2u x %6u KB] "
1916 		    "o:[%4u x %6u -> %4u x %6u]", label,
1917 		    (uint32_t)AR_MEM_TOTAL(ar, i) >> 10,
1918 		    (uint32_t)AR_MEM_SEGCNT(ar, i),
1919 		    (uint32_t)AR_MEM_SEGSIZE(ar, i) >> 10,
1920 		    (uint32_t)AR_MEM_OBJCNT_R(ar, i),
1921 		    (uint32_t)AR_MEM_OBJSIZE_R(ar, i),
1922 		    (uint32_t)AR_MEM_OBJCNT_C(ar, i),
1923 		    (uint32_t)AR_MEM_OBJSIZE_C(ar, i));
1924 	}
1925 }
1926 #endif /* SK_LOG */
1927 
1928 static size_t
skmem_arena_mib_get_stats(struct skmem_arena * ar,void * out,size_t len)1929 skmem_arena_mib_get_stats(struct skmem_arena *ar, void *out, size_t len)
1930 {
1931 	size_t actual_space = sizeof(struct sk_stats_arena);
1932 	struct sk_stats_arena *sar = out;
1933 	struct skmem_arena_mmap_info *ami = NULL;
1934 	pid_t proc_pid;
1935 	int i;
1936 
1937 	if (out == NULL || len < actual_space) {
1938 		goto done;
1939 	}
1940 
1941 	AR_LOCK(ar);
1942 	(void) snprintf(sar->sar_name, sizeof(sar->sar_name),
1943 	    "%s", ar->ar_name);
1944 	sar->sar_type = (sk_stats_arena_type_t)ar->ar_type;
1945 	sar->sar_mapsize = (uint64_t)ar->ar_mapsize;
1946 	i = 0;
1947 	SLIST_FOREACH(ami, &ar->ar_map_head, ami_link) {
1948 		if (ami->ami_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS) {
1949 			struct kern_channel *ch;
1950 			ch = container_of(ami, struct kern_channel, ch_mmap);
1951 			proc_pid = ch->ch_pid;
1952 		} else {
1953 			ASSERT((ami->ami_arena->ar_type ==
1954 			    SKMEM_ARENA_TYPE_NECP) ||
1955 			    (ami->ami_arena->ar_type ==
1956 			    SKMEM_ARENA_TYPE_SYSTEM));
1957 			proc_pid =
1958 			    necp_client_get_proc_pid_from_arena_info(ami);
1959 		}
1960 		sar->sar_mapped_pids[i++] = proc_pid;
1961 		if (i >= SK_STATS_ARENA_MAPPED_PID_MAX) {
1962 			break;
1963 		}
1964 	}
1965 
1966 	for (i = 0; i < SKMEM_REGIONS; i++) {
1967 		struct skmem_region *skr = ar->ar_regions[i];
1968 		uuid_t *sreg_uuid = &sar->sar_regions_uuid[i];
1969 
1970 		if (skr == NULL) {
1971 			uuid_clear(*sreg_uuid);
1972 			continue;
1973 		}
1974 
1975 		uuid_copy(*sreg_uuid, skr->skr_uuid);
1976 	}
1977 	AR_UNLOCK(ar);
1978 
1979 done:
1980 	return actual_space;
1981 }
1982 
1983 static int
1984 skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS
1985 {
1986 #pragma unused(arg1, arg2, oidp)
1987 	struct skmem_arena *ar;
1988 	size_t actual_space;
1989 	size_t buffer_space;
1990 	size_t allocated_space;
1991 	caddr_t buffer = NULL;
1992 	caddr_t scan;
1993 	int error = 0;
1994 
1995 	if (!kauth_cred_issuser(kauth_cred_get())) {
1996 		return EPERM;
1997 	}
1998 
1999 	net_update_uptime();
2000 	buffer_space = req->oldlen;
2001 	if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
2002 		if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
2003 			buffer_space = SK_SYSCTL_ALLOC_MAX;
2004 		}
2005 		allocated_space = buffer_space;
2006 		buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_arena_mib);
2007 		if (__improbable(buffer == NULL)) {
2008 			return ENOBUFS;
2009 		}
2010 	} else if (req->oldptr == USER_ADDR_NULL) {
2011 		buffer_space = 0;
2012 	}
2013 	actual_space = 0;
2014 	scan = buffer;
2015 
2016 	SKMEM_ARENA_LOCK();
2017 	TAILQ_FOREACH(ar, &skmem_arena_head, ar_link) {
2018 		size_t size = skmem_arena_mib_get_stats(ar, scan, buffer_space);
2019 		if (scan != NULL) {
2020 			if (buffer_space < size) {
2021 				/* supplied buffer too small, stop copying */
2022 				error = ENOMEM;
2023 				break;
2024 			}
2025 			scan += size;
2026 			buffer_space -= size;
2027 		}
2028 		actual_space += size;
2029 	}
2030 	SKMEM_ARENA_UNLOCK();
2031 
2032 	if (actual_space != 0) {
2033 		int out_error = SYSCTL_OUT(req, buffer, actual_space);
2034 		if (out_error != 0) {
2035 			error = out_error;
2036 		}
2037 	}
2038 	if (buffer != NULL) {
2039 		sk_free_data(buffer, allocated_space);
2040 	}
2041 
2042 	return error;
2043 }
2044