xref: /xnu-8792.81.2/bsd/skywalk/mem/skmem_arena.c (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* BEGIN CSTYLED */
30 /*
31  * SKMEM_ARENA_TYPE_NEXUS:
32  *
33  *   This arena represents the memory subsystem of a nexus adapter.  It consist
34  *   of a collection of memory regions that are usable by the nexus, as well
35  *   as the various caches for objects in those regions.
36  *
37  *       (1 per nexus adapter)
38  *     +=======================+
39  *     |      skmem_arena      |
40  *     +-----------------------+              (backing regions)
41  *     |     ar_regions[0]     |           +=======================+
42  *     :          ...          : ------->> |     skmem_region      |===+
43  *     |     ar_regions[n]     |           +=======================+   |===+
44  *     +=======================+               +=======================+   |
45  *     |     arn_{caches,pp}   | ---+              +=======================+
46  *     +-----------------------+    |
47  *     |     arn_stats_obj     |    |
48  *     |     arn_flowadv_obj   |    |         (cache frontends)
49  *     |     arn_nexusadv_obj  |    |      +=======================+
50  *     +-----------------------+    +--->> |     skmem_cache       |===+
51  *                                         +=======================+   |===+
52  *                                             +=======================+   |
53  *                                                 +=======================+
54  *
55  *   Three regions {umd,kmd,buf} are used for the packet buffer pool, which
56  *   may be external to the nexus adapter, e.g. created by the driver or an
57  *   external entity.  If not supplied, we create these regions along with
58  *   the packet buffer pool ourselves.  The rest of the regions (unrelated
59  *   to the packet buffer pool) are unique to the arena and are allocated at
60  *   arena creation time.
61  *
62  *   An arena may be mapped to a user task/process for as many times as needed.
63  *   The result of each mapping is a contiguous range within the address space
64  *   of that task, indicated by [ami_mapaddr, ami_mapaddr + ami_mapsize) span.
65  *   This is achieved by leveraging the mapper memory object ar_mapper that
66  *   "stitches" the disjoint segments together.  Only user-mappable regions,
67  *   i.e. those marked with SKR_MODE_MMAPOK, will be included in this span.
68  *
69  *   Nexus adapters that are eligible for defunct will trigger the arena to
70  *   undergo memory redirection for all regions except those that are marked
71  *   with SKR_MODE_NOREDIRECT.  This happens when all of the channels opened
72  *   to the adapter are defunct.  Upon completion, those redirected regions
73  *   will be torn down in order to reduce their memory footprints.  When this
74  *   happens the adapter and its arena are no longer active or in service.
75  *
76  *   The arena exposes caches for allocating and freeing most region objects.
77  *   These slab-allocator based caches act as front-ends to the regions; only
78  *   the metadata cache (for kern_packet_t) utilizes the magazines layer.  All
79  *   other ones simply utilize skmem_cache for slab-based allocations.
80  *
81  *   Certain regions contain singleton objects that are simple enough to not
82  *   require the slab allocator, such as the ones used for statistics and flow
83  *   advisories.  Because of this, we directly allocate from those regions
84  *   and store the objects in the arena.
85  *
86  * SKMEM_ARENA_TYPE_NECP:
87  *
88  *   This arena represents the memory subsystem of an NECP file descriptor
89  *   object.  It consists of a memory region for per-flow statistics, as well
90  *   as a cache front-end for that region.
91  *
92  * SKMEM_ARENA_SYSTEM:
93  *
94  *   This arena represents general, system-wide objects.  It currently
95  *   consists of the sysctls region that's created once at init time.
96  */
97 /* END CSTYLED */
98 
99 #include <skywalk/os_skywalk_private.h>
100 #include <net/necp.h>
101 
102 static void skmem_arena_destroy(struct skmem_arena *);
103 static void skmem_arena_teardown(struct skmem_arena *, boolean_t);
104 static int skmem_arena_create_finalize(struct skmem_arena *);
105 static void skmem_arena_nexus_teardown(struct skmem_arena_nexus *, boolean_t);
106 static void skmem_arena_necp_teardown(struct skmem_arena_necp *, boolean_t);
107 static void skmem_arena_system_teardown(struct skmem_arena_system *, boolean_t);
108 static struct skmem_arena *skmem_arena_alloc(skmem_arena_type_t,
109     const char *);
110 static void skmem_arena_free(struct skmem_arena *);
111 static void skmem_arena_retain_locked(struct skmem_arena *);
112 static void skmem_arena_reap_locked(struct skmem_arena *, boolean_t);
113 static boolean_t skmem_arena_munmap_common(struct skmem_arena *,
114     struct skmem_arena_mmap_info *);
115 #if SK_LOG
116 static void skmem_arena_create_region_log(struct skmem_arena *);
117 #endif /* SK_LOG */
118 static int skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS;
119 
120 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, arena,
121     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
122     0, 0, skmem_arena_mib_get_sysctl, "S,sk_stats_arena",
123     "Skywalk arena statistics");
124 
125 static LCK_GRP_DECLARE(skmem_arena_lock_grp, "skmem_arena");
126 static LCK_MTX_DECLARE(skmem_arena_lock, &skmem_arena_lock_grp);
127 
128 static TAILQ_HEAD(, skmem_arena) skmem_arena_head = TAILQ_HEAD_INITIALIZER(skmem_arena_head);
129 
130 #define SKMEM_ARENA_LOCK()                      \
131 	lck_mtx_lock(&skmem_arena_lock)
132 #define SKMEM_ARENA_LOCK_ASSERT_HELD()          \
133 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_OWNED)
134 #define SKMEM_ARENA_LOCK_ASSERT_NOTHELD()       \
135 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_NOTOWNED)
136 #define SKMEM_ARENA_UNLOCK()                    \
137 	lck_mtx_unlock(&skmem_arena_lock)
138 
139 #define AR_NEXUS_SIZE           sizeof(struct skmem_arena_nexus)
140 static ZONE_DEFINE(ar_nexus_zone, SKMEM_ZONE_PREFIX ".mem.arena.nexus",
141     AR_NEXUS_SIZE, ZC_ZFREE_CLEARMEM);
142 
143 #define AR_NECP_SIZE            sizeof(struct skmem_arena_necp)
144 static ZONE_DEFINE(ar_necp_zone, SKMEM_ZONE_PREFIX ".mem.arena.necp",
145     AR_NECP_SIZE, ZC_ZFREE_CLEARMEM);
146 
147 #define AR_SYSTEM_SIZE          sizeof(struct skmem_arena_system)
148 static ZONE_DEFINE(ar_system_zone, SKMEM_ZONE_PREFIX ".mem.arena.system",
149     AR_SYSTEM_SIZE, ZC_ZFREE_CLEARMEM);
150 
151 #define SKMEM_TAG_ARENA_MIB     "com.apple.skywalk.arena.mib"
152 static SKMEM_TAG_DEFINE(skmem_tag_arena_mib, SKMEM_TAG_ARENA_MIB);
153 
154 static_assert(SKMEM_ARENA_TYPE_NEXUS == SAR_TYPE_NEXUS);
155 static_assert(SKMEM_ARENA_TYPE_NECP == SAR_TYPE_NECP);
156 static_assert(SKMEM_ARENA_TYPE_SYSTEM == SAR_TYPE_SYSTEM);
157 
158 SK_NO_INLINE_ATTRIBUTE
159 static int
skmem_arena_sd_setup(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct skmem_arena * ar,boolean_t kernel_only,boolean_t tx)160 skmem_arena_sd_setup(const struct nexus_adapter *na,
161     struct skmem_region_params srp[SKMEM_REGIONS], struct skmem_arena *ar,
162     boolean_t kernel_only, boolean_t tx)
163 {
164 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
165 	struct skmem_cache **cachep;
166 	struct skmem_region *ksd_skr = NULL, *usd_skr = NULL;
167 	const char *name = na->na_name;
168 	char cname[64];
169 	skmem_region_id_t usd_type, ksd_type;
170 	int err = 0;
171 
172 	usd_type = tx ? SKMEM_REGION_TXAUSD : SKMEM_REGION_RXFUSD;
173 	ksd_type = tx ? SKMEM_REGION_TXAKSD : SKMEM_REGION_RXFKSD;
174 	if (tx) {
175 		usd_type = SKMEM_REGION_TXAUSD;
176 		ksd_type = SKMEM_REGION_TXAKSD;
177 		cachep = &arn->arn_txaksd_cache;
178 	} else {
179 		usd_type = SKMEM_REGION_RXFUSD;
180 		ksd_type = SKMEM_REGION_RXFKSD;
181 		cachep = &arn->arn_rxfksd_cache;
182 	}
183 	ksd_skr = skmem_region_create(name, &srp[ksd_type], NULL, NULL, NULL);
184 	if (ksd_skr == NULL) {
185 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
186 		    "create %s region", ar->ar_name, SK_KVA(ar),
187 		    ar->ar_flags, ARF_BITS, srp[ksd_type].srp_name);
188 		err = ENOMEM;
189 		goto failed;
190 	}
191 	ar->ar_regions[ksd_type] = ksd_skr;
192 	if (!kernel_only) {
193 		usd_skr = skmem_region_create(name, &srp[usd_type], NULL,
194 		    NULL, NULL);
195 		if (usd_skr == NULL) {
196 			err = ENOMEM;
197 			goto failed;
198 		}
199 		ar->ar_regions[usd_type] = usd_skr;
200 		skmem_region_mirror(ksd_skr, usd_skr);
201 	}
202 	snprintf(cname, sizeof(cname), tx ? "txa_ksd.%s" : "rxf_ksd.%s", name);
203 	ASSERT(ar->ar_regions[ksd_type] != NULL);
204 	*cachep = skmem_cache_create(cname,
205 	    srp[ksd_type].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
206 	    ar->ar_regions[ksd_type], SKMEM_CR_NOMAGAZINES);
207 	if (*cachep == NULL) {
208 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
209 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
210 		err = ENOMEM;
211 		goto failed;
212 	}
213 	return 0;
214 
215 failed:
216 	if (ksd_skr != NULL) {
217 		skmem_region_release(ksd_skr);
218 		ar->ar_regions[ksd_type] = NULL;
219 	}
220 	if (usd_skr != NULL) {
221 		/*
222 		 * decrements refcnt incremented by skmem_region_mirror()
223 		 * this is not needed in case skmem_cache_create() succeeds
224 		 * because skmem_cache_destroy() does the release.
225 		 */
226 		skmem_region_release(usd_skr);
227 
228 		/* decrements the region's own refcnt */
229 		skmem_region_release(usd_skr);
230 		ar->ar_regions[usd_type] = NULL;
231 	}
232 	return err;
233 }
234 
235 SK_NO_INLINE_ATTRIBUTE
236 static void
skmem_arena_sd_teardown(struct skmem_arena * ar,boolean_t tx)237 skmem_arena_sd_teardown(struct skmem_arena *ar, boolean_t tx)
238 {
239 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
240 	struct skmem_cache **cachep;
241 	struct skmem_region **ksd_rp, **usd_rp;
242 
243 	if (tx) {
244 		cachep = &arn->arn_txaksd_cache;
245 		ksd_rp = &ar->ar_regions[SKMEM_REGION_TXAKSD];
246 		usd_rp = &ar->ar_regions[SKMEM_REGION_TXAUSD];
247 	} else {
248 		cachep = &arn->arn_rxfksd_cache;
249 		ksd_rp = &ar->ar_regions[SKMEM_REGION_RXFKSD];
250 		usd_rp = &ar->ar_regions[SKMEM_REGION_RXFUSD];
251 	}
252 	if (*cachep != NULL) {
253 		skmem_cache_destroy(*cachep);
254 		*cachep = NULL;
255 	}
256 	if (*usd_rp != NULL) {
257 		skmem_region_release(*usd_rp);
258 		*usd_rp = NULL;
259 	}
260 	if (*ksd_rp != NULL) {
261 		skmem_region_release(*ksd_rp);
262 		*ksd_rp = NULL;
263 	}
264 }
265 
266 static bool
skmem_arena_pp_setup(struct skmem_arena * ar,struct skmem_region_params srp[SKMEM_REGIONS],const char * name,struct kern_pbufpool * rx_pp,struct kern_pbufpool * tx_pp,uint32_t flags)267 skmem_arena_pp_setup(struct skmem_arena *ar,
268     struct skmem_region_params srp[SKMEM_REGIONS], const char *name,
269     struct kern_pbufpool *rx_pp, struct kern_pbufpool *tx_pp,
270     uint32_t flags)
271 {
272 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
273 	boolean_t kernel_only = (flags & SKMEM_PP_FLAG_KERNEL_ONLY) != 0;
274 	if (rx_pp == NULL && tx_pp == NULL) {
275 		uint32_t ppcreatef = 0;
276 		if (flags & SKMEM_PP_FLAG_TRUNCATED_BUF) {
277 			ppcreatef |= PPCREATEF_TRUNCATED_BUF;
278 		}
279 		if (flags & SKMEM_PP_FLAG_KERNEL_ONLY) {
280 			ppcreatef |= PPCREATEF_KERNEL_ONLY;
281 		}
282 		if (srp[SKMEM_REGION_KMD].srp_max_frags > 1) {
283 			ppcreatef |= PPCREATEF_ONDEMAND_BUF;
284 		}
285 		if (flags & SKMEM_PP_FLAG_RAW_BFLT) {
286 			ppcreatef |= PPCREATEF_RAW_BFLT;
287 		}
288 		/* callee retains pp upon success */
289 		rx_pp = pp_create(name, srp, NULL, NULL, NULL, NULL, NULL,
290 		    ppcreatef);
291 		if (rx_pp == NULL) {
292 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create pp",
293 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
294 			return false;
295 		}
296 		pp_retain(rx_pp);
297 		tx_pp = rx_pp;
298 	} else {
299 		if (rx_pp == NULL) {
300 			rx_pp = tx_pp;
301 		} else if (tx_pp == NULL) {
302 			tx_pp = rx_pp;
303 		}
304 
305 		ASSERT(rx_pp->pp_md_type == tx_pp->pp_md_type);
306 		ASSERT(rx_pp->pp_md_subtype == tx_pp->pp_md_subtype);
307 		ASSERT(!(!kernel_only &&
308 		    (PP_KERNEL_ONLY(rx_pp) || (PP_KERNEL_ONLY(tx_pp)))));
309 		arn->arn_mode |= AR_NEXUS_MODE_EXTERNAL_PPOOL;
310 		pp_retain(rx_pp);
311 		pp_retain(tx_pp);
312 	}
313 
314 	arn->arn_rx_pp = rx_pp;
315 	arn->arn_tx_pp = tx_pp;
316 	if (rx_pp == tx_pp) {
317 		skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
318 		if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
319 			skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
320 		}
321 		ar->ar_regions[SKMEM_REGION_BUF_DEF] = PP_BUF_REGION_DEF(rx_pp);
322 		ar->ar_regions[SKMEM_REGION_BUF_LARGE] =
323 		    PP_BUF_REGION_LARGE(rx_pp);
324 		ar->ar_regions[SKMEM_REGION_RXBUF_DEF] = NULL;
325 		ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] = NULL;
326 		ar->ar_regions[SKMEM_REGION_TXBUF_DEF] = NULL;
327 		ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] = NULL;
328 		skmem_region_retain(rx_pp->pp_kmd_region);
329 		ar->ar_regions[SKMEM_REGION_KMD] = rx_pp->pp_kmd_region;
330 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
331 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
332 		if (rx_pp->pp_kbft_region != NULL) {
333 			skmem_region_retain(rx_pp->pp_kbft_region);
334 			ar->ar_regions[SKMEM_REGION_KBFT] =
335 			    rx_pp->pp_kbft_region;
336 		}
337 		ar->ar_regions[SKMEM_REGION_RXKBFT] = NULL;
338 		ar->ar_regions[SKMEM_REGION_TXKBFT] = NULL;
339 	} else {
340 		ASSERT(kernel_only); /* split userspace pools not supported */
341 		ar->ar_regions[SKMEM_REGION_BUF_DEF] = NULL;
342 		ar->ar_regions[SKMEM_REGION_BUF_LARGE] = NULL;
343 		skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
344 		ar->ar_regions[SKMEM_REGION_RXBUF_DEF] =
345 		    PP_BUF_REGION_DEF(rx_pp);
346 		ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] =
347 		    PP_BUF_REGION_LARGE(rx_pp);
348 		if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
349 			skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
350 		}
351 		skmem_region_retain(PP_BUF_REGION_DEF(tx_pp));
352 		ar->ar_regions[SKMEM_REGION_TXBUF_DEF] =
353 		    PP_BUF_REGION_DEF(tx_pp);
354 		ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] =
355 		    PP_BUF_REGION_LARGE(tx_pp);
356 		if (PP_BUF_REGION_LARGE(tx_pp) != NULL) {
357 			skmem_region_retain(PP_BUF_REGION_LARGE(tx_pp));
358 		}
359 		ar->ar_regions[SKMEM_REGION_KMD] = NULL;
360 		skmem_region_retain(rx_pp->pp_kmd_region);
361 		ar->ar_regions[SKMEM_REGION_RXKMD] = rx_pp->pp_kmd_region;
362 		skmem_region_retain(tx_pp->pp_kmd_region);
363 		ar->ar_regions[SKMEM_REGION_TXKMD] = tx_pp->pp_kmd_region;
364 		ar->ar_regions[SKMEM_REGION_KBFT] = NULL;
365 		if (rx_pp->pp_kbft_region != NULL) {
366 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(rx_pp));
367 			skmem_region_retain(rx_pp->pp_kbft_region);
368 			ar->ar_regions[SKMEM_REGION_RXKBFT] =
369 			    rx_pp->pp_kbft_region;
370 		}
371 		if (tx_pp->pp_kbft_region != NULL) {
372 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(tx_pp));
373 			skmem_region_retain(tx_pp->pp_kbft_region);
374 			ar->ar_regions[SKMEM_REGION_TXKBFT] =
375 			    tx_pp->pp_kbft_region;
376 		}
377 	}
378 
379 	if (kernel_only) {
380 		if ((arn->arn_mode & AR_NEXUS_MODE_EXTERNAL_PPOOL) == 0) {
381 			ASSERT(PP_KERNEL_ONLY(rx_pp));
382 			ASSERT(PP_KERNEL_ONLY(tx_pp));
383 			ASSERT(rx_pp->pp_umd_region == NULL);
384 			ASSERT(tx_pp->pp_umd_region == NULL);
385 			ASSERT(rx_pp->pp_kmd_region->skr_mirror == NULL);
386 			ASSERT(tx_pp->pp_kmd_region->skr_mirror == NULL);
387 			ASSERT(rx_pp->pp_ubft_region == NULL);
388 			ASSERT(tx_pp->pp_ubft_region == NULL);
389 			if (rx_pp->pp_kbft_region != NULL) {
390 				ASSERT(rx_pp->pp_kbft_region->skr_mirror ==
391 				    NULL);
392 			}
393 			if (tx_pp->pp_kbft_region != NULL) {
394 				ASSERT(tx_pp->pp_kbft_region->skr_mirror ==
395 				    NULL);
396 			}
397 		}
398 	} else {
399 		ASSERT(rx_pp == tx_pp);
400 		ASSERT(!PP_KERNEL_ONLY(rx_pp));
401 		ASSERT(rx_pp->pp_umd_region->skr_mode & SKR_MODE_MIRRORED);
402 		ASSERT(rx_pp->pp_kmd_region->skr_mirror != NULL);
403 		ar->ar_regions[SKMEM_REGION_UMD] = rx_pp->pp_umd_region;
404 		skmem_region_retain(rx_pp->pp_umd_region);
405 		if (rx_pp->pp_kbft_region != NULL) {
406 			ASSERT(rx_pp->pp_kbft_region->skr_mirror != NULL);
407 			ASSERT(rx_pp->pp_ubft_region != NULL);
408 			ASSERT(rx_pp->pp_ubft_region->skr_mode &
409 			    SKR_MODE_MIRRORED);
410 			ar->ar_regions[SKMEM_REGION_UBFT] =
411 			    rx_pp->pp_ubft_region;
412 			skmem_region_retain(rx_pp->pp_ubft_region);
413 		}
414 	}
415 
416 	arn->arn_md_type = rx_pp->pp_md_type;
417 	arn->arn_md_subtype = rx_pp->pp_md_subtype;
418 	return true;
419 }
420 
421 /*
422  * Create a nexus adapter arena.
423  */
424 struct skmem_arena *
skmem_arena_create_for_nexus(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct kern_pbufpool ** tx_pp,struct kern_pbufpool ** rx_pp,uint32_t pp_flags,struct kern_nexus_advisory * nxv,int * perr)425 skmem_arena_create_for_nexus(const struct nexus_adapter *na,
426     struct skmem_region_params srp[SKMEM_REGIONS], struct kern_pbufpool **tx_pp,
427     struct kern_pbufpool **rx_pp, uint32_t pp_flags,
428     struct kern_nexus_advisory *nxv, int *perr)
429 {
430 #define SRP_CFLAGS(_id)         (srp[_id].srp_cflags)
431 	struct skmem_arena_nexus *arn;
432 	struct skmem_arena *ar;
433 	char cname[64];
434 	uint32_t i;
435 	const char *name = na->na_name;
436 	boolean_t kernel_only = (pp_flags & SKMEM_PP_FLAG_KERNEL_ONLY) != 0;
437 
438 	*perr = 0;
439 
440 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NEXUS, name);
441 	ASSERT(ar != NULL && ar->ar_zsize == AR_NEXUS_SIZE);
442 	arn = (struct skmem_arena_nexus *)ar;
443 
444 	/* these regions must not be readable/writeable */
445 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_GUARD);
446 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_GUARD);
447 
448 	/* these regions must be read-only */
449 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_UREADONLY);
450 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_UREADONLY);
451 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_UREADONLY);
452 	if ((na->na_flags & NAF_USER_PKT_POOL) == 0) {
453 		ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
454 		    SKMEM_REGION_CR_UREADONLY);
455 		ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
456 		    SKMEM_REGION_CR_UREADONLY);
457 	} else {
458 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
459 		    SKMEM_REGION_CR_UREADONLY));
460 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
461 		    SKMEM_REGION_CR_UREADONLY));
462 	}
463 
464 	/* these regions must be user-mappable */
465 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_MMAPOK);
466 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_MMAPOK);
467 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_MMAPOK);
468 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_MMAPOK);
469 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_MMAPOK);
470 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_MMAPOK);
471 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_MMAPOK);
472 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_MMAPOK);
473 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_MMAPOK);
474 	ASSERT(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_MMAPOK);
475 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_MMAPOK);
476 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_MMAPOK);
477 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_MMAPOK);
478 
479 	/* these must not be user-mappable */
480 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_MMAPOK));
481 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_MMAPOK));
482 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_MMAPOK));
483 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_MMAPOK));
484 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_MMAPOK));
485 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_MMAPOK));
486 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_MMAPOK));
487 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_MMAPOK));
488 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_MMAPOK));
489 
490 	/* these regions must be shareable */
491 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_SHAREOK);
492 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
493 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
494 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
495 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
496 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
497 
498 	/* these regions must not be be shareable */
499 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_SHAREOK));
500 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_SHAREOK));
501 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_SHAREOK));
502 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_SHAREOK));
503 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_SHAREOK));
504 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_SHAREOK));
505 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_SHAREOK));
506 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_SHAREOK));
507 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_SHAREOK));
508 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_SHAREOK));
509 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_SHAREOK));
510 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_SHAREOK));
511 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_SHAREOK));
512 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_SHAREOK));
513 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_SHAREOK));
514 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_SHAREOK));
515 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_SHAREOK));
516 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_SHAREOK));
517 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_SHAREOK));
518 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_SHAREOK));
519 
520 	/* these must stay active */
521 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_NOREDIRECT);
522 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_NOREDIRECT);
523 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_NOREDIRECT);
524 
525 	/* no kstats for nexus */
526 	ASSERT(srp[SKMEM_REGION_KSTATS].srp_c_obj_cnt == 0);
527 
528 	AR_LOCK(ar);
529 	if (!skmem_arena_pp_setup(ar, srp, name, (rx_pp ? *rx_pp : NULL),
530 	    (tx_pp ? *tx_pp : NULL), pp_flags)) {
531 		goto failed;
532 	}
533 
534 	if (nxv != NULL && nxv->nxv_reg != NULL) {
535 		struct skmem_region *svr = nxv->nxv_reg;
536 
537 		ASSERT(svr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
538 		ASSERT(svr->skr_seg_max_cnt == 1);
539 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = svr;
540 		skmem_region_retain(svr);
541 
542 		ASSERT(nxv->nxv_adv != NULL);
543 		if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
544 			VERIFY(nxv->flowswitch_nxv_adv->nxadv_ver ==
545 			    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
546 		} else if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
547 			VERIFY(nxv->netif_nxv_adv->nna_version ==
548 			    NX_NETIF_ADVISORY_CURRENT_VERSION);
549 		} else {
550 			panic_plain("%s: invalid advisory type %d",
551 			    __func__, nxv->nxv_adv_type);
552 			/* NOTREACHED */
553 		}
554 		arn->arn_nexusadv_obj = nxv->nxv_adv;
555 	} else {
556 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
557 		ASSERT(srp[SKMEM_REGION_NEXUSADV].srp_c_obj_cnt == 0);
558 	}
559 
560 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, TRUE) != 0) {
561 		goto failed;
562 	}
563 
564 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, FALSE) != 0) {
565 		goto failed;
566 	}
567 
568 	for (i = 0; i < SKMEM_REGIONS; i++) {
569 		/* skip if already created */
570 		if (ar->ar_regions[i] != NULL) {
571 			continue;
572 		}
573 
574 		/* skip external regions from packet pool */
575 		if (skmem_region_for_pp(i)) {
576 			continue;
577 		}
578 
579 		/* skip slot descriptor regions */
580 		if (i == SKMEM_REGION_TXAUSD || i == SKMEM_REGION_RXFUSD ||
581 		    i == SKMEM_REGION_TXAKSD || i == SKMEM_REGION_RXFKSD) {
582 			continue;
583 		}
584 
585 		/* skip if region is configured to be empty */
586 		if (srp[i].srp_c_obj_cnt == 0) {
587 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
588 			    i == SKMEM_REGION_USTATS ||
589 			    i == SKMEM_REGION_KSTATS ||
590 			    i == SKMEM_REGION_INTRINSIC ||
591 			    i == SKMEM_REGION_FLOWADV ||
592 			    i == SKMEM_REGION_NEXUSADV ||
593 			    i == SKMEM_REGION_SYSCTLS ||
594 			    i == SKMEM_REGION_GUARD_TAIL);
595 			continue;
596 		}
597 
598 		ASSERT(srp[i].srp_id == i);
599 
600 		/*
601 		 * Skip {SCHEMA, RING, GUARD} for kernel-only arena.  Note
602 		 * that this is assuming kernel-only arena is always used
603 		 * for kernel-only nexus adapters (never used directly by
604 		 * user process.)
605 		 *
606 		 * XXX [email protected] - see comments in kern_pbufpool_create().
607 		 * We need to revisit this logic for "direct channel" access,
608 		 * perhaps via a separate adapter flag.
609 		 */
610 		if (kernel_only && (i == SKMEM_REGION_GUARD_HEAD ||
611 		    i == SKMEM_REGION_SCHEMA || i == SKMEM_REGION_RING ||
612 		    i == SKMEM_REGION_GUARD_TAIL)) {
613 			continue;
614 		}
615 
616 		/* not for nexus, or for us to create here */
617 		ASSERT(i != SKMEM_REGION_GUARD_HEAD || sk_guard);
618 		ASSERT(i != SKMEM_REGION_NEXUSADV);
619 		ASSERT(i != SKMEM_REGION_SYSCTLS);
620 		ASSERT(i != SKMEM_REGION_GUARD_TAIL || sk_guard);
621 		ASSERT(i != SKMEM_REGION_KSTATS);
622 		ASSERT(i != SKMEM_REGION_INTRINSIC);
623 
624 		/* otherwise create it */
625 		if ((ar->ar_regions[i] = skmem_region_create(name, &srp[i],
626 		    NULL, NULL, NULL)) == NULL) {
627 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
628 			    "create %s region", ar->ar_name, SK_KVA(ar),
629 			    ar->ar_flags, ARF_BITS, srp[i].srp_name);
630 			goto failed;
631 		}
632 	}
633 
634 	/* create skmem_cache for schema (without magazines) */
635 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL || kernel_only);
636 	if (ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL) {
637 		(void) snprintf(cname, sizeof(cname), "schema.%s", name);
638 		if ((arn->arn_schema_cache = skmem_cache_create(cname,
639 		    srp[SKMEM_REGION_SCHEMA].srp_c_obj_size, 0, NULL, NULL,
640 		    NULL, NULL, ar->ar_regions[SKMEM_REGION_SCHEMA],
641 		    SKMEM_CR_NOMAGAZINES)) == NULL) {
642 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
643 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
644 			    cname);
645 			goto failed;
646 		}
647 	}
648 
649 	/* create skmem_cache for rings (without magazines) */
650 	(void) snprintf(cname, sizeof(cname), "ring.%s", name);
651 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] != NULL || kernel_only);
652 	if ((ar->ar_regions[SKMEM_REGION_RING] != NULL) &&
653 	    (arn->arn_ring_cache = skmem_cache_create(cname,
654 	    srp[SKMEM_REGION_RING].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
655 	    ar->ar_regions[SKMEM_REGION_RING], SKMEM_CR_NOMAGAZINES)) == NULL) {
656 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
657 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
658 		goto failed;
659 	}
660 
661 	/*
662 	 * If the stats region is present, allocate a single object directly
663 	 * from the region; we don't need to create an skmem_cache for this,
664 	 * as the object is allocated (and freed) only once.
665 	 */
666 	if (ar->ar_regions[SKMEM_REGION_USTATS] != NULL) {
667 		struct skmem_region *str = ar->ar_regions[SKMEM_REGION_USTATS];
668 
669 		/* no kstats for nexus */
670 		ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
671 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
672 		ASSERT(str->skr_seg_max_cnt == 1);
673 
674 		if ((arn->arn_stats_obj = skmem_region_alloc(str, NULL,
675 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
676 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
677 			    "stats", ar->ar_name, SK_KVA(ar), ar->ar_flags,
678 			    ARF_BITS);
679 			goto failed;
680 		}
681 	}
682 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
683 
684 	/*
685 	 * If the flowadv region is present, allocate a single object directly
686 	 * from the region; we don't need to create an skmem_cache for this,
687 	 * as the object is allocated (and freed) only once.
688 	 */
689 	if (ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL) {
690 		struct skmem_region *str =
691 		    ar->ar_regions[SKMEM_REGION_FLOWADV];
692 
693 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
694 		ASSERT(str->skr_seg_max_cnt == 1);
695 
696 		if ((arn->arn_flowadv_obj = skmem_region_alloc(str, NULL,
697 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
698 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
699 			    "flowadv", ar->ar_name, SK_KVA(ar), ar->ar_flags,
700 			    ARF_BITS);
701 			goto failed;
702 		}
703 	}
704 
705 	if (skmem_arena_create_finalize(ar) != 0) {
706 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
707 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
708 		goto failed;
709 	}
710 
711 	++ar->ar_refcnt;        /* for caller */
712 	AR_UNLOCK(ar);
713 
714 	SKMEM_ARENA_LOCK();
715 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
716 	SKMEM_ARENA_UNLOCK();
717 
718 	/* caller didn't give us one, but would like us to return it? */
719 	if (rx_pp != NULL && *rx_pp == NULL) {
720 		*rx_pp = arn->arn_rx_pp;
721 		pp_retain(*rx_pp);
722 	}
723 	if (tx_pp != NULL && *tx_pp == NULL) {
724 		*tx_pp = arn->arn_tx_pp;
725 		pp_retain(*tx_pp);  /* for caller */
726 	}
727 
728 #if SK_LOG
729 	if (__improbable(sk_verbose != 0)) {
730 		skmem_arena_create_region_log(ar);
731 	}
732 #endif /* SK_LOG */
733 
734 	return ar;
735 
736 failed:
737 	AR_LOCK_ASSERT_HELD(ar);
738 	skmem_arena_destroy(ar);
739 	*perr = ENOMEM;
740 
741 	return NULL;
742 #undef SRP_CFLAGS
743 }
744 
745 void
skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus * arn,int cnt)746 skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus *arn, int cnt)
747 {
748 	struct skmem_arena *ar = &arn->arn_cmn;
749 
750 	AR_LOCK(ar);
751 	arn->arn_ksd_nodefunct += cnt;
752 	VERIFY(arn->arn_ksd_nodefunct >= 0);
753 	AR_UNLOCK(ar);
754 }
755 
756 boolean_t
skmem_arena_nexus_sd_idle(struct skmem_arena_nexus * arn)757 skmem_arena_nexus_sd_idle(struct skmem_arena_nexus *arn)
758 {
759 	struct skmem_arena *ar = &arn->arn_cmn;
760 	boolean_t idle;
761 
762 	AR_LOCK(ar);
763 	VERIFY(arn->arn_ksd_nodefunct >= 0);
764 	idle = (arn->arn_ksd_nodefunct == 0);
765 	AR_UNLOCK(ar);
766 
767 	return idle;
768 }
769 
770 static void
skmem_arena_nexus_teardown(struct skmem_arena_nexus * arn,boolean_t defunct)771 skmem_arena_nexus_teardown(struct skmem_arena_nexus *arn, boolean_t defunct)
772 {
773 	struct skmem_arena *ar = &arn->arn_cmn;
774 	struct skmem_region *skr;
775 	int i;
776 
777 	AR_LOCK_ASSERT_HELD(ar);
778 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
779 
780 	/* these should never be set for nexus arena */
781 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL || sk_guard);
782 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
783 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL || sk_guard);
784 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
785 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
786 
787 	if (arn->arn_stats_obj != NULL) {
788 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
789 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
790 		skmem_region_free(skr, arn->arn_stats_obj, NULL);
791 		arn->arn_stats_obj = NULL;
792 		skmem_region_release(skr);
793 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
794 	}
795 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
796 	ASSERT(arn->arn_stats_obj == NULL);
797 
798 	if (arn->arn_flowadv_obj != NULL) {
799 		skr = ar->ar_regions[SKMEM_REGION_FLOWADV];
800 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
801 		skmem_region_free(skr, arn->arn_flowadv_obj, NULL);
802 		arn->arn_flowadv_obj = NULL;
803 		skmem_region_release(skr);
804 		ar->ar_regions[SKMEM_REGION_FLOWADV] = NULL;
805 	}
806 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
807 	ASSERT(arn->arn_flowadv_obj == NULL);
808 
809 	if (arn->arn_nexusadv_obj != NULL) {
810 		skr = ar->ar_regions[SKMEM_REGION_NEXUSADV];
811 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
812 		/* we didn't allocate this, so just nullify it */
813 		arn->arn_nexusadv_obj = NULL;
814 		skmem_region_release(skr);
815 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = NULL;
816 	}
817 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
818 	ASSERT(arn->arn_nexusadv_obj == NULL);
819 
820 	ASSERT(!((arn->arn_rx_pp == NULL) ^ (arn->arn_tx_pp == NULL)));
821 	if (arn->arn_rx_pp != NULL) {
822 		for (i = 0; i < SKMEM_PP_REGIONS; i++) {
823 			skmem_region_id_t reg = skmem_pp_region_ids[i];
824 			skr = ar->ar_regions[reg];
825 			if (skr != NULL) {
826 				ASSERT(!(skr->skr_mode & SKR_MODE_NOREDIRECT));
827 				skmem_region_release(skr);
828 				ar->ar_regions[reg] = NULL;
829 			}
830 		}
831 		pp_release(arn->arn_rx_pp);
832 		pp_release(arn->arn_tx_pp);
833 		arn->arn_rx_pp = NULL;
834 		arn->arn_tx_pp = NULL;
835 	}
836 	for (i = 0; i < SKMEM_PP_REGIONS; i++) {
837 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
838 	}
839 	ASSERT(arn->arn_rx_pp == NULL);
840 	ASSERT(arn->arn_tx_pp == NULL);
841 
842 	if (arn->arn_ring_cache != NULL) {
843 		skr = ar->ar_regions[SKMEM_REGION_RING];
844 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
845 		skmem_cache_destroy(arn->arn_ring_cache);
846 		arn->arn_ring_cache = NULL;
847 		skmem_region_release(skr);
848 		ar->ar_regions[SKMEM_REGION_RING] = NULL;
849 	}
850 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
851 	ASSERT(arn->arn_ring_cache == NULL);
852 
853 	/*
854 	 * Stop here if we're in the defunct context, and we're asked
855 	 * to keep the slot descriptor regions alive as they are still
856 	 * being referred to by the nexus owner (driver).
857 	 */
858 	if (defunct && arn->arn_ksd_nodefunct != 0) {
859 		ASSERT(arn->arn_ksd_nodefunct > 0);
860 		return;
861 	}
862 
863 	ASSERT(arn->arn_ksd_nodefunct == 0);
864 	skmem_arena_sd_teardown(ar, TRUE);
865 	skmem_arena_sd_teardown(ar, FALSE);
866 
867 	/* stop here if we're in the defunct context */
868 	if (defunct) {
869 		return;
870 	}
871 	if (arn->arn_schema_cache != NULL) {
872 		skr = ar->ar_regions[SKMEM_REGION_SCHEMA];
873 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
874 		skmem_cache_destroy(arn->arn_schema_cache);
875 		arn->arn_schema_cache = NULL;
876 		skmem_region_release(skr);
877 		ar->ar_regions[SKMEM_REGION_SCHEMA] = NULL;
878 	}
879 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
880 	ASSERT(arn->arn_schema_cache == NULL);
881 
882 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_HEAD]) != NULL) {
883 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
884 		skmem_region_release(skr);
885 		ar->ar_regions[SKMEM_REGION_GUARD_HEAD] = NULL;
886 	}
887 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
888 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_TAIL]) != NULL) {
889 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
890 		skmem_region_release(skr);
891 		ar->ar_regions[SKMEM_REGION_GUARD_TAIL] = NULL;
892 	}
893 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
894 }
895 
896 /*
897  * Create an NECP arena.
898  */
899 struct skmem_arena *
skmem_arena_create_for_necp(const char * name,struct skmem_region_params * srp_ustats,struct skmem_region_params * srp_kstats,int * perr)900 skmem_arena_create_for_necp(const char *name,
901     struct skmem_region_params *srp_ustats,
902     struct skmem_region_params *srp_kstats, int *perr)
903 {
904 	struct skmem_arena_necp *arc;
905 	struct skmem_arena *ar;
906 	char cname[64];
907 
908 	*perr = 0;
909 
910 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NECP, name);
911 	ASSERT(ar != NULL && ar->ar_zsize == AR_NECP_SIZE);
912 	arc = (struct skmem_arena_necp *)ar;
913 
914 	/*
915 	 * Must be stats region, and must be user-mappable;
916 	 * don't assert for SKMEM_REGION_CR_MONOLITHIC here
917 	 * as the client might want multi-segment mode.
918 	 */
919 	ASSERT(srp_ustats->srp_id == SKMEM_REGION_USTATS);
920 	ASSERT(srp_kstats->srp_id == SKMEM_REGION_KSTATS);
921 	ASSERT(srp_ustats->srp_cflags & SKMEM_REGION_CR_MMAPOK);
922 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_MMAPOK));
923 	ASSERT(!(srp_ustats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
924 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
925 	ASSERT(srp_ustats->srp_c_obj_size != 0);
926 	ASSERT(srp_kstats->srp_c_obj_size != 0);
927 	ASSERT(srp_ustats->srp_c_obj_cnt != 0);
928 	ASSERT(srp_kstats->srp_c_obj_cnt != 0);
929 	ASSERT(srp_ustats->srp_c_seg_size == srp_kstats->srp_c_seg_size);
930 	ASSERT(srp_ustats->srp_seg_cnt == srp_kstats->srp_seg_cnt);
931 	ASSERT(srp_ustats->srp_c_obj_size == srp_kstats->srp_c_obj_size);
932 	ASSERT(srp_ustats->srp_c_obj_cnt == srp_kstats->srp_c_obj_cnt);
933 
934 	AR_LOCK(ar);
935 
936 	if ((ar->ar_regions[SKMEM_REGION_USTATS] = skmem_region_create(name,
937 	    srp_ustats, NULL, NULL, NULL)) == NULL) {
938 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
939 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
940 		    srp_ustats->srp_name);
941 		goto failed;
942 	}
943 
944 	if ((ar->ar_regions[SKMEM_REGION_KSTATS] = skmem_region_create(name,
945 	    srp_kstats, NULL, NULL, NULL)) == NULL) {
946 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
947 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
948 		    srp_kstats->srp_name);
949 		goto failed;
950 	}
951 
952 	skmem_region_mirror(ar->ar_regions[SKMEM_REGION_KSTATS],
953 	    ar->ar_regions[SKMEM_REGION_USTATS]);
954 
955 	/* create skmem_cache for kernel stats (without magazines) */
956 	(void) snprintf(cname, sizeof(cname), "kstats.%s", name);
957 	if ((arc->arc_kstats_cache = skmem_cache_create(cname,
958 	    srp_kstats->srp_c_obj_size, 0, necp_stats_ctor, NULL, NULL, NULL,
959 	    ar->ar_regions[SKMEM_REGION_KSTATS],
960 	    SKMEM_CR_NOMAGAZINES)) == NULL) {
961 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
962 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
963 		goto failed;
964 	}
965 
966 	if (skmem_arena_create_finalize(ar) != 0) {
967 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
968 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
969 		goto failed;
970 	}
971 
972 	/*
973 	 * These must never be configured for NECP arena.
974 	 *
975 	 * XXX: In theory we can add guard pages to this arena,
976 	 * but for now leave that as an exercise for the future.
977 	 */
978 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
979 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
980 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
981 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
982 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
983 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
984 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
985 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
986 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
987 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
988 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
989 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
990 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
991 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
992 	}
993 
994 	/* these must be configured for NECP arena */
995 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
996 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] != NULL);
997 
998 	++ar->ar_refcnt;        /* for caller */
999 	AR_UNLOCK(ar);
1000 
1001 	SKMEM_ARENA_LOCK();
1002 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1003 	SKMEM_ARENA_UNLOCK();
1004 
1005 #if SK_LOG
1006 	if (__improbable(sk_verbose != 0)) {
1007 		skmem_arena_create_region_log(ar);
1008 	}
1009 #endif /* SK_LOG */
1010 
1011 	return ar;
1012 
1013 failed:
1014 	AR_LOCK_ASSERT_HELD(ar);
1015 	skmem_arena_destroy(ar);
1016 	*perr = ENOMEM;
1017 
1018 	return NULL;
1019 }
1020 
1021 static void
skmem_arena_necp_teardown(struct skmem_arena_necp * arc,boolean_t defunct)1022 skmem_arena_necp_teardown(struct skmem_arena_necp *arc, boolean_t defunct)
1023 {
1024 #pragma unused(defunct)
1025 	struct skmem_arena *ar = &arc->arc_cmn;
1026 	struct skmem_region *skr;
1027 
1028 	AR_LOCK_ASSERT_HELD(ar);
1029 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NECP);
1030 
1031 	/* these must never be configured for NECP arena */
1032 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1033 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1034 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1035 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1036 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1037 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1038 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1039 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1040 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1041 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1042 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1043 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1044 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1045 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1046 	}
1047 
1048 	if (arc->arc_kstats_cache != NULL) {
1049 		skr = ar->ar_regions[SKMEM_REGION_KSTATS];
1050 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1051 		skmem_cache_destroy(arc->arc_kstats_cache);
1052 		arc->arc_kstats_cache = NULL;
1053 		skmem_region_release(skr);
1054 		ar->ar_regions[SKMEM_REGION_KSTATS] = NULL;
1055 
1056 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
1057 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1058 		skmem_region_release(skr);
1059 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
1060 	}
1061 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1062 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1063 	ASSERT(arc->arc_kstats_cache == NULL);
1064 }
1065 
1066 /*
1067  * Given an arena, return its NECP variant (if applicable).
1068  */
1069 struct skmem_arena_necp *
skmem_arena_necp(struct skmem_arena * ar)1070 skmem_arena_necp(struct skmem_arena *ar)
1071 {
1072 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_NECP)) {
1073 		return NULL;
1074 	}
1075 
1076 	return (struct skmem_arena_necp *)ar;
1077 }
1078 
1079 /*
1080  * Create a System arena.
1081  */
1082 struct skmem_arena *
skmem_arena_create_for_system(const char * name,int * perr)1083 skmem_arena_create_for_system(const char *name, int *perr)
1084 {
1085 	struct skmem_region *skrsys;
1086 	struct skmem_arena_system *ars;
1087 	struct skmem_arena *ar;
1088 
1089 	*perr = 0;
1090 
1091 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_SYSTEM, name);
1092 	ASSERT(ar != NULL && ar->ar_zsize == AR_SYSTEM_SIZE);
1093 	ars = (struct skmem_arena_system *)ar;
1094 
1095 	AR_LOCK(ar);
1096 	/* retain system-wide sysctls region */
1097 	skrsys = skmem_get_sysctls_region();
1098 	ASSERT(skrsys != NULL && skrsys->skr_id == SKMEM_REGION_SYSCTLS);
1099 	ASSERT((skrsys->skr_mode & (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES |
1100 	    SKR_MODE_KREADONLY | SKR_MODE_UREADONLY | SKR_MODE_MONOLITHIC |
1101 	    SKR_MODE_SHAREOK)) ==
1102 	    (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES | SKR_MODE_UREADONLY |
1103 	    SKR_MODE_MONOLITHIC));
1104 	ar->ar_regions[SKMEM_REGION_SYSCTLS] = skrsys;
1105 	skmem_region_retain(skrsys);
1106 
1107 	/* object is valid as long as the sysctls region is retained */
1108 	ars->ars_sysctls_obj = skmem_get_sysctls_obj(&ars->ars_sysctls_objsize);
1109 	ASSERT(ars->ars_sysctls_obj != NULL);
1110 	ASSERT(ars->ars_sysctls_objsize != 0);
1111 
1112 	if (skmem_arena_create_finalize(ar) != 0) {
1113 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
1114 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1115 		goto failed;
1116 	}
1117 
1118 	/*
1119 	 * These must never be configured for system arena.
1120 	 *
1121 	 * XXX: In theory we can add guard pages to this arena,
1122 	 * but for now leave that as an exercise for the future.
1123 	 */
1124 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1125 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1126 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1127 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1128 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1129 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1130 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1131 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1132 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1133 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1134 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1135 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1136 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1137 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1138 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1139 	}
1140 
1141 	/* these must be configured for system arena */
1142 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] != NULL);
1143 
1144 	++ar->ar_refcnt;        /* for caller */
1145 	AR_UNLOCK(ar);
1146 
1147 	SKMEM_ARENA_LOCK();
1148 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1149 	SKMEM_ARENA_UNLOCK();
1150 
1151 #if SK_LOG
1152 	if (__improbable(sk_verbose != 0)) {
1153 		skmem_arena_create_region_log(ar);
1154 	}
1155 #endif /* SK_LOG */
1156 
1157 	return ar;
1158 
1159 failed:
1160 	AR_LOCK_ASSERT_HELD(ar);
1161 	skmem_arena_destroy(ar);
1162 	*perr = ENOMEM;
1163 
1164 	return NULL;
1165 }
1166 
1167 static void
skmem_arena_system_teardown(struct skmem_arena_system * ars,boolean_t defunct)1168 skmem_arena_system_teardown(struct skmem_arena_system *ars, boolean_t defunct)
1169 {
1170 	struct skmem_arena *ar = &ars->ars_cmn;
1171 	struct skmem_region *skr;
1172 
1173 	AR_LOCK_ASSERT_HELD(ar);
1174 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1175 
1176 	/* these must never be configured for system arena */
1177 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1178 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1179 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1180 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1181 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1182 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1183 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1184 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1185 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1186 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1187 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1188 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1189 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1190 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1191 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1192 	}
1193 
1194 	/* nothing to do here for now during defunct, just return */
1195 	if (defunct) {
1196 		return;
1197 	}
1198 
1199 	if (ars->ars_sysctls_obj != NULL) {
1200 		skr = ar->ar_regions[SKMEM_REGION_SYSCTLS];
1201 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
1202 		/* we didn't allocate this, so don't free it */
1203 		ars->ars_sysctls_obj = NULL;
1204 		ars->ars_sysctls_objsize = 0;
1205 		skmem_region_release(skr);
1206 		ar->ar_regions[SKMEM_REGION_SYSCTLS] = NULL;
1207 	}
1208 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1209 	ASSERT(ars->ars_sysctls_obj == NULL);
1210 	ASSERT(ars->ars_sysctls_objsize == 0);
1211 }
1212 
1213 /*
1214  * Given an arena, return its System variant (if applicable).
1215  */
1216 struct skmem_arena_system *
skmem_arena_system(struct skmem_arena * ar)1217 skmem_arena_system(struct skmem_arena *ar)
1218 {
1219 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_SYSTEM)) {
1220 		return NULL;
1221 	}
1222 
1223 	return (struct skmem_arena_system *)ar;
1224 }
1225 
1226 void *
skmem_arena_system_sysctls_obj_addr(struct skmem_arena * ar)1227 skmem_arena_system_sysctls_obj_addr(struct skmem_arena *ar)
1228 {
1229 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1230 	return skmem_arena_system(ar)->ars_sysctls_obj;
1231 }
1232 
1233 size_t
skmem_arena_system_sysctls_obj_size(struct skmem_arena * ar)1234 skmem_arena_system_sysctls_obj_size(struct skmem_arena *ar)
1235 {
1236 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1237 	return skmem_arena_system(ar)->ars_sysctls_objsize;
1238 }
1239 
1240 /*
1241  * Destroy a region.
1242  */
1243 static void
skmem_arena_destroy(struct skmem_arena * ar)1244 skmem_arena_destroy(struct skmem_arena *ar)
1245 {
1246 	AR_LOCK_ASSERT_HELD(ar);
1247 
1248 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags %b",
1249 	    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1250 
1251 	ASSERT(ar->ar_refcnt == 0);
1252 	if (ar->ar_link.tqe_next != NULL || ar->ar_link.tqe_prev != NULL) {
1253 		AR_UNLOCK(ar);
1254 		SKMEM_ARENA_LOCK();
1255 		TAILQ_REMOVE(&skmem_arena_head, ar, ar_link);
1256 		SKMEM_ARENA_UNLOCK();
1257 		AR_LOCK(ar);
1258 		ASSERT(ar->ar_refcnt == 0);
1259 	}
1260 
1261 	/* teardown all remaining memory regions and associated resources */
1262 	skmem_arena_teardown(ar, FALSE);
1263 
1264 	if (ar->ar_ar != NULL) {
1265 		IOSKArenaDestroy(ar->ar_ar);
1266 		ar->ar_ar = NULL;
1267 	}
1268 
1269 	if (ar->ar_flags & ARF_ACTIVE) {
1270 		ar->ar_flags &= ~ARF_ACTIVE;
1271 	}
1272 
1273 	AR_UNLOCK(ar);
1274 
1275 	skmem_arena_free(ar);
1276 }
1277 
1278 /*
1279  * Teardown (or defunct) a region.
1280  */
1281 static void
skmem_arena_teardown(struct skmem_arena * ar,boolean_t defunct)1282 skmem_arena_teardown(struct skmem_arena *ar, boolean_t defunct)
1283 {
1284 	uint32_t i;
1285 
1286 	switch (ar->ar_type) {
1287 	case SKMEM_ARENA_TYPE_NEXUS:
1288 		skmem_arena_nexus_teardown((struct skmem_arena_nexus *)ar,
1289 		    defunct);
1290 		break;
1291 
1292 	case SKMEM_ARENA_TYPE_NECP:
1293 		skmem_arena_necp_teardown((struct skmem_arena_necp *)ar,
1294 		    defunct);
1295 		break;
1296 
1297 	case SKMEM_ARENA_TYPE_SYSTEM:
1298 		skmem_arena_system_teardown((struct skmem_arena_system *)ar,
1299 		    defunct);
1300 		break;
1301 
1302 	default:
1303 		VERIFY(0);
1304 		/* NOTREACHED */
1305 		__builtin_unreachable();
1306 	}
1307 
1308 	/* stop here if we're in the defunct context */
1309 	if (defunct) {
1310 		return;
1311 	}
1312 
1313 	/* take care of any remaining ones */
1314 	for (i = 0; i < SKMEM_REGIONS; i++) {
1315 		if (ar->ar_regions[i] == NULL) {
1316 			continue;
1317 		}
1318 
1319 		skmem_region_release(ar->ar_regions[i]);
1320 		ar->ar_regions[i] = NULL;
1321 	}
1322 }
1323 
1324 static int
skmem_arena_create_finalize(struct skmem_arena * ar)1325 skmem_arena_create_finalize(struct skmem_arena *ar)
1326 {
1327 	IOSKRegionRef reg[SKMEM_REGIONS];
1328 	uint32_t i, regcnt = 0;
1329 	int err = 0;
1330 
1331 	AR_LOCK_ASSERT_HELD(ar);
1332 
1333 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1334 
1335 	/*
1336 	 * Prepare an array of regions that can be mapped to user task;
1337 	 * exclude regions that aren't eligible for user task mapping.
1338 	 */
1339 	bzero(&reg, sizeof(reg));
1340 	for (i = 0; i < SKMEM_REGIONS; i++) {
1341 		struct skmem_region *skr = ar->ar_regions[i];
1342 		if (skr == NULL || !(skr->skr_mode & SKR_MODE_MMAPOK)) {
1343 			continue;
1344 		}
1345 
1346 		ASSERT(skr->skr_reg != NULL);
1347 		reg[regcnt++] = skr->skr_reg;
1348 	}
1349 	ASSERT(regcnt != 0);
1350 
1351 	/*
1352 	 * Create backing IOSKArena handle.
1353 	 */
1354 	ar->ar_ar = IOSKArenaCreate(reg, (IOSKCount)regcnt);
1355 	if (ar->ar_ar == NULL) {
1356 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create "
1357 		    "IOSKArena of %u regions", ar->ar_name, SK_KVA(ar),
1358 		    ar->ar_flags, ARF_BITS, regcnt);
1359 		err = ENOMEM;
1360 		goto failed;
1361 	}
1362 
1363 	ar->ar_flags |= ARF_ACTIVE;
1364 
1365 failed:
1366 	return err;
1367 }
1368 
1369 static struct skmem_arena *
skmem_arena_alloc(skmem_arena_type_t type,const char * name)1370 skmem_arena_alloc(skmem_arena_type_t type, const char *name)
1371 {
1372 	const char *ar_str = NULL;
1373 	struct zone *ar_zone = NULL;
1374 	struct skmem_arena *ar;
1375 	size_t ar_zsize = 0;
1376 
1377 	switch (type) {
1378 	case SKMEM_ARENA_TYPE_NEXUS:
1379 		ar_zone = ar_nexus_zone;
1380 		ar_zsize = AR_NEXUS_SIZE;
1381 		ar_str = "nexus";
1382 		break;
1383 
1384 	case SKMEM_ARENA_TYPE_NECP:
1385 		ar_zone = ar_necp_zone;
1386 		ar_zsize = AR_NECP_SIZE;
1387 		ar_str = "necp";
1388 		break;
1389 
1390 	case SKMEM_ARENA_TYPE_SYSTEM:
1391 		ar_zone = ar_system_zone;
1392 		ar_zsize = AR_SYSTEM_SIZE;
1393 		ar_str = "system";
1394 		break;
1395 
1396 	default:
1397 		VERIFY(0);
1398 		/* NOTREACHED */
1399 		__builtin_unreachable();
1400 	}
1401 
1402 	ar = zalloc_flags(ar_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1403 	ar->ar_type = type;
1404 	ar->ar_zsize = ar_zsize;
1405 	ar->ar_zone = ar_zone;
1406 
1407 	lck_mtx_init(&ar->ar_lock, &skmem_arena_lock_grp,
1408 	    LCK_ATTR_NULL);
1409 	(void) snprintf(ar->ar_name, sizeof(ar->ar_name),
1410 	    "%s.%s.%s", SKMEM_ARENA_PREFIX, ar_str, name);
1411 
1412 	return ar;
1413 }
1414 
1415 static void
skmem_arena_free(struct skmem_arena * ar)1416 skmem_arena_free(struct skmem_arena *ar)
1417 {
1418 #if DEBUG || DEVELOPMENT
1419 	ASSERT(ar->ar_refcnt == 0);
1420 	ASSERT(!(ar->ar_flags & ARF_ACTIVE));
1421 	ASSERT(ar->ar_ar == NULL);
1422 	ASSERT(ar->ar_mapcnt == 0);
1423 	ASSERT(SLIST_EMPTY(&ar->ar_map_head));
1424 	for (uint32_t i = 0; i < SKMEM_REGIONS; i++) {
1425 		ASSERT(ar->ar_regions[i] == NULL);
1426 	}
1427 #endif /* DEBUG || DEVELOPMENT */
1428 
1429 	lck_mtx_destroy(&ar->ar_lock, &skmem_arena_lock_grp);
1430 	zfree(ar->ar_zone, ar);
1431 }
1432 
1433 /*
1434  * Retain an arena.
1435  */
1436 __attribute__((always_inline))
1437 static inline void
skmem_arena_retain_locked(struct skmem_arena * ar)1438 skmem_arena_retain_locked(struct skmem_arena *ar)
1439 {
1440 	AR_LOCK_ASSERT_HELD(ar);
1441 	ar->ar_refcnt++;
1442 	ASSERT(ar->ar_refcnt != 0);
1443 }
1444 
1445 void
skmem_arena_retain(struct skmem_arena * ar)1446 skmem_arena_retain(struct skmem_arena *ar)
1447 {
1448 	AR_LOCK(ar);
1449 	skmem_arena_retain_locked(ar);
1450 	AR_UNLOCK(ar);
1451 }
1452 
1453 /*
1454  * Release (and potentially destroy) an arena.
1455  */
1456 __attribute__((always_inline))
1457 static inline boolean_t
skmem_arena_release_locked(struct skmem_arena * ar)1458 skmem_arena_release_locked(struct skmem_arena *ar)
1459 {
1460 	boolean_t lastref = FALSE;
1461 
1462 	AR_LOCK_ASSERT_HELD(ar);
1463 	ASSERT(ar->ar_refcnt != 0);
1464 	if (--ar->ar_refcnt == 0) {
1465 		skmem_arena_destroy(ar);
1466 		lastref = TRUE;
1467 	} else {
1468 		lastref = FALSE;
1469 	}
1470 
1471 	return lastref;
1472 }
1473 
1474 boolean_t
skmem_arena_release(struct skmem_arena * ar)1475 skmem_arena_release(struct skmem_arena *ar)
1476 {
1477 	boolean_t lastref;
1478 
1479 	AR_LOCK(ar);
1480 	/* unlock only if this isn't the last reference */
1481 	if (!(lastref = skmem_arena_release_locked(ar))) {
1482 		AR_UNLOCK(ar);
1483 	}
1484 
1485 	return lastref;
1486 }
1487 
1488 /*
1489  * Map an arena to the task's address space.
1490  */
1491 int
skmem_arena_mmap(struct skmem_arena * ar,struct proc * p,struct skmem_arena_mmap_info * ami)1492 skmem_arena_mmap(struct skmem_arena *ar, struct proc *p,
1493     struct skmem_arena_mmap_info *ami)
1494 {
1495 	task_t task = proc_task(p);
1496 	IOReturn ioerr;
1497 	int err = 0;
1498 
1499 	ASSERT(task != kernel_task && task != TASK_NULL);
1500 	ASSERT(ami->ami_arena == NULL);
1501 	ASSERT(ami->ami_mapref == NULL);
1502 	ASSERT(ami->ami_maptask == TASK_NULL);
1503 	ASSERT(!ami->ami_redirect);
1504 
1505 	AR_LOCK(ar);
1506 	if ((ar->ar_flags & (ARF_ACTIVE | ARF_DEFUNCT)) != ARF_ACTIVE) {
1507 		err = ENODEV;
1508 		goto failed;
1509 	}
1510 
1511 	ASSERT(ar->ar_ar != NULL);
1512 	if ((ami->ami_mapref = IOSKMapperCreate(ar->ar_ar, task)) == NULL) {
1513 		err = ENOMEM;
1514 		goto failed;
1515 	}
1516 
1517 	ioerr = IOSKMapperGetAddress(ami->ami_mapref, &ami->ami_mapaddr,
1518 	    &ami->ami_mapsize);
1519 	VERIFY(ioerr == kIOReturnSuccess);
1520 
1521 	ami->ami_arena = ar;
1522 	skmem_arena_retain_locked(ar);
1523 	SLIST_INSERT_HEAD(&ar->ar_map_head, ami, ami_link);
1524 
1525 	ami->ami_maptask = task;
1526 	ar->ar_mapcnt++;
1527 	if (ar->ar_mapcnt == 1) {
1528 		ar->ar_mapsize = ami->ami_mapsize;
1529 	}
1530 
1531 	ASSERT(ami->ami_mapref != NULL);
1532 	ASSERT(ami->ami_arena == ar);
1533 	AR_UNLOCK(ar);
1534 
1535 	return 0;
1536 
1537 failed:
1538 	AR_UNLOCK(ar);
1539 	skmem_arena_munmap(ar, ami);
1540 	VERIFY(err != 0);
1541 
1542 	return err;
1543 }
1544 
1545 /*
1546  * Remove arena's memory mapping from task's address space (common code).
1547  * Returns true if caller needs to perform a deferred defunct.
1548  */
1549 static boolean_t
skmem_arena_munmap_common(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1550 skmem_arena_munmap_common(struct skmem_arena *ar,
1551     struct skmem_arena_mmap_info *ami)
1552 {
1553 	boolean_t need_defunct = FALSE;
1554 
1555 	AR_LOCK(ar);
1556 	if (ami->ami_mapref != NULL) {
1557 		IOSKMapperDestroy(ami->ami_mapref);
1558 		ami->ami_mapref = NULL;
1559 
1560 		VERIFY(ar->ar_mapcnt != 0);
1561 		ar->ar_mapcnt--;
1562 		if (ar->ar_mapcnt == 0) {
1563 			ar->ar_mapsize = 0;
1564 		}
1565 
1566 		VERIFY(ami->ami_arena == ar);
1567 		SLIST_REMOVE(&ar->ar_map_head, ami, skmem_arena_mmap_info,
1568 		    ami_link);
1569 
1570 		/*
1571 		 * We expect that the caller ensures an extra reference
1572 		 * held on the arena, in addition to the one in mmap_info.
1573 		 */
1574 		VERIFY(ar->ar_refcnt > 1);
1575 		(void) skmem_arena_release_locked(ar);
1576 		ami->ami_arena = NULL;
1577 
1578 		if (ami->ami_redirect) {
1579 			/*
1580 			 * This mapper has been redirected; decrement
1581 			 * the redirect count associated with it.
1582 			 */
1583 			VERIFY(ar->ar_maprdrcnt != 0);
1584 			ar->ar_maprdrcnt--;
1585 		} else if (ar->ar_maprdrcnt != 0 &&
1586 		    ar->ar_maprdrcnt == ar->ar_mapcnt) {
1587 			/*
1588 			 * The are other mappers for this arena that have
1589 			 * all been redirected, but the arena wasn't marked
1590 			 * inactive by skmem_arena_redirect() last time since
1591 			 * this particular mapper that we just destroyed
1592 			 * was using it.  Now that it's gone, finish the
1593 			 * postponed work below once we return to caller.
1594 			 */
1595 			ASSERT(ar->ar_flags & ARF_ACTIVE);
1596 			ar->ar_flags &= ~ARF_ACTIVE;
1597 			need_defunct = TRUE;
1598 		}
1599 	}
1600 	ASSERT(ami->ami_mapref == NULL);
1601 	ASSERT(ami->ami_arena == NULL);
1602 
1603 	ami->ami_maptask = TASK_NULL;
1604 	ami->ami_mapaddr = 0;
1605 	ami->ami_mapsize = 0;
1606 	ami->ami_redirect = FALSE;
1607 
1608 	AR_UNLOCK(ar);
1609 
1610 	return need_defunct;
1611 }
1612 
1613 /*
1614  * Remove arena's memory mapping from task's address space (channel version).
1615  * Will perform a deferred defunct if needed.
1616  */
1617 void
skmem_arena_munmap_channel(struct skmem_arena * ar,struct kern_channel * ch)1618 skmem_arena_munmap_channel(struct skmem_arena *ar, struct kern_channel *ch)
1619 {
1620 	SK_LOCK_ASSERT_HELD();
1621 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1622 
1623 	/*
1624 	 * If this is this is on a channel that was holding the last
1625 	 * active reference count on the arena, and that there are
1626 	 * other defunct channels pointing to that arena, perform the
1627 	 * actual arena defunct now.
1628 	 */
1629 	if (skmem_arena_munmap_common(ar, &ch->ch_mmap)) {
1630 		struct kern_nexus *nx = ch->ch_nexus;
1631 		struct kern_nexus_domain_provider *nxdom_prov = NX_DOM_PROV(nx);
1632 
1633 		/*
1634 		 * Similar to kern_channel_defunct(), where we let the
1635 		 * domain provider complete the defunct.  At this point
1636 		 * both sk_lock and the channel locks are held, and so
1637 		 * we indicate that to the callee.
1638 		 */
1639 		nxdom_prov->nxdom_prov_dom->nxdom_defunct_finalize(nxdom_prov,
1640 		    nx, ch, TRUE);
1641 	}
1642 }
1643 
1644 /*
1645  * Remove arena's memory mapping from task's address space (generic).
1646  * This routine should only be called on non-channel related arenas.
1647  */
1648 void
skmem_arena_munmap(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1649 skmem_arena_munmap(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami)
1650 {
1651 	(void) skmem_arena_munmap_common(ar, ami);
1652 }
1653 
1654 /*
1655  * Redirect eligible memory regions in the task's memory map so that
1656  * they get overwritten and backed with anonymous (zero-filled) pages.
1657  */
1658 int
skmem_arena_mredirect(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami,struct proc * p,boolean_t * need_defunct)1659 skmem_arena_mredirect(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami,
1660     struct proc *p, boolean_t *need_defunct)
1661 {
1662 #pragma unused(p)
1663 	int err = 0;
1664 
1665 	*need_defunct = FALSE;
1666 
1667 	AR_LOCK(ar);
1668 	ASSERT(ar->ar_ar != NULL);
1669 	if (ami->ami_redirect) {
1670 		err = EALREADY;
1671 	} else if (ami->ami_mapref == NULL) {
1672 		err = ENXIO;
1673 	} else {
1674 		VERIFY(ar->ar_mapcnt != 0);
1675 		ASSERT(ar->ar_flags & ARF_ACTIVE);
1676 		VERIFY(ami->ami_arena == ar);
1677 		/*
1678 		 * This effectively overwrites the mappings for all
1679 		 * redirectable memory regions (i.e. those without the
1680 		 * SKMEM_REGION_CR_NOREDIRECT flag) while preserving their
1681 		 * protection flags.  Accesses to these regions will be
1682 		 * redirected to anonymous, zero-filled pages.
1683 		 */
1684 		IOSKMapperRedirect(ami->ami_mapref);
1685 		ami->ami_redirect = TRUE;
1686 
1687 		/*
1688 		 * Mark the arena as inactive if all mapper instances are
1689 		 * redirected; otherwise, we do this later during unmap.
1690 		 * Once inactive, the arena will not allow further mmap,
1691 		 * and it is ready to be defunct later.
1692 		 */
1693 		if (++ar->ar_maprdrcnt == ar->ar_mapcnt) {
1694 			ar->ar_flags &= ~ARF_ACTIVE;
1695 			*need_defunct = TRUE;
1696 		}
1697 	}
1698 	AR_UNLOCK(ar);
1699 
1700 	SK_DF(((err != 0) ? SK_VERB_ERROR : SK_VERB_DEFAULT),
1701 	    "%s(%d) \"%s\" ar 0x%llx flags %b inactive %u need_defunct %u "
1702 	    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ar->ar_name,
1703 	    SK_KVA(ar), ar->ar_flags, ARF_BITS, !(ar->ar_flags & ARF_ACTIVE),
1704 	    *need_defunct, err);
1705 
1706 	return err;
1707 }
1708 
1709 /*
1710  * Defunct a region.
1711  */
1712 int
skmem_arena_defunct(struct skmem_arena * ar)1713 skmem_arena_defunct(struct skmem_arena *ar)
1714 {
1715 	AR_LOCK(ar);
1716 
1717 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags 0x%b", ar->ar_name,
1718 	    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1719 
1720 	if (ar->ar_flags & ARF_DEFUNCT) {
1721 		AR_UNLOCK(ar);
1722 		return EALREADY;
1723 	} else if (ar->ar_flags & ARF_ACTIVE) {
1724 		AR_UNLOCK(ar);
1725 		return EBUSY;
1726 	}
1727 
1728 	/* purge the caches now */
1729 	skmem_arena_reap_locked(ar, TRUE);
1730 
1731 	/* teardown eligible memory regions and associated resources */
1732 	skmem_arena_teardown(ar, TRUE);
1733 
1734 	ar->ar_flags |= ARF_DEFUNCT;
1735 
1736 	AR_UNLOCK(ar);
1737 
1738 	return 0;
1739 }
1740 
1741 /*
1742  * Retrieve total and in-use memory statistics of regions in the arena.
1743  */
1744 void
skmem_arena_get_stats(struct skmem_arena * ar,uint64_t * mem_total,uint64_t * mem_inuse)1745 skmem_arena_get_stats(struct skmem_arena *ar, uint64_t *mem_total,
1746     uint64_t *mem_inuse)
1747 {
1748 	uint32_t i;
1749 
1750 	if (mem_total != NULL) {
1751 		*mem_total = 0;
1752 	}
1753 	if (mem_inuse != NULL) {
1754 		*mem_inuse = 0;
1755 	}
1756 
1757 	AR_LOCK(ar);
1758 	for (i = 0; i < SKMEM_REGIONS; i++) {
1759 		if (ar->ar_regions[i] == NULL) {
1760 			continue;
1761 		}
1762 
1763 		if (mem_total != NULL) {
1764 			*mem_total += AR_MEM_TOTAL(ar, i);
1765 		}
1766 		if (mem_inuse != NULL) {
1767 			*mem_inuse += AR_MEM_INUSE(ar, i);
1768 		}
1769 	}
1770 	AR_UNLOCK(ar);
1771 }
1772 
1773 /*
1774  * Retrieve the offset of a particular region (identified by its ID)
1775  * from the base of the arena.
1776  */
1777 mach_vm_offset_t
skmem_arena_get_region_offset(struct skmem_arena * ar,skmem_region_id_t id)1778 skmem_arena_get_region_offset(struct skmem_arena *ar, skmem_region_id_t id)
1779 {
1780 	mach_vm_offset_t offset = 0;
1781 	uint32_t i;
1782 
1783 	ASSERT(id < SKMEM_REGIONS);
1784 
1785 	AR_LOCK(ar);
1786 	for (i = 0; i < id; i++) {
1787 		if (ar->ar_regions[i] == NULL) {
1788 			continue;
1789 		}
1790 
1791 		offset += ar->ar_regions[i]->skr_size;
1792 	}
1793 	AR_UNLOCK(ar);
1794 
1795 	return offset;
1796 }
1797 
1798 static void
skmem_reap_pbufpool_caches(struct kern_pbufpool * pp,boolean_t purge)1799 skmem_reap_pbufpool_caches(struct kern_pbufpool *pp, boolean_t purge)
1800 {
1801 	if (pp->pp_kmd_cache != NULL) {
1802 		skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1803 	}
1804 	if (PP_BUF_CACHE_DEF(pp) != NULL) {
1805 		skmem_cache_reap_now(PP_BUF_CACHE_DEF(pp), purge);
1806 	}
1807 	if (PP_BUF_CACHE_LARGE(pp) != NULL) {
1808 		skmem_cache_reap_now(PP_BUF_CACHE_LARGE(pp), purge);
1809 	}
1810 	if (PP_KBFT_CACHE_DEF(pp) != NULL) {
1811 		skmem_cache_reap_now(PP_KBFT_CACHE_DEF(pp), purge);
1812 	}
1813 	if (PP_KBFT_CACHE_LARGE(pp) != NULL) {
1814 		skmem_cache_reap_now(PP_KBFT_CACHE_LARGE(pp), purge);
1815 	}
1816 	if (pp->pp_raw_kbft_cache != NULL) {
1817 		skmem_cache_reap_now(pp->pp_raw_kbft_cache, purge);
1818 	}
1819 }
1820 
1821 /*
1822  * Reap all of configured caches in the arena, so that any excess amount
1823  * outside of their working sets gets released to their respective backing
1824  * regions.  If purging is specified, we empty the caches' working sets,
1825  * including everything that's cached at the CPU layer.
1826  */
1827 static void
skmem_arena_reap_locked(struct skmem_arena * ar,boolean_t purge)1828 skmem_arena_reap_locked(struct skmem_arena *ar, boolean_t purge)
1829 {
1830 	struct skmem_arena_nexus *arn;
1831 	struct skmem_arena_necp *arc;
1832 	struct kern_pbufpool *pp;
1833 
1834 	AR_LOCK_ASSERT_HELD(ar);
1835 
1836 	switch (ar->ar_type) {
1837 	case SKMEM_ARENA_TYPE_NEXUS:
1838 		arn = (struct skmem_arena_nexus *)ar;
1839 		if (arn->arn_schema_cache != NULL) {
1840 			skmem_cache_reap_now(arn->arn_schema_cache, purge);
1841 		}
1842 		if (arn->arn_ring_cache != NULL) {
1843 			skmem_cache_reap_now(arn->arn_ring_cache, purge);
1844 		}
1845 		if ((pp = arn->arn_rx_pp) != NULL) {
1846 			skmem_reap_pbufpool_caches(pp, purge);
1847 		}
1848 		if ((pp = arn->arn_tx_pp) != NULL && pp != arn->arn_rx_pp) {
1849 			skmem_reap_pbufpool_caches(pp, purge);
1850 		}
1851 		break;
1852 
1853 	case SKMEM_ARENA_TYPE_NECP:
1854 		arc = (struct skmem_arena_necp *)ar;
1855 		if (arc->arc_kstats_cache != NULL) {
1856 			skmem_cache_reap_now(arc->arc_kstats_cache, purge);
1857 		}
1858 		break;
1859 
1860 	case SKMEM_ARENA_TYPE_SYSTEM:
1861 		break;
1862 	}
1863 }
1864 
1865 void
skmem_arena_reap(struct skmem_arena * ar,boolean_t purge)1866 skmem_arena_reap(struct skmem_arena *ar, boolean_t purge)
1867 {
1868 	AR_LOCK(ar);
1869 	skmem_arena_reap_locked(ar, purge);
1870 	AR_UNLOCK(ar);
1871 }
1872 
1873 #if SK_LOG
1874 SK_LOG_ATTRIBUTE
1875 static void
skmem_arena_create_region_log(struct skmem_arena * ar)1876 skmem_arena_create_region_log(struct skmem_arena *ar)
1877 {
1878 	char label[32];
1879 	int i;
1880 
1881 	switch (ar->ar_type) {
1882 	case SKMEM_ARENA_TYPE_NEXUS:
1883 		SK_D("\"%s\" ar 0x%llx flags %b rx_pp 0x%llx tx_pp 0x%llu",
1884 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
1885 		    SK_KVA(skmem_arena_nexus(ar)->arn_rx_pp),
1886 		    SK_KVA(skmem_arena_nexus(ar)->arn_tx_pp));
1887 		break;
1888 
1889 	case SKMEM_ARENA_TYPE_NECP:
1890 	case SKMEM_ARENA_TYPE_SYSTEM:
1891 		SK_D("\"%s\" ar 0x%llx flags %b", ar->ar_name,
1892 		    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1893 		break;
1894 	}
1895 
1896 	for (i = 0; i < SKMEM_REGIONS; i++) {
1897 		if (ar->ar_regions[i] == NULL) {
1898 			continue;
1899 		}
1900 
1901 		(void) snprintf(label, sizeof(label), "REGION_%s:",
1902 		    skmem_region_id2name(i));
1903 		SK_D("  %-16s %6u KB s:[%2u x %6u KB] "
1904 		    "o:[%4u x %6u -> %4u x %6u]", label,
1905 		    (uint32_t)AR_MEM_TOTAL(ar, i) >> 10,
1906 		    (uint32_t)AR_MEM_SEGCNT(ar, i),
1907 		    (uint32_t)AR_MEM_SEGSIZE(ar, i) >> 10,
1908 		    (uint32_t)AR_MEM_OBJCNT_R(ar, i),
1909 		    (uint32_t)AR_MEM_OBJSIZE_R(ar, i),
1910 		    (uint32_t)AR_MEM_OBJCNT_C(ar, i),
1911 		    (uint32_t)AR_MEM_OBJSIZE_C(ar, i));
1912 	}
1913 }
1914 #endif /* SK_LOG */
1915 
1916 static size_t
skmem_arena_mib_get_stats(struct skmem_arena * ar,void * out,size_t len)1917 skmem_arena_mib_get_stats(struct skmem_arena *ar, void *out, size_t len)
1918 {
1919 	size_t actual_space = sizeof(struct sk_stats_arena);
1920 	struct sk_stats_arena *sar = out;
1921 	struct skmem_arena_mmap_info *ami = NULL;
1922 	pid_t proc_pid;
1923 	int i;
1924 
1925 	if (out == NULL || len < actual_space) {
1926 		goto done;
1927 	}
1928 
1929 	AR_LOCK(ar);
1930 	(void) snprintf(sar->sar_name, sizeof(sar->sar_name),
1931 	    "%s", ar->ar_name);
1932 	sar->sar_type = (sk_stats_arena_type_t)ar->ar_type;
1933 	sar->sar_mapsize = (uint64_t)ar->ar_mapsize;
1934 	i = 0;
1935 	SLIST_FOREACH(ami, &ar->ar_map_head, ami_link) {
1936 		if (ami->ami_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS) {
1937 			struct kern_channel *ch;
1938 			ch = container_of(ami, struct kern_channel, ch_mmap);
1939 			proc_pid = ch->ch_pid;
1940 		} else {
1941 			ASSERT((ami->ami_arena->ar_type ==
1942 			    SKMEM_ARENA_TYPE_NECP) ||
1943 			    (ami->ami_arena->ar_type ==
1944 			    SKMEM_ARENA_TYPE_SYSTEM));
1945 			proc_pid =
1946 			    necp_client_get_proc_pid_from_arena_info(ami);
1947 		}
1948 		sar->sar_mapped_pids[i++] = proc_pid;
1949 		if (i >= SK_STATS_ARENA_MAPPED_PID_MAX) {
1950 			break;
1951 		}
1952 	}
1953 
1954 	for (i = 0; i < SKMEM_REGIONS; i++) {
1955 		struct skmem_region *skr = ar->ar_regions[i];
1956 		uuid_t *sreg_uuid = &sar->sar_regions_uuid[i];
1957 
1958 		if (skr == NULL) {
1959 			uuid_clear(*sreg_uuid);
1960 			continue;
1961 		}
1962 
1963 		uuid_copy(*sreg_uuid, skr->skr_uuid);
1964 	}
1965 	AR_UNLOCK(ar);
1966 
1967 done:
1968 	return actual_space;
1969 }
1970 
1971 static int
1972 skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS
1973 {
1974 #pragma unused(arg1, arg2, oidp)
1975 	struct skmem_arena *ar;
1976 	size_t actual_space;
1977 	size_t buffer_space;
1978 	size_t allocated_space;
1979 	caddr_t buffer = NULL;
1980 	caddr_t scan;
1981 	int error = 0;
1982 
1983 	if (!kauth_cred_issuser(kauth_cred_get())) {
1984 		return EPERM;
1985 	}
1986 
1987 	net_update_uptime();
1988 	buffer_space = req->oldlen;
1989 	if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
1990 		if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
1991 			buffer_space = SK_SYSCTL_ALLOC_MAX;
1992 		}
1993 		allocated_space = buffer_space;
1994 		buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_arena_mib);
1995 		if (__improbable(buffer == NULL)) {
1996 			return ENOBUFS;
1997 		}
1998 	} else if (req->oldptr == USER_ADDR_NULL) {
1999 		buffer_space = 0;
2000 	}
2001 	actual_space = 0;
2002 	scan = buffer;
2003 
2004 	SKMEM_ARENA_LOCK();
2005 	TAILQ_FOREACH(ar, &skmem_arena_head, ar_link) {
2006 		size_t size = skmem_arena_mib_get_stats(ar, scan, buffer_space);
2007 		if (scan != NULL) {
2008 			if (buffer_space < size) {
2009 				/* supplied buffer too small, stop copying */
2010 				error = ENOMEM;
2011 				break;
2012 			}
2013 			scan += size;
2014 			buffer_space -= size;
2015 		}
2016 		actual_space += size;
2017 	}
2018 	SKMEM_ARENA_UNLOCK();
2019 
2020 	if (actual_space != 0) {
2021 		int out_error = SYSCTL_OUT(req, buffer, actual_space);
2022 		if (out_error != 0) {
2023 			error = out_error;
2024 		}
2025 	}
2026 	if (buffer != NULL) {
2027 		sk_free_data(buffer, allocated_space);
2028 	}
2029 
2030 	return error;
2031 }
2032