1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1992,7 NeXT Computer, Inc.
30 *
31 * Unix data structure initialization.
32 *
33 */
34
35 #include <mach/mach_types.h>
36
37 #include <kern/startup.h>
38 #include <vm/vm_kern_xnu.h>
39 #include <mach/vm_prot.h>
40
41 #include <sys/param.h>
42 #include <sys/buf_internal.h>
43 #include <sys/file_internal.h>
44 #include <sys/proc_internal.h>
45 #include <sys/mcache.h>
46 #include <sys/mbuf.h>
47 #include <sys/systm.h>
48 #include <sys/tty.h>
49 #include <sys/vnode.h>
50 #include <sys/sysctl.h>
51 #include <machine/cons.h>
52 #include <pexpert/pexpert.h>
53 #include <sys/socketvar.h>
54 #include <pexpert/pexpert.h>
55 #include <netinet/tcp_var.h>
56
57 extern uint32_t kern_maxvnodes;
58 #if CONFIG_MBUF_MCACHE
59 extern vm_map_t mb_map;
60 #endif /* CONFIG_MBUF_MCACHE */
61
62 void bsd_bufferinit(void);
63
64 unsigned int bsd_mbuf_cluster_reserve(boolean_t *);
65 void bsd_scale_setup(int);
66 void bsd_exec_setup(int);
67
68 /*
69 * Declare these as initialized data so we can patch them.
70 */
71
72 #ifdef NBUF
73 int max_nbuf_headers = NBUF;
74 int niobuf_headers = (NBUF / 2) + 2048;
75 int nbuf_hashelements = NBUF;
76 int nbuf_headers = NBUF;
77 #else
78 int max_nbuf_headers = 0;
79 int niobuf_headers = 0;
80 int nbuf_hashelements = 0;
81 int nbuf_headers = 0;
82 #endif
83
84 SYSCTL_INT(_kern, OID_AUTO, nbuf, CTLFLAG_RD | CTLFLAG_LOCKED, &nbuf_headers, 0, "");
85 SYSCTL_INT(_kern, OID_AUTO, maxnbuf, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_KERN, &max_nbuf_headers, 0, "");
86
87 __private_extern__ int customnbuf = 0;
88
89 #if SOCKETS
90 static unsigned int mbuf_poolsz;
91 #endif
92
93 vm_map_t buffer_map;
94 vm_map_t bufferhdr_map;
95 static int vnodes_sized = 0;
96
97 extern void bsd_startupearly(void);
98
99 static vm_map_size_t bufferhdr_map_size;
100 SECURITY_READ_ONLY_LATE(struct mach_vm_range) bufferhdr_range = {};
101
102 static vm_map_size_t
bsd_get_bufferhdr_map_size(void)103 bsd_get_bufferhdr_map_size(void)
104 {
105 vm_size_t size;
106
107 /* clip the number of buf headers upto 16k */
108 if (max_nbuf_headers == 0) {
109 max_nbuf_headers = (int)atop_kernel(sane_size / 50); /* Get 2% of ram, but no more than we can map */
110 }
111 if ((customnbuf == 0) && ((unsigned int)max_nbuf_headers > 16384)) {
112 max_nbuf_headers = 16384;
113 }
114 if (max_nbuf_headers < CONFIG_MIN_NBUF) {
115 max_nbuf_headers = CONFIG_MIN_NBUF;
116 }
117
118 if (niobuf_headers == 0) {
119 if (max_nbuf_headers < 4096) {
120 niobuf_headers = max_nbuf_headers;
121 } else {
122 niobuf_headers = (max_nbuf_headers / 2) + 2048;
123 }
124 }
125 if (niobuf_headers < CONFIG_MIN_NIOBUF) {
126 niobuf_headers = CONFIG_MIN_NIOBUF;
127 }
128
129 size = (max_nbuf_headers + niobuf_headers) * sizeof(struct buf);
130 size = round_page(size);
131
132 return size;
133 }
134
135 KMEM_RANGE_REGISTER_DYNAMIC(bufferhdr, &bufferhdr_range, ^() {
136 return bufferhdr_map_size = bsd_get_bufferhdr_map_size();
137 });
138
139 void
bsd_startupearly(void)140 bsd_startupearly(void)
141 {
142 vm_size_t size = bufferhdr_map_size;
143
144 assert(size);
145
146 /* clip the number of hash elements to 200000 */
147 if ((customnbuf == 0) && nbuf_hashelements == 0) {
148 nbuf_hashelements = (int)atop_kernel(sane_size / 50);
149 if ((unsigned int)nbuf_hashelements > 200000) {
150 nbuf_hashelements = 200000;
151 }
152 } else {
153 nbuf_hashelements = max_nbuf_headers;
154 }
155
156 bufferhdr_map = kmem_suballoc(kernel_map,
157 &bufferhdr_range.min_address,
158 size,
159 VM_MAP_CREATE_NEVER_FAULTS,
160 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
161 KMS_PERMANENT | KMS_NOFAIL,
162 VM_KERN_MEMORY_FILE).kmr_submap;
163
164 kmem_alloc(bufferhdr_map,
165 &(vm_offset_t){ bufferhdr_range.min_address },
166 size,
167 KMA_NOFAIL | KMA_PERMANENT | KMA_ZERO | KMA_KOBJECT,
168 VM_KERN_MEMORY_FILE);
169
170 buf_headers = (struct buf *)bufferhdr_range.min_address;
171
172 if (vnodes_sized == 0) {
173 if (!PE_get_default("kern.maxvnodes", &desiredvnodes, sizeof(desiredvnodes))) {
174 /*
175 * Size vnodes based on memory
176 * Number vnodes is (memsize/64k) + 1024
177 * This is the calculation that is used by launchd in tiger
178 * we are clipping the max based on 16G
179 * ie ((16*1024*1024*1024)/(64 *1024)) + 1024 = 263168;
180 * CONFIG_VNODES is set to 263168 for "medium" configurations (the default)
181 * but can be smaller or larger.
182 */
183 desiredvnodes = (int)(sane_size / 65536) + 1024;
184 #ifdef CONFIG_VNODES
185 if (desiredvnodes > CONFIG_VNODES) {
186 desiredvnodes = CONFIG_VNODES;
187 }
188 #endif
189 }
190 vnodes_sized = 1;
191 }
192 }
193
194 #if SOCKETS
195 SECURITY_READ_ONLY_LATE(struct mach_vm_range) mb_range = {};
196 KMEM_RANGE_REGISTER_DYNAMIC(mb, &mb_range, ^() {
197 nmbclusters = bsd_mbuf_cluster_reserve(NULL) / MCLBYTES;
198 return (vm_map_size_t)(nmbclusters * MCLBYTES);
199 });
200 #endif /* SOCKETS */
201
202 void
bsd_bufferinit(void)203 bsd_bufferinit(void)
204 {
205 /*
206 * Note: Console device initialized in kminit() from bsd_autoconf()
207 * prior to call to us in bsd_init().
208 */
209
210 bsd_startupearly();
211
212 #if CONFIG_MBUF_MCACHE
213 mb_map = kmem_suballoc(kernel_map,
214 &mb_range.min_address,
215 (vm_size_t) (nmbclusters * MCLBYTES),
216 FALSE,
217 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
218 KMS_PERMANENT | KMS_NOFAIL,
219 VM_KERN_MEMORY_MBUF).kmr_submap;
220 mbutl = (unsigned char *)mb_range.min_address;
221 #endif /* CONFIG_MBUF_MCACHE */
222
223 /*
224 * Set up buffers, so they can be used to read disk labels.
225 */
226 bufinit();
227 }
228
229 /* 512 MB (K32) or 2 GB (K64) hard limit on size of the mbuf pool */
230 #if !defined(__LP64__)
231 #define MAX_MBUF_POOL (512 << MBSHIFT)
232 #else
233 #define MAX_MBUF_POOL (2ULL << GBSHIFT)
234 #endif /* !__LP64__ */
235 #define MAX_NCL (MAX_MBUF_POOL >> MCLSHIFT)
236
237 #if SOCKETS
238 /*
239 * this has been broken out into a separate routine that
240 * can be called from the x86 early vm initialization to
241 * determine how much lo memory to reserve on systems with
242 * DMA hardware that can't fully address all of the physical
243 * memory that is present.
244 */
245 unsigned int
bsd_mbuf_cluster_reserve(boolean_t * overridden)246 bsd_mbuf_cluster_reserve(boolean_t *overridden)
247 {
248 int mbuf_pool = 0, ncl = 0;
249 static boolean_t was_overridden = FALSE;
250
251 /* If called more than once, return the previously calculated size */
252 if (mbuf_poolsz != 0) {
253 goto done;
254 }
255
256 /*
257 * Some of these are parsed in parse_bsd_args(), but for x86 we get
258 * here early from i386_vm_init() and so we parse them now, in order
259 * to correctly compute the size of the low-memory VM pool. It is
260 * redundant but rather harmless.
261 */
262 (void) PE_parse_boot_argn("ncl", &ncl, sizeof(ncl));
263 (void) PE_parse_boot_argn("mbuf_pool", &mbuf_pool, sizeof(mbuf_pool));
264
265 /*
266 * Convert "mbuf_pool" from MB to # of 2KB clusters; it is
267 * equivalent to "ncl", except that it uses different unit.
268 */
269 if (mbuf_pool != 0) {
270 ncl = (mbuf_pool << MBSHIFT) >> MCLSHIFT;
271 }
272
273 if (sane_size > (64 * 1024 * 1024) || ncl != 0) {
274 if (ncl || serverperfmode) {
275 was_overridden = TRUE;
276 }
277
278 if ((nmbclusters = ncl) == 0) {
279 /* Auto-configure the mbuf pool size */
280 nmbclusters = mbuf_default_ncl(mem_actual);
281 } else {
282 /* Make sure it's not odd in case ncl is manually set */
283 if (nmbclusters & 0x1) {
284 --nmbclusters;
285 }
286
287 /* And obey the upper limit */
288 if (nmbclusters > MAX_NCL) {
289 nmbclusters = MAX_NCL;
290 }
291 }
292
293 /* Round it down to nearest multiple of PAGE_SIZE */
294 nmbclusters = (unsigned int)P2ROUNDDOWN(nmbclusters, NCLPG);
295 }
296 mbuf_poolsz = nmbclusters << MCLSHIFT;
297 done:
298 if (overridden) {
299 *overridden = was_overridden;
300 }
301
302 return mbuf_poolsz;
303 }
304 #endif
305
306 #if defined(__LP64__)
307 extern int tcp_tcbhashsize;
308 #endif
309
310 void
bsd_scale_setup(int scale)311 bsd_scale_setup(int scale)
312 {
313 #if defined(__LP64__)
314 if (scale > 0) {
315 if (!serverperfmode) {
316 maxproc *= scale;
317 maxprocperuid = (maxproc * 2) / 3;
318 if (scale > 2) {
319 maxfiles *= scale;
320 maxfilesperproc = maxfiles / 2;
321 }
322 } else {
323 maxproc = 2500 * scale;
324 hard_maxproc = maxproc;
325 /* no fp usage */
326 maxprocperuid = (maxproc * 3) / 4;
327 maxfiles = (150000 * scale);
328 maxfilesperproc = maxfiles / 2;
329 desiredvnodes = maxfiles;
330 vnodes_sized = 1;
331 tcp_tfo_backlog = 100 * scale;
332 if (scale > 4) {
333 /* clip somaxconn at 32G level */
334 somaxconn = 2048;
335 /*
336 * For scale > 4 (> 32G), clip
337 * tcp_tcbhashsize to 32K
338 */
339 tcp_tcbhashsize = 32 * 1024;
340 } else {
341 somaxconn = 512 * scale;
342 tcp_tcbhashsize = 4 * 1024 * scale;
343 }
344 }
345 }
346
347 if (maxproc > hard_maxproc) {
348 hard_maxproc = maxproc;
349 }
350 #endif
351 bsd_exec_setup(scale);
352 }
353