xref: /xnu-10002.61.3/bsd/dev/unix_startup.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1992,7 NeXT Computer, Inc.
30  *
31  * Unix data structure initialization.
32  *
33  */
34 
35 #include <mach/mach_types.h>
36 
37 #include <kern/startup.h>
38 #include <vm/vm_kern.h>
39 #include <mach/vm_prot.h>
40 
41 #include <sys/param.h>
42 #include <sys/buf_internal.h>
43 #include <sys/file_internal.h>
44 #include <sys/proc_internal.h>
45 #include <sys/mcache.h>
46 #include <sys/mbuf.h>
47 #include <sys/systm.h>
48 #include <sys/tty.h>
49 #include <sys/vnode.h>
50 #include <sys/sysctl.h>
51 #include <machine/cons.h>
52 #include <pexpert/pexpert.h>
53 #include <sys/socketvar.h>
54 #include <pexpert/pexpert.h>
55 #include <netinet/tcp_var.h>
56 
57 extern uint32_t kern_maxvnodes;
58 #if CONFIG_MBUF_MCACHE
59 extern vm_map_t mb_map;
60 #endif /* CONFIG_MBUF_MCACHE */
61 
62 #if INET
63 extern uint32_t   tcp_sendspace;
64 extern uint32_t   tcp_recvspace;
65 #endif
66 
67 void            bsd_bufferinit(void);
68 
69 unsigned int    bsd_mbuf_cluster_reserve(boolean_t *);
70 void bsd_scale_setup(int);
71 void bsd_exec_setup(int);
72 
73 /*
74  * Declare these as initialized data so we can patch them.
75  */
76 
77 #ifdef  NBUF
78 int             max_nbuf_headers = NBUF;
79 int             niobuf_headers = (NBUF / 2) + 2048;
80 int             nbuf_hashelements = NBUF;
81 int             nbuf_headers = NBUF;
82 #else
83 int             max_nbuf_headers = 0;
84 int             niobuf_headers = 0;
85 int             nbuf_hashelements = 0;
86 int             nbuf_headers = 0;
87 #endif
88 
89 SYSCTL_INT(_kern, OID_AUTO, nbuf, CTLFLAG_RD | CTLFLAG_LOCKED, &nbuf_headers, 0, "");
90 SYSCTL_INT(_kern, OID_AUTO, maxnbuf, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_KERN, &max_nbuf_headers, 0, "");
91 
92 __private_extern__ int customnbuf = 0;
93 
94 /* Indicates a server boot when set */
95 TUNABLE(int, serverperfmode, "serverperfmode", 0);
96 
97 #if SOCKETS
98 static unsigned int mbuf_poolsz;
99 #endif
100 
101 vm_map_t        buffer_map;
102 vm_map_t        bufferhdr_map;
103 static int vnodes_sized = 0;
104 
105 extern void     bsd_startupearly(void);
106 
107 static vm_map_size_t    bufferhdr_map_size;
108 SECURITY_READ_ONLY_LATE(struct mach_vm_range)  bufferhdr_range = {};
109 
110 static vm_map_size_t
bsd_get_bufferhdr_map_size(void)111 bsd_get_bufferhdr_map_size(void)
112 {
113 	vm_size_t       size;
114 
115 	/* clip the number of buf headers upto 16k */
116 	if (max_nbuf_headers == 0) {
117 		max_nbuf_headers = (int)atop_kernel(sane_size / 50); /* Get 2% of ram, but no more than we can map */
118 	}
119 	if ((customnbuf == 0) && ((unsigned int)max_nbuf_headers > 16384)) {
120 		max_nbuf_headers = 16384;
121 	}
122 	if (max_nbuf_headers < CONFIG_MIN_NBUF) {
123 		max_nbuf_headers = CONFIG_MIN_NBUF;
124 	}
125 
126 	if (niobuf_headers == 0) {
127 		if (max_nbuf_headers < 4096) {
128 			niobuf_headers = max_nbuf_headers;
129 		} else {
130 			niobuf_headers = (max_nbuf_headers / 2) + 2048;
131 		}
132 	}
133 	if (niobuf_headers < CONFIG_MIN_NIOBUF) {
134 		niobuf_headers = CONFIG_MIN_NIOBUF;
135 	}
136 
137 	size = (max_nbuf_headers + niobuf_headers) * sizeof(struct buf);
138 	size = round_page(size);
139 
140 	return size;
141 }
142 
143 KMEM_RANGE_REGISTER_DYNAMIC(bufferhdr, &bufferhdr_range, ^() {
144 	return bufferhdr_map_size = bsd_get_bufferhdr_map_size();
145 });
146 
147 void
bsd_startupearly(void)148 bsd_startupearly(void)
149 {
150 	vm_size_t size = bufferhdr_map_size;
151 
152 	assert(size);
153 
154 	/* clip the number of hash elements  to 200000 */
155 	if ((customnbuf == 0) && nbuf_hashelements == 0) {
156 		nbuf_hashelements = (int)atop_kernel(sane_size / 50);
157 		if ((unsigned int)nbuf_hashelements > 200000) {
158 			nbuf_hashelements = 200000;
159 		}
160 	} else {
161 		nbuf_hashelements = max_nbuf_headers;
162 	}
163 
164 	bufferhdr_map = kmem_suballoc(kernel_map,
165 	    &bufferhdr_range.min_address,
166 	    size,
167 	    VM_MAP_CREATE_NEVER_FAULTS,
168 	    VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
169 	    KMS_PERMANENT | KMS_NOFAIL,
170 	    VM_KERN_MEMORY_FILE).kmr_submap;
171 
172 	kmem_alloc(bufferhdr_map,
173 	    &(vm_offset_t){ bufferhdr_range.min_address },
174 	    size,
175 	    KMA_NOFAIL | KMA_PERMANENT | KMA_ZERO | KMA_KOBJECT,
176 	    VM_KERN_MEMORY_FILE);
177 
178 	buf_headers = (struct buf *)bufferhdr_range.min_address;
179 
180 #if SOCKETS
181 	{
182 		static const unsigned int       maxspace = 128 * 1024;
183 		int             scale;
184 
185 #if INET
186 		if ((scale = nmbclusters / NMBCLUSTERS) > 1) {
187 			tcp_sendspace *= scale;
188 			tcp_recvspace *= scale;
189 
190 			if (tcp_sendspace > maxspace) {
191 				tcp_sendspace = maxspace;
192 			}
193 			if (tcp_recvspace > maxspace) {
194 				tcp_recvspace = maxspace;
195 			}
196 		}
197 #endif /* INET */
198 	}
199 #endif /* SOCKETS */
200 
201 	if (vnodes_sized == 0) {
202 		if (!PE_get_default("kern.maxvnodes", &desiredvnodes, sizeof(desiredvnodes))) {
203 			/*
204 			 * Size vnodes based on memory
205 			 * Number vnodes  is (memsize/64k) + 1024
206 			 * This is the calculation that is used by launchd in tiger
207 			 * we are clipping the max based on 16G
208 			 * ie ((16*1024*1024*1024)/(64 *1024)) + 1024 = 263168;
209 			 * CONFIG_VNODES is set to 263168 for "medium" configurations (the default)
210 			 * but can be smaller or larger.
211 			 */
212 			desiredvnodes  = (int)(sane_size / 65536) + 1024;
213 #ifdef CONFIG_VNODES
214 			if (desiredvnodes > CONFIG_VNODES) {
215 				desiredvnodes = CONFIG_VNODES;
216 			}
217 #endif
218 		}
219 		vnodes_sized = 1;
220 	}
221 }
222 
223 #if SOCKETS
224 SECURITY_READ_ONLY_LATE(struct mach_vm_range) mb_range = {};
225 KMEM_RANGE_REGISTER_DYNAMIC(mb, &mb_range, ^() {
226 	nmbclusters = bsd_mbuf_cluster_reserve(NULL) / MCLBYTES;
227 	return (vm_map_size_t)(nmbclusters * MCLBYTES);
228 });
229 #endif /* SOCKETS */
230 
231 void
bsd_bufferinit(void)232 bsd_bufferinit(void)
233 {
234 	/*
235 	 * Note: Console device initialized in kminit() from bsd_autoconf()
236 	 * prior to call to us in bsd_init().
237 	 */
238 
239 	bsd_startupearly();
240 
241 #if CONFIG_MBUF_MCACHE
242 	mb_map = kmem_suballoc(kernel_map,
243 	    &mb_range.min_address,
244 	    (vm_size_t) (nmbclusters * MCLBYTES),
245 	    FALSE,
246 	    VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
247 	    KMS_PERMANENT | KMS_NOFAIL,
248 	    VM_KERN_MEMORY_MBUF).kmr_submap;
249 	mbutl = (unsigned char *)mb_range.min_address;
250 #endif /* CONFIG_MBUF_MCACHE */
251 
252 	/*
253 	 * Set up buffers, so they can be used to read disk labels.
254 	 */
255 	bufinit();
256 }
257 
258 /* 512 MB (K32) or 2 GB (K64) hard limit on size of the mbuf pool */
259 #if !defined(__LP64__)
260 #define MAX_MBUF_POOL   (512 << MBSHIFT)
261 #else
262 #define MAX_MBUF_POOL   (2ULL << GBSHIFT)
263 #endif /* !__LP64__ */
264 #define MAX_NCL         (MAX_MBUF_POOL >> MCLSHIFT)
265 
266 #if SOCKETS
267 /*
268  * this has been broken out into a separate routine that
269  * can be called from the x86 early vm initialization to
270  * determine how much lo memory to reserve on systems with
271  * DMA hardware that can't fully address all of the physical
272  * memory that is present.
273  */
274 unsigned int
bsd_mbuf_cluster_reserve(boolean_t * overridden)275 bsd_mbuf_cluster_reserve(boolean_t *overridden)
276 {
277 	int mbuf_pool = 0, ncl = 0;
278 	static boolean_t was_overridden = FALSE;
279 
280 	/* If called more than once, return the previously calculated size */
281 	if (mbuf_poolsz != 0) {
282 		goto done;
283 	}
284 
285 	/*
286 	 * Some of these are parsed in parse_bsd_args(), but for x86 we get
287 	 * here early from i386_vm_init() and so we parse them now, in order
288 	 * to correctly compute the size of the low-memory VM pool.  It is
289 	 * redundant but rather harmless.
290 	 */
291 	(void) PE_parse_boot_argn("ncl", &ncl, sizeof(ncl));
292 	(void) PE_parse_boot_argn("mbuf_pool", &mbuf_pool, sizeof(mbuf_pool));
293 
294 	/*
295 	 * Convert "mbuf_pool" from MB to # of 2KB clusters; it is
296 	 * equivalent to "ncl", except that it uses different unit.
297 	 */
298 	if (mbuf_pool != 0) {
299 		ncl = (mbuf_pool << MBSHIFT) >> MCLSHIFT;
300 	}
301 
302 	if (sane_size > (64 * 1024 * 1024) || ncl != 0) {
303 		if (ncl || serverperfmode) {
304 			was_overridden = TRUE;
305 		}
306 
307 		if ((nmbclusters = ncl) == 0) {
308 			/* Auto-configure the mbuf pool size */
309 			nmbclusters = mbuf_default_ncl(mem_actual);
310 		} else {
311 			/* Make sure it's not odd in case ncl is manually set */
312 			if (nmbclusters & 0x1) {
313 				--nmbclusters;
314 			}
315 
316 			/* And obey the upper limit */
317 			if (nmbclusters > MAX_NCL) {
318 				nmbclusters = MAX_NCL;
319 			}
320 		}
321 
322 		/* Round it down to nearest multiple of PAGE_SIZE */
323 		nmbclusters = (unsigned int)P2ROUNDDOWN(nmbclusters, NCLPG);
324 	}
325 	mbuf_poolsz = nmbclusters << MCLSHIFT;
326 done:
327 	if (overridden) {
328 		*overridden = was_overridden;
329 	}
330 
331 	return mbuf_poolsz;
332 }
333 #endif
334 
335 #if defined(__LP64__)
336 extern int tcp_tcbhashsize;
337 extern int max_cached_sock_count;
338 #endif
339 
340 #define SERVER_PERF_MODE_VALIDATION_DISABLES 0x5dee
341 extern unsigned int kern_feature_overrides;
342 void
bsd_scale_setup(int scale)343 bsd_scale_setup(int scale)
344 {
345 #if defined(__LP64__)
346 	if ((scale > 0) && (serverperfmode == 0)) {
347 		maxproc *= scale;
348 		maxprocperuid = (maxproc * 2) / 3;
349 		if (scale > 2) {
350 			maxfiles *= scale;
351 			maxfilesperproc = maxfiles / 2;
352 		}
353 	}
354 	/* Apply server scaling rules */
355 	if ((scale > 0) && (serverperfmode != 0)) {
356 		maxproc = 2500 * scale;
357 		hard_maxproc = maxproc;
358 		/* no fp usage */
359 		maxprocperuid = (maxproc * 3) / 4;
360 		maxfiles = (150000 * scale);
361 		maxfilesperproc = maxfiles / 2;
362 		desiredvnodes = maxfiles;
363 		vnodes_sized = 1;
364 		tcp_tfo_backlog = 100 * scale;
365 		if (scale > 4) {
366 			/* clip somaxconn at 32G level */
367 			somaxconn = 2048;
368 			/*
369 			 * For scale > 4 (> 32G), clip
370 			 * tcp_tcbhashsize to 32K
371 			 */
372 			tcp_tcbhashsize = 32 * 1024;
373 
374 			if (scale > 7) {
375 				/* clip at 64G level */
376 				max_cached_sock_count = 165000;
377 			} else {
378 				max_cached_sock_count = 60000 + ((scale - 1) * 15000);
379 			}
380 		} else {
381 			somaxconn = 512 * scale;
382 			tcp_tcbhashsize = 4 * 1024 * scale;
383 			max_cached_sock_count = 60000 + ((scale - 1) * 15000);
384 		}
385 	}
386 
387 	if (maxproc > hard_maxproc) {
388 		hard_maxproc = maxproc;
389 	}
390 #endif
391 	if (serverperfmode) {
392 		/* If running in serverperfmode disable some internal only diagnostics. */
393 		kern_feature_overrides |= SERVER_PERF_MODE_VALIDATION_DISABLES;
394 	}
395 	bsd_exec_setup(scale);
396 }
397