xref: /xnu-8019.80.24/bsd/dev/unix_startup.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1992,7 NeXT Computer, Inc.
30  *
31  * Unix data structure initialization.
32  *
33  */
34 
35 #include <mach/mach_types.h>
36 
37 #include <kern/startup.h>
38 #include <vm/vm_kern.h>
39 #include <mach/vm_prot.h>
40 
41 #include <sys/param.h>
42 #include <sys/buf_internal.h>
43 #include <sys/file_internal.h>
44 #include <sys/proc_internal.h>
45 #include <sys/mcache.h>
46 #include <sys/mbuf.h>
47 #include <sys/systm.h>
48 #include <sys/tty.h>
49 #include <sys/vnode.h>
50 #include <sys/sysctl.h>
51 #include <machine/cons.h>
52 #include <pexpert/pexpert.h>
53 #include <sys/socketvar.h>
54 #include <pexpert/pexpert.h>
55 #include <netinet/tcp_var.h>
56 
57 extern uint32_t kern_maxvnodes;
58 extern vm_map_t mb_map;
59 
60 #if INET
61 extern uint32_t   tcp_sendspace;
62 extern uint32_t   tcp_recvspace;
63 #endif
64 
65 void            bsd_bufferinit(void);
66 
67 unsigned int    bsd_mbuf_cluster_reserve(boolean_t *);
68 void bsd_scale_setup(int);
69 void bsd_exec_setup(int);
70 
71 /*
72  * Declare these as initialized data so we can patch them.
73  */
74 
75 #ifdef  NBUF
76 int             max_nbuf_headers = NBUF;
77 int             niobuf_headers = (NBUF / 2) + 2048;
78 int             nbuf_hashelements = NBUF;
79 int             nbuf_headers = NBUF;
80 #else
81 int             max_nbuf_headers = 0;
82 int             niobuf_headers = 0;
83 int             nbuf_hashelements = 0;
84 int             nbuf_headers = 0;
85 #endif
86 
87 SYSCTL_INT(_kern, OID_AUTO, nbuf, CTLFLAG_RD | CTLFLAG_LOCKED, &nbuf_headers, 0, "");
88 SYSCTL_INT(_kern, OID_AUTO, maxnbuf, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_KERN, &max_nbuf_headers, 0, "");
89 
90 __private_extern__ int customnbuf = 0;
91 
92 /* Indicates a server boot when set */
93 TUNABLE(int, serverperfmode, "serverperfmode", 0);
94 
95 #if SOCKETS
96 static unsigned int mbuf_poolsz;
97 #endif
98 
99 vm_map_t        buffer_map;
100 vm_map_t        bufferhdr_map;
101 static int vnodes_sized = 0;
102 
103 extern void     bsd_startupearly(void);
104 
105 void
bsd_startupearly(void)106 bsd_startupearly(void)
107 {
108 	vm_offset_t     firstaddr;
109 	vm_size_t       size;
110 	kern_return_t   ret;
111 
112 	/* clip the number of buf headers upto 16k */
113 	if (max_nbuf_headers == 0) {
114 		max_nbuf_headers = (int)atop_kernel(sane_size / 50); /* Get 2% of ram, but no more than we can map */
115 	}
116 	if ((customnbuf == 0) && ((unsigned int)max_nbuf_headers > 16384)) {
117 		max_nbuf_headers = 16384;
118 	}
119 	if (max_nbuf_headers < CONFIG_MIN_NBUF) {
120 		max_nbuf_headers = CONFIG_MIN_NBUF;
121 	}
122 
123 	/* clip the number of hash elements  to 200000 */
124 	if ((customnbuf == 0) && nbuf_hashelements == 0) {
125 		nbuf_hashelements = (int)atop_kernel(sane_size / 50);
126 		if ((unsigned int)nbuf_hashelements > 200000) {
127 			nbuf_hashelements = 200000;
128 		}
129 	} else {
130 		nbuf_hashelements = max_nbuf_headers;
131 	}
132 
133 	if (niobuf_headers == 0) {
134 		if (max_nbuf_headers < 4096) {
135 			niobuf_headers = max_nbuf_headers;
136 		} else {
137 			niobuf_headers = (max_nbuf_headers / 2) + 2048;
138 		}
139 	}
140 	if (niobuf_headers < CONFIG_MIN_NIOBUF) {
141 		niobuf_headers = CONFIG_MIN_NIOBUF;
142 	}
143 
144 	size = (max_nbuf_headers + niobuf_headers) * sizeof(struct buf);
145 	size = round_page(size);
146 
147 	ret = kmem_suballoc(kernel_map,
148 	    &firstaddr,
149 	    size,
150 	    FALSE,
151 	    VM_FLAGS_ANYWHERE,
152 	    VM_MAP_KERNEL_FLAGS_NONE,
153 	    VM_KERN_MEMORY_FILE,
154 	    &bufferhdr_map);
155 
156 	if (ret != KERN_SUCCESS) {
157 		panic("Failed to create bufferhdr_map");
158 	}
159 
160 	ret = kernel_memory_allocate(bufferhdr_map,
161 	    &firstaddr,
162 	    size,
163 	    0,
164 	    KMA_HERE | KMA_KOBJECT,
165 	    VM_KERN_MEMORY_FILE);
166 
167 	if (ret != KERN_SUCCESS) {
168 		panic("Failed to allocate bufferhdr_map");
169 	}
170 
171 	buf_headers = (struct buf *) firstaddr;
172 	bzero(buf_headers, size);
173 
174 #if SOCKETS
175 	{
176 		static const unsigned int       maxspace = 128 * 1024;
177 		int             scale;
178 
179 		nmbclusters = bsd_mbuf_cluster_reserve(NULL) / MCLBYTES;
180 
181 #if INET
182 		if ((scale = nmbclusters / NMBCLUSTERS) > 1) {
183 			tcp_sendspace *= scale;
184 			tcp_recvspace *= scale;
185 
186 			if (tcp_sendspace > maxspace) {
187 				tcp_sendspace = maxspace;
188 			}
189 			if (tcp_recvspace > maxspace) {
190 				tcp_recvspace = maxspace;
191 			}
192 		}
193 #endif /* INET */
194 	}
195 #endif /* SOCKETS */
196 
197 	if (vnodes_sized == 0) {
198 		if (!PE_get_default("kern.maxvnodes", &desiredvnodes, sizeof(desiredvnodes))) {
199 			/*
200 			 * Size vnodes based on memory
201 			 * Number vnodes  is (memsize/64k) + 1024
202 			 * This is the calculation that is used by launchd in tiger
203 			 * we are clipping the max based on 16G
204 			 * ie ((16*1024*1024*1024)/(64 *1024)) + 1024 = 263168;
205 			 * CONFIG_VNODES is set to 263168 for "medium" configurations (the default)
206 			 * but can be smaller or larger.
207 			 */
208 			desiredvnodes  = (int)(sane_size / 65536) + 1024;
209 #ifdef CONFIG_VNODES
210 			if (desiredvnodes > CONFIG_VNODES) {
211 				desiredvnodes = CONFIG_VNODES;
212 			}
213 #endif
214 		}
215 		vnodes_sized = 1;
216 	}
217 }
218 
219 void
bsd_bufferinit(void)220 bsd_bufferinit(void)
221 {
222 #if SOCKETS
223 	kern_return_t   ret;
224 #endif
225 	/*
226 	 * Note: Console device initialized in kminit() from bsd_autoconf()
227 	 * prior to call to us in bsd_init().
228 	 */
229 
230 	bsd_startupearly();
231 
232 #if SOCKETS
233 	ret = kmem_suballoc(kernel_map,
234 	    (vm_offset_t *) &mbutl,
235 	    (vm_size_t) (nmbclusters * MCLBYTES),
236 	    FALSE,
237 	    VM_FLAGS_ANYWHERE,
238 	    VM_MAP_KERNEL_FLAGS_NONE,
239 	    VM_KERN_MEMORY_MBUF,
240 	    &mb_map);
241 
242 	if (ret != KERN_SUCCESS) {
243 		panic("Failed to allocate mb_map");
244 	}
245 #endif /* SOCKETS */
246 
247 	/*
248 	 * Set up buffers, so they can be used to read disk labels.
249 	 */
250 	bufinit();
251 }
252 
253 /* 512 MB (K32) or 2 GB (K64) hard limit on size of the mbuf pool */
254 #if !defined(__LP64__)
255 #define MAX_MBUF_POOL   (512 << MBSHIFT)
256 #else
257 #define MAX_MBUF_POOL   (2ULL << GBSHIFT)
258 #endif /* !__LP64__ */
259 #define MAX_NCL         (MAX_MBUF_POOL >> MCLSHIFT)
260 
261 #if SOCKETS
262 /*
263  * this has been broken out into a separate routine that
264  * can be called from the x86 early vm initialization to
265  * determine how much lo memory to reserve on systems with
266  * DMA hardware that can't fully address all of the physical
267  * memory that is present.
268  */
269 unsigned int
bsd_mbuf_cluster_reserve(boolean_t * overridden)270 bsd_mbuf_cluster_reserve(boolean_t *overridden)
271 {
272 	int mbuf_pool = 0, ncl = 0;
273 	static boolean_t was_overridden = FALSE;
274 
275 	/* If called more than once, return the previously calculated size */
276 	if (mbuf_poolsz != 0) {
277 		goto done;
278 	}
279 
280 	/*
281 	 * Some of these are parsed in parse_bsd_args(), but for x86 we get
282 	 * here early from i386_vm_init() and so we parse them now, in order
283 	 * to correctly compute the size of the low-memory VM pool.  It is
284 	 * redundant but rather harmless.
285 	 */
286 	(void) PE_parse_boot_argn("ncl", &ncl, sizeof(ncl));
287 	(void) PE_parse_boot_argn("mbuf_pool", &mbuf_pool, sizeof(mbuf_pool));
288 
289 	/*
290 	 * Convert "mbuf_pool" from MB to # of 2KB clusters; it is
291 	 * equivalent to "ncl", except that it uses different unit.
292 	 */
293 	if (mbuf_pool != 0) {
294 		ncl = (mbuf_pool << MBSHIFT) >> MCLSHIFT;
295 	}
296 
297 	if (sane_size > (64 * 1024 * 1024) || ncl != 0) {
298 		if (ncl || serverperfmode) {
299 			was_overridden = TRUE;
300 		}
301 
302 		if ((nmbclusters = ncl) == 0) {
303 			/* Auto-configure the mbuf pool size */
304 			nmbclusters = mbuf_default_ncl(mem_actual);
305 		} else {
306 			/* Make sure it's not odd in case ncl is manually set */
307 			if (nmbclusters & 0x1) {
308 				--nmbclusters;
309 			}
310 
311 			/* And obey the upper limit */
312 			if (nmbclusters > MAX_NCL) {
313 				nmbclusters = MAX_NCL;
314 			}
315 		}
316 
317 		/* Round it down to nearest multiple of PAGE_SIZE */
318 		nmbclusters = (unsigned int)P2ROUNDDOWN(nmbclusters, NCLPG);
319 	}
320 	mbuf_poolsz = nmbclusters << MCLSHIFT;
321 done:
322 	if (overridden) {
323 		*overridden = was_overridden;
324 	}
325 
326 	return mbuf_poolsz;
327 }
328 #endif
329 
330 #if defined(__LP64__)
331 extern int tcp_tcbhashsize;
332 extern int max_cached_sock_count;
333 #endif
334 
335 
336 void
bsd_scale_setup(int scale)337 bsd_scale_setup(int scale)
338 {
339 #if defined(__LP64__)
340 	if ((scale > 0) && (serverperfmode == 0)) {
341 		maxproc *= scale;
342 		maxprocperuid = (maxproc * 2) / 3;
343 		if (scale > 2) {
344 			maxfiles *= scale;
345 			maxfilesperproc = maxfiles / 2;
346 		}
347 	}
348 	/* Apply server scaling rules */
349 	if ((scale > 0) && (serverperfmode != 0)) {
350 		maxproc = 2500 * scale;
351 		hard_maxproc = maxproc;
352 		/* no fp usage */
353 		maxprocperuid = (maxproc * 3) / 4;
354 		maxfiles = (150000 * scale);
355 		maxfilesperproc = maxfiles / 2;
356 		desiredvnodes = maxfiles;
357 		vnodes_sized = 1;
358 		tcp_tfo_backlog = 100 * scale;
359 		if (scale > 4) {
360 			/* clip somaxconn at 32G level */
361 			somaxconn = 2048;
362 			/*
363 			 * For scale > 4 (> 32G), clip
364 			 * tcp_tcbhashsize to 32K
365 			 */
366 			tcp_tcbhashsize = 32 * 1024;
367 
368 			if (scale > 7) {
369 				/* clip at 64G level */
370 				max_cached_sock_count = 165000;
371 			} else {
372 				max_cached_sock_count = 60000 + ((scale - 1) * 15000);
373 			}
374 		} else {
375 			somaxconn = 512 * scale;
376 			tcp_tcbhashsize = 4 * 1024 * scale;
377 			max_cached_sock_count = 60000 + ((scale - 1) * 15000);
378 		}
379 	}
380 
381 	if (maxproc > hard_maxproc) {
382 		hard_maxproc = maxproc;
383 	}
384 #endif
385 	bsd_exec_setup(scale);
386 }
387