1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1992,7 NeXT Computer, Inc.
30 *
31 * Unix data structure initialization.
32 *
33 */
34
35 #include <mach/mach_types.h>
36
37 #include <kern/startup.h>
38 #include <vm/vm_kern.h>
39 #include <mach/vm_prot.h>
40
41 #include <sys/param.h>
42 #include <sys/buf_internal.h>
43 #include <sys/file_internal.h>
44 #include <sys/proc_internal.h>
45 #include <sys/mcache.h>
46 #include <sys/mbuf.h>
47 #include <sys/systm.h>
48 #include <sys/tty.h>
49 #include <sys/vnode.h>
50 #include <sys/sysctl.h>
51 #include <machine/cons.h>
52 #include <pexpert/pexpert.h>
53 #include <sys/socketvar.h>
54 #include <pexpert/pexpert.h>
55 #include <netinet/tcp_var.h>
56
57 extern uint32_t kern_maxvnodes;
58 extern vm_map_t mb_map;
59
60 #if INET
61 extern uint32_t tcp_sendspace;
62 extern uint32_t tcp_recvspace;
63 #endif
64
65 void bsd_bufferinit(void);
66
67 unsigned int bsd_mbuf_cluster_reserve(boolean_t *);
68 void bsd_scale_setup(int);
69 void bsd_exec_setup(int);
70
71 /*
72 * Declare these as initialized data so we can patch them.
73 */
74
75 #ifdef NBUF
76 int max_nbuf_headers = NBUF;
77 int niobuf_headers = (NBUF / 2) + 2048;
78 int nbuf_hashelements = NBUF;
79 int nbuf_headers = NBUF;
80 #else
81 int max_nbuf_headers = 0;
82 int niobuf_headers = 0;
83 int nbuf_hashelements = 0;
84 int nbuf_headers = 0;
85 #endif
86
87 SYSCTL_INT(_kern, OID_AUTO, nbuf, CTLFLAG_RD | CTLFLAG_LOCKED, &nbuf_headers, 0, "");
88 SYSCTL_INT(_kern, OID_AUTO, maxnbuf, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_KERN, &max_nbuf_headers, 0, "");
89
90 __private_extern__ int customnbuf = 0;
91
92 /* Indicates a server boot when set */
93 TUNABLE(int, serverperfmode, "serverperfmode", 0);
94
95 #if SOCKETS
96 static unsigned int mbuf_poolsz;
97 #endif
98
99 vm_map_t buffer_map;
100 vm_map_t bufferhdr_map;
101 static int vnodes_sized = 0;
102
103 extern void bsd_startupearly(void);
104
105 static vm_map_size_t bufferhdr_map_size;
106 SECURITY_READ_ONLY_LATE(struct kmem_range) bufferhdr_range = {};
107
108 static vm_map_size_t
bsd_get_bufferhdr_map_size(void)109 bsd_get_bufferhdr_map_size(void)
110 {
111 vm_size_t size;
112
113 /* clip the number of buf headers upto 16k */
114 if (max_nbuf_headers == 0) {
115 max_nbuf_headers = (int)atop_kernel(sane_size / 50); /* Get 2% of ram, but no more than we can map */
116 }
117 if ((customnbuf == 0) && ((unsigned int)max_nbuf_headers > 16384)) {
118 max_nbuf_headers = 16384;
119 }
120 if (max_nbuf_headers < CONFIG_MIN_NBUF) {
121 max_nbuf_headers = CONFIG_MIN_NBUF;
122 }
123
124 if (niobuf_headers == 0) {
125 if (max_nbuf_headers < 4096) {
126 niobuf_headers = max_nbuf_headers;
127 } else {
128 niobuf_headers = (max_nbuf_headers / 2) + 2048;
129 }
130 }
131 if (niobuf_headers < CONFIG_MIN_NIOBUF) {
132 niobuf_headers = CONFIG_MIN_NIOBUF;
133 }
134
135 size = (max_nbuf_headers + niobuf_headers) * sizeof(struct buf);
136 size = round_page(size);
137
138 return size;
139 }
140
141 KMEM_RANGE_REGISTER_DYNAMIC(bufferhdr, &bufferhdr_range, ^() {
142 return bufferhdr_map_size = bsd_get_bufferhdr_map_size();
143 });
144
145 void
bsd_startupearly(void)146 bsd_startupearly(void)
147 {
148 vm_offset_t firstaddr = bufferhdr_range.min_address;
149 vm_size_t size = bufferhdr_map_size;
150
151 assert(size);
152
153 /* clip the number of hash elements to 200000 */
154 if ((customnbuf == 0) && nbuf_hashelements == 0) {
155 nbuf_hashelements = (int)atop_kernel(sane_size / 50);
156 if ((unsigned int)nbuf_hashelements > 200000) {
157 nbuf_hashelements = 200000;
158 }
159 } else {
160 nbuf_hashelements = max_nbuf_headers;
161 }
162
163 bufferhdr_map = kmem_suballoc(kernel_map,
164 &firstaddr,
165 size,
166 VM_MAP_CREATE_NEVER_FAULTS,
167 VM_FLAGS_FIXED_RANGE_SUBALLOC,
168 KMS_PERMANENT | KMS_NOFAIL,
169 VM_KERN_MEMORY_FILE).kmr_submap;
170
171 kmem_alloc(bufferhdr_map,
172 &firstaddr,
173 size,
174 KMA_NOFAIL | KMA_PERMANENT | KMA_ZERO | KMA_KOBJECT,
175 VM_KERN_MEMORY_FILE);
176
177 buf_headers = (struct buf *) firstaddr;
178
179 #if SOCKETS
180 {
181 static const unsigned int maxspace = 128 * 1024;
182 int scale;
183
184 #if INET
185 if ((scale = nmbclusters / NMBCLUSTERS) > 1) {
186 tcp_sendspace *= scale;
187 tcp_recvspace *= scale;
188
189 if (tcp_sendspace > maxspace) {
190 tcp_sendspace = maxspace;
191 }
192 if (tcp_recvspace > maxspace) {
193 tcp_recvspace = maxspace;
194 }
195 }
196 #endif /* INET */
197 }
198 #endif /* SOCKETS */
199
200 if (vnodes_sized == 0) {
201 if (!PE_get_default("kern.maxvnodes", &desiredvnodes, sizeof(desiredvnodes))) {
202 /*
203 * Size vnodes based on memory
204 * Number vnodes is (memsize/64k) + 1024
205 * This is the calculation that is used by launchd in tiger
206 * we are clipping the max based on 16G
207 * ie ((16*1024*1024*1024)/(64 *1024)) + 1024 = 263168;
208 * CONFIG_VNODES is set to 263168 for "medium" configurations (the default)
209 * but can be smaller or larger.
210 */
211 desiredvnodes = (int)(sane_size / 65536) + 1024;
212 #ifdef CONFIG_VNODES
213 if (desiredvnodes > CONFIG_VNODES) {
214 desiredvnodes = CONFIG_VNODES;
215 }
216 #endif
217 }
218 vnodes_sized = 1;
219 }
220 }
221
222 #if SOCKETS
223 SECURITY_READ_ONLY_LATE(struct kmem_range) mb_range = {};
224 KMEM_RANGE_REGISTER_DYNAMIC(mb, &mb_range, ^() {
225 nmbclusters = bsd_mbuf_cluster_reserve(NULL) / MCLBYTES;
226 return (vm_map_size_t)(nmbclusters * MCLBYTES);
227 });
228 #endif /* SOCKETS */
229
230 void
bsd_bufferinit(void)231 bsd_bufferinit(void)
232 {
233 /*
234 * Note: Console device initialized in kminit() from bsd_autoconf()
235 * prior to call to us in bsd_init().
236 */
237
238 bsd_startupearly();
239
240 #if SOCKETS
241 mbutl = (unsigned char *) mb_range.min_address;
242 mb_map = kmem_suballoc(kernel_map,
243 (vm_offset_t *) &mbutl,
244 (vm_size_t) (nmbclusters * MCLBYTES),
245 FALSE,
246 VM_FLAGS_FIXED_RANGE_SUBALLOC,
247 KMS_PERMANENT | KMS_NOFAIL,
248 VM_KERN_MEMORY_MBUF).kmr_submap;
249 #endif /* SOCKETS */
250
251 /*
252 * Set up buffers, so they can be used to read disk labels.
253 */
254 bufinit();
255 }
256
257 /* 512 MB (K32) or 2 GB (K64) hard limit on size of the mbuf pool */
258 #if !defined(__LP64__)
259 #define MAX_MBUF_POOL (512 << MBSHIFT)
260 #else
261 #define MAX_MBUF_POOL (2ULL << GBSHIFT)
262 #endif /* !__LP64__ */
263 #define MAX_NCL (MAX_MBUF_POOL >> MCLSHIFT)
264
265 #if SOCKETS
266 /*
267 * this has been broken out into a separate routine that
268 * can be called from the x86 early vm initialization to
269 * determine how much lo memory to reserve on systems with
270 * DMA hardware that can't fully address all of the physical
271 * memory that is present.
272 */
273 unsigned int
bsd_mbuf_cluster_reserve(boolean_t * overridden)274 bsd_mbuf_cluster_reserve(boolean_t *overridden)
275 {
276 int mbuf_pool = 0, ncl = 0;
277 static boolean_t was_overridden = FALSE;
278
279 /* If called more than once, return the previously calculated size */
280 if (mbuf_poolsz != 0) {
281 goto done;
282 }
283
284 /*
285 * Some of these are parsed in parse_bsd_args(), but for x86 we get
286 * here early from i386_vm_init() and so we parse them now, in order
287 * to correctly compute the size of the low-memory VM pool. It is
288 * redundant but rather harmless.
289 */
290 (void) PE_parse_boot_argn("ncl", &ncl, sizeof(ncl));
291 (void) PE_parse_boot_argn("mbuf_pool", &mbuf_pool, sizeof(mbuf_pool));
292
293 /*
294 * Convert "mbuf_pool" from MB to # of 2KB clusters; it is
295 * equivalent to "ncl", except that it uses different unit.
296 */
297 if (mbuf_pool != 0) {
298 ncl = (mbuf_pool << MBSHIFT) >> MCLSHIFT;
299 }
300
301 if (sane_size > (64 * 1024 * 1024) || ncl != 0) {
302 if (ncl || serverperfmode) {
303 was_overridden = TRUE;
304 }
305
306 if ((nmbclusters = ncl) == 0) {
307 /* Auto-configure the mbuf pool size */
308 nmbclusters = mbuf_default_ncl(mem_actual);
309 } else {
310 /* Make sure it's not odd in case ncl is manually set */
311 if (nmbclusters & 0x1) {
312 --nmbclusters;
313 }
314
315 /* And obey the upper limit */
316 if (nmbclusters > MAX_NCL) {
317 nmbclusters = MAX_NCL;
318 }
319 }
320
321 /* Round it down to nearest multiple of PAGE_SIZE */
322 nmbclusters = (unsigned int)P2ROUNDDOWN(nmbclusters, NCLPG);
323 }
324 mbuf_poolsz = nmbclusters << MCLSHIFT;
325 done:
326 if (overridden) {
327 *overridden = was_overridden;
328 }
329
330 return mbuf_poolsz;
331 }
332 #endif
333
334 #if defined(__LP64__)
335 extern int tcp_tcbhashsize;
336 extern int max_cached_sock_count;
337 #endif
338
339
340 void
bsd_scale_setup(int scale)341 bsd_scale_setup(int scale)
342 {
343 #if defined(__LP64__)
344 if ((scale > 0) && (serverperfmode == 0)) {
345 maxproc *= scale;
346 maxprocperuid = (maxproc * 2) / 3;
347 if (scale > 2) {
348 maxfiles *= scale;
349 maxfilesperproc = maxfiles / 2;
350 }
351 }
352 /* Apply server scaling rules */
353 if ((scale > 0) && (serverperfmode != 0)) {
354 maxproc = 2500 * scale;
355 hard_maxproc = maxproc;
356 /* no fp usage */
357 maxprocperuid = (maxproc * 3) / 4;
358 maxfiles = (150000 * scale);
359 maxfilesperproc = maxfiles / 2;
360 desiredvnodes = maxfiles;
361 vnodes_sized = 1;
362 tcp_tfo_backlog = 100 * scale;
363 if (scale > 4) {
364 /* clip somaxconn at 32G level */
365 somaxconn = 2048;
366 /*
367 * For scale > 4 (> 32G), clip
368 * tcp_tcbhashsize to 32K
369 */
370 tcp_tcbhashsize = 32 * 1024;
371
372 if (scale > 7) {
373 /* clip at 64G level */
374 max_cached_sock_count = 165000;
375 } else {
376 max_cached_sock_count = 60000 + ((scale - 1) * 15000);
377 }
378 } else {
379 somaxconn = 512 * scale;
380 tcp_tcbhashsize = 4 * 1024 * scale;
381 max_cached_sock_count = 60000 + ((scale - 1) * 15000);
382 }
383 }
384
385 if (maxproc > hard_maxproc) {
386 hard_maxproc = maxproc;
387 }
388 #endif
389 bsd_exec_setup(scale);
390 }
391