xref: /xnu-8796.141.3/osfmk/i386/commpage/commpage.c (revision 1b191cb58250d0705d8a51287127505aa4bc0789)
1 /*
2  * Copyright (c) 2003-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  *	Here's what to do if you want to add a new routine to the comm page:
31  *
32  *		1. Add a definition for it's address in osfmk/i386/cpu_capabilities.h,
33  *		   being careful to reserve room for future expansion.
34  *
35  *		2. Write one or more versions of the routine, each with it's own
36  *		   commpage_descriptor.  The tricky part is getting the "special",
37  *		   "musthave", and "canthave" fields right, so that exactly one
38  *		   version of the routine is selected for every machine.
39  *		   The source files should be in osfmk/i386/commpage/.
40  *
41  *		3. Add a ptr to your new commpage_descriptor(s) in the "routines"
42  *		   array in osfmk/i386/commpage/commpage_asm.s.  There are two
43  *		   arrays, one for the 32-bit and one for the 64-bit commpage.
44  *
45  *		4. Write the code in Libc to use the new routine.
46  */
47 
48 #include <mach/mach_types.h>
49 #include <mach/machine.h>
50 #include <mach/vm_map.h>
51 #include <mach/mach_vm.h>
52 #include <mach/machine.h>
53 #include <i386/cpuid.h>
54 #include <i386/tsc.h>
55 #include <i386/rtclock_protos.h>
56 #include <i386/cpu_data.h>
57 #include <i386/machine_routines.h>
58 #include <i386/misc_protos.h>
59 #include <i386/cpuid.h>
60 #include <machine/cpu_capabilities.h>
61 #include <machine/commpage.h>
62 #include <machine/pmap.h>
63 #include <vm/vm_kern.h>
64 #include <vm/vm_map.h>
65 #include <stdatomic.h>
66 
67 #include <ipc/ipc_port.h>
68 
69 #include <kern/page_decrypt.h>
70 #include <kern/processor.h>
71 
72 #include <sys/kdebug.h>
73 
74 #if CONFIG_ATM
75 #include <atm/atm_internal.h>
76 #endif
77 
78 /* the lists of commpage routines are in commpage_asm.s  */
79 extern  commpage_descriptor*    commpage_32_routines[];
80 extern  commpage_descriptor*    commpage_64_routines[];
81 
82 extern vm_map_t commpage32_map; // the shared submap, set up in vm init
83 extern vm_map_t commpage64_map; // the shared submap, set up in vm init
84 extern vm_map_t commpage_text32_map;    // the shared submap, set up in vm init
85 extern vm_map_t commpage_text64_map;    // the shared submap, set up in vm init
86 
87 
88 char    *commPagePtr32 = NULL;          // virtual addr in kernel map of 32-bit commpage
89 char    *commPagePtr64 = NULL;          // ...and of 64-bit commpage
90 char    *commPageTextPtr32 = NULL;      // virtual addr in kernel map of 32-bit commpage
91 char    *commPageTextPtr64 = NULL;      // ...and of 64-bit commpage
92 
93 uint64_t     _cpu_capabilities = 0;     // define the capability vector
94 
95 typedef uint32_t commpage_address_t;
96 
97 static commpage_address_t       next;   // next available address in comm page
98 
99 static char    *commPagePtr;            // virtual addr in kernel map of commpage we are working on
100 static commpage_address_t       commPageBaseOffset; // subtract from 32-bit runtime address to get offset in virtual commpage in kernel map
101 
102 static  commpage_time_data      *time_data32 = NULL;
103 static  commpage_time_data      *time_data64 = NULL;
104 static  new_commpage_timeofday_data_t *gtod_time_data32 = NULL;
105 static  new_commpage_timeofday_data_t *gtod_time_data64 = NULL;
106 
107 
108 decl_simple_lock_data(static, commpage_active_cpus_lock);
109 
110 /* Allocate the commpage and add to the shared submap created by vm:
111  *      1. allocate a page in the kernel map (RW)
112  *	2. wire it down
113  *	3. make a memory entry out of it
114  *	4. map that entry into the shared comm region map (R-only)
115  */
116 
117 static  void*
commpage_allocate(vm_map_t submap,size_t area_used,vm_prot_t uperm)118 commpage_allocate(
119 	vm_map_t        submap,                 // commpage32_map or commpage_map64
120 	size_t          area_used,              // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
121 	vm_prot_t       uperm)
122 {
123 	mach_vm_offset_t kernel_addr = 0;        // address of commpage in kernel map
124 	mach_vm_offset_t zero = 0;
125 	vm_size_t       size = area_used;       // size actually populated
126 	vm_map_entry_t  entry;
127 	ipc_port_t      handle;
128 	kern_return_t   kr;
129 	vm_map_kernel_flags_t vmk_flags;
130 
131 	if (submap == NULL) {
132 		panic("commpage submap is null");
133 	}
134 
135 	kr = mach_vm_map_kernel(kernel_map,
136 	    &kernel_addr,
137 	    area_used,
138 	    0,
139 	    VM_MAP_KERNEL_FLAGS_ANYWHERE(.vm_tag = VM_KERN_MEMORY_OSFMK),
140 	    NULL,
141 	    0,
142 	    FALSE,
143 	    VM_PROT_ALL,
144 	    VM_PROT_ALL,
145 	    VM_INHERIT_NONE);
146 	if (kr != KERN_SUCCESS) {
147 		panic("cannot allocate commpage %d", kr);
148 	}
149 
150 	if ((kr = vm_map_wire_kernel(kernel_map,
151 	    kernel_addr,
152 	    kernel_addr + area_used,
153 	    VM_PROT_DEFAULT, VM_KERN_MEMORY_OSFMK,
154 	    FALSE))) {
155 		panic("cannot wire commpage: %d", kr);
156 	}
157 
158 	/*
159 	 * Now that the object is created and wired into the kernel map, mark it so that no delay
160 	 * copy-on-write will ever be performed on it as a result of mapping it into user-space.
161 	 * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and
162 	 * that would be a real disaster.
163 	 *
164 	 * JMM - What we really need is a way to create it like this in the first place.
165 	 */
166 	if (!(kr = vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr, VM_MAP_PAGE_MASK(kernel_map)), &entry) || entry->is_sub_map)) {
167 		panic("cannot find commpage entry %d", kr);
168 	}
169 	VME_OBJECT(entry)->copy_strategy = MEMORY_OBJECT_COPY_NONE;
170 
171 	if ((kr = mach_make_memory_entry( kernel_map,           // target map
172 	    &size,                                      // size
173 	    kernel_addr,                                // offset (address in kernel map)
174 	    uperm,                              // protections as specified
175 	    &handle,                                    // this is the object handle we get
176 	    NULL ))) {                                  // parent_entry (what is this?)
177 		panic("cannot make entry for commpage %d", kr);
178 	}
179 
180 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
181 	if (uperm == (VM_PROT_READ | VM_PROT_EXECUTE)) {
182 		/*
183 		 * Mark this unsigned executable mapping as "jit" to avoid
184 		 * code-signing violations when attempting to execute unsigned
185 		 * code.
186 		 */
187 		vmk_flags.vmkf_map_jit = TRUE;
188 	}
189 
190 	kr = mach_vm_map_kernel(
191 		submap,                 // target map (shared submap)
192 		&zero,                  // address (map into 1st page in submap)
193 		area_used,              // size
194 		0,                      // mask
195 		vmk_flags,
196 		handle,                 // port is the memory entry we just made
197 		0,                      // offset (map 1st page in memory entry)
198 		FALSE,                  // copy
199 		uperm,                  // cur_protection (R-only in user map)
200 		uperm,                  // max_protection
201 		VM_INHERIT_SHARE);      // inheritance
202 	if (kr != KERN_SUCCESS) {
203 		panic("cannot map commpage %d", kr);
204 	}
205 
206 	ipc_port_release(handle);
207 	/* Make the kernel mapping non-executable. This cannot be done
208 	 * at the time of map entry creation as mach_make_memory_entry
209 	 * cannot handle disjoint permissions at this time.
210 	 */
211 	kr = vm_protect(kernel_map, kernel_addr, area_used, FALSE, VM_PROT_READ | VM_PROT_WRITE);
212 	assert(kr == KERN_SUCCESS);
213 
214 	return (void*)(intptr_t)kernel_addr;                     // return address in kernel map
215 }
216 
217 /* Get address (in kernel map) of a commpage field. */
218 
219 static void*
commpage_addr_of(commpage_address_t addr_at_runtime)220 commpage_addr_of(
221 	commpage_address_t     addr_at_runtime )
222 {
223 	return (void*) ((uintptr_t)commPagePtr + (addr_at_runtime - commPageBaseOffset));
224 }
225 
226 /*
227  * Calculate address of data within 32- and 64-bit commpages (not to be used with commpage
228  * text).
229  */
230 static void*
commpage_specific_addr_of(char * commPageBase,commpage_address_t addr_at_runtime)231 commpage_specific_addr_of(char *commPageBase, commpage_address_t addr_at_runtime)
232 {
233 	/*
234 	 * Note that the base address (_COMM_PAGE32_BASE_ADDRESS) is the same for
235 	 * 32- and 64-bit commpages
236 	 */
237 	return (void*) ((uintptr_t)commPageBase + (addr_at_runtime - _COMM_PAGE32_BASE_ADDRESS));
238 }
239 
240 /* Determine number of CPUs on this system.  We cannot rely on
241  * machine_info.max_cpus this early in the boot.
242  */
243 static int
commpage_cpus(void)244 commpage_cpus( void )
245 {
246 	unsigned int cpus;
247 
248 	cpus = ml_wait_max_cpus();                   // NB: this call can block
249 
250 	if (cpus == 0) {
251 		panic("commpage cpus==0");
252 	}
253 	if (cpus > 0xFF) {
254 		cpus = 0xFF;
255 	}
256 
257 	return cpus;
258 }
259 
260 /* Initialize kernel version of _cpu_capabilities vector (used by KEXTs.) */
261 
262 static void
commpage_init_cpu_capabilities(void)263 commpage_init_cpu_capabilities( void )
264 {
265 	uint64_t bits;
266 	int cpus;
267 	ml_cpu_info_t cpu_info;
268 
269 	bits = 0;
270 	ml_cpu_get_info(&cpu_info);
271 
272 	switch (cpu_info.vector_unit) {
273 	case 9:
274 		bits |= kHasAVX1_0;
275 		OS_FALLTHROUGH;
276 	case 8:
277 		bits |= kHasSSE4_2;
278 		OS_FALLTHROUGH;
279 	case 7:
280 		bits |= kHasSSE4_1;
281 		OS_FALLTHROUGH;
282 	case 6:
283 		bits |= kHasSupplementalSSE3;
284 		OS_FALLTHROUGH;
285 	case 5:
286 		bits |= kHasSSE3;
287 		OS_FALLTHROUGH;
288 	case 4:
289 		bits |= kHasSSE2;
290 		OS_FALLTHROUGH;
291 	case 3:
292 		bits |= kHasSSE;
293 		OS_FALLTHROUGH;
294 	case 2:
295 		bits |= kHasMMX;
296 		OS_FALLTHROUGH;
297 	default:
298 		break;
299 	}
300 	switch (cpu_info.cache_line_size) {
301 	case 128:
302 		bits |= kCache128;
303 		break;
304 	case 64:
305 		bits |= kCache64;
306 		break;
307 	case 32:
308 		bits |= kCache32;
309 		break;
310 	default:
311 		break;
312 	}
313 	cpus = commpage_cpus();                 // how many CPUs do we have
314 
315 	bits |= (cpus << kNumCPUsShift);
316 
317 	bits |= kFastThreadLocalStorage;        // we use %gs for TLS
318 
319 #define setif(_bits, _bit, _condition) \
320 	if (_condition) _bits |= _bit
321 
322 	setif(bits, kUP, cpus == 1);
323 	setif(bits, k64Bit, cpu_mode_is64bit());
324 	setif(bits, kSlow, tscFreq <= SLOW_TSC_THRESHOLD);
325 
326 	setif(bits, kHasAES, cpuid_features() &
327 	    CPUID_FEATURE_AES);
328 	setif(bits, kHasF16C, cpuid_features() &
329 	    CPUID_FEATURE_F16C);
330 	setif(bits, kHasRDRAND, cpuid_features() &
331 	    CPUID_FEATURE_RDRAND);
332 	setif(bits, kHasFMA, cpuid_features() &
333 	    CPUID_FEATURE_FMA);
334 
335 	setif(bits, kHasBMI1, cpuid_leaf7_features() &
336 	    CPUID_LEAF7_FEATURE_BMI1);
337 	setif(bits, kHasBMI2, cpuid_leaf7_features() &
338 	    CPUID_LEAF7_FEATURE_BMI2);
339 	/* Do not advertise RTM and HLE if the TSX FORCE ABORT WA is required */
340 	if (cpuid_wa_required(CPU_INTEL_TSXFA) & CWA_OFF) {
341 		setif(bits, kHasRTM, cpuid_leaf7_features() &
342 		    CPUID_LEAF7_FEATURE_RTM);
343 		setif(bits, kHasHLE, cpuid_leaf7_features() &
344 		    CPUID_LEAF7_FEATURE_HLE);
345 	}
346 	setif(bits, kHasAVX2_0, cpuid_leaf7_features() &
347 	    CPUID_LEAF7_FEATURE_AVX2);
348 	setif(bits, kHasRDSEED, cpuid_leaf7_features() &
349 	    CPUID_LEAF7_FEATURE_RDSEED);
350 	setif(bits, kHasADX, cpuid_leaf7_features() &
351 	    CPUID_LEAF7_FEATURE_ADX);
352 
353 #if 0   /* The kernel doesn't support MPX or SGX */
354 	setif(bits, kHasMPX, cpuid_leaf7_features() &
355 	    CPUID_LEAF7_FEATURE_MPX);
356 	setif(bits, kHasSGX, cpuid_leaf7_features() &
357 	    CPUID_LEAF7_FEATURE_SGX);
358 #endif
359 
360 	if (ml_fpu_avx512_enabled()) {
361 		setif(bits, kHasAVX512F, cpuid_leaf7_features() &
362 		    CPUID_LEAF7_FEATURE_AVX512F);
363 		setif(bits, kHasAVX512CD, cpuid_leaf7_features() &
364 		    CPUID_LEAF7_FEATURE_AVX512CD);
365 		setif(bits, kHasAVX512DQ, cpuid_leaf7_features() &
366 		    CPUID_LEAF7_FEATURE_AVX512DQ);
367 		setif(bits, kHasAVX512BW, cpuid_leaf7_features() &
368 		    CPUID_LEAF7_FEATURE_AVX512BW);
369 		setif(bits, kHasAVX512VL, cpuid_leaf7_features() &
370 		    CPUID_LEAF7_FEATURE_AVX512VL);
371 		setif(bits, kHasAVX512IFMA, cpuid_leaf7_features() &
372 		    CPUID_LEAF7_FEATURE_AVX512IFMA);
373 		setif(bits, kHasAVX512VBMI, cpuid_leaf7_features() &
374 		    CPUID_LEAF7_FEATURE_AVX512VBMI);
375 		setif(bits, kHasVAES, cpuid_leaf7_features() &
376 		    CPUID_LEAF7_FEATURE_VAES);
377 		setif(bits, kHasVPCLMULQDQ, cpuid_leaf7_features() &
378 		    CPUID_LEAF7_FEATURE_VPCLMULQDQ);
379 		setif(bits, kHasAVX512VNNI, cpuid_leaf7_features() &
380 		    CPUID_LEAF7_FEATURE_AVX512VNNI);
381 		setif(bits, kHasAVX512BITALG, cpuid_leaf7_features() &
382 		    CPUID_LEAF7_FEATURE_AVX512BITALG);
383 		setif(bits, kHasAVX512VPOPCNTDQ, cpuid_leaf7_features() &
384 		    CPUID_LEAF7_FEATURE_AVX512VPCDQ);
385 	}
386 
387 	uint64_t misc_enable = rdmsr64(MSR_IA32_MISC_ENABLE);
388 	setif(bits, kHasENFSTRG, (misc_enable & 1ULL) &&
389 	    (cpuid_leaf7_features() &
390 	    CPUID_LEAF7_FEATURE_ERMS));
391 
392 	_cpu_capabilities = bits;               // set kernel version for use by drivers etc
393 }
394 
395 /* initialize the approx_time_supported flag and set the approx time to 0.
396  * Called during initial commpage population.
397  */
398 static void
commpage_mach_approximate_time_init(void)399 commpage_mach_approximate_time_init(void)
400 {
401 	char *cp = commPagePtr32;
402 	uint8_t supported;
403 
404 #ifdef CONFIG_MACH_APPROXIMATE_TIME
405 	supported = 1;
406 #else
407 	supported = 0;
408 #endif
409 	if (cp) {
410 		cp += (_COMM_PAGE_APPROX_TIME_SUPPORTED - _COMM_PAGE32_BASE_ADDRESS);
411 		*(boolean_t *)cp = supported;
412 	}
413 
414 	cp = commPagePtr64;
415 	if (cp) {
416 		cp += (_COMM_PAGE_APPROX_TIME_SUPPORTED - _COMM_PAGE32_START_ADDRESS);
417 		*(boolean_t *)cp = supported;
418 	}
419 	commpage_update_mach_approximate_time(0);
420 }
421 
422 static void
commpage_mach_continuous_time_init(void)423 commpage_mach_continuous_time_init(void)
424 {
425 	commpage_update_mach_continuous_time(0);
426 }
427 
428 static void
commpage_boottime_init(void)429 commpage_boottime_init(void)
430 {
431 	clock_sec_t secs;
432 	clock_usec_t microsecs;
433 	clock_get_boottime_microtime(&secs, &microsecs);
434 	commpage_update_boottime(secs * USEC_PER_SEC + microsecs);
435 }
436 
437 uint64_t
_get_cpu_capabilities(void)438 _get_cpu_capabilities(void)
439 {
440 	return _cpu_capabilities;
441 }
442 
443 /* Copy data into commpage. */
444 
445 static void
commpage_stuff(commpage_address_t address,const void * source,int length)446 commpage_stuff(
447 	commpage_address_t  address,
448 	const void  *source,
449 	int         length  )
450 {
451 	void        *dest = commpage_addr_of(address);
452 
453 	if (address < next) {
454 		panic("commpage overlap at address 0x%p, 0x%x < 0x%x", dest, address, next);
455 	}
456 
457 	bcopy(source, dest, length);
458 
459 	next = address + length;
460 }
461 
462 /*
463  * Updates both the 32-bit and 64-bit commpages with the new data.
464  */
465 static void
commpage_update(commpage_address_t address,const void * source,int length)466 commpage_update(commpage_address_t address, const void *source, int length)
467 {
468 	void *dest = commpage_specific_addr_of(commPagePtr32, address);
469 	bcopy(source, dest, length);
470 
471 	dest = commpage_specific_addr_of(commPagePtr64, address);
472 	bcopy(source, dest, length);
473 }
474 
475 void
commpage_post_ucode_update(void)476 commpage_post_ucode_update(void)
477 {
478 	commpage_init_cpu_capabilities();
479 	commpage_update(_COMM_PAGE_CPU_CAPABILITIES64, &_cpu_capabilities, sizeof(_cpu_capabilities));
480 	commpage_update(_COMM_PAGE_CPU_CAPABILITIES, &_cpu_capabilities, sizeof(uint32_t));
481 }
482 
483 /* Copy a routine into comm page if it matches running machine.
484  */
485 static void
commpage_stuff_routine(commpage_descriptor * rd)486 commpage_stuff_routine(
487 	commpage_descriptor *rd     )
488 {
489 	commpage_stuff(rd->commpage_address, rd->code_address, rd->code_length);
490 }
491 
492 
493 /* Fill in the 32- or 64-bit commpage.  Called once for each.
494  */
495 
496 static void
commpage_populate_one(vm_map_t submap,char ** kernAddressPtr,size_t area_used,commpage_address_t base_offset,commpage_time_data ** time_data,new_commpage_timeofday_data_t ** gtod_time_data,const char * signature,vm_prot_t uperm)497 commpage_populate_one(
498 	vm_map_t        submap,         // commpage32_map or compage64_map
499 	char **         kernAddressPtr, // &commPagePtr32 or &commPagePtr64
500 	size_t          area_used,      // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
501 	commpage_address_t base_offset, // will become commPageBaseOffset
502 	commpage_time_data** time_data, // &time_data32 or &time_data64
503 	new_commpage_timeofday_data_t** gtod_time_data, // &gtod_time_data32 or &gtod_time_data64
504 	const char*     signature,      // "commpage 32-bit" or "commpage 64-bit"
505 	vm_prot_t       uperm)
506 {
507 	uint8_t         c1;
508 	uint16_t        c2;
509 	uint64_t        c8;
510 	uint32_t        cfamily;
511 	short   version = _COMM_PAGE_THIS_VERSION;
512 
513 	next = 0;
514 	commPagePtr = (char *)commpage_allocate( submap, (vm_size_t) area_used, uperm );
515 	*kernAddressPtr = commPagePtr;                          // save address either in commPagePtr32 or 64
516 	commPageBaseOffset = base_offset;
517 
518 	*time_data = commpage_addr_of( _COMM_PAGE_TIME_DATA_START );
519 	*gtod_time_data = commpage_addr_of( _COMM_PAGE_NEWTIMEOFDAY_DATA );
520 
521 	/* Stuff in the constants.  We move things into the comm page in strictly
522 	 * ascending order, so we can check for overlap and panic if so.
523 	 * Note: the 32-bit cpu_capabilities vector is retained in addition to
524 	 * the expanded 64-bit vector.
525 	 */
526 	commpage_stuff(_COMM_PAGE_SIGNATURE, signature, (int)MIN(_COMM_PAGE_SIGNATURELEN, strlen(signature)));
527 	commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES64, &_cpu_capabilities, sizeof(_cpu_capabilities));
528 	commpage_stuff(_COMM_PAGE_VERSION, &version, sizeof(short));
529 	commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES, &_cpu_capabilities, sizeof(uint32_t));
530 
531 	c2 = 32;  // default
532 	if (_cpu_capabilities & kCache64) {
533 		c2 = 64;
534 	} else if (_cpu_capabilities & kCache128) {
535 		c2 = 128;
536 	}
537 	commpage_stuff(_COMM_PAGE_CACHE_LINESIZE, &c2, 2);
538 
539 	/* machine_info valid after ml_wait_max_cpus() */
540 	c1 = machine_info.physical_cpu_max;
541 	commpage_stuff(_COMM_PAGE_PHYSICAL_CPUS, &c1, 1);
542 	c1 = machine_info.logical_cpu_max;
543 	commpage_stuff(_COMM_PAGE_LOGICAL_CPUS, &c1, 1);
544 
545 	c8 = ml_cpu_cache_size(0);
546 	commpage_stuff(_COMM_PAGE_MEMORY_SIZE, &c8, 8);
547 
548 	cfamily = cpuid_info()->cpuid_cpufamily;
549 	commpage_stuff(_COMM_PAGE_CPUFAMILY, &cfamily, 4);
550 	c1 = PAGE_SHIFT;
551 	commpage_stuff(_COMM_PAGE_KERNEL_PAGE_SHIFT, &c1, 1);
552 	commpage_stuff(_COMM_PAGE_USER_PAGE_SHIFT_64, &c1, 1);
553 
554 	if (next > _COMM_PAGE_END) {
555 		panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%p", next, commPagePtr);
556 	}
557 }
558 
559 
560 /* Fill in commpages: called once, during kernel initialization, from the
561  * startup thread before user-mode code is running.
562  *
563  * See the top of this file for a list of what you have to do to add
564  * a new routine to the commpage.
565  */
566 
567 void
commpage_populate(void)568 commpage_populate( void )
569 {
570 	commpage_init_cpu_capabilities();
571 
572 	commpage_populate_one(  commpage32_map,
573 	    &commPagePtr32,
574 	    _COMM_PAGE32_AREA_USED,
575 	    _COMM_PAGE32_BASE_ADDRESS,
576 	    &time_data32,
577 	    &gtod_time_data32,
578 	    _COMM_PAGE32_SIGNATURE_STRING,
579 	    VM_PROT_READ);
580 #ifndef __LP64__
581 	pmap_commpage32_init((vm_offset_t) commPagePtr32, _COMM_PAGE32_BASE_ADDRESS,
582 	    _COMM_PAGE32_AREA_USED / INTEL_PGBYTES);
583 #endif
584 	time_data64 = time_data32;                      /* if no 64-bit commpage, point to 32-bit */
585 	gtod_time_data64 = gtod_time_data32;
586 
587 	if (_cpu_capabilities & k64Bit) {
588 		commpage_populate_one(  commpage64_map,
589 		    &commPagePtr64,
590 		    _COMM_PAGE64_AREA_USED,
591 		    _COMM_PAGE32_START_ADDRESS,                     /* commpage address are relative to 32-bit commpage placement */
592 		    &time_data64,
593 		    &gtod_time_data64,
594 		    _COMM_PAGE64_SIGNATURE_STRING,
595 		    VM_PROT_READ);
596 #ifndef __LP64__
597 		pmap_commpage64_init((vm_offset_t) commPagePtr64, _COMM_PAGE64_BASE_ADDRESS,
598 		    _COMM_PAGE64_AREA_USED / INTEL_PGBYTES);
599 #endif
600 	}
601 
602 	simple_lock_init(&commpage_active_cpus_lock, 0);
603 
604 	commpage_update_active_cpus();
605 	commpage_mach_approximate_time_init();
606 	commpage_mach_continuous_time_init();
607 	commpage_boottime_init();
608 	rtc_nanotime_init_commpage();
609 	commpage_update_kdebug_state();
610 #if CONFIG_ATM
611 	commpage_update_atm_diagnostic_config(atm_get_diagnostic_config());
612 #endif
613 }
614 
615 /* Fill in the common routines during kernel initialization.
616  * This is called before user-mode code is running.
617  */
618 void
commpage_text_populate(void)619 commpage_text_populate( void )
620 {
621 	commpage_descriptor **rd;
622 
623 	next = 0;
624 	commPagePtr = (char *) commpage_allocate(commpage_text32_map, (vm_size_t) _COMM_PAGE_TEXT_AREA_USED, VM_PROT_READ | VM_PROT_EXECUTE);
625 	commPageTextPtr32 = commPagePtr;
626 
627 	char *cptr = commPagePtr;
628 	int i = 0;
629 	for (; i < _COMM_PAGE_TEXT_AREA_USED; i++) {
630 		cptr[i] = 0xCC;
631 	}
632 
633 	commPageBaseOffset = _COMM_PAGE_TEXT_START;
634 	for (rd = commpage_32_routines; *rd != NULL; rd++) {
635 		commpage_stuff_routine(*rd);
636 	}
637 
638 #ifndef __LP64__
639 	pmap_commpage32_init((vm_offset_t) commPageTextPtr32, _COMM_PAGE_TEXT_START,
640 	    _COMM_PAGE_TEXT_AREA_USED / INTEL_PGBYTES);
641 #endif
642 
643 	if (_cpu_capabilities & k64Bit) {
644 		next = 0;
645 		commPagePtr = (char *) commpage_allocate(commpage_text64_map, (vm_size_t) _COMM_PAGE_TEXT_AREA_USED, VM_PROT_READ | VM_PROT_EXECUTE);
646 		commPageTextPtr64 = commPagePtr;
647 
648 		cptr = commPagePtr;
649 		for (i = 0; i < _COMM_PAGE_TEXT_AREA_USED; i++) {
650 			cptr[i] = 0xCC;
651 		}
652 
653 		for (rd = commpage_64_routines; *rd != NULL; rd++) {
654 			commpage_stuff_routine(*rd);
655 		}
656 
657 #ifndef __LP64__
658 		pmap_commpage64_init((vm_offset_t) commPageTextPtr64, _COMM_PAGE_TEXT_START,
659 		    _COMM_PAGE_TEXT_AREA_USED / INTEL_PGBYTES);
660 #endif
661 	}
662 
663 	if (next > _COMM_PAGE_TEXT_END) {
664 		panic("commpage text overflow: next=0x%08x, commPagePtr=%p", next, commPagePtr);
665 	}
666 }
667 
668 /* Update commpage nanotime information.
669  *
670  * This routine must be serialized by some external means, ie a lock.
671  */
672 
673 void
commpage_set_nanotime(uint64_t tsc_base,uint64_t ns_base,uint32_t scale,uint32_t shift)674 commpage_set_nanotime(
675 	uint64_t        tsc_base,
676 	uint64_t        ns_base,
677 	uint32_t        scale,
678 	uint32_t        shift )
679 {
680 	commpage_time_data      *p32 = time_data32;
681 	commpage_time_data      *p64 = time_data64;
682 	static uint32_t generation = 0;
683 	uint32_t        next_gen;
684 
685 	if (p32 == NULL) {              /* have commpages been allocated yet? */
686 		return;
687 	}
688 
689 	if (generation != p32->nt_generation) {
690 		panic("nanotime trouble 1");    /* possibly not serialized */
691 	}
692 	if (ns_base < p32->nt_ns_base) {
693 		panic("nanotime trouble 2");
694 	}
695 	if ((shift != 0) && ((_cpu_capabilities & kSlow) == 0)) {
696 		panic("nanotime trouble 3");
697 	}
698 
699 	next_gen = ++generation;
700 	if (next_gen == 0) {
701 		next_gen = ++generation;
702 	}
703 
704 	p32->nt_generation = 0;         /* mark invalid, so commpage won't try to use it */
705 	p64->nt_generation = 0;
706 
707 	p32->nt_tsc_base = tsc_base;
708 	p64->nt_tsc_base = tsc_base;
709 
710 	p32->nt_ns_base = ns_base;
711 	p64->nt_ns_base = ns_base;
712 
713 	p32->nt_scale = scale;
714 	p64->nt_scale = scale;
715 
716 	p32->nt_shift = shift;
717 	p64->nt_shift = shift;
718 
719 	p32->nt_generation = next_gen;  /* mark data as valid */
720 	p64->nt_generation = next_gen;
721 }
722 
723 /* Update commpage gettimeofday() information.  As with nanotime(), we interleave
724  * updates to the 32- and 64-bit commpage, in order to keep time more nearly in sync
725  * between the two environments.
726  *
727  * This routine must be serializeed by some external means, ie a lock.
728  */
729 
730 void
commpage_set_timestamp(uint64_t abstime,uint64_t sec,uint64_t frac,uint64_t scale,uint64_t tick_per_sec)731 commpage_set_timestamp(
732 	uint64_t        abstime,
733 	uint64_t        sec,
734 	uint64_t        frac,
735 	uint64_t        scale,
736 	uint64_t        tick_per_sec)
737 {
738 	new_commpage_timeofday_data_t   *p32 = gtod_time_data32;
739 	new_commpage_timeofday_data_t   *p64 = gtod_time_data64;
740 
741 	p32->TimeStamp_tick = 0x0ULL;
742 	p64->TimeStamp_tick = 0x0ULL;
743 
744 	p32->TimeStamp_sec = sec;
745 	p64->TimeStamp_sec = sec;
746 
747 	p32->TimeStamp_frac = frac;
748 	p64->TimeStamp_frac = frac;
749 
750 	p32->Ticks_scale = scale;
751 	p64->Ticks_scale = scale;
752 
753 	p32->Ticks_per_sec = tick_per_sec;
754 	p64->Ticks_per_sec = tick_per_sec;
755 
756 	p32->TimeStamp_tick = abstime;
757 	p64->TimeStamp_tick = abstime;
758 }
759 
760 /* Update _COMM_PAGE_MEMORY_PRESSURE.  Called periodically from vm's compute_memory_pressure()  */
761 
762 void
commpage_set_memory_pressure(unsigned int pressure)763 commpage_set_memory_pressure(
764 	unsigned int    pressure )
765 {
766 	char        *cp;
767 	uint32_t    *ip;
768 
769 	cp = commPagePtr32;
770 	if (cp) {
771 		cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_BASE_ADDRESS);
772 		ip = (uint32_t*) (void *) cp;
773 		*ip = (uint32_t) pressure;
774 	}
775 
776 	cp = commPagePtr64;
777 	if (cp) {
778 		cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_START_ADDRESS);
779 		ip = (uint32_t*) (void *) cp;
780 		*ip = (uint32_t) pressure;
781 	}
782 }
783 
784 /* Updated every time a logical CPU goes offline/online */
785 void
commpage_update_active_cpus(void)786 commpage_update_active_cpus(void)
787 {
788 	char        *cp;
789 	volatile uint8_t    *ip;
790 
791 	/* At least 32-bit commpage must be initialized */
792 	if (!commPagePtr32) {
793 		return;
794 	}
795 
796 	simple_lock(&commpage_active_cpus_lock, LCK_GRP_NULL);
797 
798 	cp = commPagePtr32;
799 	cp += (_COMM_PAGE_ACTIVE_CPUS - _COMM_PAGE32_BASE_ADDRESS);
800 	ip = (volatile uint8_t*) cp;
801 	*ip = (uint8_t) processor_avail_count_user;
802 
803 	cp = commPagePtr64;
804 	if (cp) {
805 		cp += (_COMM_PAGE_ACTIVE_CPUS - _COMM_PAGE32_START_ADDRESS);
806 		ip = (volatile uint8_t*) cp;
807 		*ip = (uint8_t) processor_avail_count_user;
808 	}
809 
810 	simple_unlock(&commpage_active_cpus_lock);
811 }
812 
813 /*
814  * Update the commpage with current kdebug state. This currently has bits for
815  * global trace state, and typefilter enablement. It is likely additional state
816  * will be tracked in the future.
817  *
818  * INVARIANT: This value will always be 0 if global tracing is disabled. This
819  * allows simple guard tests of "if (*_COMM_PAGE_KDEBUG_ENABLE) { ... }"
820  */
821 void
commpage_update_kdebug_state(void)822 commpage_update_kdebug_state(void)
823 {
824 	volatile uint32_t *saved_data_ptr;
825 	char *cp;
826 
827 	cp = commPagePtr32;
828 	if (cp) {
829 		cp += (_COMM_PAGE_KDEBUG_ENABLE - _COMM_PAGE32_BASE_ADDRESS);
830 		saved_data_ptr = (volatile uint32_t *)cp;
831 		*saved_data_ptr = kdebug_commpage_state();
832 	}
833 
834 	cp = commPagePtr64;
835 	if (cp) {
836 		cp += (_COMM_PAGE_KDEBUG_ENABLE - _COMM_PAGE32_START_ADDRESS);
837 		saved_data_ptr = (volatile uint32_t *)cp;
838 		*saved_data_ptr = kdebug_commpage_state();
839 	}
840 }
841 
842 /* Ditto for atm_diagnostic_config */
843 void
commpage_update_atm_diagnostic_config(uint32_t diagnostic_config)844 commpage_update_atm_diagnostic_config(uint32_t diagnostic_config)
845 {
846 	volatile uint32_t *saved_data_ptr;
847 	char *cp;
848 
849 	cp = commPagePtr32;
850 	if (cp) {
851 		cp += (_COMM_PAGE_ATM_DIAGNOSTIC_CONFIG - _COMM_PAGE32_BASE_ADDRESS);
852 		saved_data_ptr = (volatile uint32_t *)cp;
853 		*saved_data_ptr = diagnostic_config;
854 	}
855 
856 	cp = commPagePtr64;
857 	if (cp) {
858 		cp += (_COMM_PAGE_ATM_DIAGNOSTIC_CONFIG - _COMM_PAGE32_START_ADDRESS);
859 		saved_data_ptr = (volatile uint32_t *)cp;
860 		*saved_data_ptr = diagnostic_config;
861 	}
862 }
863 
864 /*
865  * update the commpage with if dtrace user land probes are enabled
866  */
867 void
commpage_update_dof(boolean_t enabled)868 commpage_update_dof(boolean_t enabled)
869 {
870 #if CONFIG_DTRACE
871 	char *cp;
872 
873 	cp = commPagePtr32;
874 	if (cp) {
875 		cp += (_COMM_PAGE_DTRACE_DOF_ENABLED - _COMM_PAGE32_BASE_ADDRESS);
876 		*cp = (enabled ? 1 : 0);
877 	}
878 
879 	cp = commPagePtr64;
880 	if (cp) {
881 		cp += (_COMM_PAGE_DTRACE_DOF_ENABLED - _COMM_PAGE32_START_ADDRESS);
882 		*cp = (enabled ? 1 : 0);
883 	}
884 #else
885 	(void)enabled;
886 #endif
887 }
888 
889 
890 /*
891  * update the dyld global config flags
892  */
893 void
commpage_update_dyld_flags(uint64_t value)894 commpage_update_dyld_flags(uint64_t value)
895 {
896 	char *cp;
897 
898 	cp = commPagePtr32;
899 	if (cp) {
900 		cp += (_COMM_PAGE_DYLD_FLAGS - _COMM_PAGE32_BASE_ADDRESS);
901 		*(uint64_t *)cp = value;
902 	}
903 
904 	cp = commPagePtr64;
905 	if (cp) {
906 		cp += (_COMM_PAGE_DYLD_FLAGS - _COMM_PAGE32_BASE_ADDRESS);
907 		*(uint64_t *)cp = value;
908 	}
909 }
910 
911 
912 /*
913  * update the commpage data for last known value of mach_absolute_time()
914  */
915 
916 void
commpage_update_mach_approximate_time(uint64_t abstime)917 commpage_update_mach_approximate_time(uint64_t abstime)
918 {
919 #ifdef CONFIG_MACH_APPROXIMATE_TIME
920 	uint64_t saved_data;
921 	char *cp;
922 
923 	cp = commPagePtr32;
924 	if (cp) {
925 		cp += (_COMM_PAGE_APPROX_TIME - _COMM_PAGE32_BASE_ADDRESS);
926 		saved_data = atomic_load_explicit((_Atomic uint64_t *)(uintptr_t)cp, memory_order_relaxed);
927 		if (saved_data < abstime) {
928 			/* ignoring the success/fail return value assuming that
929 			 * if the value has been updated since we last read it,
930 			 * "someone" has a newer timestamp than us and ours is
931 			 * now invalid. */
932 			atomic_compare_exchange_strong_explicit((_Atomic uint64_t *)(uintptr_t)cp,
933 			    &saved_data, abstime, memory_order_relaxed, memory_order_relaxed);
934 		}
935 	}
936 	cp = commPagePtr64;
937 	if (cp) {
938 		cp += (_COMM_PAGE_APPROX_TIME - _COMM_PAGE32_START_ADDRESS);
939 		saved_data = atomic_load_explicit((_Atomic uint64_t *)(uintptr_t)cp, memory_order_relaxed);
940 		if (saved_data < abstime) {
941 			/* ignoring the success/fail return value assuming that
942 			 * if the value has been updated since we last read it,
943 			 * "someone" has a newer timestamp than us and ours is
944 			 * now invalid. */
945 			atomic_compare_exchange_strong_explicit((_Atomic uint64_t *)(uintptr_t)cp,
946 			    &saved_data, abstime, memory_order_relaxed, memory_order_relaxed);
947 		}
948 	}
949 #else
950 #pragma unused (abstime)
951 #endif
952 }
953 
954 void
commpage_update_mach_continuous_time(uint64_t sleeptime)955 commpage_update_mach_continuous_time(uint64_t sleeptime)
956 {
957 	char *cp;
958 	cp = commPagePtr32;
959 	if (cp) {
960 		cp += (_COMM_PAGE_CONT_TIMEBASE - _COMM_PAGE32_START_ADDRESS);
961 		*(uint64_t *)cp = sleeptime;
962 	}
963 
964 	cp = commPagePtr64;
965 	if (cp) {
966 		cp += (_COMM_PAGE_CONT_TIMEBASE - _COMM_PAGE32_START_ADDRESS);
967 		*(uint64_t *)cp = sleeptime;
968 	}
969 }
970 
971 void
commpage_update_boottime(uint64_t boottime)972 commpage_update_boottime(uint64_t boottime)
973 {
974 	char *cp;
975 	cp = commPagePtr32;
976 	if (cp) {
977 		cp += (_COMM_PAGE_BOOTTIME_USEC - _COMM_PAGE32_START_ADDRESS);
978 		*(uint64_t *)cp = boottime;
979 	}
980 
981 	cp = commPagePtr64;
982 	if (cp) {
983 		cp += (_COMM_PAGE_BOOTTIME_USEC - _COMM_PAGE32_START_ADDRESS);
984 		*(uint64_t *)cp = boottime;
985 	}
986 }
987 
988 
989 extern user32_addr_t commpage_text32_location;
990 extern user64_addr_t commpage_text64_location;
991 
992 /* Check to see if a given address is in the Preemption Free Zone (PFZ) */
993 
994 uint32_t
commpage_is_in_pfz32(uint32_t addr32)995 commpage_is_in_pfz32(uint32_t addr32)
996 {
997 	if ((addr32 >= (commpage_text32_location + _COMM_TEXT_PFZ_START_OFFSET))
998 	    && (addr32 < (commpage_text32_location + _COMM_TEXT_PFZ_END_OFFSET))) {
999 		return 1;
1000 	} else {
1001 		return 0;
1002 	}
1003 }
1004 
1005 uint32_t
commpage_is_in_pfz64(addr64_t addr64)1006 commpage_is_in_pfz64(addr64_t addr64)
1007 {
1008 	if ((addr64 >= (commpage_text64_location + _COMM_TEXT_PFZ_START_OFFSET))
1009 	    && (addr64 < (commpage_text64_location + _COMM_TEXT_PFZ_END_OFFSET))) {
1010 		return 1;
1011 	} else {
1012 		return 0;
1013 	}
1014 }
1015