xref: /xnu-8792.61.2/osfmk/x86_64/idt64.s (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1/*
2 * Copyright (c) 2010-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <i386/asm.h>
29#include <assym.s>
30#include <debug.h>
31#include "dwarf_unwind.h"
32#include <i386/eflags.h>
33#include <i386/rtclock_asm.h>
34#include <i386/trap.h>
35#define _ARCH_I386_ASM_HELP_H_	/* Prevent inclusion of user header */
36#include <mach/i386/syscall_sw.h>
37#include <i386/postcode.h>
38#include <i386/proc_reg.h>
39#include <mach/exception_types.h>
40
41#if DEBUG
42#define	DEBUG_IDT64 		1
43#endif
44
45/*
46 * This is the low-level trap and interrupt handling code associated with
47 * the IDT. It also includes system call handlers for sysenter/syscall.
48 * The IDT itself is defined in mp_desc.c.
49 *
50 * Code here is structured as follows:
51 *
52 * stubs 	Code called directly from an IDT vector.
53 *		All entry points have the "idt64_" prefix and they are built
54 *		using macros expanded by the inclusion of idt_table.h.
55 *		This code performs vector-dependent identification and jumps
56 * 		into the dispatch code.
57 *
58 * dispatch	The dispatch code is responsible for saving the thread state
59 *		(which is either 64-bit or 32-bit) and then jumping to the
60 *		class handler identified by the stub.
61 *
62 * returns	Code to restore state and return to the previous context.
63 *
64 * handlers	There are several classes of handlers:
65 *   interrupt	- asynchronous events typically from external devices
66 *   trap	- synchronous events due to thread execution
67 *   syscall	- synchronous system call request
68 *   fatal	- fatal traps
69 */
70/*
71 * Indices of handlers for each exception type.
72 */
73#define	HNDL_ALLINTRS		0
74#define	HNDL_ALLTRAPS		1
75#define	HNDL_SYSENTER		2
76#define	HNDL_SYSCALL		3
77#define	HNDL_UNIX_SCALL		4
78#define	HNDL_MACH_SCALL		5
79#define	HNDL_MDEP_SCALL		6
80#define	HNDL_DOUBLE_FAULT	7
81#define	HNDL_MACHINE_CHECK	8
82
83
84/* Begin double-mapped descriptor section */
85
86.section	__HIB, __desc
87.globl EXT(idt64_hndl_table0)
88EXT(idt64_hndl_table0):
89/* 0x00 */	.quad EXT(ks_dispatch)
90/* 0x08 */	.quad EXT(ks_64bit_return)
91/* 0x10 */	.quad 0 /* Populated with CPU shadow displacement*/
92/* 0x18 */	.quad EXT(ks_32bit_return)
93#define	TBL0_OFF_DISP_USER_WITH_POPRAX	0x20
94/* 0x20 */	.quad EXT(ks_dispatch_user_with_pop_rax)
95#define	TBL0_OFF_DISP_KERN_WITH_POPRAX	0x28
96/* 0x28 */	.quad EXT(ks_dispatch_kernel_with_pop_rax)
97#define	TBL0_OFF_PTR_KERNEL_STACK_MASK	0x30
98/* 0x30 */	.quad 0 /* &kernel_stack_mask */
99
100EXT(idt64_hndl_table1):
101	.quad	EXT(hndl_allintrs)
102	.quad	EXT(hndl_alltraps)
103	.quad	EXT(hndl_sysenter)
104	.quad	EXT(hndl_syscall)
105	.quad	EXT(hndl_unix_scall)
106	.quad	EXT(hndl_mach_scall)
107	.quad	EXT(hndl_mdep_scall)
108	.quad	EXT(hndl_double_fault)
109	.quad	EXT(hndl_machine_check)
110.text
111
112
113/* The wrapper for all non-special traps/interrupts */
114/* Everything up to PUSH_FUNCTION is just to output
115 * the interrupt number out to the postcode display
116 */
117#if DEBUG_IDT64
118#define IDT_ENTRY_WRAPPER(n, f)			 \
119	push	%rax				;\
120	POSTCODE2(0x6400+n)			;\
121	pop	%rax				;\
122	pushq	$(f)				;\
123	pushq	$(n)				;\
124	jmp L_dispatch
125#else
126#define IDT_ENTRY_WRAPPER(n, f)			 \
127	pushq	$(f)				;\
128	pushq	$(n)				;\
129	jmp L_dispatch
130#endif
131
132/* A trap that comes with an error code already on the stack */
133#define TRAP_ERR(n, f)				 \
134	Entry(f)				;\
135	IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
136
137/* A normal trap */
138#define TRAP(n, f)				 \
139	Entry(f)				;\
140	pushq	$0          			;\
141	IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)
142
143#define USER_TRAP TRAP
144
145/* An interrupt */
146#define INTERRUPT(n)			 	\
147	Entry(_intr_ ## n)			;\
148	pushq	$0          			;\
149	IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS)
150
151/* A trap with a special-case handler, hence we don't need to define anything */
152#define TRAP_SPC(n, f)
153#define TRAP_IST1(n, f)
154#define TRAP_IST2(n, f)
155#define USER_TRAP_SPC(n, f)
156
157/* Begin double-mapped text section */
158.section __HIB, __text
159/* Generate all the stubs */
160#include "idt_table.h"
161
162Entry(idt64_page_fault)
163	pushq	$(HNDL_ALLTRAPS)
164#if !(DEVELOPMENT || DEBUG)
165	pushq	$(T_PAGE_FAULT)
166	jmp	L_dispatch
167#else
168	pushq	$(T_PAGE_FAULT)
169
170	pushq	%rax
171	pushq	%rbx
172	pushq	%rcx
173	testb	$3, 8+8+8+ISF64_CS(%rsp)	/* Coming from userspace? */
174	jz	L_pfkern		/* No? (relatively uncommon), goto L_pfkern */
175
176	/*
177	 * We faulted from the user; if the fault address is at the user's %rip,
178	 * abort trying to save the cacheline since that adds another page fault's
179	 * overhead when we recover, below.
180	 */
181	movq	8+8+8+ISF64_RIP(%rsp), %rbx
182	movq	%cr2, %rcx
183	cmpq	%rbx, %rcx
184
185	/* note that the next 3 instructions do not affect RFLAGS */
186	swapgs
187	leaq	EXT(idt64_hndl_table0)(%rip), %rax
188	mov	16(%rax), %rax	/* Offset of per-CPU shadow */
189
190	jne	L_dispatch_from_user_with_rbx_rcx_pushes
191	jmp	abort_rip_cacheline_read
192
193L_pfkern:
194	/*
195	 * Kernel page fault
196	 * If the fault occurred on while reading from the user's code cache line, abort the cache line read;
197	 * otherwise, treat this as a regular kernel fault
198	 */
199	movq	8+8+8+ISF64_RIP(%rsp), %rbx
200	leaq	rip_cacheline_read(%rip), %rcx
201	cmpq	%rcx, %rbx
202	jb	regular_kernel_page_fault
203	leaq	rip_cacheline_read_end(%rip), %rcx
204	cmpq	%rcx, %rbx
205	jbe	L_pf_on_clread	/* Did we hit a #PF within the cacheline read? */
206
207regular_kernel_page_fault:
208	/* No, regular kernel #PF */
209	popq	%rcx
210	popq	%rbx
211	jmp	L_dispatch_from_kernel_no_push_rax
212
213L_pf_on_clread:
214	/*
215	 * We faulted while trying to read user instruction memory at the parent fault's %rip; abort that action by
216	 * changing the return address on the stack, restoring cr2 to its previous value, peeling off the pushes we
217	 * added on entry to the page fault handler, then performing an iretq
218	 */
219	popq	%rcx
220	movq	%rcx, %cr2
221	popq	%rbx
222	leaq	abort_rip_cacheline_read(%rip), %rax
223	movq	%rax, 8+ISF64_RIP(%rsp)
224	popq	%rax
225	addq	$24, %rsp	/* pop the 2 pushes + the error code */
226	iretq			/* Resume previous trap/fault processing */
227#endif /* !(DEVELOPMENT || DEBUG) */
228
229/*
230 * #DB handler, which runs on IST1, will treat as spurious any #DB received while executing in the
231 * kernel while not on the kernel's gsbase.
232 */
233Entry(idt64_debug)
234	/* Synthesize common interrupt stack frame */
235	push	$0			/* error code */
236	pushq	$(HNDL_ALLTRAPS)
237	pushq	$(T_DEBUG)
238	/* Spill prior to RDMSR */
239	push	%rax
240	push	%rcx
241	push	%rdx
242	mov	$(MSR_IA32_GS_BASE), %ecx
243	rdmsr					/* Check contents of GSBASE MSR */
244	test	$0x80000000, %edx		/* MSB set? Already swapped to kernel's */
245	jnz	1f
246
247	/*
248	 * If we're not already swapped to the kernel's gsbase AND this #DB originated from kernel space,
249	 * it must have happened within the very small window on entry or exit before or after (respectively)
250	 * swapgs occurred.  In those cases, consider the #DB spurious and immediately return.
251	 */
252	testb	$3, 8+8+8+ISF64_CS(%rsp)
253	jnz	2f
254	pop	%rdx
255	pop	%rcx
256	pop	%rax
257	addq	$0x18, %rsp	/* Remove synthesized interrupt stack frame */
258	jmp	EXT(ret64_iret)
2592:
260	swapgs					/* direct from user */
2611:
262	pop	%rdx
263
264	leaq	EXT(idt64_hndl_table0)(%rip), %rax
265	mov	16(%rax), %rax /* Offset of per-CPU shadow */
266
267	mov	%gs:CPU_SHADOWTASK_CR3(%rax), %rax
268	mov	%rax, %cr3
269
270	pop	%rcx
271
272	/* Note that %rax will be popped from the stack in ks_dispatch, below */
273
274	leaq    EXT(idt64_hndl_table0)(%rip), %rax
275	jmp	*(%rax)
276
277/*
278 * Legacy interrupt gate System call handlers.
279 * These are entered via a syscall interrupt. The system call number in %rax
280 * is saved to the error code slot in the stack frame. We then branch to the
281 * common state saving code.
282 */
283
284#ifndef UNIX_INT
285#error NO UNIX INT!!!
286#endif
287Entry(idt64_unix_scall)
288	pushq	%rax			/* save system call number */
289	pushq	$(HNDL_UNIX_SCALL)
290	pushq	$(UNIX_INT)
291	jmp	L_u64bit_entry_check
292
293Entry(idt64_mach_scall)
294	pushq	%rax			/* save system call number */
295	pushq	$(HNDL_MACH_SCALL)
296	pushq	$(MACH_INT)
297	jmp	L_u64bit_entry_check
298
299Entry(idt64_mdep_scall)
300	pushq	%rax			/* save system call number */
301	pushq	$(HNDL_MDEP_SCALL)
302	pushq	$(MACHDEP_INT)
303	jmp	L_u64bit_entry_check
304
305/*
306 * For GP/NP/SS faults, we use the IST1 stack.
307 * For faults from user-space, we have to copy the machine state to the
308 * PCB stack and then dispatch as normal.
309 * For faults in kernel-space, we need to scrub for kernel exit faults and
310 * treat these as user-space faults. But for all other kernel-space faults
311 * we continue to run on the IST1 stack as we dispatch to handle the fault
312 * as fatal.
313 */
314Entry(idt64_segnp)
315	pushq	$(HNDL_ALLTRAPS)
316	pushq	$(T_SEGMENT_NOT_PRESENT)
317	jmp	L_check_for_kern_flt
318
319Entry(idt64_gen_prot)
320	pushq	$(HNDL_ALLTRAPS)
321	pushq	$(T_GENERAL_PROTECTION)
322	jmp	L_check_for_kern_flt
323
324Entry(idt64_stack_fault)
325	pushq	$(HNDL_ALLTRAPS)
326	pushq	$(T_STACK_FAULT)
327	jmp	L_check_for_kern_flt
328
329L_check_for_kern_flt:
330	/*
331	 * If we took a #GP or #SS from the kernel, check if we took them
332	 * from either ret32_iret or ret64_iret.  If we did, we need to
333	 * jump into L_dispatch at the swapgs so that the code in L_dispatch
334	 * can proceed with the correct GSbase.
335	 */
336	pushq	%rax
337	testb	$3, 8+ISF64_CS(%rsp)
338	jnz	L_dispatch_from_user_no_push_rax		/* Fault from user, go straight to dispatch */
339
340	/* Check if the fault occurred in the 32-bit segment restoration window (which executes with user gsb) */
341	leaq	L_32bit_seg_restore_begin(%rip), %rax
342	cmpq	%rax, 8+ISF64_RIP(%rsp)
343	jb	L_not_32bit_segrestores
344	leaq	L_32bit_seg_restore_done(%rip), %rax
345	cmpq	%rax, 8+ISF64_RIP(%rsp)
346	jae	L_not_32bit_segrestores
347	jmp	1f
348L_not_32bit_segrestores:
349	leaq	EXT(ret32_iret)(%rip), %rax
350	cmpq	%rax, 8+ISF64_RIP(%rsp)
351	je	1f
352	leaq	EXT(ret64_iret)(%rip), %rax
353	cmpq	%rax, 8+ISF64_RIP(%rsp)
354	je	1f
355	jmp	L_dispatch_from_kernel_no_push_rax
356	/*
357	 * We hit the fault on iretq, so check the original return %cs.  If
358	 * it's a user %cs, fixup the stack and then jump to dispatch..
359	 *
360	 * With this type of fault, the stack is layed-out as follows:
361	 *
362	 *
363	 * orig %ss      saved_rsp+32
364	 * orig %rsp     saved_rsp+24
365	 * orig %rflags  saved_rsp+16
366	 * orig %cs      saved_rsp+8
367	 * orig %rip     saved_rsp
368         * ^^^^^^^^^ (maybe on another stack, since we switched to IST1)
369	 * %ss           +64            -8
370	 * saved_rsp     +56           -16
371	 * %rflags       +48           -24
372	 * %cs           +40           -32
373	 * %rip          +32           -40
374	 * error code    +24           -48
375	 * hander        +16           -56
376	 * trap number   +8            -64
377	 * <saved %rax>  <== %rsp      -72
378	 */
3791:
380	pushq	%rbx
381	movq	16+ISF64_RSP(%rsp), %rbx
382	movq	ISF64_CS-24(%rbx), %rax
383	testb	$3, %al					/* If the original return destination was to user */
384	jnz	2f
385	popq	%rbx
386	jmp	L_dispatch_from_kernel_no_push_rax	/* Fault occurred when trying to return to kernel */
3872:
388	/*
389	 * Fix the stack so the original trap frame is current, then jump to dispatch
390	 */
391
392	movq	%rax, 16+ISF64_CS(%rsp)
393
394	movq	ISF64_RSP-24(%rbx), %rax
395	movq	%rax, 16+ISF64_RSP(%rsp)
396
397	movq	ISF64_RIP-24(%rbx), %rax
398	movq	%rax, 16+ISF64_RIP(%rsp)
399
400	movq	ISF64_SS-24(%rbx), %rax
401	movq	%rax, 16+ISF64_SS(%rsp)
402
403	movq	ISF64_RFLAGS-24(%rbx), %rax
404	movq	%rax, 16+ISF64_RFLAGS(%rsp)
405
406	popq	%rbx
407	jmp	L_dispatch_from_user_no_push_rax
408
409
410/*
411 * Fatal exception handlers:
412 */
413Entry(idt64_db_task_dbl_fault)
414	pushq	$(HNDL_DOUBLE_FAULT)
415	pushq	$(T_DOUBLE_FAULT)
416	jmp	L_dispatch
417
418Entry(idt64_db_task_stk_fault)
419	pushq	$(HNDL_DOUBLE_FAULT)
420	pushq	$(T_STACK_FAULT)
421	jmp	L_dispatch
422
423Entry(idt64_mc)
424	push	$(0)			/* Error */
425	pushq	$(HNDL_MACHINE_CHECK)
426	pushq	$(T_MACHINE_CHECK)
427	jmp	L_dispatch
428
429/*
430 * NMI
431 * This may or may not be fatal but extreme care is required
432 * because it may fall when control was already in another trampoline.
433 *
434 * We get here on IST2 stack which is used exclusively for NMIs.
435 * Machine checks, doublefaults and similar use IST1
436 */
437Entry(idt64_nmi)
438	push	%rax
439	push	%rcx
440	push	%rdx
441	testb	$3, ISF64_CS(%rsp)
442	jz	1f
443
444	/* From user-space: copy interrupt state to user PCB */
445	swapgs
446
447	leaq    EXT(idt64_hndl_table0)(%rip), %rax
448	mov     16(%rax), %rax /* Offset of per-CPU shadow */
449	mov     %gs:CPU_SHADOWTASK_CR3(%rax), %rax
450	mov     %rax, %cr3			/* note that SMAP is enabled in L_common_dispatch (on Broadwell+) */
451
452	mov	%gs:CPU_UBER_ISF, %rcx		/* PCB stack addr */
453	add	$(ISF64_SIZE), %rcx		/* adjust to base of ISF */
454
455	leaq    TBL0_OFF_DISP_USER_WITH_POPRAX+EXT(idt64_hndl_table0)(%rip), %rax		/* ks_dispatch_user_with_pop_rax */
456	jmp	4f						/* Copy state to PCB */
457
4581:
459	/*
460	 * From kernel-space:
461	 * Determine whether the kernel or user GS is set.
462	 * Sets the high 32 bits of the return CS to 1 to ensure that we'll swapgs back correctly at IRET.
463	 */
464	mov	$(MSR_IA32_GS_BASE), %ecx
465	rdmsr					/* read kernel gsbase */
466	test	$0x80000000, %edx		/* test MSB of address */
467	jnz	2f
468	swapgs					/* so swap */
469	movl	$1, ISF64_CS+4(%rsp)		/* and set flag in CS slot */
4702:
471
472	leaq    EXT(idt64_hndl_table0)(%rip), %rax
473	mov     16(%rax), %rax /* Offset of per-CPU shadow */
474	mov	%cr3, %rdx
475	mov     %gs:CPU_SHADOWTASK_CR3(%rax), %rax
476	mov     %rax, %cr3 /* Unconditionally switch to primary kernel pagetables */
477
478	/*
479	 * Determine whether we're on the kernel or interrupt stack
480	 * when the NMI hit.
481	 */
482	mov	ISF64_RSP(%rsp), %rcx
483	mov	%gs:CPU_KERNEL_STACK, %rax
484	xor	%rcx, %rax
485	movq	TBL0_OFF_PTR_KERNEL_STACK_MASK+EXT(idt64_hndl_table0)(%rip), %rdx
486	mov	(%rdx), %rdx		/* Load kernel_stack_mask */
487	and	%rdx, %rax
488	test	%rax, %rax		/* are we on the kernel stack? */
489	jz	3f			/* yes */
490
491	mov	%gs:CPU_INT_STACK_TOP, %rax
492	cmp	%rcx, %rax		/* are we on the interrupt stack? */
493	jb	5f			/* no */
494	leaq	-INTSTACK_SIZE(%rax), %rax
495	cmp	%rcx, %rax
496	jb	3f			/* yes */
4975:
498	mov    %gs:CPU_KERNEL_STACK, %rcx
4993:
500	/* 16-byte-align kernel/interrupt stack for state push */
501	and	$0xFFFFFFFFFFFFFFF0, %rcx
502
503	leaq    TBL0_OFF_DISP_KERN_WITH_POPRAX+EXT(idt64_hndl_table0)(%rip), %rax		/* ks_dispatch_kernel_with_pop_rax */
5044:
505	/*
506	 * Copy state from NMI stack (RSP) to the save area (RCX) which is
507	 * the PCB for user or kernel/interrupt stack from kernel.
508	 * ISF64_ERR(RSP)    saved RAX
509	 * ISF64_TRAPFN(RSP) saved RCX
510	 * ISF64_TRAPNO(RSP) saved RDX
511	 */
512	xchg	%rsp, %rcx			/* set for pushes */
513	push	ISF64_SS(%rcx)
514	push	ISF64_RSP(%rcx)
515	push	ISF64_RFLAGS(%rcx)
516	push	ISF64_CS(%rcx)
517	push	ISF64_RIP(%rcx)
518	/* Synthesize common interrupt stack frame */
519	push	$(0)				/* error code 0 */
520	push	$(HNDL_ALLINTRS)		/* trapfn allintrs */
521	push	$(T_NMI)			/* trapno T_NMI */
522	push	ISF64_ERR(%rcx)			/* saved %rax is popped in ks_dispatch_{kernel|user}_with_pop_rax */
523	mov	ISF64_TRAPNO(%rcx), %rdx
524	mov	ISF64_TRAPFN(%rcx), %rcx
525
526	jmp	*(%rax)		/* ks_dispatch_{kernel|user}_with_pop_rax */
527
528Entry(idt64_double_fault)
529	pushq	$(HNDL_DOUBLE_FAULT)
530	pushq	$(T_DOUBLE_FAULT)
531	jmp	L_dispatch
532
533Entry(hi64_syscall)
534Entry(idt64_syscall)
535	swapgs
536     /* Use RAX as a temporary by shifting its contents into R11[32:63]
537      * The systemcall number is defined to be a 32-bit quantity, as is
538      * RFLAGS.
539      */
540	shlq	$32, %rax
541	or 	%rax, %r11
542.globl EXT(dblsyscall_patch_point)
543EXT(dblsyscall_patch_point):
544//	movabsq	$0x12345678ABCDEFFFULL, %rax
545     /* Generate offset to the double-mapped per-CPU data shadow
546      * into RAX
547      */
548	leaq	EXT(idt64_hndl_table0)(%rip), %rax
549	mov	16(%rax), %rax
550	mov     %rsp, %gs:CPU_UBER_TMP(%rax)  /* save user stack */
551	mov     %gs:CPU_ESTACK(%rax), %rsp  /* switch stack to per-cpu estack */
552	sub	$(ISF64_SIZE), %rsp
553
554	/*
555	 * Synthesize an ISF frame on the exception stack
556	 */
557	movl	$(USER_DS), ISF64_SS(%rsp)
558	mov	%rcx, ISF64_RIP(%rsp)		/* rip */
559
560	mov	%gs:CPU_UBER_TMP(%rax), %rcx
561	mov	%rcx, ISF64_RSP(%rsp)		/* user stack --changed */
562
563	mov	%r11, %rax
564	shrq	$32, %rax		/* Restore RAX */
565	mov	%r11d, %r11d		/* Clear r11[32:63] */
566
567	mov	%r11, ISF64_RFLAGS(%rsp)	/* rflags */
568	movl	$(SYSCALL_CS), ISF64_CS(%rsp)	/* cs - a pseudo-segment */
569	mov	%rax, ISF64_ERR(%rsp)		/* err/rax - syscall code */
570	movq	$(HNDL_SYSCALL), ISF64_TRAPFN(%rsp)
571	movq	$(T_SYSCALL), ISF64_TRAPNO(%rsp)	/* trapno */
572	swapgs
573	jmp	L_dispatch			/* this can only be 64-bit */
574
575Entry(hi64_sysenter)
576Entry(idt64_sysenter)
577	/* Synthesize an interrupt stack frame onto the
578	 * exception stack.
579	 */
580	push	$(USER_DS)		/* ss */
581	push	%rcx			/* uesp */
582	pushf				/* flags */
583	/*
584	 * Clear, among others, the Nested Task (NT) flags bit;
585	 * this is zeroed by INT, but not by SYSENTER.
586	 */
587	push	$0
588	popf
589	push	$(SYSENTER_CS)		/* cs */
590L_sysenter_continue:
591	push	%rdx			/* eip */
592	push	%rax			/* err/eax - syscall code */
593	pushq	$(HNDL_SYSENTER)
594	pushq	$(T_SYSENTER)
595	orl	$(EFL_IF), ISF64_RFLAGS(%rsp)
596	jmp	L_u64bit_entry_check
597
598#if DEVELOPMENT || DEBUG
599do_cacheline_stash:
600	/*
601	 * Copy the cache line that includes the user's EIP/RIP into the shadow cpu structure
602	 * for later extraction/sanity-checking in user_trap().
603	 */
604
605	pushq	%rbx
606	pushq	%rcx
607L_dispatch_from_user_with_rbx_rcx_pushes:
608	movq	8+8+8+ISF64_RIP(%rsp), %rbx
609	andq	$-64, %rbx	/* Round address to cacheline boundary */
610	pushf
611	/*
612	 * disable SMAP, if it's enabled (note that CLAC is present in BDW and later only, so we're
613	 * using generic instructions here without checking whether the CPU supports SMAP first)
614	 */
615	orq	$(1 << 18), (%rsp)
616	popf
617	/*
618	 * Note that we only check for a faulting read on the first read, since if the first read
619	 * succeeds, the rest of the cache line should also be readible since we are running with
620	 * interrupts disabled here and a TLB invalidation cannot sneak in and pull the rug out.
621	 */
622	movq	%cr2, %rcx	/* stash the original %cr2 in case the first cacheline read triggers a #PF */
623				/* This value of %cr2 is restored in the page fault handler if it detects */
624				/* that the fault occurrent on the next instruction, so the original #PF can */
625				/* continue to be handled without issue. */
626rip_cacheline_read:
627	mov	(%rbx), %rcx
628	/* Note that CPU_RTIMES in the shadow cpu struct was just a convenient place to stash the cacheline */
629	mov	%rcx, %gs:CPU_RTIMES(%rax)
630	movq    %cr2, %rcx
631	mov	8(%rbx), %rcx
632	mov	%rcx, %gs:8+CPU_RTIMES(%rax)
633	movq    %cr2, %rcx
634	mov	16(%rbx), %rcx
635	mov	%rcx, %gs:16+CPU_RTIMES(%rax)
636	movq    %cr2, %rcx
637	mov	24(%rbx), %rcx
638	mov	%rcx, %gs:24+CPU_RTIMES(%rax)
639	movq    %cr2, %rcx
640	mov	32(%rbx), %rcx
641	mov	%rcx, %gs:32+CPU_RTIMES(%rax)
642	movq    %cr2, %rcx
643	mov	40(%rbx), %rcx
644	mov	%rcx, %gs:40+CPU_RTIMES(%rax)
645	movq    %cr2, %rcx
646	mov	48(%rbx), %rcx
647	mov	%rcx, %gs:48+CPU_RTIMES(%rax)
648	movq    %cr2, %rcx
649rip_cacheline_read_end:
650	mov	56(%rbx), %rcx
651	mov	%rcx, %gs:56+CPU_RTIMES(%rax)
652
653	pushf
654	andq	$~(1 << 18), (%rsp) 	/* reenable SMAP */
655	popf
656
657	jmp	cacheline_read_cleanup_stack
658
659abort_rip_cacheline_read:
660	pushf
661	andq	$~(1 << 18), (%rsp) 	/* reenable SMAP */
662	popf
663abort_rip_cacheline_read_no_smap_reenable:
664	movl	$0xdeadc0de, %ecx			/* Write a sentinel so higher-level code knows this was aborted */
665	shlq	$32, %rcx
666	movl	$0xbeefcafe, %ebx
667	orq	%rbx, %rcx
668	movq	%rcx, %gs:CPU_RTIMES(%rax)
669	movq	%rcx, %gs:8+CPU_RTIMES(%rax)
670
671cacheline_read_cleanup_stack:
672	popq	%rcx
673	popq	%rbx
674	jmp	L_dispatch_kgsb
675#endif /* if DEVELOPMENT || DEBUG */
676
677/*
678 * Common dispatch point.
679 * Determine what mode has been interrupted and save state accordingly.
680 * Here with:
681 *	rsp	from user-space:   interrupt state in PCB, or
682 *		from kernel-space: interrupt state in kernel or interrupt stack
683 *	GSBASE	from user-space:   pthread area, or
684 *		from kernel-space: cpu_data
685 */
686
687L_dispatch:
688	pushq	%rax
689	testb	$3, 8+ISF64_CS(%rsp)
690	jz	1f
691L_dispatch_from_user_no_push_rax:
692	swapgs
693	leaq	EXT(idt64_hndl_table0)(%rip), %rax
694	mov	16(%rax), %rax	/* Offset of per-CPU shadow */
695
696#if DEVELOPMENT || DEBUG
697	/* Stash the cacheline for #UD, #PF, and #GP */
698	cmpl	$(T_INVALID_OPCODE), 8+ISF64_TRAPNO(%rsp)
699	je	do_cacheline_stash
700	cmpl	$(T_PAGE_FAULT), 8+ISF64_TRAPNO(%rsp)
701	je	do_cacheline_stash
702	cmpl	$(T_GENERAL_PROTECTION), 8+ISF64_TRAPNO(%rsp)
703	je	do_cacheline_stash
704#endif
705
706L_dispatch_kgsb:
707	mov	%gs:CPU_SHADOWTASK_CR3(%rax), %rax
708	mov	%rax, %cr3
709#if	DEBUG
710	mov	%rax, %gs:CPU_ENTRY_CR3
711#endif
712L_dispatch_from_kernel_no_push_rax:
7131:
714	leaq	EXT(idt64_hndl_table0)(%rip), %rax
715	/* The text/data relationship here must be preserved in the doublemap, and the contents must be remapped */
716	/* Indirect branch to non-doublemapped trampolines */
717	jmp *(%rax)
718/* User return: register restoration and address space switch sequence */
719Entry(ks_64bit_return)
720
721	mov	R64_R14(%r15), %r14
722	mov	R64_R13(%r15), %r13
723	mov	R64_R12(%r15), %r12
724	mov	R64_R11(%r15), %r11
725	mov	R64_R10(%r15), %r10
726	mov	R64_R9(%r15),  %r9
727	mov	R64_R8(%r15),  %r8
728	mov	R64_RSI(%r15), %rsi
729	mov	R64_RDI(%r15), %rdi
730	mov	R64_RBP(%r15), %rbp
731	mov	R64_RDX(%r15), %rdx
732	mov	R64_RCX(%r15), %rcx
733	mov	R64_RBX(%r15), %rbx
734	mov	R64_RAX(%r15), %rax
735	/* Switch to per-CPU exception stack */
736	mov	%gs:CPU_ESTACK, %rsp
737
738	/* Synthesize interrupt stack frame from PCB savearea to exception stack */
739	push	R64_SS(%r15)
740	push	R64_RSP(%r15)
741	push	R64_RFLAGS(%r15)
742	push	R64_CS(%r15)
743	push	R64_RIP(%r15)
744
745	cmpw	$(KERNEL64_CS), 8(%rsp)
746	jne	1f			/* Returning to user (%r15 will be restored after the segment checks) */
747	mov	R64_R15(%r15), %r15
748	jmp	L_64b_kernel_return	/* Returning to kernel */
749
7501:
751	push	%rax				/* [A] */
752	movl	%gs:CPU_NEED_SEGCHK, %eax
753	push	%rax				/* [B] */
754
755	/* Returning to user */
756	cmpl	$0, %gs:CPU_CURTASK_HAS_LDT	/* If the current task has an LDT, check and restore segment regs */
757	jne	L_64b_segops_island
758
759	/*
760	 * Restore %r15, since we're now done accessing saved state
761	 * and (%r15) won't be accessible after the %cr3 load anyway.
762	 * Note that %r15 is restored below for the segment-restore
763	 * case, just after we no longer need to access register state
764	 * relative to %r15.
765	 */
766	mov	R64_R15(%r15), %r15
767
768	/*
769	 * Note that this %cr3 sequence is duplicated here to save
770	 * [at least] a load and comparison that would be required if
771	 * this block were shared.
772	 */
773	/* Discover user cr3/ASID */
774	mov	%gs:CPU_UCR3, %rax
775#if	DEBUG
776	mov	%rax, %gs:CPU_EXIT_CR3
777#endif
778	mov	%rax, %cr3
779	/* Continue execution on the shared/doublemapped trampoline */
780	swapgs
781
782L_chk_sysret:
783	pop	%rax	/* Matched to [B], above (segchk required) */
784
785	/*
786	 * At this point, the stack contains:
787	 *
788	 * +--------------+
789	 * |  Return SS   | +40
790	 * |  Return RSP  | +32
791	 * |  Return RFL  | +24
792	 * |  Return CS   | +16
793	 * |  Return RIP  | +8
794	 * |  Saved RAX   |  <-- rsp
795	 * +--------------+
796	 */
797
798	cmpw	$(SYSCALL_CS), 16(%rsp) /* test for exit via SYSRET */
799	je      L_sysret
800
801	testl	$(MTHR_SEGCHK), %eax
802	jnz	L_verw_island_2
803
804	pop	%rax		/* Matched to [A], above */
805
806L_64b_kernel_return:
807.globl EXT(ret64_iret)
808EXT(ret64_iret):
809        iretq			/* return from interrupt */
810
811
812L_sysret:
813	testl	$(MTHR_SEGCHK), %eax
814	jnz	L_verw_island_3
815
816	pop	%rax		/* Matched to [A], above */
817	/*
818	 * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
819	 * 	rcx	user rip
820	 *	r11	user rflags
821	 *	rsp	user stack pointer
822	 */
823	pop	%rcx
824	add	$8, %rsp
825	pop	%r11
826	pop	%rsp
827	sysretq			/* return from system call */
828
829
830L_verw_island_2:
831
832	pop	%rax		/* Matched to [A], above */
833	verw	32(%rsp)	/* verw operates on the %ss value already on the stack */
834	jmp	EXT(ret64_iret)
835
836
837L_verw_island_3:
838
839	pop	%rax		/* Matched to [A], above */
840
841	/*
842	 * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
843	 * 	rcx	user rip
844	 *	r11	user rflags
845	 *	rsp	user stack pointer
846	 */
847	pop	%rcx
848	add	$8, %rsp
849	pop	%r11
850	verw	8(%rsp)		/* verw operates on the %ss value already on the stack */
851	pop	%rsp
852	sysretq			/* return from system call */
853
854
855L_64b_segops_island:
856
857	/* Validate CS/DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
858	/* Exempt "known good" statically configured selectors, e.g. USER64_CS and 0 */
859	cmpw	$(USER64_CS), R64_CS(%r15)
860	jz 	11f
861	larw	R64_CS(%r15), %ax
862	jnz	L_64_reset_cs
863	/* Ensure that the segment referenced by CS in the saved state is a code segment (bit 11 == 1) */
864	testw	$0x800, %ax
865	jz	L_64_reset_cs		/* Update stored %cs with known-good selector if ZF == 1 */
866	jmp	11f
867L_64_reset_cs:
868	movl	$(USER64_CS), R64_CS(%r15)
86911:
870	cmpw	$0, R64_DS(%r15)
871	jz 	22f
872	larw	R64_DS(%r15), %ax
873	jz	22f
874	movl	$0, R64_DS(%r15)
87522:
876	cmpw	$0, R64_ES(%r15)
877	jz 	33f
878	larw	R64_ES(%r15), %ax
879	jz	33f
880	movl	$0, R64_ES(%r15)
88133:
882	cmpw	$0, R64_FS(%r15)
883	jz 	44f
884	larw	R64_FS(%r15), %ax
885	jz	44f
886	movl	$0, R64_FS(%r15)
88744:
888	cmpw	$0, R64_GS(%r15)
889	jz	55f
890	larw	R64_GS(%r15), %ax
891	jz	55f
892	movl	$0, R64_GS(%r15)
89355:
894	/*
895	 * Pack the segment registers in %rax since (%r15) will not
896	 * be accessible after the %cr3 switch.
897	 * Only restore %gs if cthread_self is zero, (indicate
898	 * this to the code below with a value of 0xffff)
899	 */
900	mov	%gs:CPU_ACTIVE_THREAD, %rax	/* Get the active thread */
901	cmpq	$0, TH_CTH_SELF(%rax)
902	je	L_restore_gs
903	movw	$0xFFFF, %ax
904	jmp	1f
905L_restore_gs:
906	movw	R64_GS(%r15), %ax
9071:
908	shlq	$16, %rax
909	movw	R64_FS(%r15), %ax
910	shlq	$16, %rax
911	movw	R64_ES(%r15), %ax
912	shlq	$16, %rax
913	movw	R64_DS(%r15), %ax
914
915	/*
916	 * Restore %r15, since we're done accessing saved state
917	 * and (%r15) won't be accessible after the %cr3 switch.
918	 */
919	mov	R64_R15(%r15), %r15
920
921	/* Discover user cr3/ASID */
922	push	%rax
923	mov	%gs:CPU_UCR3, %rax
924#if	DEBUG
925	mov	%rax, %gs:CPU_EXIT_CR3
926#endif
927	mov	%rax, %cr3
928	/* Continue execution on the shared/doublemapped trampoline */
929	pop	%rax
930	swapgs
931
932	/*
933	 * Returning to user; restore segment registers that might be used
934	 * by compatibility-mode code in a 64-bit user process.
935	 *
936	 * Note that if we take a fault here, it's OK that we haven't yet
937	 * popped %rax from the stack, because %rsp will be reset to
938	 * the value pushed onto the exception stack (above).
939	 */
940	movw	%ax, %ds
941	shrq	$16, %rax
942
943	movw	%ax, %es
944	shrq	$16, %rax
945
946	movw	%ax, %fs
947	shrq	$16, %rax
948
949	/*
950	 * 0xFFFF is the sentinel set above that indicates we should
951	 * not restore %gs (because GS.base was already set elsewhere
952	 * (e.g.: in act_machine_set_pcb or machine_thread_set_tsd_base))
953	 */
954	cmpw	$0xFFFF, %ax
955	je	L_chk_sysret
956	movw	%ax, %gs		/* Restore %gs to user-set value */
957	jmp	L_chk_sysret
958
959
960L_u64bit_entry_check:
961	/*
962	 * Check we're not a confused 64-bit user.
963	 */
964	pushq	%rax
965	swapgs
966	leaq	EXT(idt64_hndl_table0)(%rip), %rax
967	mov	16(%rax), %rax
968
969	cmpl	$(TASK_MAP_32BIT), %gs:CPU_TASK_MAP(%rax)
970	jne	L_64bit_entry_reject
971	jmp	L_dispatch_kgsb
972
973L_64bit_entry_reject:
974	/*
975	 * Here for a 64-bit user attempting an invalid kernel entry.
976	 */
977	movq	$(HNDL_ALLTRAPS), 8+ISF64_TRAPFN(%rsp)
978	movq	$(T_INVALID_OPCODE), 8+ISF64_TRAPNO(%rsp)
979	jmp 	L_dispatch_kgsb
980
981Entry(ks_32bit_return)
982
983	/* Validate CS/DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
984	/* Exempt "known good" statically configured selectors, e.g. USER_CS, USER_DS and 0 */
985	cmpw	$(USER_CS), R32_CS(%r15)
986	jz 	11f
987	larw	R32_CS(%r15), %ax
988	jnz	L_32_reset_cs
989	/* Ensure that the segment referenced by CS in the saved state is a code segment (bit 11 == 1) */
990	testw	$0x800, %ax
991	jz	L_32_reset_cs		/* Update stored %cs with known-good selector if ZF == 1 */
992	jmp	11f
993L_32_reset_cs:
994	movl	$(USER_CS), R32_CS(%r15)
99511:
996	cmpw	$(USER_DS), R32_DS(%r15)
997	jz	22f
998	cmpw	$0, R32_DS(%r15)
999	jz 	22f
1000	larw	R32_DS(%r15), %ax
1001	jz	22f
1002	movl	$(USER_DS), R32_DS(%r15)
100322:
1004	cmpw	$(USER_DS), R32_ES(%r15)
1005	jz	33f
1006	cmpw	$0, R32_ES(%r15)
1007	jz 	33f
1008	larw	R32_ES(%r15), %ax
1009	jz	33f
1010	movl	$(USER_DS), R32_ES(%r15)
101133:
1012	cmpw	$(USER_DS), R32_FS(%r15)
1013	jz	44f
1014	cmpw	$0, R32_FS(%r15)
1015	jz 	44f
1016	larw	R32_FS(%r15), %ax
1017	jz	44f
1018	movl	$(USER_DS), R32_FS(%r15)
101944:
1020	cmpw	$(USER_CTHREAD), R32_GS(%r15)
1021	jz	55f
1022	cmpw	$0, R32_GS(%r15)
1023	jz 	55f
1024	larw	R32_GS(%r15), %ax
1025	jz	55f
1026	movl	$(USER_CTHREAD), R32_GS(%r15)
102755:
1028
1029	/*
1030	 * Restore general 32-bit registers
1031	 */
1032	movl	R32_EAX(%r15), %eax
1033	movl	R32_EBX(%r15), %ebx
1034	movl	R32_ECX(%r15), %ecx
1035	movl	R32_EDX(%r15), %edx
1036	movl	R32_EBP(%r15), %ebp
1037	movl	R32_ESI(%r15), %esi
1038	movl	R32_EDI(%r15), %edi
1039	movl	R32_DS(%r15), %r8d
1040	movl	R32_ES(%r15), %r9d
1041	movl	R32_FS(%r15), %r10d
1042	movl	R32_GS(%r15), %r11d
1043
1044	/* Switch to the per-cpu (doublemapped) exception stack */
1045	mov	%gs:CPU_ESTACK, %rsp
1046
1047	/* Now transfer the ISF to the exception stack in preparation for iret, below */
1048	movl	R32_SS(%r15), %r12d
1049	push	%r12
1050	movl	R32_UESP(%r15), %r12d
1051	push	%r12
1052	movl	R32_EFLAGS(%r15), %r12d
1053	push	%r12
1054	movl	R32_CS(%r15), %r12d
1055	push	%r12
1056	movl	R32_EIP(%r15), %r12d
1057	push	%r12
1058
1059	movl	%gs:CPU_NEED_SEGCHK, %r14d	/* %r14 will be zeroed just before we return */
1060
1061	/*
1062	 * Finally, switch to the user pagetables.  After this, all %gs-relative
1063	 * accesses MUST be to cpu shadow data ONLY.  Note that after we restore %gs
1064	 * (after the swapgs), no %gs-relative accesses should be performed.
1065	 */
1066	/* Discover user cr3/ASID */
1067	mov	%gs:CPU_UCR3, %r13
1068#if	DEBUG
1069	mov	%r13, %gs:CPU_EXIT_CR3
1070#endif
1071	mov	%r13, %cr3
1072
1073	swapgs
1074
1075	/*
1076	 * Restore segment registers. A #GP taken here will push state onto IST1,
1077	 * not the exception stack.  Note that the placement of the labels here
1078	 * corresponds to the fault address-detection logic (so do not change them
1079	 * without also changing that code).
1080	 */
1081L_32bit_seg_restore_begin:
1082	mov	%r8, %ds
1083	mov	%r9, %es
1084	mov	%r10, %fs
1085	mov	%r11, %gs
1086L_32bit_seg_restore_done:
1087
1088	/* Zero 64-bit-exclusive GPRs to prevent data leaks */
1089	xor	%r8, %r8
1090	xor	%r9, %r9
1091	xor	%r10, %r10
1092	xor	%r11, %r11
1093	xor	%r12, %r12
1094	xor	%r13, %r13
1095	xor	%r15, %r15
1096
1097	/*
1098	 * At this point, the stack contains:
1099	 *
1100	 * +--------------+
1101	 * |  Return SS   | +32
1102	 * |  Return RSP  | +24
1103	 * |  Return RFL  | +16
1104	 * |  Return CS   | +8
1105	 * |  Return RIP  | <-- rsp
1106	 * +--------------+
1107	 */
1108
1109	cmpw	$(SYSENTER_CS), 8(%rsp)		/* test for sysexit */
1110	je      L_rtu_via_sysexit
1111
1112	testl	$(MTHR_SEGCHK), %r14d
1113	jnz	L_verw_island
1114
1115L_after_verw:
1116	xor	%r14, %r14
1117
1118.globl EXT(ret32_iret)
1119EXT(ret32_iret):
1120	iretq				/* return from interrupt */
1121
1122L_verw_island:
1123	verw	32(%rsp)
1124	jmp	L_after_verw
1125
1126L_verw_island_1:
1127	verw	16(%rsp)
1128	jmp	L_after_verw_1
1129
1130L_rtu_via_sysexit:
1131	pop	%rdx			/* user return eip */
1132	pop	%rcx			/* pop and toss cs */
1133	andl	$(~EFL_IF), (%rsp)	/* clear interrupts enable, sti below */
1134
1135	/*
1136	 * %ss is now at 16(%rsp)
1137	 */
1138	testl	$(MTHR_SEGCHK), %r14d
1139	je	L_verw_island_1
1140L_after_verw_1:
1141	xor	%r14, %r14
1142
1143	popf				/* flags - carry denotes failure */
1144	pop	%rcx			/* user return esp */
1145
1146
1147	sti				/* interrupts enabled after sysexit */
1148	sysexitl			/* 32-bit sysexit */
1149
1150/* End of double-mapped TEXT */
1151.text
1152
1153Entry(ks_dispatch)
1154	popq	%rax
1155	cmpw	$(KERNEL64_CS), ISF64_CS(%rsp)
1156	je	EXT(ks_dispatch_kernel)
1157
1158	mov 	%rax, %gs:CPU_UBER_TMP
1159	mov 	%gs:CPU_UBER_ISF, %rax
1160	add 	$(ISF64_SIZE), %rax
1161
1162	xchg	%rsp, %rax
1163/* Memory to memory moves (aint x86 wonderful):
1164 * Transfer the exception frame from the per-CPU exception stack to the
1165 * 'PCB' stack programmed at cswitch.
1166 */
1167	push	ISF64_SS(%rax)
1168	push	ISF64_RSP(%rax)
1169	push	ISF64_RFLAGS(%rax)
1170	push	ISF64_CS(%rax)
1171	push	ISF64_RIP(%rax)
1172	push	ISF64_ERR(%rax)
1173	push	ISF64_TRAPFN(%rax)
1174	push 	ISF64_TRAPNO(%rax)
1175	mov	%gs:CPU_UBER_TMP, %rax
1176	jmp	EXT(ks_dispatch_user)
1177
1178Entry(ks_dispatch_user_with_pop_rax)
1179	pop	%rax
1180	jmp	EXT(ks_dispatch_user)
1181
1182Entry(ks_dispatch_user)
1183	cmpl	$(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
1184	je	L_dispatch_U32		/* 32-bit user task */
1185
1186L_dispatch_U64:
1187	subq	$(ISS64_OFFSET), %rsp
1188	mov	%r15, R64_R15(%rsp)
1189	mov	%rsp, %r15
1190	mov	%gs:CPU_KERNEL_STACK, %rsp
1191	jmp	L_dispatch_64bit
1192
1193Entry(ks_dispatch_kernel_with_pop_rax)
1194	pop	%rax
1195	jmp	EXT(ks_dispatch_kernel)
1196
1197Entry(ks_dispatch_kernel)
1198	subq	$(ISS64_OFFSET), %rsp
1199	mov	%r15, R64_R15(%rsp)
1200	mov	%rsp, %r15
1201
1202/*
1203 * Here for 64-bit user task or kernel
1204 */
1205L_dispatch_64bit:
1206	movl	$(SS_64), SS_FLAVOR(%r15)
1207
1208	/*
1209	 * Save segment regs if a 64-bit task has
1210	 * installed customized segments in the LDT
1211	 */
1212	cmpl	$0, %gs:CPU_CURTASK_HAS_LDT
1213	je	L_skip_save_extra_segregs
1214
1215	mov	%ds, R64_DS(%r15)
1216	mov	%es, R64_ES(%r15)
1217
1218L_skip_save_extra_segregs:
1219	mov	%fs, R64_FS(%r15)
1220	mov	%gs, R64_GS(%r15)
1221
1222
1223	/* Save general-purpose registers */
1224	mov	%rax, R64_RAX(%r15)
1225	mov	%rbx, R64_RBX(%r15)
1226	mov	%rcx, R64_RCX(%r15)
1227	mov	%rdx, R64_RDX(%r15)
1228	mov	%rbp, R64_RBP(%r15)
1229	mov	%rdi, R64_RDI(%r15)
1230	mov	%rsi, R64_RSI(%r15)
1231	mov	%r8,  R64_R8(%r15)
1232	mov	%r9,  R64_R9(%r15)
1233	mov	%r10, R64_R10(%r15)
1234	mov	%r11, R64_R11(%r15)
1235	mov	%r12, R64_R12(%r15)
1236	mov	%r13, R64_R13(%r15)
1237	mov	%r14, R64_R14(%r15)
1238
1239	/* Zero unused GPRs. BX/DX/SI are clobbered elsewhere across the exception handler, and are skipped. */
1240	xor	%ecx, %ecx
1241	xor	%edi, %edi
1242	xor	%r8, %r8
1243	xor	%r9, %r9
1244	xor	%r10, %r10
1245	xor	%r11, %r11
1246	xor	%r12, %r12
1247	xor	%r13, %r13
1248	xor	%r14, %r14
1249
1250	/* cr2 is significant only for page-faults */
1251	mov	%cr2, %rax
1252	mov	%rax, R64_CR2(%r15)
1253
1254L_dispatch_U64_after_fault:
1255	mov	R64_TRAPNO(%r15), %ebx	/* %ebx := trapno for later */
1256	mov	R64_TRAPFN(%r15), %rdx	/* %rdx := trapfn for later */
1257	mov	R64_CS(%r15), %esi	/* %esi := cs for later */
1258
1259	jmp	L_common_dispatch
1260
1261L_dispatch_U32: /* 32-bit user task */
1262	subq	$(ISS64_OFFSET), %rsp
1263	mov	%rsp, %r15
1264	mov	%gs:CPU_KERNEL_STACK, %rsp
1265	movl	$(SS_32), SS_FLAVOR(%r15)
1266
1267	/*
1268	 * Save segment regs
1269	 */
1270	mov	%ds, R32_DS(%r15)
1271	mov	%es, R32_ES(%r15)
1272	mov	%fs, R32_FS(%r15)
1273	mov	%gs, R32_GS(%r15)
1274
1275	/*
1276	 * Save general 32-bit registers
1277	 */
1278	mov	%eax, R32_EAX(%r15)
1279	mov	%ebx, R32_EBX(%r15)
1280	mov	%ecx, R32_ECX(%r15)
1281	mov	%edx, R32_EDX(%r15)
1282	mov	%ebp, R32_EBP(%r15)
1283	mov	%esi, R32_ESI(%r15)
1284	mov	%edi, R32_EDI(%r15)
1285
1286	/* Unconditionally save cr2; only meaningful on page faults */
1287	mov	%cr2, %rax
1288	mov	%eax, R32_CR2(%r15)
1289	/* Zero unused GPRs. BX/DX/SI/R15 are clobbered elsewhere across the exception handler, and are skipped. */
1290	xor	%ecx, %ecx
1291	xor	%edi, %edi
1292	xor	%r8, %r8
1293	xor	%r9, %r9
1294	xor	%r10, %r10
1295	xor	%r11, %r11
1296	xor	%r12, %r12
1297	xor	%r13, %r13
1298	xor	%r14, %r14
1299
1300	/*
1301	 * Copy registers already saved in the machine state
1302	 * (in the interrupt stack frame) into the compat save area.
1303	 */
1304	mov	R64_RIP(%r15), %eax
1305	mov	%eax, R32_EIP(%r15)
1306	mov	R64_RFLAGS(%r15), %eax
1307	mov	%eax, R32_EFLAGS(%r15)
1308	mov	R64_RSP(%r15), %eax
1309	mov	%eax, R32_UESP(%r15)
1310	mov	R64_SS(%r15), %eax
1311	mov	%eax, R32_SS(%r15)
1312L_dispatch_U32_after_fault:
1313	mov	R64_CS(%r15), %esi		/* %esi := %cs for later */
1314	mov	%esi, R32_CS(%r15)
1315	mov	R64_TRAPNO(%r15), %ebx		/* %ebx := trapno for later */
1316	mov	%ebx, R32_TRAPNO(%r15)
1317	mov	R64_ERR(%r15), %eax
1318	mov	%eax, R32_ERR(%r15)
1319	mov	R64_TRAPFN(%r15), %rdx		/* %rdx := trapfn for later */
1320
1321L_common_dispatch:
1322	cld		/* Ensure the direction flag is clear in the kernel */
1323	cmpl    $0, EXT(pmap_smap_enabled)(%rip)
1324	je	1f
1325	clac		/* Clear EFLAGS.AC if SMAP is present/enabled */
13261:
1327	/*
1328	 * We mark the kernel's cr3 as "active" for TLB coherency evaluation
1329	 * For threads with a mapped pagezero (some WINE games) on non-SMAP platforms,
1330	 * we switch to the kernel's address space on entry. Also,
1331	 * if the global no_shared_cr3 is TRUE we do switch to the kernel's cr3
1332	 * so that illicit accesses to userspace can be trapped.
1333	 */
1334	mov	%gs:CPU_KERNEL_CR3, %rcx
1335	mov	%rcx, %gs:CPU_ACTIVE_CR3
1336	test	$3, %esi			/* CS: user/kernel? */
1337	jz	2f				/* skip CR3 reload if from kernel */
1338	xor	%ebp, %ebp
1339	cmpl	$0, %gs:CPU_PAGEZERO_MAPPED
1340	jnz	11f
1341	cmpl	$0, EXT(no_shared_cr3)(%rip)
1342	je	2f
134311:
1344	xor	%eax, %eax
1345	movw	%gs:CPU_KERNEL_PCID, %ax
1346	or	%rax, %rcx
1347	mov	%rcx, %cr3			/* load kernel cr3 */
1348	jmp	4f
13492:
1350	/* Deferred processing of pending kernel address space TLB invalidations */
1351	mov     %gs:CPU_ACTIVE_CR3+4, %rcx
1352	shr     $32, %rcx
1353	testl   %ecx, %ecx
1354	jz      4f
1355	movl    $0, %gs:CPU_TLB_INVALID
1356	cmpb	$0, EXT(invpcid_enabled)(%rip)
1357	jz	L_cr4_island
1358	movl	$2, %ecx
1359	invpcid %gs:CPU_IP_DESC, %rcx
13604:
1361L_set_act:
1362	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Get the active thread */
1363	testq	%rcx, %rcx
1364	je	L_intcnt
1365	movl	$-1, TH_IOTIER_OVERRIDE(%rcx)	/* Reset IO tier override to -1 before handling trap */
1366	cmpq	$0, TH_PCB_IDS(%rcx)	/* Is there a debug register state? */
1367	jnz	L_dr7_island
1368L_intcnt:
1369	incl	%gs:hwIntCnt(,%ebx,4)		// Bump the trap/intr count
1370	/* Dispatch the designated handler */
1371	cmp	EXT(dblmap_base)(%rip), %rsp
1372	jb	66f
1373	cmp	EXT(dblmap_max)(%rip), %rsp
1374	jge	66f
1375	subq	EXT(dblmap_dist)(%rip), %rsp
1376	subq	EXT(dblmap_dist)(%rip), %r15
137766:
1378	leaq	EXT(idt64_hndl_table1)(%rip), %rax
1379	jmp	*(%rax, %rdx, 8)
1380
1381L_cr4_island:
1382	mov	%cr4, %rcx      /* RMWW CR4, for lack of an alternative*/
1383	and	$(~CR4_PGE), %rcx
1384	mov	%rcx, %cr4
1385	or	$(CR4_PGE), %rcx
1386	mov	%rcx, %cr4
1387	jmp	L_set_act
1388L_dr7_island:
1389	xor	%ecx, %ecx		/* If so, reset DR7 (the control) */
1390	mov	%rcx, %dr7
1391	jmp	L_intcnt
1392/*
1393 * Control is passed here to return to user.
1394 */
1395Entry(return_to_user)
1396	TIME_TRAP_UEXIT
1397
1398Entry(ret_to_user)
1399	mov	%gs:CPU_ACTIVE_THREAD, %rdx
1400	cmpq	$0, TH_PCB_IDS(%rdx)	/* Is there a debug register context? */
1401	jnz	L_dr_restore_island
1402L_post_dr_restore:
1403	/*
1404	 * We now mark the task's address space as active for TLB coherency.
1405	 * Handle special cases such as pagezero-less tasks here.
1406	 */
1407	mov	%gs:CPU_TASK_CR3, %rcx
1408	mov	%rcx, %gs:CPU_ACTIVE_CR3
1409	cmpl	$0, %gs:CPU_PAGEZERO_MAPPED
1410	jnz	L_cr3_switch_island
1411	movl	EXT(no_shared_cr3)(%rip), %eax
1412	test	%eax, %eax		/* -no_shared_cr3 */
1413	jnz	L_cr3_switch_island
1414
1415L_cr3_switch_return:
1416	mov	%gs:CPU_DR7, %rax	/* Is there a debug control register?*/
1417	cmp	$0, %rax
1418	je	4f
1419	mov	%rax, %dr7		/* Set DR7 */
1420	movq	$0, %gs:CPU_DR7
14214:
1422	cmpl	$(SS_64), SS_FLAVOR(%r15)	/* 64-bit state? */
1423	jne	L_32bit_return
1424
1425	/*
1426	 * Restore general 64-bit registers.
1427	 * Here on fault stack and PCB address in R15.
1428	 */
1429	leaq	EXT(idt64_hndl_table0)(%rip), %rax
1430	jmp	*8(%rax)
1431
1432
1433L_32bit_return:
1434#if DEBUG_IDT64
1435	cmpl	$(SS_32), SS_FLAVOR(%r15)	/* 32-bit state? */
1436	je	1f
1437	cli
1438	POSTCODE2(0x6432)
1439	CCALL1(panic_idt64, %r15)
14401:
1441#endif /* DEBUG_IDT64 */
1442
1443	leaq	EXT(idt64_hndl_table0)(%rip), %rax
1444	jmp	*0x18(%rax)
1445
1446
1447L_dr_restore_island:
1448	movq    TH_PCB_IDS(%rdx),%rax   /* Obtain this thread's debug state */
1449	cmpl	$(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
1450	jne	1f
1451	movl	DS_DR0(%rax), %ecx	/* If so, load the 32 bit DRs */
1452	movq	%rcx, %dr0
1453	movl	DS_DR1(%rax), %ecx
1454	movq	%rcx, %dr1
1455	movl	DS_DR2(%rax), %ecx
1456	movq	%rcx, %dr2
1457	movl	DS_DR3(%rax), %ecx
1458	movq	%rcx, %dr3
1459	movl	DS_DR7(%rax), %ecx
1460	movq 	%rcx, %gs:CPU_DR7
1461	jmp 	2f
14621:
1463	mov	DS64_DR0(%rax), %rcx	/* Load the full width DRs*/
1464	mov	%rcx, %dr0
1465	mov	DS64_DR1(%rax), %rcx
1466	mov	%rcx, %dr1
1467	mov	DS64_DR2(%rax), %rcx
1468	mov	%rcx, %dr2
1469	mov	DS64_DR3(%rax), %rcx
1470	mov	%rcx, %dr3
1471	mov	DS64_DR7(%rax), %rcx
1472	mov 	%rcx, %gs:CPU_DR7
14732:
1474	jmp	L_post_dr_restore
1475L_cr3_switch_island:
1476	xor	%eax, %eax
1477	movw	%gs:CPU_ACTIVE_PCID, %ax
1478	or	%rax, %rcx
1479	mov	%rcx, %cr3
1480	jmp	L_cr3_switch_return
1481
1482ret_to_kernel:
1483#if DEBUG_IDT64
1484	cmpl	$(SS_64), SS_FLAVOR(%r15)	/* 64-bit state? */
1485	je	1f
1486	cli
1487	POSTCODE2(0x6464)
1488	CCALL1(panic_idt64, %r15)
1489	hlt
14901:
1491	cmpw	$(KERNEL64_CS), R64_CS(%r15)
1492	je	2f
1493	CCALL1(panic_idt64, %r15)
1494	hlt
14952:
1496#endif
1497	/*
1498	 * Restore general 64-bit registers.
1499	 * Here on fault stack and PCB address in R15.
1500	 */
1501	leaq	EXT(idt64_hndl_table0)(%rip), %rax
1502	jmp *8(%rax)
1503
1504/* All 'exceptions' enter hndl_alltraps, with:
1505 *	r15	x86_saved_state_t address
1506 *	rsp	kernel stack if user-space, otherwise interrupt or kernel stack
1507 *	esi	cs at trap
1508 *
1509 * The rest of the state is set up as:
1510 *	both rsp and r15 are 16-byte aligned
1511 *	interrupts disabled
1512 *	direction flag cleared
1513 */
1514Entry(hndl_alltraps)
1515	mov	%esi, %eax
1516	testb	$3, %al
1517	jz	trap_from_kernel
1518
1519	TIME_TRAP_UENTRY
1520
1521	/* Check for active vtimers in the current task */
1522	mov	%gs:CPU_ACTIVE_THREAD, %rcx
1523	movl	$-1, TH_IOTIER_OVERRIDE(%rcx)	/* Reset IO tier override to -1 before handling trap/exception */
1524	mov	TH_TASK(%rcx), %rbx
1525	TASK_VTIMER_CHECK(%rbx, %rcx)
1526
1527	CCALL1(user_trap, %r15)			/* call user trap routine */
1528	/* user_trap() unmasks interrupts */
1529	cli					/* hold off intrs - critical section */
1530	xorl	%ecx, %ecx			/* don't check if we're in the PFZ */
1531
1532
1533Entry(return_from_trap)
1534	movq	%gs:CPU_ACTIVE_THREAD,%r15	/* Get current thread */
1535	movl	$-1, TH_IOTIER_OVERRIDE(%r15)	/* Reset IO tier override to -1 before returning to userspace */
1536	movq	TH_PCB_ISS(%r15), %r15		/* PCB stack */
1537	movl	%gs:CPU_PENDING_AST,%eax
1538	testl	%eax,%eax
1539	je	EXT(return_to_user)		/* branch if no AST */
1540
1541L_return_from_trap_with_ast:
1542	testl	%ecx, %ecx		/* see if we need to check for an EIP in the PFZ */
1543	je	2f			/* no, go handle the AST */
1544	cmpl	$(SS_64), SS_FLAVOR(%r15)	/* are we a 64-bit task? */
1545	je	1f
1546					/* no... 32-bit user mode */
1547	movl	R32_EIP(%r15), %edi
1548	xorq	%rbp, %rbp		/* clear framepointer */
1549	CCALL(commpage_is_in_pfz32)
1550	testl	%eax, %eax
1551	je	2f			/* not in the PFZ... go service AST */
1552	movl	%eax, R32_EBX(%r15)	/* let the PFZ know we've pended an AST */
1553	jmp	EXT(return_to_user)
15541:
1555	movq	R64_RIP(%r15), %rdi
1556	xorq	%rbp, %rbp		/* clear framepointer */
1557	CCALL(commpage_is_in_pfz64)
1558	testl	%eax, %eax
1559	je	2f			/* not in the PFZ... go service AST */
1560	movl	%eax, R64_RBX(%r15)	/* let the PFZ know we've pended an AST */
1561	jmp	EXT(return_to_user)
15622:
1563
1564	xorq	%rbp, %rbp		/* clear framepointer */
1565	CCALL(ast_taken_user)		/* handle all ASTs (enables interrupts, may return via continuation) */
1566
1567	cli
1568	mov	%rsp, %r15		/* AST changes stack, saved state */
1569	xorl	%ecx, %ecx		/* don't check if we're in the PFZ */
1570	jmp	EXT(return_from_trap)	/* and check again (rare) */
1571
1572/*
1573 * Trap from kernel mode.  No need to switch stacks.
1574 * Interrupts must be off here - we will set them to state at time of trap
1575 * as soon as it's safe for us to do so and not recurse doing preemption
1576 *
1577 */
1578trap_from_kernel:
1579
1580UNWIND_PROLOGUE
1581
1582	movq	%r15, %rdi		/* saved state addr */
1583
1584UNWIND_DIRECTIVES
1585
1586	pushq   R64_RIP(%r15)           /* Simulate a CALL from fault point */
1587	pushq   %rbp                    /* Extend framepointer chain */
1588	movq    %rsp, %rbp
1589	CCALLWITHSP(kernel_trap)	/* to kernel trap routine */
1590	popq    %rbp
1591	addq    $8, %rsp
1592	mov	%rsp, %r15		/* DTrace slides stack/saved-state */
1593	cli
1594
1595	movl	%gs:CPU_PENDING_AST,%eax	/* get pending asts */
1596	testl	$(AST_URGENT),%eax		/* any urgent preemption? */
1597	je	ret_to_kernel			/* no, nothing to do */
1598	cmpl	$(T_PREEMPT),R64_TRAPNO(%r15)
1599	je	ret_to_kernel			/* T_PREEMPT handled in kernel_trap() */
1600	testl	$(EFL_IF),R64_RFLAGS(%r15)	/* interrupts disabled? */
1601	je	ret_to_kernel
1602	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL	/* preemption disabled? */
1603	jne	ret_to_kernel
1604	movq	%gs:CPU_KERNEL_STACK,%rax
1605	movq	%rsp,%rcx
1606	xorq	%rax,%rcx
1607	andq	EXT(kernel_stack_mask)(%rip),%rcx
1608	testq	%rcx,%rcx		/* are we on the kernel stack? */
1609	jne	ret_to_kernel		/* no, skip it */
1610
1611	CCALL(ast_taken_kernel)         /* take the AST */
1612
1613	mov	%rsp, %r15		/* AST changes stack, saved state */
1614	jmp	ret_to_kernel
1615
1616UNWIND_EPILOGUE
1617
1618/*
1619 * All interrupts on all tasks enter here with:
1620 *	r15	 x86_saved_state_t
1621 *	rsp	 kernel or interrupt stack
1622 *	esi	 cs at trap
1623 *
1624 *	both rsp and r15 are 16-byte aligned
1625 *	interrupts disabled
1626 *	direction flag cleared
1627 */
1628Entry(hndl_allintrs)
1629
1630UNWIND_PROLOGUE
1631
1632	/*
1633	 * test whether already on interrupt stack
1634	 */
1635	movq	%gs:CPU_INT_STACK_TOP,%rcx
1636	cmpq	%rsp,%rcx
1637	jb	1f
1638	leaq	-INTSTACK_SIZE(%rcx),%rdx
1639	cmpq	%rsp,%rdx
1640	jb	int_from_intstack
16411:
1642	xchgq	%rcx,%rsp		/* switch to interrupt stack */
1643
1644	mov	%cr0,%rax		/* get cr0 */
1645	orl	$(CR0_TS),%eax		/* or in TS bit */
1646	mov	%rax,%cr0		/* set cr0 */
1647
1648	pushq	%rcx			/* save pointer to old stack */
1649	pushq	%gs:CPU_INT_STATE	/* save previous intr state */
1650	movq	%r15,%gs:CPU_INT_STATE	/* set intr state */
1651
1652UNWIND_DIRECTIVES
1653
1654	CCALL1(recount_enter_intel_interrupt, %r15) /* update time and PMCs */
1655
1656	/* Check for active vtimers in the current task */
1657	mov	%gs:CPU_ACTIVE_THREAD, %rcx
1658	mov	TH_TASK(%rcx), %rbx
1659	TASK_VTIMER_CHECK(%rbx, %rcx)
1660
1661	incl	%gs:CPU_PREEMPTION_LEVEL
1662	incl	%gs:CPU_INTERRUPT_LEVEL
1663
1664	CCALL1(interrupt, %r15)		/* call generic interrupt routine */
1665
1666UNWIND_EPILOGUE
1667
1668.globl	EXT(return_to_iret)
1669LEXT(return_to_iret)			/* (label for kdb_kintr and hardclock) */
1670
1671	decl	%gs:CPU_INTERRUPT_LEVEL
1672	decl	%gs:CPU_PREEMPTION_LEVEL
1673
1674	CCALL(recount_leave_intel_interrupt) /* update time and PMCs */
1675
1676	popq	%gs:CPU_INT_STATE 	/* reset/clear intr state pointer */
1677	popq	%rsp			/* switch back to old stack */
1678
1679	movq	%gs:CPU_ACTIVE_THREAD,%rax
1680	movq	TH_PCB_FPS(%rax),%rax	/* get pcb's ifps */
1681	cmpq	$0,%rax			/* Is there a context */
1682	je	1f			/* Branch if not */
1683	movl	FP_VALID(%rax),%eax	/* Load fp_valid */
1684	cmpl	$0,%eax			/* Check if valid */
1685	jne	1f			/* Branch if valid */
1686	clts				/* Clear TS */
1687	jmp	2f
16881:
1689	mov	%cr0,%rax		/* get cr0 */
1690	orl	$(CR0_TS),%eax		/* or in TS bit */
1691	mov	%rax,%cr0		/* set cr0 */
16922:
1693	/* Load interrupted code segment into %eax */
1694	movl	R64_CS(%r15), %eax	/* assume 64-bit state */
1695	cmpl	$(SS_32), SS_FLAVOR(%r15) /* 32-bit? */
1696#if DEBUG_IDT64
1697	jne	5f
1698	movl	R32_CS(%r15),%eax	/* 32-bit user mode */
1699	jmp	3f
17005:
1701	cmpl    $(SS_64),SS_FLAVOR(%r15)
1702	je	3f
1703	POSTCODE2(0x6431)
1704	CCALL1(panic_idt64, %r15)
1705	hlt
1706#else
1707	je	4f
1708#endif
17093:
1710	testb	$3,%al			/* user mode, */
1711	jnz	ast_from_interrupt_user	/* go handle potential ASTs */
1712	/*
1713	 * we only want to handle preemption requests if
1714	 * the interrupt fell in the kernel context
1715	 * and preemption isn't disabled
1716	 */
1717	movl	%gs:CPU_PENDING_AST,%eax
1718	testl	$(AST_URGENT),%eax		/* any urgent requests? */
1719	je	ret_to_kernel			/* no, nothing to do */
1720
1721	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL	/* preemption disabled? */
1722	jne	ret_to_kernel			/* yes, skip it */
1723
1724	/*
1725	 * Take an AST from kernel space.  We don't need (and don't want)
1726	 * to do as much as the case where the interrupt came from user
1727	 * space.
1728	 */
1729	CCALL(ast_taken_kernel)
1730
1731	mov	%rsp, %r15		/* AST changes stack, saved state */
1732	jmp	ret_to_kernel
17334:
1734	movl	R32_CS(%r15),%eax	/* 32-bit user mode */
1735	jmp	3b
1736
1737
1738/*
1739 * nested int - simple path, can't preempt etc on way out
1740 */
1741int_from_intstack:
1742	incl	%gs:CPU_PREEMPTION_LEVEL
1743	incl	%gs:CPU_INTERRUPT_LEVEL
1744	incl	%gs:CPU_NESTED_ISTACK
1745
1746	push	%gs:CPU_INT_STATE
1747	mov	%r15, %gs:CPU_INT_STATE
1748
1749	CCALL1(interrupt, %r15)
1750
1751	pop	%gs:CPU_INT_STATE
1752
1753	decl	%gs:CPU_INTERRUPT_LEVEL
1754	decl	%gs:CPU_PREEMPTION_LEVEL
1755	decl	%gs:CPU_NESTED_ISTACK
1756
1757	jmp	ret_to_kernel
1758
1759/*
1760 *	Take an AST from an interrupted user
1761 */
1762ast_from_interrupt_user:
1763	movl	%gs:CPU_PENDING_AST,%eax
1764	testl	%eax,%eax		/* pending ASTs? */
1765	je	EXT(ret_to_user)	/* no, nothing to do */
1766
1767	TIME_TRAP_UENTRY
1768
1769	movl	$1, %ecx		/* check if we're in the PFZ */
1770	jmp	L_return_from_trap_with_ast	/* return */
1771
1772
1773/* Syscall dispatch routines! */
1774
1775/*
1776 *
1777 * 32bit Tasks
1778 * System call entries via INTR_GATE or sysenter:
1779 *
1780 *	r15	 x86_saved_state32_t
1781 *	rsp	 kernel stack
1782 *
1783 *	both rsp and r15 are 16-byte aligned
1784 *	interrupts disabled
1785 *	direction flag cleared
1786 */
1787
1788Entry(hndl_sysenter)
1789	/*
1790	 * We can be here either for a mach syscall or a unix syscall,
1791	 * as indicated by the sign of the code:
1792	 */
1793	movl	R32_EAX(%r15),%eax
1794	testl	%eax,%eax
1795	js	EXT(hndl_mach_scall)		/* < 0 => mach */
1796						/* > 0 => unix */
1797
1798Entry(hndl_unix_scall)
1799
1800        TIME_TRAP_UENTRY
1801
1802	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
1803	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
1804	incl	TH_SYSCALLS_UNIX(%rcx)		/* increment call count   */
1805
1806	/* Check for active vtimers in the current task */
1807	TASK_VTIMER_CHECK(%rbx,%rcx)
1808
1809	sti
1810
1811	CCALL1(unix_syscall, %r15)
1812	/*
1813	 * always returns through thread_exception_return
1814	 */
1815
1816
1817Entry(hndl_mach_scall)
1818	TIME_TRAP_UENTRY
1819
1820	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
1821	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
1822	incl	TH_SYSCALLS_MACH(%rcx)		/* increment call count   */
1823
1824	/* Check for active vtimers in the current task */
1825	TASK_VTIMER_CHECK(%rbx,%rcx)
1826
1827	sti
1828
1829	CCALL1(mach_call_munger, %r15)
1830	/*
1831	 * always returns through thread_exception_return
1832	 */
1833
1834
1835Entry(hndl_mdep_scall)
1836	TIME_TRAP_UENTRY
1837
1838	/* Check for active vtimers in the current task */
1839	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
1840	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
1841	TASK_VTIMER_CHECK(%rbx,%rcx)
1842
1843	sti
1844
1845	CCALL1(machdep_syscall, %r15)
1846	/*
1847	 * always returns through thread_exception_return
1848	 */
1849
1850/*
1851 * 64bit Tasks
1852 * System call entries via syscall only:
1853 *
1854 *	r15	 x86_saved_state64_t
1855 *	rsp	 kernel stack
1856 *
1857 *	both rsp and r15 are 16-byte aligned
1858 *	interrupts disabled
1859 *	direction flag cleared
1860 */
1861
1862Entry(hndl_syscall)
1863	TIME_TRAP_UENTRY
1864
1865	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
1866	movl	$-1, TH_IOTIER_OVERRIDE(%rcx)	/* Reset IO tier override to -1 before handling syscall */
1867	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
1868
1869	/* Check for active vtimers in the current task */
1870	TASK_VTIMER_CHECK(%rbx,%rcx)
1871
1872	/*
1873	 * We can be here either for a mach, unix machdep or diag syscall,
1874	 * as indicated by the syscall class:
1875	 */
1876	movl	R64_RAX(%r15), %eax		/* syscall number/class */
1877	movl	%eax, %edx
1878	andl	$(SYSCALL_CLASS_MASK), %edx	/* syscall class */
1879	cmpl	$(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
1880	je	EXT(hndl_mach_scall64)
1881	cmpl	$(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
1882	je	EXT(hndl_unix_scall64)
1883	cmpl	$(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
1884	je	EXT(hndl_mdep_scall64)
1885	cmpl	$(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
1886	je	EXT(hndl_diag_scall64)
1887
1888	/* Syscall class unknown */
1889	sti
1890	CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
1891	/* no return */
1892
1893
1894Entry(hndl_unix_scall64)
1895	incl	TH_SYSCALLS_UNIX(%rcx)		/* increment call count   */
1896	sti
1897
1898	CCALL1(unix_syscall64, %r15)
1899	/*
1900	 * always returns through thread_exception_return
1901	 */
1902
1903
1904Entry(hndl_mach_scall64)
1905	incl	TH_SYSCALLS_MACH(%rcx)		/* increment call count   */
1906	sti
1907
1908	CCALL1(mach_call_munger64, %r15)
1909	/*
1910	 * always returns through thread_exception_return
1911	 */
1912
1913
1914
1915Entry(hndl_mdep_scall64)
1916	sti
1917
1918	CCALL1(machdep_syscall64, %r15)
1919	/*
1920	 * always returns through thread_exception_return
1921	 */
1922
1923Entry(hndl_diag_scall64)
1924	CCALL1(diagCall64, %r15)	// Call diagnostics
1925	test	%eax, %eax		// What kind of return is this?
1926	je	1f			// - branch if bad (zero)
1927	jmp	EXT(return_to_user)	// Normal return, do not check asts...
19281:
1929	sti
1930	CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
1931	/* no return */
1932/* TODO assert at all 'C' entry points that we're never operating on the fault stack's alias mapping */
1933Entry(hndl_machine_check)
1934	/* Adjust SP and savearea to their canonical, non-aliased addresses */
1935	CCALL1(panic_machine_check64, %r15)
1936	hlt
1937
1938Entry(hndl_double_fault)
1939	CCALL1(panic_double_fault64, %r15)
1940	hlt
1941