1/* 2 * Copyright (c) 2010-2020 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28#include <i386/asm.h> 29#include <assym.s> 30#include <debug.h> 31#include "dwarf_unwind.h" 32#include <i386/eflags.h> 33#include <i386/rtclock_asm.h> 34#include <i386/trap.h> 35#define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */ 36#include <mach/i386/syscall_sw.h> 37#include <i386/postcode.h> 38#include <i386/proc_reg.h> 39#include <mach/exception_types.h> 40 41#if DEBUG 42#define DEBUG_IDT64 1 43#endif 44 45/* 46 * This is the low-level trap and interrupt handling code associated with 47 * the IDT. It also includes system call handlers for sysenter/syscall. 48 * The IDT itself is defined in mp_desc.c. 49 * 50 * Code here is structured as follows: 51 * 52 * stubs Code called directly from an IDT vector. 53 * All entry points have the "idt64_" prefix and they are built 54 * using macros expanded by the inclusion of idt_table.h. 55 * This code performs vector-dependent identification and jumps 56 * into the dispatch code. 57 * 58 * dispatch The dispatch code is responsible for saving the thread state 59 * (which is either 64-bit or 32-bit) and then jumping to the 60 * class handler identified by the stub. 61 * 62 * returns Code to restore state and return to the previous context. 63 * 64 * handlers There are several classes of handlers: 65 * interrupt - asynchronous events typically from external devices 66 * trap - synchronous events due to thread execution 67 * syscall - synchronous system call request 68 * fatal - fatal traps 69 */ 70/* 71 * Indices of handlers for each exception type. 72 */ 73#define HNDL_ALLINTRS 0 74#define HNDL_ALLTRAPS 1 75#define HNDL_SYSENTER 2 76#define HNDL_SYSCALL 3 77#define HNDL_UNIX_SCALL 4 78#define HNDL_MACH_SCALL 5 79#define HNDL_MDEP_SCALL 6 80#define HNDL_DOUBLE_FAULT 7 81#define HNDL_MACHINE_CHECK 8 82 83 84/* Begin double-mapped descriptor section */ 85 86.section __HIB, __desc 87.globl EXT(idt64_hndl_table0) 88EXT(idt64_hndl_table0): 89/* 0x00 */ .quad EXT(ks_dispatch) 90/* 0x08 */ .quad EXT(ks_64bit_return) 91/* 0x10 */ .quad 0 /* Populated with CPU shadow displacement*/ 92/* 0x18 */ .quad EXT(ks_32bit_return) 93#define TBL0_OFF_DISP_USER_WITH_POPRAX 0x20 94/* 0x20 */ .quad EXT(ks_dispatch_user_with_pop_rax) 95#define TBL0_OFF_DISP_KERN_WITH_POPRAX 0x28 96/* 0x28 */ .quad EXT(ks_dispatch_kernel_with_pop_rax) 97#define TBL0_OFF_PTR_KERNEL_STACK_MASK 0x30 98/* 0x30 */ .quad 0 /* &kernel_stack_mask */ 99 100EXT(idt64_hndl_table1): 101 .quad EXT(hndl_allintrs) 102 .quad EXT(hndl_alltraps) 103 .quad EXT(hndl_sysenter) 104 .quad EXT(hndl_syscall) 105 .quad EXT(hndl_unix_scall) 106 .quad EXT(hndl_mach_scall) 107 .quad EXT(hndl_mdep_scall) 108 .quad EXT(hndl_double_fault) 109 .quad EXT(hndl_machine_check) 110.text 111 112 113/* The wrapper for all non-special traps/interrupts */ 114/* Everything up to PUSH_FUNCTION is just to output 115 * the interrupt number out to the postcode display 116 */ 117#if DEBUG_IDT64 118#define IDT_ENTRY_WRAPPER(n, f) \ 119 push %rax ;\ 120 POSTCODE2(0x6400+n) ;\ 121 pop %rax ;\ 122 pushq $(f) ;\ 123 pushq $(n) ;\ 124 jmp L_dispatch 125#else 126#define IDT_ENTRY_WRAPPER(n, f) \ 127 pushq $(f) ;\ 128 pushq $(n) ;\ 129 jmp L_dispatch 130#endif 131 132/* A trap that comes with an error code already on the stack */ 133#define TRAP_ERR(n, f) \ 134 Entry(f) ;\ 135 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS) 136 137/* A normal trap */ 138#define TRAP(n, f) \ 139 Entry(f) ;\ 140 pushq $0 ;\ 141 IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS) 142 143#define USER_TRAP TRAP 144 145/* An interrupt */ 146#define INTERRUPT(n) \ 147 Entry(_intr_ ## n) ;\ 148 pushq $0 ;\ 149 IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS) 150 151/* A trap with a special-case handler, hence we don't need to define anything */ 152#define TRAP_SPC(n, f) 153#define TRAP_IST1(n, f) 154#define TRAP_IST2(n, f) 155#define USER_TRAP_SPC(n, f) 156 157/* Begin double-mapped text section */ 158.section __HIB, __text 159/* Generate all the stubs */ 160#include "idt_table.h" 161 162Entry(idt64_page_fault) 163 pushq $(HNDL_ALLTRAPS) 164#if !(DEVELOPMENT || DEBUG) 165 pushq $(T_PAGE_FAULT) 166 jmp L_dispatch 167#else 168 pushq $(T_PAGE_FAULT) 169 170 pushq %rax 171 pushq %rbx 172 pushq %rcx 173 testb $3, 8+8+8+ISF64_CS(%rsp) /* Coming from userspace? */ 174 jz L_pfkern /* No? (relatively uncommon), goto L_pfkern */ 175 176 /* 177 * We faulted from the user; if the fault address is at the user's %rip, 178 * abort trying to save the cacheline since that adds another page fault's 179 * overhead when we recover, below. 180 */ 181 movq 8+8+8+ISF64_RIP(%rsp), %rbx 182 movq %cr2, %rcx 183 cmpq %rbx, %rcx 184 185 /* note that the next 3 instructions do not affect RFLAGS */ 186 swapgs 187 leaq EXT(idt64_hndl_table0)(%rip), %rax 188 mov 16(%rax), %rax /* Offset of per-CPU shadow */ 189 190 jne L_dispatch_from_user_with_rbx_rcx_pushes 191 jmp abort_rip_cacheline_read 192 193L_pfkern: 194 /* 195 * Kernel page fault 196 * If the fault occurred on while reading from the user's code cache line, abort the cache line read; 197 * otherwise, treat this as a regular kernel fault 198 */ 199 movq 8+8+8+ISF64_RIP(%rsp), %rbx 200 leaq rip_cacheline_read(%rip), %rcx 201 cmpq %rcx, %rbx 202 jb regular_kernel_page_fault 203 leaq rip_cacheline_read_end(%rip), %rcx 204 cmpq %rcx, %rbx 205 jbe L_pf_on_clread /* Did we hit a #PF within the cacheline read? */ 206 207regular_kernel_page_fault: 208 /* No, regular kernel #PF */ 209 popq %rcx 210 popq %rbx 211 jmp L_dispatch_from_kernel_no_push_rax 212 213L_pf_on_clread: 214 /* 215 * We faulted while trying to read user instruction memory at the parent fault's %rip; abort that action by 216 * changing the return address on the stack, restoring cr2 to its previous value, peeling off the pushes we 217 * added on entry to the page fault handler, then performing an iretq 218 */ 219 popq %rcx 220 movq %rcx, %cr2 221 popq %rbx 222 leaq abort_rip_cacheline_read(%rip), %rax 223 movq %rax, 8+ISF64_RIP(%rsp) 224 popq %rax 225 addq $24, %rsp /* pop the 2 pushes + the error code */ 226 iretq /* Resume previous trap/fault processing */ 227#endif /* !(DEVELOPMENT || DEBUG) */ 228 229/* 230 * #DB handler, which runs on IST1, will treat as spurious any #DB received while executing in the 231 * kernel while not on the kernel's gsbase. 232 */ 233Entry(idt64_debug) 234 /* Synthesize common interrupt stack frame */ 235 push $0 /* error code */ 236 pushq $(HNDL_ALLTRAPS) 237 pushq $(T_DEBUG) 238 /* Spill prior to RDMSR */ 239 push %rax 240 push %rcx 241 push %rdx 242 mov $(MSR_IA32_GS_BASE), %ecx 243 rdmsr /* Check contents of GSBASE MSR */ 244 test $0x80000000, %edx /* MSB set? Already swapped to kernel's */ 245 jnz 1f 246 247 /* 248 * If we're not already swapped to the kernel's gsbase AND this #DB originated from kernel space, 249 * it must have happened within the very small window on entry or exit before or after (respectively) 250 * swapgs occurred. In those cases, consider the #DB spurious and immediately return. 251 */ 252 testb $3, 8+8+8+ISF64_CS(%rsp) 253 jnz 2f 254 pop %rdx 255 pop %rcx 256 pop %rax 257 addq $0x18, %rsp /* Remove synthesized interrupt stack frame */ 258 jmp EXT(ret64_iret) 2592: 260 swapgs /* direct from user */ 2611: 262 pop %rdx 263 264 leaq EXT(idt64_hndl_table0)(%rip), %rax 265 mov 16(%rax), %rax /* Offset of per-CPU shadow */ 266 267 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax 268 mov %rax, %cr3 269 270 pop %rcx 271 272 /* Note that %rax will be popped from the stack in ks_dispatch, below */ 273 274 leaq EXT(idt64_hndl_table0)(%rip), %rax 275 jmp *(%rax) 276 277/* 278 * Legacy interrupt gate System call handlers. 279 * These are entered via a syscall interrupt. The system call number in %rax 280 * is saved to the error code slot in the stack frame. We then branch to the 281 * common state saving code. 282 */ 283 284#ifndef UNIX_INT 285#error NO UNIX INT!!! 286#endif 287Entry(idt64_unix_scall) 288 pushq %rax /* save system call number */ 289 pushq $(HNDL_UNIX_SCALL) 290 pushq $(UNIX_INT) 291 jmp L_u64bit_entry_check 292 293Entry(idt64_mach_scall) 294 pushq %rax /* save system call number */ 295 pushq $(HNDL_MACH_SCALL) 296 pushq $(MACH_INT) 297 jmp L_u64bit_entry_check 298 299Entry(idt64_mdep_scall) 300 pushq %rax /* save system call number */ 301 pushq $(HNDL_MDEP_SCALL) 302 pushq $(MACHDEP_INT) 303 jmp L_u64bit_entry_check 304 305/* 306 * For GP/NP/SS faults, we use the IST1 stack. 307 * For faults from user-space, we have to copy the machine state to the 308 * PCB stack and then dispatch as normal. 309 * For faults in kernel-space, we need to scrub for kernel exit faults and 310 * treat these as user-space faults. But for all other kernel-space faults 311 * we continue to run on the IST1 stack as we dispatch to handle the fault 312 * as fatal. 313 */ 314Entry(idt64_segnp) 315 pushq $(HNDL_ALLTRAPS) 316 pushq $(T_SEGMENT_NOT_PRESENT) 317 jmp L_check_for_kern_flt 318 319Entry(idt64_gen_prot) 320 pushq $(HNDL_ALLTRAPS) 321 pushq $(T_GENERAL_PROTECTION) 322 jmp L_check_for_kern_flt 323 324Entry(idt64_stack_fault) 325 pushq $(HNDL_ALLTRAPS) 326 pushq $(T_STACK_FAULT) 327 jmp L_check_for_kern_flt 328 329L_check_for_kern_flt: 330 /* 331 * If we took a #GP or #SS from the kernel, check if we took them 332 * from either ret32_iret or ret64_iret. If we did, we need to 333 * jump into L_dispatch at the swapgs so that the code in L_dispatch 334 * can proceed with the correct GSbase. 335 */ 336 pushq %rax 337 testb $3, 8+ISF64_CS(%rsp) 338 jnz L_dispatch_from_user_no_push_rax /* Fault from user, go straight to dispatch */ 339 340 /* Check if the fault occurred in the 32-bit segment restoration window (which executes with user gsb) */ 341 leaq L_32bit_seg_restore_begin(%rip), %rax 342 cmpq %rax, 8+ISF64_RIP(%rsp) 343 jb L_not_32bit_segrestores 344 leaq L_32bit_seg_restore_done(%rip), %rax 345 cmpq %rax, 8+ISF64_RIP(%rsp) 346 jae L_not_32bit_segrestores 347 jmp 1f 348L_not_32bit_segrestores: 349 leaq EXT(ret32_iret)(%rip), %rax 350 cmpq %rax, 8+ISF64_RIP(%rsp) 351 je 1f 352 leaq EXT(ret64_iret)(%rip), %rax 353 cmpq %rax, 8+ISF64_RIP(%rsp) 354 je 1f 355 jmp L_dispatch_from_kernel_no_push_rax 356 /* 357 * We hit the fault on iretq, so check the original return %cs. If 358 * it's a user %cs, fixup the stack and then jump to dispatch.. 359 * 360 * With this type of fault, the stack is layed-out as follows: 361 * 362 * 363 * orig %ss saved_rsp+32 364 * orig %rsp saved_rsp+24 365 * orig %rflags saved_rsp+16 366 * orig %cs saved_rsp+8 367 * orig %rip saved_rsp 368 * ^^^^^^^^^ (maybe on another stack, since we switched to IST1) 369 * %ss +64 -8 370 * saved_rsp +56 -16 371 * %rflags +48 -24 372 * %cs +40 -32 373 * %rip +32 -40 374 * error code +24 -48 375 * hander +16 -56 376 * trap number +8 -64 377 * <saved %rax> <== %rsp -72 378 */ 3791: 380 pushq %rbx 381 movq 16+ISF64_RSP(%rsp), %rbx 382 movq ISF64_CS-24(%rbx), %rax 383 testb $3, %al /* If the original return destination was to user */ 384 jnz 2f 385 popq %rbx 386 jmp L_dispatch_from_kernel_no_push_rax /* Fault occurred when trying to return to kernel */ 3872: 388 /* 389 * Fix the stack so the original trap frame is current, then jump to dispatch 390 */ 391 392 movq %rax, 16+ISF64_CS(%rsp) 393 394 movq ISF64_RSP-24(%rbx), %rax 395 movq %rax, 16+ISF64_RSP(%rsp) 396 397 movq ISF64_RIP-24(%rbx), %rax 398 movq %rax, 16+ISF64_RIP(%rsp) 399 400 movq ISF64_SS-24(%rbx), %rax 401 movq %rax, 16+ISF64_SS(%rsp) 402 403 movq ISF64_RFLAGS-24(%rbx), %rax 404 movq %rax, 16+ISF64_RFLAGS(%rsp) 405 406 popq %rbx 407 jmp L_dispatch_from_user_no_push_rax 408 409 410/* 411 * Fatal exception handlers: 412 */ 413Entry(idt64_db_task_dbl_fault) 414 pushq $(HNDL_DOUBLE_FAULT) 415 pushq $(T_DOUBLE_FAULT) 416 jmp L_dispatch 417 418Entry(idt64_db_task_stk_fault) 419 pushq $(HNDL_DOUBLE_FAULT) 420 pushq $(T_STACK_FAULT) 421 jmp L_dispatch 422 423Entry(idt64_mc) 424 push $(0) /* Error */ 425 pushq $(HNDL_MACHINE_CHECK) 426 pushq $(T_MACHINE_CHECK) 427 jmp L_dispatch 428 429/* 430 * NMI 431 * This may or may not be fatal but extreme care is required 432 * because it may fall when control was already in another trampoline. 433 * 434 * We get here on IST2 stack which is used exclusively for NMIs. 435 * Machine checks, doublefaults and similar use IST1 436 */ 437Entry(idt64_nmi) 438 push %rax 439 push %rcx 440 push %rdx 441 testb $3, ISF64_CS(%rsp) 442 jz 1f 443 444 /* From user-space: copy interrupt state to user PCB */ 445 swapgs 446 447 leaq EXT(idt64_hndl_table0)(%rip), %rax 448 mov 16(%rax), %rax /* Offset of per-CPU shadow */ 449 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax 450 mov %rax, %cr3 /* note that SMAP is enabled in L_common_dispatch (on Broadwell+) */ 451 452 mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */ 453 add $(ISF64_SIZE), %rcx /* adjust to base of ISF */ 454 455 leaq TBL0_OFF_DISP_USER_WITH_POPRAX+EXT(idt64_hndl_table0)(%rip), %rax /* ks_dispatch_user_with_pop_rax */ 456 jmp 4f /* Copy state to PCB */ 457 4581: 459 /* 460 * From kernel-space: 461 * Determine whether the kernel or user GS is set. 462 * Sets the high 32 bits of the return CS to 1 to ensure that we'll swapgs back correctly at IRET. 463 */ 464 mov $(MSR_IA32_GS_BASE), %ecx 465 rdmsr /* read kernel gsbase */ 466 test $0x80000000, %edx /* test MSB of address */ 467 jnz 2f 468 swapgs /* so swap */ 469 movl $1, ISF64_CS+4(%rsp) /* and set flag in CS slot */ 4702: 471 472 leaq EXT(idt64_hndl_table0)(%rip), %rax 473 mov 16(%rax), %rax /* Offset of per-CPU shadow */ 474 mov %cr3, %rdx 475 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax 476 mov %rax, %cr3 /* Unconditionally switch to primary kernel pagetables */ 477 478 /* 479 * Determine whether we're on the kernel or interrupt stack 480 * when the NMI hit. 481 */ 482 mov ISF64_RSP(%rsp), %rcx 483 mov %gs:CPU_KERNEL_STACK, %rax 484 xor %rcx, %rax 485 movq TBL0_OFF_PTR_KERNEL_STACK_MASK+EXT(idt64_hndl_table0)(%rip), %rdx 486 mov (%rdx), %rdx /* Load kernel_stack_mask */ 487 and %rdx, %rax 488 test %rax, %rax /* are we on the kernel stack? */ 489 jz 3f /* yes */ 490 491 mov %gs:CPU_INT_STACK_TOP, %rax 492 cmp %rcx, %rax /* are we on the interrupt stack? */ 493 jb 5f /* no */ 494 leaq -INTSTACK_SIZE(%rax), %rax 495 cmp %rcx, %rax 496 jb 3f /* yes */ 4975: 498 mov %gs:CPU_KERNEL_STACK, %rcx 4993: 500 /* 16-byte-align kernel/interrupt stack for state push */ 501 and $0xFFFFFFFFFFFFFFF0, %rcx 502 503 leaq TBL0_OFF_DISP_KERN_WITH_POPRAX+EXT(idt64_hndl_table0)(%rip), %rax /* ks_dispatch_kernel_with_pop_rax */ 5044: 505 /* 506 * Copy state from NMI stack (RSP) to the save area (RCX) which is 507 * the PCB for user or kernel/interrupt stack from kernel. 508 * ISF64_ERR(RSP) saved RAX 509 * ISF64_TRAPFN(RSP) saved RCX 510 * ISF64_TRAPNO(RSP) saved RDX 511 */ 512 xchg %rsp, %rcx /* set for pushes */ 513 push ISF64_SS(%rcx) 514 push ISF64_RSP(%rcx) 515 push ISF64_RFLAGS(%rcx) 516 push ISF64_CS(%rcx) 517 push ISF64_RIP(%rcx) 518 /* Synthesize common interrupt stack frame */ 519 push $(0) /* error code 0 */ 520 push $(HNDL_ALLINTRS) /* trapfn allintrs */ 521 push $(T_NMI) /* trapno T_NMI */ 522 push ISF64_ERR(%rcx) /* saved %rax is popped in ks_dispatch_{kernel|user}_with_pop_rax */ 523 mov ISF64_TRAPNO(%rcx), %rdx 524 mov ISF64_TRAPFN(%rcx), %rcx 525 526 jmp *(%rax) /* ks_dispatch_{kernel|user}_with_pop_rax */ 527 528Entry(idt64_double_fault) 529 pushq $(HNDL_DOUBLE_FAULT) 530 pushq $(T_DOUBLE_FAULT) 531 jmp L_dispatch 532 533Entry(hi64_syscall) 534Entry(idt64_syscall) 535 swapgs 536 /* Use RAX as a temporary by shifting its contents into R11[32:63] 537 * The systemcall number is defined to be a 32-bit quantity, as is 538 * RFLAGS. 539 */ 540 shlq $32, %rax 541 or %rax, %r11 542.globl EXT(dblsyscall_patch_point) 543EXT(dblsyscall_patch_point): 544// movabsq $0x12345678ABCDEFFFULL, %rax 545 /* Generate offset to the double-mapped per-CPU data shadow 546 * into RAX 547 */ 548 leaq EXT(idt64_hndl_table0)(%rip), %rax 549 mov 16(%rax), %rax 550 mov %rsp, %gs:CPU_UBER_TMP(%rax) /* save user stack */ 551 mov %gs:CPU_ESTACK(%rax), %rsp /* switch stack to per-cpu estack */ 552 sub $(ISF64_SIZE), %rsp 553 554 /* 555 * Synthesize an ISF frame on the exception stack 556 */ 557 movl $(USER_DS), ISF64_SS(%rsp) 558 mov %rcx, ISF64_RIP(%rsp) /* rip */ 559 560 mov %gs:CPU_UBER_TMP(%rax), %rcx 561 mov %rcx, ISF64_RSP(%rsp) /* user stack --changed */ 562 563 mov %r11, %rax 564 shrq $32, %rax /* Restore RAX */ 565 mov %r11d, %r11d /* Clear r11[32:63] */ 566 567 mov %r11, ISF64_RFLAGS(%rsp) /* rflags */ 568 movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */ 569 mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */ 570 movq $(HNDL_SYSCALL), ISF64_TRAPFN(%rsp) 571 movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */ 572 swapgs 573 jmp L_dispatch /* this can only be 64-bit */ 574 575Entry(hi64_sysenter) 576Entry(idt64_sysenter) 577 /* Synthesize an interrupt stack frame onto the 578 * exception stack. 579 */ 580 push $(USER_DS) /* ss */ 581 push %rcx /* uesp */ 582 pushf /* flags */ 583 /* 584 * Clear, among others, the Nested Task (NT) flags bit; 585 * this is zeroed by INT, but not by SYSENTER. 586 */ 587 push $0 588 popf 589 push $(SYSENTER_CS) /* cs */ 590L_sysenter_continue: 591 push %rdx /* eip */ 592 push %rax /* err/eax - syscall code */ 593 pushq $(HNDL_SYSENTER) 594 pushq $(T_SYSENTER) 595 orl $(EFL_IF), ISF64_RFLAGS(%rsp) 596 jmp L_u64bit_entry_check 597 598#if DEVELOPMENT || DEBUG 599do_cacheline_stash: 600 /* 601 * Copy the cache line that includes the user's EIP/RIP into the shadow cpu structure 602 * for later extraction/sanity-checking in user_trap(). 603 */ 604 605 pushq %rbx 606 pushq %rcx 607L_dispatch_from_user_with_rbx_rcx_pushes: 608 movq 8+8+8+ISF64_RIP(%rsp), %rbx 609 andq $-64, %rbx /* Round address to cacheline boundary */ 610 pushf 611 /* 612 * disable SMAP, if it's enabled (note that CLAC is present in BDW and later only, so we're 613 * using generic instructions here without checking whether the CPU supports SMAP first) 614 */ 615 orq $(1 << 18), (%rsp) 616 popf 617 /* 618 * Note that we only check for a faulting read on the first read, since if the first read 619 * succeeds, the rest of the cache line should also be readible since we are running with 620 * interrupts disabled here and a TLB invalidation cannot sneak in and pull the rug out. 621 */ 622 movq %cr2, %rcx /* stash the original %cr2 in case the first cacheline read triggers a #PF */ 623 /* This value of %cr2 is restored in the page fault handler if it detects */ 624 /* that the fault occurrent on the next instruction, so the original #PF can */ 625 /* continue to be handled without issue. */ 626rip_cacheline_read: 627 mov (%rbx), %rcx 628 /* Note that CPU_RTIMES in the shadow cpu struct was just a convenient place to stash the cacheline */ 629 mov %rcx, %gs:CPU_RTIMES(%rax) 630 movq %cr2, %rcx 631 mov 8(%rbx), %rcx 632 mov %rcx, %gs:8+CPU_RTIMES(%rax) 633 movq %cr2, %rcx 634 mov 16(%rbx), %rcx 635 mov %rcx, %gs:16+CPU_RTIMES(%rax) 636 movq %cr2, %rcx 637 mov 24(%rbx), %rcx 638 mov %rcx, %gs:24+CPU_RTIMES(%rax) 639 movq %cr2, %rcx 640 mov 32(%rbx), %rcx 641 mov %rcx, %gs:32+CPU_RTIMES(%rax) 642 movq %cr2, %rcx 643 mov 40(%rbx), %rcx 644 mov %rcx, %gs:40+CPU_RTIMES(%rax) 645 movq %cr2, %rcx 646 mov 48(%rbx), %rcx 647 mov %rcx, %gs:48+CPU_RTIMES(%rax) 648 movq %cr2, %rcx 649rip_cacheline_read_end: 650 mov 56(%rbx), %rcx 651 mov %rcx, %gs:56+CPU_RTIMES(%rax) 652 653 pushf 654 andq $~(1 << 18), (%rsp) /* reenable SMAP */ 655 popf 656 657 jmp cacheline_read_cleanup_stack 658 659abort_rip_cacheline_read: 660 pushf 661 andq $~(1 << 18), (%rsp) /* reenable SMAP */ 662 popf 663abort_rip_cacheline_read_no_smap_reenable: 664 movl $0xdeadc0de, %ecx /* Write a sentinel so higher-level code knows this was aborted */ 665 shlq $32, %rcx 666 movl $0xbeefcafe, %ebx 667 orq %rbx, %rcx 668 movq %rcx, %gs:CPU_RTIMES(%rax) 669 movq %rcx, %gs:8+CPU_RTIMES(%rax) 670 671cacheline_read_cleanup_stack: 672 popq %rcx 673 popq %rbx 674 jmp L_dispatch_kgsb 675#endif /* if DEVELOPMENT || DEBUG */ 676 677/* 678 * Common dispatch point. 679 * Determine what mode has been interrupted and save state accordingly. 680 * Here with: 681 * rsp from user-space: interrupt state in PCB, or 682 * from kernel-space: interrupt state in kernel or interrupt stack 683 * GSBASE from user-space: pthread area, or 684 * from kernel-space: cpu_data 685 */ 686 687L_dispatch: 688 pushq %rax 689 testb $3, 8+ISF64_CS(%rsp) 690 jz 1f 691L_dispatch_from_user_no_push_rax: 692 swapgs 693 leaq EXT(idt64_hndl_table0)(%rip), %rax 694 mov 16(%rax), %rax /* Offset of per-CPU shadow */ 695 696#if DEVELOPMENT || DEBUG 697 /* Stash the cacheline for #UD, #PF, and #GP */ 698 cmpl $(T_INVALID_OPCODE), 8+ISF64_TRAPNO(%rsp) 699 je do_cacheline_stash 700 cmpl $(T_PAGE_FAULT), 8+ISF64_TRAPNO(%rsp) 701 je do_cacheline_stash 702 cmpl $(T_GENERAL_PROTECTION), 8+ISF64_TRAPNO(%rsp) 703 je do_cacheline_stash 704#endif 705 706L_dispatch_kgsb: 707 mov %gs:CPU_SHADOWTASK_CR3(%rax), %rax 708 mov %rax, %cr3 709#if DEBUG 710 mov %rax, %gs:CPU_ENTRY_CR3 711#endif 712L_dispatch_from_kernel_no_push_rax: 7131: 714 leaq EXT(idt64_hndl_table0)(%rip), %rax 715 /* The text/data relationship here must be preserved in the doublemap, and the contents must be remapped */ 716 /* Indirect branch to non-doublemapped trampolines */ 717 jmp *(%rax) 718/* User return: register restoration and address space switch sequence */ 719Entry(ks_64bit_return) 720 721 mov R64_R14(%r15), %r14 722 mov R64_R13(%r15), %r13 723 mov R64_R12(%r15), %r12 724 mov R64_R11(%r15), %r11 725 mov R64_R10(%r15), %r10 726 mov R64_R9(%r15), %r9 727 mov R64_R8(%r15), %r8 728 mov R64_RSI(%r15), %rsi 729 mov R64_RDI(%r15), %rdi 730 mov R64_RBP(%r15), %rbp 731 mov R64_RDX(%r15), %rdx 732 mov R64_RCX(%r15), %rcx 733 mov R64_RBX(%r15), %rbx 734 mov R64_RAX(%r15), %rax 735 /* Switch to per-CPU exception stack */ 736 mov %gs:CPU_ESTACK, %rsp 737 738 /* Synthesize interrupt stack frame from PCB savearea to exception stack */ 739 push R64_SS(%r15) 740 push R64_RSP(%r15) 741 push R64_RFLAGS(%r15) 742 push R64_CS(%r15) 743 push R64_RIP(%r15) 744 745 cmpw $(KERNEL64_CS), 8(%rsp) 746 jne 1f /* Returning to user (%r15 will be restored after the segment checks) */ 747 mov R64_R15(%r15), %r15 748 jmp L_64b_kernel_return /* Returning to kernel */ 749 7501: 751 push %rax /* [A] */ 752 movl %gs:CPU_NEED_SEGCHK, %eax 753 push %rax /* [B] */ 754 755 /* Returning to user */ 756 cmpl $0, %gs:CPU_CURTASK_HAS_LDT /* If the current task has an LDT, check and restore segment regs */ 757 jne L_64b_segops_island 758 759 /* 760 * Restore %r15, since we're now done accessing saved state 761 * and (%r15) won't be accessible after the %cr3 load anyway. 762 * Note that %r15 is restored below for the segment-restore 763 * case, just after we no longer need to access register state 764 * relative to %r15. 765 */ 766 mov R64_R15(%r15), %r15 767 768 /* 769 * Note that this %cr3 sequence is duplicated here to save 770 * [at least] a load and comparison that would be required if 771 * this block were shared. 772 */ 773 /* Discover user cr3/ASID */ 774 mov %gs:CPU_UCR3, %rax 775#if DEBUG 776 mov %rax, %gs:CPU_EXIT_CR3 777#endif 778 mov %rax, %cr3 779 /* Continue execution on the shared/doublemapped trampoline */ 780 swapgs 781 782L_chk_sysret: 783 pop %rax /* Matched to [B], above (segchk required) */ 784 785 /* 786 * At this point, the stack contains: 787 * 788 * +--------------+ 789 * | Return SS | +40 790 * | Return RSP | +32 791 * | Return RFL | +24 792 * | Return CS | +16 793 * | Return RIP | +8 794 * | Saved RAX | <-- rsp 795 * +--------------+ 796 */ 797 798 cmpw $(SYSCALL_CS), 16(%rsp) /* test for exit via SYSRET */ 799 je L_sysret 800 801 testl $(MTHR_SEGCHK), %eax 802 jnz L_verw_island_2 803 804 pop %rax /* Matched to [A], above */ 805 806L_64b_kernel_return: 807.globl EXT(ret64_iret) 808EXT(ret64_iret): 809 iretq /* return from interrupt */ 810 811 812L_sysret: 813 testl $(MTHR_SEGCHK), %eax 814 jnz L_verw_island_3 815 816 pop %rax /* Matched to [A], above */ 817 /* 818 * Here to restore rcx/r11/rsp and perform the sysret back to user-space. 819 * rcx user rip 820 * r11 user rflags 821 * rsp user stack pointer 822 */ 823 pop %rcx 824 add $8, %rsp 825 pop %r11 826 pop %rsp 827 sysretq /* return from system call */ 828 829 830L_verw_island_2: 831 832 pop %rax /* Matched to [A], above */ 833 verw 32(%rsp) /* verw operates on the %ss value already on the stack */ 834 jmp EXT(ret64_iret) 835 836 837L_verw_island_3: 838 839 pop %rax /* Matched to [A], above */ 840 841 /* 842 * Here to restore rcx/r11/rsp and perform the sysret back to user-space. 843 * rcx user rip 844 * r11 user rflags 845 * rsp user stack pointer 846 */ 847 pop %rcx 848 add $8, %rsp 849 pop %r11 850 verw 8(%rsp) /* verw operates on the %ss value already on the stack */ 851 pop %rsp 852 sysretq /* return from system call */ 853 854 855L_64b_segops_island: 856 857 /* Validate CS/DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */ 858 /* Exempt "known good" statically configured selectors, e.g. USER64_CS and 0 */ 859 cmpw $(USER64_CS), R64_CS(%r15) 860 jz 11f 861 larw R64_CS(%r15), %ax 862 jnz L_64_reset_cs 863 /* Ensure that the segment referenced by CS in the saved state is a code segment (bit 11 == 1) */ 864 testw $0x800, %ax 865 jz L_64_reset_cs /* Update stored %cs with known-good selector if ZF == 1 */ 866 jmp 11f 867L_64_reset_cs: 868 movl $(USER64_CS), R64_CS(%r15) 86911: 870 cmpw $0, R64_DS(%r15) 871 jz 22f 872 larw R64_DS(%r15), %ax 873 jz 22f 874 movl $0, R64_DS(%r15) 87522: 876 cmpw $0, R64_ES(%r15) 877 jz 33f 878 larw R64_ES(%r15), %ax 879 jz 33f 880 movl $0, R64_ES(%r15) 88133: 882 cmpw $0, R64_FS(%r15) 883 jz 44f 884 larw R64_FS(%r15), %ax 885 jz 44f 886 movl $0, R64_FS(%r15) 88744: 888 cmpw $0, R64_GS(%r15) 889 jz 55f 890 larw R64_GS(%r15), %ax 891 jz 55f 892 movl $0, R64_GS(%r15) 89355: 894 /* 895 * Pack the segment registers in %rax since (%r15) will not 896 * be accessible after the %cr3 switch. 897 * Only restore %gs if cthread_self is zero, (indicate 898 * this to the code below with a value of 0xffff) 899 */ 900 mov %gs:CPU_ACTIVE_THREAD, %rax /* Get the active thread */ 901 cmpq $0, TH_CTH_SELF(%rax) 902 je L_restore_gs 903 movw $0xFFFF, %ax 904 jmp 1f 905L_restore_gs: 906 movw R64_GS(%r15), %ax 9071: 908 shlq $16, %rax 909 movw R64_FS(%r15), %ax 910 shlq $16, %rax 911 movw R64_ES(%r15), %ax 912 shlq $16, %rax 913 movw R64_DS(%r15), %ax 914 915 /* 916 * Restore %r15, since we're done accessing saved state 917 * and (%r15) won't be accessible after the %cr3 switch. 918 */ 919 mov R64_R15(%r15), %r15 920 921 /* Discover user cr3/ASID */ 922 push %rax 923 mov %gs:CPU_UCR3, %rax 924#if DEBUG 925 mov %rax, %gs:CPU_EXIT_CR3 926#endif 927 mov %rax, %cr3 928 /* Continue execution on the shared/doublemapped trampoline */ 929 pop %rax 930 swapgs 931 932 /* 933 * Returning to user; restore segment registers that might be used 934 * by compatibility-mode code in a 64-bit user process. 935 * 936 * Note that if we take a fault here, it's OK that we haven't yet 937 * popped %rax from the stack, because %rsp will be reset to 938 * the value pushed onto the exception stack (above). 939 */ 940 movw %ax, %ds 941 shrq $16, %rax 942 943 movw %ax, %es 944 shrq $16, %rax 945 946 movw %ax, %fs 947 shrq $16, %rax 948 949 /* 950 * 0xFFFF is the sentinel set above that indicates we should 951 * not restore %gs (because GS.base was already set elsewhere 952 * (e.g.: in act_machine_set_pcb or machine_thread_set_tsd_base)) 953 */ 954 cmpw $0xFFFF, %ax 955 je L_chk_sysret 956 movw %ax, %gs /* Restore %gs to user-set value */ 957 jmp L_chk_sysret 958 959 960L_u64bit_entry_check: 961 /* 962 * Check we're not a confused 64-bit user. 963 */ 964 pushq %rax 965 swapgs 966 leaq EXT(idt64_hndl_table0)(%rip), %rax 967 mov 16(%rax), %rax 968 969 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP(%rax) 970 jne L_64bit_entry_reject 971 jmp L_dispatch_kgsb 972 973L_64bit_entry_reject: 974 /* 975 * Here for a 64-bit user attempting an invalid kernel entry. 976 */ 977 movq $(HNDL_ALLTRAPS), 8+ISF64_TRAPFN(%rsp) 978 movq $(T_INVALID_OPCODE), 8+ISF64_TRAPNO(%rsp) 979 jmp L_dispatch_kgsb 980 981Entry(ks_32bit_return) 982 983 /* Validate CS/DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */ 984 /* Exempt "known good" statically configured selectors, e.g. USER_CS, USER_DS and 0 */ 985 cmpw $(USER_CS), R32_CS(%r15) 986 jz 11f 987 larw R32_CS(%r15), %ax 988 jnz L_32_reset_cs 989 /* Ensure that the segment referenced by CS in the saved state is a code segment (bit 11 == 1) */ 990 testw $0x800, %ax 991 jz L_32_reset_cs /* Update stored %cs with known-good selector if ZF == 1 */ 992 jmp 11f 993L_32_reset_cs: 994 movl $(USER_CS), R32_CS(%r15) 99511: 996 cmpw $(USER_DS), R32_DS(%r15) 997 jz 22f 998 cmpw $0, R32_DS(%r15) 999 jz 22f 1000 larw R32_DS(%r15), %ax 1001 jz 22f 1002 movl $(USER_DS), R32_DS(%r15) 100322: 1004 cmpw $(USER_DS), R32_ES(%r15) 1005 jz 33f 1006 cmpw $0, R32_ES(%r15) 1007 jz 33f 1008 larw R32_ES(%r15), %ax 1009 jz 33f 1010 movl $(USER_DS), R32_ES(%r15) 101133: 1012 cmpw $(USER_DS), R32_FS(%r15) 1013 jz 44f 1014 cmpw $0, R32_FS(%r15) 1015 jz 44f 1016 larw R32_FS(%r15), %ax 1017 jz 44f 1018 movl $(USER_DS), R32_FS(%r15) 101944: 1020 cmpw $(USER_CTHREAD), R32_GS(%r15) 1021 jz 55f 1022 cmpw $0, R32_GS(%r15) 1023 jz 55f 1024 larw R32_GS(%r15), %ax 1025 jz 55f 1026 movl $(USER_CTHREAD), R32_GS(%r15) 102755: 1028 1029 /* 1030 * Restore general 32-bit registers 1031 */ 1032 movl R32_EAX(%r15), %eax 1033 movl R32_EBX(%r15), %ebx 1034 movl R32_ECX(%r15), %ecx 1035 movl R32_EDX(%r15), %edx 1036 movl R32_EBP(%r15), %ebp 1037 movl R32_ESI(%r15), %esi 1038 movl R32_EDI(%r15), %edi 1039 movl R32_DS(%r15), %r8d 1040 movl R32_ES(%r15), %r9d 1041 movl R32_FS(%r15), %r10d 1042 movl R32_GS(%r15), %r11d 1043 1044 /* Switch to the per-cpu (doublemapped) exception stack */ 1045 mov %gs:CPU_ESTACK, %rsp 1046 1047 /* Now transfer the ISF to the exception stack in preparation for iret, below */ 1048 movl R32_SS(%r15), %r12d 1049 push %r12 1050 movl R32_UESP(%r15), %r12d 1051 push %r12 1052 movl R32_EFLAGS(%r15), %r12d 1053 push %r12 1054 movl R32_CS(%r15), %r12d 1055 push %r12 1056 movl R32_EIP(%r15), %r12d 1057 push %r12 1058 1059 movl %gs:CPU_NEED_SEGCHK, %r14d /* %r14 will be zeroed just before we return */ 1060 1061 /* 1062 * Finally, switch to the user pagetables. After this, all %gs-relative 1063 * accesses MUST be to cpu shadow data ONLY. Note that after we restore %gs 1064 * (after the swapgs), no %gs-relative accesses should be performed. 1065 */ 1066 /* Discover user cr3/ASID */ 1067 mov %gs:CPU_UCR3, %r13 1068#if DEBUG 1069 mov %r13, %gs:CPU_EXIT_CR3 1070#endif 1071 mov %r13, %cr3 1072 1073 swapgs 1074 1075 /* 1076 * Restore segment registers. A #GP taken here will push state onto IST1, 1077 * not the exception stack. Note that the placement of the labels here 1078 * corresponds to the fault address-detection logic (so do not change them 1079 * without also changing that code). 1080 */ 1081L_32bit_seg_restore_begin: 1082 mov %r8, %ds 1083 mov %r9, %es 1084 mov %r10, %fs 1085 mov %r11, %gs 1086L_32bit_seg_restore_done: 1087 1088 /* Zero 64-bit-exclusive GPRs to prevent data leaks */ 1089 xor %r8, %r8 1090 xor %r9, %r9 1091 xor %r10, %r10 1092 xor %r11, %r11 1093 xor %r12, %r12 1094 xor %r13, %r13 1095 xor %r15, %r15 1096 1097 /* 1098 * At this point, the stack contains: 1099 * 1100 * +--------------+ 1101 * | Return SS | +32 1102 * | Return RSP | +24 1103 * | Return RFL | +16 1104 * | Return CS | +8 1105 * | Return RIP | <-- rsp 1106 * +--------------+ 1107 */ 1108 1109 cmpw $(SYSENTER_CS), 8(%rsp) /* test for sysexit */ 1110 je L_rtu_via_sysexit 1111 1112 testl $(MTHR_SEGCHK), %r14d 1113 jnz L_verw_island 1114 1115L_after_verw: 1116 xor %r14, %r14 1117 1118.globl EXT(ret32_iret) 1119EXT(ret32_iret): 1120 iretq /* return from interrupt */ 1121 1122L_verw_island: 1123 verw 32(%rsp) 1124 jmp L_after_verw 1125 1126L_verw_island_1: 1127 verw 16(%rsp) 1128 jmp L_after_verw_1 1129 1130L_rtu_via_sysexit: 1131 pop %rdx /* user return eip */ 1132 pop %rcx /* pop and toss cs */ 1133 andl $(~EFL_IF), (%rsp) /* clear interrupts enable, sti below */ 1134 1135 /* 1136 * %ss is now at 16(%rsp) 1137 */ 1138 testl $(MTHR_SEGCHK), %r14d 1139 je L_verw_island_1 1140L_after_verw_1: 1141 xor %r14, %r14 1142 1143 popf /* flags - carry denotes failure */ 1144 pop %rcx /* user return esp */ 1145 1146 1147 sti /* interrupts enabled after sysexit */ 1148 sysexitl /* 32-bit sysexit */ 1149 1150/* End of double-mapped TEXT */ 1151.text 1152 1153Entry(ks_dispatch) 1154 popq %rax 1155 cmpw $(KERNEL64_CS), ISF64_CS(%rsp) 1156 je EXT(ks_dispatch_kernel) 1157 1158 mov %rax, %gs:CPU_UBER_TMP 1159 mov %gs:CPU_UBER_ISF, %rax 1160 add $(ISF64_SIZE), %rax 1161 1162 xchg %rsp, %rax 1163/* Memory to memory moves (aint x86 wonderful): 1164 * Transfer the exception frame from the per-CPU exception stack to the 1165 * 'PCB' stack programmed at cswitch. 1166 */ 1167 push ISF64_SS(%rax) 1168 push ISF64_RSP(%rax) 1169 push ISF64_RFLAGS(%rax) 1170 push ISF64_CS(%rax) 1171 push ISF64_RIP(%rax) 1172 push ISF64_ERR(%rax) 1173 push ISF64_TRAPFN(%rax) 1174 push ISF64_TRAPNO(%rax) 1175 mov %gs:CPU_UBER_TMP, %rax 1176 jmp EXT(ks_dispatch_user) 1177 1178Entry(ks_dispatch_user_with_pop_rax) 1179 pop %rax 1180 jmp EXT(ks_dispatch_user) 1181 1182Entry(ks_dispatch_user) 1183 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP 1184 je L_dispatch_U32 /* 32-bit user task */ 1185 1186L_dispatch_U64: 1187 subq $(ISS64_OFFSET), %rsp 1188 mov %r15, R64_R15(%rsp) 1189 mov %rsp, %r15 1190 mov %gs:CPU_KERNEL_STACK, %rsp 1191 jmp L_dispatch_64bit 1192 1193Entry(ks_dispatch_kernel_with_pop_rax) 1194 pop %rax 1195 jmp EXT(ks_dispatch_kernel) 1196 1197Entry(ks_dispatch_kernel) 1198 subq $(ISS64_OFFSET), %rsp 1199 mov %r15, R64_R15(%rsp) 1200 mov %rsp, %r15 1201 1202/* 1203 * Here for 64-bit user task or kernel 1204 */ 1205L_dispatch_64bit: 1206 movl $(SS_64), SS_FLAVOR(%r15) 1207 1208 /* 1209 * Save segment regs if a 64-bit task has 1210 * installed customized segments in the LDT 1211 */ 1212 cmpl $0, %gs:CPU_CURTASK_HAS_LDT 1213 je L_skip_save_extra_segregs 1214 1215 mov %ds, R64_DS(%r15) 1216 mov %es, R64_ES(%r15) 1217 1218L_skip_save_extra_segregs: 1219 mov %fs, R64_FS(%r15) 1220 mov %gs, R64_GS(%r15) 1221 1222 1223 /* Save general-purpose registers */ 1224 mov %rax, R64_RAX(%r15) 1225 mov %rbx, R64_RBX(%r15) 1226 mov %rcx, R64_RCX(%r15) 1227 mov %rdx, R64_RDX(%r15) 1228 mov %rbp, R64_RBP(%r15) 1229 mov %rdi, R64_RDI(%r15) 1230 mov %rsi, R64_RSI(%r15) 1231 mov %r8, R64_R8(%r15) 1232 mov %r9, R64_R9(%r15) 1233 mov %r10, R64_R10(%r15) 1234 mov %r11, R64_R11(%r15) 1235 mov %r12, R64_R12(%r15) 1236 mov %r13, R64_R13(%r15) 1237 mov %r14, R64_R14(%r15) 1238 1239 /* Zero unused GPRs. BX/DX/SI are clobbered elsewhere across the exception handler, and are skipped. */ 1240 xor %ecx, %ecx 1241 xor %edi, %edi 1242 xor %r8, %r8 1243 xor %r9, %r9 1244 xor %r10, %r10 1245 xor %r11, %r11 1246 xor %r12, %r12 1247 xor %r13, %r13 1248 xor %r14, %r14 1249 1250 /* cr2 is significant only for page-faults */ 1251 mov %cr2, %rax 1252 mov %rax, R64_CR2(%r15) 1253 1254L_dispatch_U64_after_fault: 1255 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */ 1256 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */ 1257 mov R64_CS(%r15), %esi /* %esi := cs for later */ 1258 1259 jmp L_common_dispatch 1260 1261L_dispatch_U32: /* 32-bit user task */ 1262 subq $(ISS64_OFFSET), %rsp 1263 mov %rsp, %r15 1264 mov %gs:CPU_KERNEL_STACK, %rsp 1265 movl $(SS_32), SS_FLAVOR(%r15) 1266 1267 /* 1268 * Save segment regs 1269 */ 1270 mov %ds, R32_DS(%r15) 1271 mov %es, R32_ES(%r15) 1272 mov %fs, R32_FS(%r15) 1273 mov %gs, R32_GS(%r15) 1274 1275 /* 1276 * Save general 32-bit registers 1277 */ 1278 mov %eax, R32_EAX(%r15) 1279 mov %ebx, R32_EBX(%r15) 1280 mov %ecx, R32_ECX(%r15) 1281 mov %edx, R32_EDX(%r15) 1282 mov %ebp, R32_EBP(%r15) 1283 mov %esi, R32_ESI(%r15) 1284 mov %edi, R32_EDI(%r15) 1285 1286 /* Unconditionally save cr2; only meaningful on page faults */ 1287 mov %cr2, %rax 1288 mov %eax, R32_CR2(%r15) 1289 /* Zero unused GPRs. BX/DX/SI/R15 are clobbered elsewhere across the exception handler, and are skipped. */ 1290 xor %ecx, %ecx 1291 xor %edi, %edi 1292 xor %r8, %r8 1293 xor %r9, %r9 1294 xor %r10, %r10 1295 xor %r11, %r11 1296 xor %r12, %r12 1297 xor %r13, %r13 1298 xor %r14, %r14 1299 1300 /* 1301 * Copy registers already saved in the machine state 1302 * (in the interrupt stack frame) into the compat save area. 1303 */ 1304 mov R64_RIP(%r15), %eax 1305 mov %eax, R32_EIP(%r15) 1306 mov R64_RFLAGS(%r15), %eax 1307 mov %eax, R32_EFLAGS(%r15) 1308 mov R64_RSP(%r15), %eax 1309 mov %eax, R32_UESP(%r15) 1310 mov R64_SS(%r15), %eax 1311 mov %eax, R32_SS(%r15) 1312L_dispatch_U32_after_fault: 1313 mov R64_CS(%r15), %esi /* %esi := %cs for later */ 1314 mov %esi, R32_CS(%r15) 1315 mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */ 1316 mov %ebx, R32_TRAPNO(%r15) 1317 mov R64_ERR(%r15), %eax 1318 mov %eax, R32_ERR(%r15) 1319 mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */ 1320 1321L_common_dispatch: 1322 cld /* Ensure the direction flag is clear in the kernel */ 1323 cmpl $0, EXT(pmap_smap_enabled)(%rip) 1324 je 1f 1325 clac /* Clear EFLAGS.AC if SMAP is present/enabled */ 13261: 1327 /* 1328 * We mark the kernel's cr3 as "active" for TLB coherency evaluation 1329 * For threads with a mapped pagezero (some WINE games) on non-SMAP platforms, 1330 * we switch to the kernel's address space on entry. Also, 1331 * if the global no_shared_cr3 is TRUE we do switch to the kernel's cr3 1332 * so that illicit accesses to userspace can be trapped. 1333 */ 1334 mov %gs:CPU_KERNEL_CR3, %rcx 1335 mov %rcx, %gs:CPU_ACTIVE_CR3 1336 test $3, %esi /* CS: user/kernel? */ 1337 jz 2f /* skip CR3 reload if from kernel */ 1338 xor %ebp, %ebp 1339 cmpl $0, %gs:CPU_PAGEZERO_MAPPED 1340 jnz 11f 1341 cmpl $0, EXT(no_shared_cr3)(%rip) 1342 je 2f 134311: 1344 xor %eax, %eax 1345 movw %gs:CPU_KERNEL_PCID, %ax 1346 or %rax, %rcx 1347 mov %rcx, %cr3 /* load kernel cr3 */ 1348 jmp 4f 13492: 1350 /* Deferred processing of pending kernel address space TLB invalidations */ 1351 mov %gs:CPU_ACTIVE_CR3+4, %rcx 1352 shr $32, %rcx 1353 testl %ecx, %ecx 1354 jz 4f 1355 movl $0, %gs:CPU_TLB_INVALID 1356 cmpb $0, EXT(invpcid_enabled)(%rip) 1357 jz L_cr4_island 1358 movl $2, %ecx 1359 invpcid %gs:CPU_IP_DESC, %rcx 13604: 1361L_set_act: 1362 mov %gs:CPU_ACTIVE_THREAD, %rcx /* Get the active thread */ 1363 testq %rcx, %rcx 1364 je L_intcnt 1365 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap */ 1366 cmpq $0, TH_PCB_IDS(%rcx) /* Is there a debug register state? */ 1367 jnz L_dr7_island 1368L_intcnt: 1369 incl %gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count 1370 /* Dispatch the designated handler */ 1371 cmp EXT(dblmap_base)(%rip), %rsp 1372 jb 66f 1373 cmp EXT(dblmap_max)(%rip), %rsp 1374 jge 66f 1375 subq EXT(dblmap_dist)(%rip), %rsp 1376 subq EXT(dblmap_dist)(%rip), %r15 137766: 1378 leaq EXT(idt64_hndl_table1)(%rip), %rax 1379 jmp *(%rax, %rdx, 8) 1380 1381L_cr4_island: 1382 mov %cr4, %rcx /* RMWW CR4, for lack of an alternative*/ 1383 and $(~CR4_PGE), %rcx 1384 mov %rcx, %cr4 1385 or $(CR4_PGE), %rcx 1386 mov %rcx, %cr4 1387 jmp L_set_act 1388L_dr7_island: 1389 xor %ecx, %ecx /* If so, reset DR7 (the control) */ 1390 mov %rcx, %dr7 1391 jmp L_intcnt 1392/* 1393 * Control is passed here to return to user. 1394 */ 1395Entry(return_to_user) 1396 TIME_TRAP_UEXIT 1397 1398Entry(ret_to_user) 1399 mov %gs:CPU_ACTIVE_THREAD, %rdx 1400 cmpq $0, TH_PCB_IDS(%rdx) /* Is there a debug register context? */ 1401 jnz L_dr_restore_island 1402L_post_dr_restore: 1403 /* 1404 * We now mark the task's address space as active for TLB coherency. 1405 * Handle special cases such as pagezero-less tasks here. 1406 */ 1407 mov %gs:CPU_TASK_CR3, %rcx 1408 mov %rcx, %gs:CPU_ACTIVE_CR3 1409 cmpl $0, %gs:CPU_PAGEZERO_MAPPED 1410 jnz L_cr3_switch_island 1411 movl EXT(no_shared_cr3)(%rip), %eax 1412 test %eax, %eax /* -no_shared_cr3 */ 1413 jnz L_cr3_switch_island 1414 1415L_cr3_switch_return: 1416 mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/ 1417 cmp $0, %rax 1418 je 4f 1419 mov %rax, %dr7 /* Set DR7 */ 1420 movq $0, %gs:CPU_DR7 14214: 1422 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */ 1423 jne L_32bit_return 1424 1425 /* 1426 * Restore general 64-bit registers. 1427 * Here on fault stack and PCB address in R15. 1428 */ 1429 leaq EXT(idt64_hndl_table0)(%rip), %rax 1430 jmp *8(%rax) 1431 1432 1433L_32bit_return: 1434#if DEBUG_IDT64 1435 cmpl $(SS_32), SS_FLAVOR(%r15) /* 32-bit state? */ 1436 je 1f 1437 cli 1438 POSTCODE2(0x6432) 1439 CCALL1(panic_idt64, %r15) 14401: 1441#endif /* DEBUG_IDT64 */ 1442 1443 leaq EXT(idt64_hndl_table0)(%rip), %rax 1444 jmp *0x18(%rax) 1445 1446 1447L_dr_restore_island: 1448 movq TH_PCB_IDS(%rdx),%rax /* Obtain this thread's debug state */ 1449 cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */ 1450 jne 1f 1451 movl DS_DR0(%rax), %ecx /* If so, load the 32 bit DRs */ 1452 movq %rcx, %dr0 1453 movl DS_DR1(%rax), %ecx 1454 movq %rcx, %dr1 1455 movl DS_DR2(%rax), %ecx 1456 movq %rcx, %dr2 1457 movl DS_DR3(%rax), %ecx 1458 movq %rcx, %dr3 1459 movl DS_DR7(%rax), %ecx 1460 movq %rcx, %gs:CPU_DR7 1461 jmp 2f 14621: 1463 mov DS64_DR0(%rax), %rcx /* Load the full width DRs*/ 1464 mov %rcx, %dr0 1465 mov DS64_DR1(%rax), %rcx 1466 mov %rcx, %dr1 1467 mov DS64_DR2(%rax), %rcx 1468 mov %rcx, %dr2 1469 mov DS64_DR3(%rax), %rcx 1470 mov %rcx, %dr3 1471 mov DS64_DR7(%rax), %rcx 1472 mov %rcx, %gs:CPU_DR7 14732: 1474 jmp L_post_dr_restore 1475L_cr3_switch_island: 1476 xor %eax, %eax 1477 movw %gs:CPU_ACTIVE_PCID, %ax 1478 or %rax, %rcx 1479 mov %rcx, %cr3 1480 jmp L_cr3_switch_return 1481 1482ret_to_kernel: 1483#if DEBUG_IDT64 1484 cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */ 1485 je 1f 1486 cli 1487 POSTCODE2(0x6464) 1488 CCALL1(panic_idt64, %r15) 1489 hlt 14901: 1491 cmpw $(KERNEL64_CS), R64_CS(%r15) 1492 je 2f 1493 CCALL1(panic_idt64, %r15) 1494 hlt 14952: 1496#endif 1497 /* 1498 * Restore general 64-bit registers. 1499 * Here on fault stack and PCB address in R15. 1500 */ 1501 leaq EXT(idt64_hndl_table0)(%rip), %rax 1502 jmp *8(%rax) 1503 1504/* All 'exceptions' enter hndl_alltraps, with: 1505 * r15 x86_saved_state_t address 1506 * rsp kernel stack if user-space, otherwise interrupt or kernel stack 1507 * esi cs at trap 1508 * 1509 * The rest of the state is set up as: 1510 * both rsp and r15 are 16-byte aligned 1511 * interrupts disabled 1512 * direction flag cleared 1513 */ 1514Entry(hndl_alltraps) 1515 mov %esi, %eax 1516 testb $3, %al 1517 jz trap_from_kernel 1518 1519 TIME_TRAP_UENTRY 1520 1521 /* Check for active vtimers in the current task */ 1522 mov %gs:CPU_ACTIVE_THREAD, %rcx 1523 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap/exception */ 1524 mov TH_TASK(%rcx), %rbx 1525 TASK_VTIMER_CHECK(%rbx, %rcx) 1526 1527 CCALL1(user_trap, %r15) /* call user trap routine */ 1528 /* user_trap() unmasks interrupts */ 1529 cli /* hold off intrs - critical section */ 1530 xorl %ecx, %ecx /* don't check if we're in the PFZ */ 1531 1532 1533Entry(return_from_trap) 1534 movq %gs:CPU_ACTIVE_THREAD,%r15 /* Get current thread */ 1535 movl $-1, TH_IOTIER_OVERRIDE(%r15) /* Reset IO tier override to -1 before returning to userspace */ 1536 movq TH_PCB_ISS(%r15), %r15 /* PCB stack */ 1537 movl %gs:CPU_PENDING_AST,%eax 1538 testl %eax,%eax 1539 je EXT(return_to_user) /* branch if no AST */ 1540 1541L_return_from_trap_with_ast: 1542 testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */ 1543 je 2f /* no, go handle the AST */ 1544 cmpl $(SS_64), SS_FLAVOR(%r15) /* are we a 64-bit task? */ 1545 je 1f 1546 /* no... 32-bit user mode */ 1547 movl R32_EIP(%r15), %edi 1548 xorq %rbp, %rbp /* clear framepointer */ 1549 CCALL(commpage_is_in_pfz32) 1550 testl %eax, %eax 1551 je 2f /* not in the PFZ... go service AST */ 1552 movl %eax, R32_EBX(%r15) /* let the PFZ know we've pended an AST */ 1553 jmp EXT(return_to_user) 15541: 1555 movq R64_RIP(%r15), %rdi 1556 xorq %rbp, %rbp /* clear framepointer */ 1557 CCALL(commpage_is_in_pfz64) 1558 testl %eax, %eax 1559 je 2f /* not in the PFZ... go service AST */ 1560 movl %eax, R64_RBX(%r15) /* let the PFZ know we've pended an AST */ 1561 jmp EXT(return_to_user) 15622: 1563 1564 xorq %rbp, %rbp /* clear framepointer */ 1565 CCALL(ast_taken_user) /* handle all ASTs (enables interrupts, may return via continuation) */ 1566 1567 cli 1568 mov %rsp, %r15 /* AST changes stack, saved state */ 1569 xorl %ecx, %ecx /* don't check if we're in the PFZ */ 1570 jmp EXT(return_from_trap) /* and check again (rare) */ 1571 1572/* 1573 * Trap from kernel mode. No need to switch stacks. 1574 * Interrupts must be off here - we will set them to state at time of trap 1575 * as soon as it's safe for us to do so and not recurse doing preemption 1576 * 1577 */ 1578trap_from_kernel: 1579 1580UNWIND_PROLOGUE 1581 1582 movq %r15, %rdi /* saved state addr */ 1583 1584UNWIND_DIRECTIVES 1585 1586 pushq R64_RIP(%r15) /* Simulate a CALL from fault point */ 1587 pushq %rbp /* Extend framepointer chain */ 1588 movq %rsp, %rbp 1589 CCALLWITHSP(kernel_trap) /* to kernel trap routine */ 1590 popq %rbp 1591 addq $8, %rsp 1592 mov %rsp, %r15 /* DTrace slides stack/saved-state */ 1593 cli 1594 1595 movl %gs:CPU_PENDING_AST,%eax /* get pending asts */ 1596 testl $(AST_URGENT),%eax /* any urgent preemption? */ 1597 je ret_to_kernel /* no, nothing to do */ 1598 cmpl $(T_PREEMPT),R64_TRAPNO(%r15) 1599 je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */ 1600 testl $(EFL_IF),R64_RFLAGS(%r15) /* interrupts disabled? */ 1601 je ret_to_kernel 1602 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ 1603 jne ret_to_kernel 1604 movq %gs:CPU_KERNEL_STACK,%rax 1605 movq %rsp,%rcx 1606 xorq %rax,%rcx 1607 andq EXT(kernel_stack_mask)(%rip),%rcx 1608 testq %rcx,%rcx /* are we on the kernel stack? */ 1609 jne ret_to_kernel /* no, skip it */ 1610 1611 CCALL(ast_taken_kernel) /* take the AST */ 1612 1613 mov %rsp, %r15 /* AST changes stack, saved state */ 1614 jmp ret_to_kernel 1615 1616UNWIND_EPILOGUE 1617 1618/* 1619 * All interrupts on all tasks enter here with: 1620 * r15 x86_saved_state_t 1621 * rsp kernel or interrupt stack 1622 * esi cs at trap 1623 * 1624 * both rsp and r15 are 16-byte aligned 1625 * interrupts disabled 1626 * direction flag cleared 1627 */ 1628Entry(hndl_allintrs) 1629 1630UNWIND_PROLOGUE 1631 1632 /* 1633 * test whether already on interrupt stack 1634 */ 1635 movq %gs:CPU_INT_STACK_TOP,%rcx 1636 cmpq %rsp,%rcx 1637 jb 1f 1638 leaq -INTSTACK_SIZE(%rcx),%rdx 1639 cmpq %rsp,%rdx 1640 jb int_from_intstack 16411: 1642 xchgq %rcx,%rsp /* switch to interrupt stack */ 1643 1644 mov %cr0,%rax /* get cr0 */ 1645 orl $(CR0_TS),%eax /* or in TS bit */ 1646 mov %rax,%cr0 /* set cr0 */ 1647 1648 pushq %rcx /* save pointer to old stack */ 1649 pushq %gs:CPU_INT_STATE /* save previous intr state */ 1650 movq %r15,%gs:CPU_INT_STATE /* set intr state */ 1651 1652UNWIND_DIRECTIVES 1653 1654 CCALL1(recount_enter_intel_interrupt, %r15) /* update time and PMCs */ 1655 1656 /* Check for active vtimers in the current task */ 1657 mov %gs:CPU_ACTIVE_THREAD, %rcx 1658 mov TH_TASK(%rcx), %rbx 1659 TASK_VTIMER_CHECK(%rbx, %rcx) 1660 1661 incl %gs:CPU_PREEMPTION_LEVEL 1662 incl %gs:CPU_INTERRUPT_LEVEL 1663 1664 CCALL1(interrupt, %r15) /* call generic interrupt routine */ 1665 1666UNWIND_EPILOGUE 1667 1668.globl EXT(return_to_iret) 1669LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ 1670 1671 decl %gs:CPU_INTERRUPT_LEVEL 1672 decl %gs:CPU_PREEMPTION_LEVEL 1673 1674 CCALL(recount_leave_intel_interrupt) /* update time and PMCs */ 1675 1676 popq %gs:CPU_INT_STATE /* reset/clear intr state pointer */ 1677 popq %rsp /* switch back to old stack */ 1678 1679 movq %gs:CPU_ACTIVE_THREAD,%rax 1680 movq TH_PCB_FPS(%rax),%rax /* get pcb's ifps */ 1681 cmpq $0,%rax /* Is there a context */ 1682 je 1f /* Branch if not */ 1683 movl FP_VALID(%rax),%eax /* Load fp_valid */ 1684 cmpl $0,%eax /* Check if valid */ 1685 jne 1f /* Branch if valid */ 1686 clts /* Clear TS */ 1687 jmp 2f 16881: 1689 mov %cr0,%rax /* get cr0 */ 1690 orl $(CR0_TS),%eax /* or in TS bit */ 1691 mov %rax,%cr0 /* set cr0 */ 16922: 1693 /* Load interrupted code segment into %eax */ 1694 movl R64_CS(%r15), %eax /* assume 64-bit state */ 1695 cmpl $(SS_32), SS_FLAVOR(%r15) /* 32-bit? */ 1696#if DEBUG_IDT64 1697 jne 5f 1698 movl R32_CS(%r15),%eax /* 32-bit user mode */ 1699 jmp 3f 17005: 1701 cmpl $(SS_64),SS_FLAVOR(%r15) 1702 je 3f 1703 POSTCODE2(0x6431) 1704 CCALL1(panic_idt64, %r15) 1705 hlt 1706#else 1707 je 4f 1708#endif 17093: 1710 testb $3,%al /* user mode, */ 1711 jnz ast_from_interrupt_user /* go handle potential ASTs */ 1712 /* 1713 * we only want to handle preemption requests if 1714 * the interrupt fell in the kernel context 1715 * and preemption isn't disabled 1716 */ 1717 movl %gs:CPU_PENDING_AST,%eax 1718 testl $(AST_URGENT),%eax /* any urgent requests? */ 1719 je ret_to_kernel /* no, nothing to do */ 1720 1721 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ 1722 jne ret_to_kernel /* yes, skip it */ 1723 1724 /* 1725 * Take an AST from kernel space. We don't need (and don't want) 1726 * to do as much as the case where the interrupt came from user 1727 * space. 1728 */ 1729 CCALL(ast_taken_kernel) 1730 1731 mov %rsp, %r15 /* AST changes stack, saved state */ 1732 jmp ret_to_kernel 17334: 1734 movl R32_CS(%r15),%eax /* 32-bit user mode */ 1735 jmp 3b 1736 1737 1738/* 1739 * nested int - simple path, can't preempt etc on way out 1740 */ 1741int_from_intstack: 1742 incl %gs:CPU_PREEMPTION_LEVEL 1743 incl %gs:CPU_INTERRUPT_LEVEL 1744 incl %gs:CPU_NESTED_ISTACK 1745 1746 push %gs:CPU_INT_STATE 1747 mov %r15, %gs:CPU_INT_STATE 1748 1749 CCALL1(interrupt, %r15) 1750 1751 pop %gs:CPU_INT_STATE 1752 1753 decl %gs:CPU_INTERRUPT_LEVEL 1754 decl %gs:CPU_PREEMPTION_LEVEL 1755 decl %gs:CPU_NESTED_ISTACK 1756 1757 jmp ret_to_kernel 1758 1759/* 1760 * Take an AST from an interrupted user 1761 */ 1762ast_from_interrupt_user: 1763 movl %gs:CPU_PENDING_AST,%eax 1764 testl %eax,%eax /* pending ASTs? */ 1765 je EXT(ret_to_user) /* no, nothing to do */ 1766 1767 TIME_TRAP_UENTRY 1768 1769 movl $1, %ecx /* check if we're in the PFZ */ 1770 jmp L_return_from_trap_with_ast /* return */ 1771 1772 1773/* Syscall dispatch routines! */ 1774 1775/* 1776 * 1777 * 32bit Tasks 1778 * System call entries via INTR_GATE or sysenter: 1779 * 1780 * r15 x86_saved_state32_t 1781 * rsp kernel stack 1782 * 1783 * both rsp and r15 are 16-byte aligned 1784 * interrupts disabled 1785 * direction flag cleared 1786 */ 1787 1788Entry(hndl_sysenter) 1789 /* 1790 * We can be here either for a mach syscall or a unix syscall, 1791 * as indicated by the sign of the code: 1792 */ 1793 movl R32_EAX(%r15),%eax 1794 testl %eax,%eax 1795 js EXT(hndl_mach_scall) /* < 0 => mach */ 1796 /* > 0 => unix */ 1797 1798Entry(hndl_unix_scall) 1799 1800 TIME_TRAP_UENTRY 1801 1802 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ 1803 movq TH_TASK(%rcx),%rbx /* point to current task */ 1804 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */ 1805 1806 /* Check for active vtimers in the current task */ 1807 TASK_VTIMER_CHECK(%rbx,%rcx) 1808 1809 sti 1810 1811 CCALL1(unix_syscall, %r15) 1812 /* 1813 * always returns through thread_exception_return 1814 */ 1815 1816 1817Entry(hndl_mach_scall) 1818 TIME_TRAP_UENTRY 1819 1820 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ 1821 movq TH_TASK(%rcx),%rbx /* point to current task */ 1822 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */ 1823 1824 /* Check for active vtimers in the current task */ 1825 TASK_VTIMER_CHECK(%rbx,%rcx) 1826 1827 sti 1828 1829 CCALL1(mach_call_munger, %r15) 1830 /* 1831 * always returns through thread_exception_return 1832 */ 1833 1834 1835Entry(hndl_mdep_scall) 1836 TIME_TRAP_UENTRY 1837 1838 /* Check for active vtimers in the current task */ 1839 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ 1840 movq TH_TASK(%rcx),%rbx /* point to current task */ 1841 TASK_VTIMER_CHECK(%rbx,%rcx) 1842 1843 sti 1844 1845 CCALL1(machdep_syscall, %r15) 1846 /* 1847 * always returns through thread_exception_return 1848 */ 1849 1850/* 1851 * 64bit Tasks 1852 * System call entries via syscall only: 1853 * 1854 * r15 x86_saved_state64_t 1855 * rsp kernel stack 1856 * 1857 * both rsp and r15 are 16-byte aligned 1858 * interrupts disabled 1859 * direction flag cleared 1860 */ 1861 1862Entry(hndl_syscall) 1863 TIME_TRAP_UENTRY 1864 1865 movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ 1866 movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling syscall */ 1867 movq TH_TASK(%rcx),%rbx /* point to current task */ 1868 1869 /* Check for active vtimers in the current task */ 1870 TASK_VTIMER_CHECK(%rbx,%rcx) 1871 1872 /* 1873 * We can be here either for a mach, unix machdep or diag syscall, 1874 * as indicated by the syscall class: 1875 */ 1876 movl R64_RAX(%r15), %eax /* syscall number/class */ 1877 movl %eax, %edx 1878 andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */ 1879 cmpl $(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx 1880 je EXT(hndl_mach_scall64) 1881 cmpl $(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx 1882 je EXT(hndl_unix_scall64) 1883 cmpl $(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx 1884 je EXT(hndl_mdep_scall64) 1885 cmpl $(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx 1886 je EXT(hndl_diag_scall64) 1887 1888 /* Syscall class unknown */ 1889 sti 1890 CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1) 1891 /* no return */ 1892 1893 1894Entry(hndl_unix_scall64) 1895 incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */ 1896 sti 1897 1898 CCALL1(unix_syscall64, %r15) 1899 /* 1900 * always returns through thread_exception_return 1901 */ 1902 1903 1904Entry(hndl_mach_scall64) 1905 incl TH_SYSCALLS_MACH(%rcx) /* increment call count */ 1906 sti 1907 1908 CCALL1(mach_call_munger64, %r15) 1909 /* 1910 * always returns through thread_exception_return 1911 */ 1912 1913 1914 1915Entry(hndl_mdep_scall64) 1916 sti 1917 1918 CCALL1(machdep_syscall64, %r15) 1919 /* 1920 * always returns through thread_exception_return 1921 */ 1922 1923Entry(hndl_diag_scall64) 1924 CCALL1(diagCall64, %r15) // Call diagnostics 1925 test %eax, %eax // What kind of return is this? 1926 je 1f // - branch if bad (zero) 1927 jmp EXT(return_to_user) // Normal return, do not check asts... 19281: 1929 sti 1930 CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1) 1931 /* no return */ 1932/* TODO assert at all 'C' entry points that we're never operating on the fault stack's alias mapping */ 1933Entry(hndl_machine_check) 1934 /* Adjust SP and savearea to their canonical, non-aliased addresses */ 1935 CCALL1(panic_machine_check64, %r15) 1936 hlt 1937 1938Entry(hndl_double_fault) 1939 CCALL1(panic_double_fault64, %r15) 1940 hlt 1941