1/* 2 * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <sys/appleapiopts.h> 30#include <machine/cpu_capabilities.h> 31#include <machine/commpage.h> 32#include <mach/i386/syscall_sw.h> 33 34 35/* PREEMPTION FREE ZONE (PFZ) 36 * 37 * A portion of the commpage is speacial-cased by the kernel to be "preemption free", 38 * ie as if we had disabled interrupts in user mode. This facilitates writing 39 * "nearly-lockless" code, for example code that must be serialized by a spinlock but 40 * which we do not want to preempt while the spinlock is held. 41 * 42 * The PFZ is implemented by collecting all the "preemption-free" code into a single 43 * contiguous region of the commpage. Register %ebx is used as a flag register; 44 * before entering the PFZ, %ebx is cleared. If some event occurs that would normally 45 * result in a premption while in the PFZ, the kernel sets %ebx nonzero instead of 46 * preempting. Then, when the routine leaves the PFZ we check %ebx and 47 * if nonzero execute a special "pfz_exit" syscall to take the delayed preemption. 48 * 49 * PFZ code must bound the amount of time spent in the PFZ, in order to control 50 * latency. Backward branches are dangerous and must not be used in a way that 51 * could inadvertently create a long-running loop. 52 * 53 * Because they cannot be implemented reasonably without a lock, we put the "atomic" 54 * FIFO enqueue and dequeue in the PFZ. As long as we don't take a page fault trying to 55 * access queue elements, these implementations behave nearly-locklessly. 56 * But we still must take a spinlock to serialize, and in case of page faults. 57 */ 58 59/* Work around 10062261 with a dummy non-local symbol */ 60fifo_queue_dummy_symbol: 61 62/* 63 * typedef volatile struct { 64 * void *opaque1; <-- ptr to first queue element or null 65 * void *opaque2; <-- ptr to last queue element or null 66 * int opaque3; <-- spinlock 67 * } OSFifoQueueHead; 68 * 69 * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); 70 */ 71 72 73/* Subroutine to make a preempt syscall. Called when we notice %ebx is 74 * nonzero after returning from a PFZ subroutine. 75 * When we enter kernel: 76 * %edx = return address 77 * %ecx = stack ptr 78 * Destroys %eax, %ecx, and %edx. 79 */ 80COMMPAGE_FUNCTION_START(preempt, 32, 4) 81 popl %edx // get return address 82 movl %esp,%ecx // save stack ptr here 83 movl $(-58),%eax /* 58 = pfz_exit */ 84 xorl %ebx,%ebx // clear "preemption pending" flag 85 sysenter 86COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT) 87 88 89/* Subroutine to back off if we cannot get the spinlock. Called 90 * after a few attempts inline in the PFZ subroutines. This code is 91 * not in the PFZ. 92 * %edi = ptr to queue head structure 93 * %ebx = preemption flag (nonzero if preemption pending) 94 * Destroys %eax. 95 */ 96COMMPAGE_FUNCTION_START(backoff, 32, 4) 97 testl %ebx,%ebx // does kernel want to preempt us? 98 jz 1f // no 99 xorl %ebx,%ebx // yes, clear flag 100 pushl %edx // preserve regs used by preempt syscall 101 pushl %ecx 102 COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff) 103 popl %ecx 104 popl %edx 1051: 106 pause // SMT-friendly backoff 107 cmpl $0,8(%edi) // sniff the lockword 108 jnz 1b // loop if still taken 109 ret // lockword is free, so reenter PFZ 110COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF) 111 112 113/* Preemption-free-zone routine to FIFO Enqueue: 114 * %edi = ptr to queue head structure 115 * %esi = ptr to element to enqueue 116 * %edx = offset of link field in elements 117 * %ebx = preemption flag (kernel sets nonzero if we should preempt) 118 */ 119 120COMMPAGE_FUNCTION_START(pfz_enqueue, 32, 4) 121 movl $0,(%edx,%esi) // zero forward link in new element 1221: 123 xorl %eax, %eax 124 orl $-1, %ecx 125 lock 126 cmpxchgl %ecx, 8(%edi) // try to take the spinlock 127 jz 2f // got it 128 129 pause 130 xorl %eax, %eax 131 lock 132 cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock 133 jz 2f // got it 134 135 pause 136 xorl %eax, %eax 137 lock 138 cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock 139 jz 2f // got it 140 141 COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue) 142 jmp 1b // loop to try again 1432: 144 movl 4(%edi),%eax // get ptr to last element in q 145 testl %eax,%eax // q null? 146 jnz 3f // no 147 movl %esi,(%edi) // q empty so this is first element 148 jmp 4f 1493: 150 movl %esi,(%edx,%eax) // point to new element from last 1514: 152 movl %esi,4(%edi) // new element becomes last in q 153 movl $0,8(%edi) // unlock spinlock 154 COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue) 155COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE) 156 157 158/* Preemption-free-zone routine to FIFO Dequeue: 159 * %edi = ptr to queue head structure 160 * %edx = offset of link field in elements 161 * %ebx = preemption flag (kernel sets nonzero if we should preempt) 162 * 163 * Returns with next element (or 0) in %eax. 164 */ 165 166COMMPAGE_FUNCTION_START(pfz_dequeue, 32, 4) 1671: 168 xorl %eax, %eax 169 orl $-1, %ecx 170 lock 171 cmpxchgl %ecx, 8(%edi) // try to take the spinlock 172 jz 2f // got it 173 174 pause 175 xorl %eax, %eax 176 lock 177 cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock 178 jz 2f // got it 179 180 pause 181 xorl %eax, %eax 182 lock 183 cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock 184 jz 2f // got it 185 186 COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue) 187 jmp 1b // loop to try again 1882: 189 movl (%edi),%eax // get ptr to first element in q 190 testl %eax,%eax // q null? 191 jz 4f // yes 192 movl (%edx,%eax),%esi// get ptr to 2nd element in q 193 testl %esi,%esi // is there a 2nd element? 194 jnz 3f // yes 195 movl %esi,4(%edi) // clear "last" field of q head 1963: 197 movl %esi,(%edi) // update "first" field of q head 1984: 199 movl $0,8(%edi) // unlock spinlock 200 COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue) 201COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE) 202 203 204COMMPAGE_FUNCTION_START(ret, 32, 4) 205 ret 206COMMPAGE_DESCRIPTOR(ret,_COMM_PAGE_RET) 207 208 209 210/************************* x86_64 versions follow **************************/ 211 212 213/* 214 * typedef volatile struct { 215 * void *opaque1; <-- ptr to first queue element or null 216 * void *opaque2; <-- ptr to last queue element or null 217 * int opaque3; <-- spinlock 218 * } OSFifoQueueHead; 219 * 220 * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); 221 */ 222 223 224/* Subroutine to make a preempt syscall. Called when we notice %ebx is 225 * nonzero after returning from a PFZ subroutine. Not in PFZ. 226 * 227 * All registers preserved (but does clear the %ebx preemption flag). 228 */ 229COMMPAGE_FUNCTION_START(preempt_64, 64, 4) 230 pushq %rax 231 pushq %rcx 232 pushq %r11 233 movl $(SYSCALL_CONSTRUCT_MACH(58)),%eax /* 58 = pfz_exit */ 234 xorl %ebx,%ebx 235 syscall 236 popq %r11 237 popq %rcx 238 popq %rax 239 ret 240COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT) 241 242 243/* Subroutine to back off if we cannot get the spinlock. Called 244 * after a few attempts inline in the PFZ subroutines. This code is 245 * not in the PFZ. 246 * %rdi = ptr to queue head structure 247 * %ebx = preemption flag (nonzero if preemption pending) 248 * Uses: %rax. 249 */ 250COMMPAGE_FUNCTION_START(backoff_64, 64, 4) 251 testl %ebx,%ebx // does kernel want to preempt us? 252 jz 1f // no 253 COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff_64) 2541: 255 pause // SMT-friendly backoff 256 cmpl $0,16(%rdi) // sniff the lockword 257 jnz 1b // loop if still taken 258 ret // lockword is free, so reenter PFZ 259COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF) 260 261 262/* Preemption-free-zone routine to FIFO Enqueue: 263 * %rdi = ptr to queue head structure 264 * %rsi = ptr to new element to enqueue 265 * %rdx = offset of link field in elements 266 * %ebx = preemption flag (kernel sets nonzero if we should preempt) 267 */ 268 269COMMPAGE_FUNCTION_START(pfz_enqueue_64, 64, 4) 270 movq $0,(%rdx,%rsi) // zero forward link in new element 2711: 272 xorl %eax, %eax 273 orl $-1, %ecx 274 lock 275 cmpxchgl %ecx,16(%rdi) // try to take the spinlock 276 jz 2f // got it 277 278 pause 279 xorl %eax, %eax 280 lock 281 cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock 282 jz 2f // got it 283 284 pause 285 xorl %eax, %eax 286 lock 287 cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock 288 jz 2f // got it 289 290 COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue_64) 291 jmp 1b // loop to try again 2922: 293 movq 8(%rdi),%rax // get ptr to last element in q 294 testq %rax,%rax // q null? 295 jnz 3f // no 296 movq %rsi,(%rdi) // q empty so this is first element 297 jmp 4f 2983: 299 movq %rsi,(%rdx,%rax) // point to new element from last 3004: 301 movq %rsi,8(%rdi) // new element becomes last in q 302 movl $0,16(%rdi) // unlock spinlock 303 COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue_64) 304COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE) 305 306 307 308/* Preemption-free-zone routine to FIFO Dequeue: 309 * %rdi = ptr to queue head structure 310 * %rdx = offset of link field in elements 311 * %ebx = preemption flag (kernel sets nonzero if we should preempt) 312 * 313 * Returns with next element (or 0) in %rax. 314 */ 315 316COMMPAGE_FUNCTION_START(pfz_dequeue_64, 64, 4) 3171: 318 xorl %eax, %eax 319 orl $-1, %ecx 320 lock 321 cmpxchgl %ecx,16(%rdi) // try to take the spinlock 322 jz 2f // got it 323 324 pause 325 xorl %eax, %eax 326 lock 327 cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock 328 jz 2f // got it 329 330 pause 331 xorl %eax, %eax 332 lock 333 cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock 334 jz 2f // got it 335 336 COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue_64) 337 jmp 1b // loop to try again 3382: 339 movq (%rdi),%rax // get ptr to first element in q 340 testq %rax,%rax // q null? 341 jz 4f // yes 342 movq (%rdx,%rax),%rsi// get ptr to 2nd element in q 343 testq %rsi,%rsi // is there a 2nd element? 344 jnz 3f // yes 345 movq %rsi,8(%rdi) // no - clear "last" field of q head 3463: 347 movq %rsi,(%rdi) // update "first" field of q head 3484: 349 movl $0,16(%rdi) // unlock spinlock 350 COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue_64) 351COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE) 352 353COMMPAGE_FUNCTION_START(ret_64, 64, 4) 354 ret 355COMMPAGE_DESCRIPTOR(ret_64,_COMM_PAGE_RET) 356