1/* 2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 * 28 * This file implements the following function for the arm64 architecture: 29 * 30 * int strncmp(const char *s1, const char *s2, size_t n); 31 * 32 * Returns 0 if the two strings are equal up to the first n bytes or to the 33 * end of the string, whichever comes first. Otherwise, returns the difference 34 * of the first mismatched characters interpreted as uint8_t. 35 */ 36 37#include <arm64/asm.h> 38 39.globl _strncmp 40 41/***************************************************************************** 42 * Macros * 43 *****************************************************************************/ 44 45.macro EstablishFrame 46 ARM64_STACK_PROLOG 47 stp fp, lr, [sp, #-16]! 48 mov fp, sp 49.endm 50 51.macro ClearFrameAndReturn 52 ldp fp, lr, [sp], #16 53 ARM64_STACK_EPILOG 54.endm 55 56#include "../mach/arm/vm_param.h" 57#define kVectorSize 16 58 59 60/***************************************************************************** 61 * Constants * 62 *****************************************************************************/ 63 64.text 65.align 5 66L_mask: 67.quad 0x0706050403020100, 0x0f0e0d0c0b0a0908 68 69/***************************************************************************** 70 * Entrypoints * 71 *****************************************************************************/ 72 73_strncmp: 74 EstablishFrame 75 eor x3, x3, x3 76 cbz x2, L_scalarDone 77// Compare one byte at a time until s1 has vector alignment. 780: tst x0, #(kVectorSize-1) 79 b.eq L_s1aligned 80 ldrb w4, [x0],#1 // load byte from src1 81 ldrb w5, [x1],#1 // load byte from src2 82 subs x3, x4, x5 // if the are not equal 83 ccmp w4, #0, #4, eq // or we find an EOS 84 b.eq L_scalarDone // return the difference 85 subs x2, x2, #1 // decrement length 86 b.ne 0b // continue loop if non-zero 87 88// We found a mismatch or EOS before s1 became aligned. Simply return the 89// difference between the last bytes that we loaded. 90L_scalarDone: 91 mov x0, x3 92 ClearFrameAndReturn 93 94L_s1aligned: 95// If s2 is similarly aligned to s1, then we can use a naive vector comparison 96// from this point on without worrying about spurious page faults; none of our 97// loads will ever cross a page boundary, because they are all aligned. 98 99 100 tst x1, #(kVectorSize-1) 101 b.eq L_naiveVector 102 103/***************************************************************************** 104 * Careful chunk comparison * 105 *****************************************************************************/ 106 107// Otherwise, we need to be careful; although vector loads from s1 cannot 108// cross a page boundary because they are aligned, s2 is not aligned. We 109// compute the multiple of vector size that we can safely load before reaching 110// a page boundary, and compare only that far before switching over to scalar 111// comparisons to step across the page boundary. If this number happens to 112// be zero, we jump directly to the scalar comparison. 113 neg x7, x1 114 ands x7, x7, #(PAGE_MIN_SIZE-kVectorSize) 115 b.eq 2f 116 117.align 4 118// If n is less than the number of bytes before a page-crossing load, jump 119// into the naive vector path instead, since we will not even reach a page 120// crossing. Otherwise, decrement n by that number before we monkey with it, 121// and set the decremented value aside. 1220: cmp x2, x7 123 b.ls L_naiveVector 124 sub x6, x2, x7 125// Use vector comparisons until a mismatch or EOS is encountered, or the next 126// vector load from s2 would be page-crossing. 1271: ldr q0, [x0],#(kVectorSize) 128 ldr q1, [x1],#(kVectorSize) 129 cmeq.16b v1, v0, v1 130 and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS 131 uminv.16b b1, v0 132 fmov w3, s1 // zero only iff comparison is finished 133 cbz w3, L_vectorDone 134 subs x7, x7, #(kVectorSize) 135 b.ne 1b 136// Restore the updated n to x2 137 mov x2, x6 138// The next vector load will cross a page boundary. Instead, compare one byte 139// at a time until s1 again has vector alignment, at which point we will have 140// compared exactly 16 bytes. 1412: ldrb w4, [x0],#1 // load byte from src1 142 ldrb w5, [x1],#1 // load byte from src2 143 subs x3, x4, x5 // if the are not equal 144 ccmp w4, #0, #4, eq // or we find an EOS 145 b.eq L_scalarDone // return the difference 146 subs x2, x2, #1 // decrement length 147 b.eq L_scalarDone // exit loop if zero. 148 tst x0, #(kVectorSize-1) 149 b.ne 2b 150// Having compared one vector's worth of bytes using a scalar comparison, we 151// know that we are safely across the page boundary. Initialize x7 and jump 152// back into the vector comparison part of the loop. 153 mov x7, #(PAGE_MIN_SIZE-kVectorSize) 154 b 0b 155 156 157 158/***************************************************************************** 159 * Naive vector comparison * 160 *****************************************************************************/ 161 162L_naiveVector: 163 subs x3, x2, #(kVectorSize) 164 b.lo L_scalar 165 add x4, x0, x3 // save the addresses of the last vectors 166 add x5, x1, x3 167 mov x2, x3 // length -= kVectorSize 168.align 4 1690: 170 ldr q0, [x0],#(kVectorSize) 171 ldr q1, [x1],#(kVectorSize) 172 cmeq.16b v1, v0, v1 173 and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS 174 uminv.16b b1, v0 175 fmov w3, s1 // zero only iff comparison is finished 176 cbz w3, L_vectorDone 177 subs x2, x2, #(kVectorSize) 178 b.hi 0b 179 180 // compare the last vector 181 mov x0, x4 182 mov x1, x5 183 ldr q0, [x0],#(kVectorSize) 184 ldr q1, [x1],#(kVectorSize) 185 cmeq.16b v1, v0, v1 186 and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS 187 uminv.16b b1, v0 188 fmov w3, s1 // zero only iff comparison is finished 189 cbz w3, L_vectorDone 190 191L_readNBytes: 192 eor x0, x0, x0 193 ClearFrameAndReturn 194 195L_vectorDone: 196// Load the bytes corresponding to the first mismatch or EOS and return 197// their difference. 198 eor.16b v1, v1, v1 199 cmhi.16b v0, v0, v1 // force non-zero lanes to 0xff 200 ldr q1, L_mask 201 orr.16b v0, v0, v1 // lane index in lanes containing mismatch or EOS 202 uminv.16b b1, v0 203 fmov w3, s1 204 sub x3, x3, #(kVectorSize) 205 ldrb w4, [x0, x3] 206 ldrb w5, [x1, x3] 207 sub x0, x4, x5 208 ClearFrameAndReturn 209 210L_scalar: 211 ldrb w4, [x0],#1 // load byte from src1 212 ldrb w5, [x1],#1 // load byte from src2 213 subs x3, x4, x5 // if the are not equal 214 ccmp w4, #0, #4, eq // or we find an EOS 215 b.eq 1f // return the difference 216 subs x2, x2, #1 // decrement length 217 b.ne L_scalar // continue loop if non-zero 2181: 219 mov x0, x3 220 ClearFrameAndReturn 221 222