1/* 2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 * 28 * This file implements the following function for the arm64 architecture: 29 * 30 * int strncmp(const char *s1, const char *s2, size_t n); 31 * 32 * Returns 0 if the two strings are equal up to the first n bytes or to the 33 * end of the string, whichever comes first. Otherwise, returns the difference 34 * of the first mismatched characters interpreted as uint8_t. 35 */ 36 37#include <arm64/asm.h> 38 39.globl _strncmp 40 41/***************************************************************************** 42 * Macros * 43 *****************************************************************************/ 44 45.macro EstablishFrame 46 ARM64_STACK_PROLOG 47 stp fp, lr, [sp, #-16]! 48 mov fp, sp 49.endm 50 51.macro ClearFrameAndReturn 52 ldp fp, lr, [sp], #16 53 ARM64_STACK_EPILOG 54.endm 55 56#include "../mach/arm/vm_param.h" 57#define kVectorSize 16 58 59/***************************************************************************** 60 * Constants * 61 *****************************************************************************/ 62 63.text 64.align 5 65L_mask: 66.quad 0x0706050403020100, 0x0f0e0d0c0b0a0908 67 68/***************************************************************************** 69 * Entrypoints * 70 *****************************************************************************/ 71 72_strncmp: 73 EstablishFrame 74 eor x3, x3, x3 75 cbz x2, L_scalarDone 76// Compare one byte at a time until s1 has vector alignment. 770: tst x0, #(kVectorSize-1) 78 b.eq L_s1aligned 79 ldrb w4, [x0],#1 // load byte from src1 80 ldrb w5, [x1],#1 // load byte from src2 81 subs x3, x4, x5 // if the are not equal 82 ccmp w4, #0, #4, eq // or we find an EOS 83 b.eq L_scalarDone // return the difference 84 subs x2, x2, #1 // decrement length 85 b.ne 0b // continue loop if non-zero 86 87// We found a mismatch or EOS before s1 became aligned. Simply return the 88// difference between the last bytes that we loaded. 89L_scalarDone: 90 mov x0, x3 91 ClearFrameAndReturn 92 93L_s1aligned: 94// If s2 is similarly aligned to s1, then we can use a naive vector comparison 95// from this point on without worrying about spurious page faults; none of our 96// loads will ever cross a page boundary, because they are all aligned. 97 tst x1, #(kVectorSize-1) 98 b.eq L_naiveVector 99 100/***************************************************************************** 101 * Careful chunk comparison * 102 *****************************************************************************/ 103 104// Otherwise, we need to be careful; although vector loads from s1 cannot 105// cross a page boundary because they are aligned, s2 is not aligned. We 106// compute the multiple of vector size that we can safely load before reaching 107// a page boundary, and compare only that far before switching over to scalar 108// comparisons to step across the page boundary. If this number happens to 109// be zero, we jump directly to the scalar comparison. 110 neg x7, x1 111 ands x7, x7, #(PAGE_MIN_SIZE-kVectorSize) 112 b.eq 2f 113 114.align 4 115// If n is less than the number of bytes before a page-crossing load, jump 116// into the naive vector path instead, since we will not even reach a page 117// crossing. Otherwise, decrement n by that number before we monkey with it, 118// and set the decremented value aside. 1190: cmp x2, x7 120 b.ls L_naiveVector 121 sub x6, x2, x7 122// Use vector comparisons until a mismatch or EOS is encountered, or the next 123// vector load from s2 would be page-crossing. 1241: ldr q0, [x0],#(kVectorSize) 125 ldr q1, [x1],#(kVectorSize) 126 cmeq.16b v1, v0, v1 127 and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS 128 uminv.16b b1, v0 129 fmov w3, s1 // zero only iff comparison is finished 130 cbz w3, L_vectorDone 131 subs x7, x7, #(kVectorSize) 132 b.ne 1b 133// Restore the updated n to x2 134 mov x2, x6 135// The next vector load will cross a page boundary. Instead, compare one byte 136// at a time until s1 again has vector alignment, at which point we will have 137// compared exactly 16 bytes. 1382: ldrb w4, [x0],#1 // load byte from src1 139 ldrb w5, [x1],#1 // load byte from src2 140 subs x3, x4, x5 // if the are not equal 141 ccmp w4, #0, #4, eq // or we find an EOS 142 b.eq L_scalarDone // return the difference 143 subs x2, x2, #1 // decrement length 144 b.eq L_scalarDone // exit loop if zero. 145 tst x0, #(kVectorSize-1) 146 b.ne 2b 147// Having compared one vector's worth of bytes using a scalar comparison, we 148// know that we are safely across the page boundary. Initialize x7 and jump 149// back into the vector comparison part of the loop. 150 mov x7, #(PAGE_MIN_SIZE-kVectorSize) 151 b 0b 152 153/***************************************************************************** 154 * Naive vector comparison * 155 *****************************************************************************/ 156 157.align 4 158L_naiveVector: 159 ldr q0, [x0],#(kVectorSize) 160 ldr q1, [x1],#(kVectorSize) 161 cmeq.16b v1, v0, v1 162 and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS 163 uminv.16b b1, v0 164 fmov w3, s1 // zero only iff comparison is finished 165 cbz w3, L_vectorDone 166 subs x2, x2, #16 167 b.hi L_naiveVector 168 169L_readNBytes: 170 eor x0, x0, x0 171 ClearFrameAndReturn 172 173L_vectorDone: 174// Load the bytes corresponding to the first mismatch or EOS and return 175// their difference. 176 eor.16b v1, v1, v1 177 cmhi.16b v0, v0, v1 // force non-zero lanes to 0xff 178 ldr q1, L_mask 179 orr.16b v0, v0, v1 // lane index in lanes containing mismatch or EOS 180 uminv.16b b1, v0 181 fmov w3, s1 182// If the index of the mismatch or EOS is greater than or equal to n, it 183// occurs after the first n bytes of the string, and doesn't count. 184 cmp x3, x2 185 b.cs L_readNBytes 186 sub x3, x3, #(kVectorSize) 187 ldrb w4, [x0, x3] 188 ldrb w5, [x1, x3] 189 sub x0, x4, x5 190 ClearFrameAndReturn 191