1/* 2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 * 28 * This file implements the following function for the arm64 architecture: 29 * 30 * size_t strnlen(const char *string, size_t maxlen); 31 * 32 * The strnlen function returns either strlen(string) or maxlen, whichever 33 * is amller, without reading beyond the first maxlen characters of string. 34 */ 35 36#include <arm64/asm.h> 37 38.globl _strlen 39.globl _strnlen 40 41/***************************************************************************** 42 * Macros * 43 *****************************************************************************/ 44 45.macro EstablishFrame 46 ARM64_STACK_PROLOG 47 stp fp, lr, [sp, #-16]! 48 mov fp, sp 49.endm 50 51.macro ClearFrameAndReturn 52 ldp fp, lr, [sp], #16 53 ARM64_STACK_EPILOG 54.endm 55 56/***************************************************************************** 57 * Constants * 58 *****************************************************************************/ 59 60.text 61.align 5 62L_masks: 63.quad 0x0706050403020100, 0x0f0e0d0c0b0a0908 64.quad 0x0000000000000000, 0x0000000000000000 65 66/***************************************************************************** 67 * strnlen entrypoint * 68 *****************************************************************************/ 69 70_strnlen: 71// If n == 0, return NULL without loading any data from s. If n is so large 72// that it exceeds the size of any buffer that can be allocted, jump into a 73// simpler implementation that omits all length checks. This is both faster 74// and lets us avoid some messy edgecases in the mainline. 75 ARM64_PROLOG 76 tst x1, x1 77 b.mi _strlen 78 b.eq L_maxlenIsZero 79 EstablishFrame 80// Load the 16-byte aligned vector containing the start of the string. 81 and x2, x0, #-16 82 ldr q0, [x2] 83// Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL 84// byte once we identify one. We don't use this vector until the very end 85// of the routine; it simply falls out naturally to load it now. 86 adr x3, L_masks 87 ldr q2, [x3],#16 88// The aligned vector that we loaded to q0 contains the start of the string, 89// but if the string was not originally aligned, it also contains bytes 90// which preceed the start of the string, and which may cause false positives 91// when we search for the terminating NUL. We generate a mask to OR into the 92// vector using an unaligned load to prevent this. The mask has non-zero 93// values only in those bytes which correspond to bytes preceeding the start 94// of the string in the aligned vector load. 95 and x4, x0, #0xf 96 sub x3, x3, x4 97 ldr q1, [x3] 98 orr.16b v0, v0, v1 99// Adjust maxlen to account for bytes which preceed the start of the string, 100// and jump into the main scanning loop. 101 add x1, x1, x4 102 b 1f 103 104// Main loop. Identical to strlen, except that we also need to check that we 105// don't read more than maxlen bytes. To that end, we decrement maxlen by 16 106// on each iteration, and exit the loop if the result is zero or negative. 107.align 4 1080: ldr q0, [x2, #16]! 1091: uminv.16b b1, v0 110 fmov w3, s1 111 cbz w3, L_foundNUL 112 subs x1, x1, #16 113 b.hi 0b 114 115// We exhausted maxlen bytes without finding a terminating NUL character, so 116// we need to return maxlen. 117 sub x0, x2, x0 118 add x1, x1, #16 119 add x0, x0, x1 120 ClearFrameAndReturn 121 122L_maxlenIsZero: 123 mov x0, #0 124 ret // No stack frame, so don't clear it. 125 126L_foundNUL: 127// Compute the index of the NUL byte, and check if it occurs before maxlen 128// bytes into the vector. If not, return maxlen. Otherwise, return the 129// length of the string. 130 eor.16b v1, v1, v1 131 cmhi.16b v0, v0, v1 132 orr.16b v0, v0, v2 133 uminv.16b b1, v0 134 fmov w3, s1 // index of NUL byte in vector 135 sub x0, x2, x0 // index of vector in string 136 cmp x1, x3 // if NUL occurs before maxlen bytes 137 csel x1, x1, x3, cc // return strlen, else maxlen 138 add x0, x0, x1 139 ClearFrameAndReturn 140 141/***************************************************************************** 142 * strlen entrypoint * 143 *****************************************************************************/ 144 145.align 4 146_strlen: 147 EstablishFrame 148// Load the 16-byte aligned vector containing the start of the string. 149 and x1, x0, #-16 150 ldr q0, [x1] 151// Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL 152// byte once we identify one. We don't use this vector until the very end 153// of the routine; it simply falls out naturally to load it now. 154 adr x3, L_masks 155 ldr q2, [x3],#16 156// The aligned vector that we loaded to q0 contains the start of the string, 157// but if the string was not originally aligned, it also contains bytes 158// which preceed the start of the string, and which may cause false positives 159// when we search for the terminating NUL. We generate a mask to OR into the 160// vector using an unaligned load to prevent this. The mask has non-zero 161// values only in those bytes which correspond to bytes preceeding the start 162// of the string in the aligned vector load. 163 and x2, x0, #0xf 164 sub x3, x3, x2 165 ldr q1, [x3] 166 orr.16b v0, v0, v1 167 b 1f 168 169// Main loop. On each iteration we do the following: 170// 171// q0 <-- next 16 aligned bytes of string 172// b1 <-- unsigned minimum byte in q0 173// if (b1 != 0) continue 174// 175// Thus, we continue the loop until the 16 bytes we load contain a zero byte. 176.align 4 1770: ldr q0, [x1, #16]! 1781: uminv.16b b1, v0 179 fmov w2, s1 // umov.b would be more natural, but requries 2 µops. 180 cbnz w2, 0b 181 182// A zero byte has been found. The following registers contain values that 183// we need to compute the string's length: 184// 185// x0 pointer to start of string 186// x1 pointer to vector containing terminating NUL byte 187// v0 vector containing terminating NUL byte 188// v2 {0, 1, 2, ... , 15} 189// 190// We compute the index of the terminating NUL byte in the string (which is 191// precisely the length of the string) as follows: 192// 193// vec <-- mask(v0 != 0) | v2 194// index <-- x1 - x0 + unsignedMinimum(vec) 195 eor.16b v1, v1, v1 196 cmhi.16b v0, v0, v1 197 orr.16b v0, v0, v2 198 uminv.16b b1, v0 199 fmov w2, s1 200 sub x0, x1, x0 201 add x0, x0, x2 202 ClearFrameAndReturn 203