xref: /xnu-11417.140.69/osfmk/arm64/strnlen.s (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1/*
2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 * This file implements the following function for the arm64 architecture:
29 *
30 *  size_t strnlen(const char *string, size_t maxlen);
31 *
32 * The strnlen function returns either strlen(string) or maxlen, whichever
33 * is amller, without reading beyond the first maxlen characters of string.
34 */
35
36#include <arm64/asm.h>
37
38.globl _strlen
39.globl _strnlen
40
41/*****************************************************************************
42 *  Macros                                                                   *
43 *****************************************************************************/
44
45.macro EstablishFrame
46	ARM64_STACK_PROLOG
47	stp       fp, lr, [sp, #-16]!
48	mov       fp,      sp
49.endm
50
51.macro ClearFrameAndReturn
52	ldp       fp, lr, [sp], #16
53	ARM64_STACK_EPILOG
54.endm
55
56/*****************************************************************************
57 *  Constants                                                                *
58 *****************************************************************************/
59
60.text
61.align 5
62L_masks:
63.quad 0x0706050403020100, 0x0f0e0d0c0b0a0908
64.quad 0x0000000000000000, 0x0000000000000000
65
66/*****************************************************************************
67 *  strnlen entrypoint                                                       *
68 *****************************************************************************/
69
70_strnlen:
71//	If n == 0, return NULL without loading any data from s.  If n is so large
72//	that it exceeds the size of any buffer that can be allocted, jump into a
73//	simpler implementation that omits all length checks.  This is both faster
74//	and lets us avoid some messy edgecases in the mainline.
75	ARM64_PROLOG
76	tst       x1,      x1
77	b.mi      _strlen
78	b.eq      L_maxlenIsZero
79	EstablishFrame
80//	Load the 16-byte aligned vector containing the start of the string.
81	and       x2,      x0, #-16
82	ldr       q0,     [x2]
83//	Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
84//	byte once we identify one.  We don't use this vector until the very end
85//	of the routine; it simply falls out naturally to load it now.
86	adr       x3,          L_masks
87	ldr       q2,     [x3],#16
88//	The aligned vector that we loaded to q0 contains the start of the string,
89//	but if the string was not originally aligned, it also contains bytes
90//	which preceed the start of the string, and which may cause false positives
91//	when we search for the terminating NUL.  We generate a mask to OR into the
92//	vector using an unaligned load to prevent this.  The mask has non-zero
93//	values only in those bytes which correspond to bytes preceeding the start
94//	of the string in the aligned vector load.
95	and       x4,      x0, #0xf
96	sub       x3,      x3, x4
97	ldr       q1,     [x3]
98	orr.16b   v0,      v0, v1
99//	Adjust maxlen to account for bytes which preceed the start of the string,
100//	and jump into the main scanning loop.
101	add       x1,      x1, x4
102	b         1f
103
104//	Main loop.  Identical to strlen, except that we also need to check that we
105//	don't read more than maxlen bytes.  To that end, we decrement maxlen by 16
106//	on each iteration, and exit the loop if the result is zero or negative.
107.align 4
1080:	ldr       q0,     [x2, #16]!
1091:  uminv.16b b1,      v0
110	fmov      w3,      s1
111	cbz       w3,      L_foundNUL
112	subs      x1,      x1, #16
113	b.hi      0b
114
115//	We exhausted maxlen bytes without finding a terminating NUL character, so
116//  we need to return maxlen.
117	sub       x0,      x2, x0
118	add       x1,      x1, #16
119	add       x0,      x0, x1
120	ClearFrameAndReturn
121
122L_maxlenIsZero:
123	mov       x0,      #0
124	ret                         // No stack frame, so don't clear it.
125
126L_foundNUL:
127//	Compute the index of the NUL byte, and check if it occurs before maxlen
128//	bytes into the vector.  If not, return maxlen.  Otherwise, return the
129//	length of the string.
130	eor.16b   v1,      v1, v1
131	cmhi.16b  v0,      v0, v1
132	orr.16b   v0,      v0, v2
133	uminv.16b b1,      v0
134	fmov      w3,      s1      // index of NUL byte in vector
135	sub       x0,      x2, x0  // index of vector in string
136	cmp       x1,      x3      // if NUL occurs before maxlen bytes
137	csel      x1,      x1, x3, cc // return strlen, else maxlen
138	add       x0,      x0, x1
139	ClearFrameAndReturn
140
141/*****************************************************************************
142 *  strlen entrypoint                                                        *
143 *****************************************************************************/
144
145.align 4
146_strlen:
147	EstablishFrame
148//	Load the 16-byte aligned vector containing the start of the string.
149	and       x1,      x0, #-16
150	ldr       q0,     [x1]
151//	Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
152//	byte once we identify one.  We don't use this vector until the very end
153//	of the routine; it simply falls out naturally to load it now.
154	adr       x3,          L_masks
155	ldr       q2,     [x3],#16
156//	The aligned vector that we loaded to q0 contains the start of the string,
157//	but if the string was not originally aligned, it also contains bytes
158//	which preceed the start of the string, and which may cause false positives
159//	when we search for the terminating NUL.  We generate a mask to OR into the
160//	vector using an unaligned load to prevent this.  The mask has non-zero
161//	values only in those bytes which correspond to bytes preceeding the start
162//	of the string in the aligned vector load.
163	and       x2,      x0, #0xf
164	sub       x3,      x3, x2
165	ldr       q1,     [x3]
166	orr.16b   v0,      v0, v1
167	b         1f
168
169//	Main loop.  On each iteration we do the following:
170//
171//		q0 <-- next 16 aligned bytes of string
172//		b1 <-- unsigned minimum byte in q0
173//      if (b1 != 0) continue
174//
175//	Thus, we continue the loop until the 16 bytes we load contain a zero byte.
176.align 4
1770:	ldr       q0,     [x1, #16]!
1781:	uminv.16b b1,      v0
179	fmov      w2,      s1 // umov.b would be more natural, but requries 2 µops.
180	cbnz      w2,      0b
181
182//	A zero byte has been found.  The following registers contain values that
183//	we need to compute the string's length:
184//
185//		x0		pointer to start of string
186//		x1		pointer to vector containing terminating NUL byte
187//		v0		vector containing terminating NUL byte
188//		v2      {0, 1, 2, ... , 15}
189//
190//	We compute the index of the terminating NUL byte in the string (which is
191//	precisely the length of the string) as follows:
192//
193//		vec <-- mask(v0 != 0) | v2
194//		index <-- x1 - x0 + unsignedMinimum(vec)
195	eor.16b   v1,      v1, v1
196	cmhi.16b  v0,      v0, v1
197	orr.16b   v0,      v0, v2
198	uminv.16b b1,      v0
199	fmov      w2,      s1
200	sub       x0,      x1, x0
201	add       x0,      x0, x2
202	ClearFrameAndReturn
203