xref: /xnu-8019.80.24/osfmk/arm/strncmp.s (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1/*
2 * Copyright (c) 2010, 2011 Apple, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29.text
30.syntax unified
31.code 32
32.globl _strncmp
33// int strncmp(const char *s1, const char *s2, size_t n);
34//
35// Returns zero if the two NUL-terminated strings s1 and s2 are equal up to
36// n characters.  Otherwise, returns the difference between the first two
37// characters that do not match, interpreted as unsigned integers.
38
39#define ESTABLISH_FRAME        \
40	push   {r4-r7,lr}         ;\
41	add     r7,     sp, #12   ;\
42	push   {r8,r10}
43#define CLEAR_FRAME            \
44	pop    {r8,r10}            ;\
45	pop    {r4-r7,lr}
46
47.align 3
48.long 0, 0x01010101
49_strncmp:
50//  If n < 16, jump straight to the byte-by-byte comparison loop.
51	cmp     r2,         #16
52	blo     L_byteCompareLoop
53//  Load a character from each string and advance the pointers.  If the loaded
54//  characters are unequal or NUL, return their difference.
550:	ldrb    r3,    [r0],#1
56	ldrb    ip,    [r1],#1
57	sub     r2,         #1
58	cmp     r3,         #1
59	cmphs   r3,         ip
60	bne     L_earlyReturn
61//  If the address of the next character from s1 does not have word alignment,
62//  continue with the character-by-character comparison.  Otherwise, fall
63//  through into the word-by-word comparison path.
64	tst     r0,         #3
65	bne     0b
66
67//  We have not encountered a NUL or a mismatch, and s1 has word alignment.
68//  Establish a frame, since we're going to need additional registers anyway.
69	ESTABLISH_FRAME
70	ldr     lr,    (_strncmp-4)
71
72//  Word align s2, and place the remainder in r10.  Compute the right- and
73//  left-shifts to extract each word that we will compare to the other source
74//  from the aligned words that we load:
75//
76//      aligned s2        to be loaded on next iteration
77//      |   "true" s2     |
78//      v   v             v
79//      +---+---+---+---+ +---+---+---+---+
80//      | 0 | 1 | 2 | 3 | | 4 | 5 | 6 | 7 |
81//      +---+---+---+---+ +---+---+---+---+
82//          ^-----------------^
83//          to be compared on next iteration
84	and     r10,    r1, #3
85	bic     r1,     r1, #3
86	mov     r10,        r10, lsl #3
87	rsb     r6,     r10,#32
88
89//  Subtract the number of bytes of the initial word load from s2 that will
90//  actually be used from n.
91	sub     r2,     r2, r6, lsr #3
92
93//  Load the first aligned word of s2.  OR 0x01 into any bytes that preceed the
94//  "true s2", to prevent our check for NUL from generating a false positive.
95//  Then check for NUL, and jump to the byte-by-byte comparison loop after
96//  unwinding the pointers if we enounter one.
97	ldr     r8,    [r1],#4
98	orr     r8,     r8, lr, lsr r6
99	sub     r3,     r8, lr
100	bic     r3,     r3, r8
101	tst     r3,         lr, lsl #7
102	mov     r5,         r8, lsr r10
103	bne     L_unwindLoopPreload
104
105.align 3
106L_wordCompareLoop:
107//  If n < 4, abort the word compare loop before we load any more data.
108	subs    r2,     r2, #4
109	blo     L_nIsLessThanFour
110//  Load the next aligned word of s2 and check if it contains any NUL bytes.
111//  Load the next aligned word of s1, and extract the corresponding bytes from
112//  the two words of s2 loaded in this and the previous iteration of the loop.
113//  Compare these two words.
114//  If no NUL or mismatched words have been encountered, continue the loop.
115	ldr     r8,    [r1],#4
116#if defined _ARM_ARCH_6
117    uqsub8  r3,     lr, r8
118    tst     r3,         r3
119    ldr     ip,    [r0],#4
120#else
121	sub     r3,     r8, lr
122	bic     r3,     r3, r8
123	ldr     ip,    [r0],#4
124	tst     r3,         lr, lsl #7
125#endif
126	orr     r4,     r5, r8, lsl r6
127	cmpeq   ip,         r4
128	mov     r5,         r8, lsr r10
129	beq     L_wordCompareLoop
130
131//  Either we have encountered a NUL, or we have found a mismatch between s1
132//  and s2.  Unwind the pointers and use a byte-by-byte comparison loop.
133	sub     r0,     r0, #4
134	sub     r1,     r1, #4
135L_nIsLessThanFour:
136	add     r2,     r2, #4
137L_unwindLoopPreload:
138	sub     r1,     r1, r6, lsr #3
139	add     r2,     r2, r6, lsr #3
140	CLEAR_FRAME
141
142L_byteCompareLoop:
143//  If n-- == 0, we have exhausted the allowed number of comparisons, and need
144//  to return zero without additional loads.
145	subs    r2,     r2, #1
146	movlo   r0,         #0
147	bxlo    lr
148//  Load a character from each string and advance the pointers.  If the loaded
149//  characters are unequal or NUL, return their difference.
150	ldrb    r3,    [r0],#1
151	ldrb    ip,    [r1],#1
152	cmp     r3,         #1
153	cmpcs   r3,         ip
154	beq     L_byteCompareLoop
155
156L_earlyReturn:
157//  Return the difference between the last two characters loaded.
158	sub     r0,     r3, ip
159	bx      lr
160