xref: /xnu-11215.41.3/osfmk/arm64/memcmp_zero.s (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1/*
2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 * This file implements the following function for the arm64 architecture:
29 *
30 *  int memcmp_zero_ptr_aligned(const void *s, size_t n);
31 *
32 * The memcmp_zero_ptr_aligned function checks string s of n bytes contains all zeros.
33 * Address and size of the string s must be pointer-aligned (8-byte for arm64).
34 * Return 0 if true, 1 otherwise. Also return 0 if n is 0.
35 */
36
37/* this guard is used by tests */
38#ifdef __arm64__
39
40#include "asm.h"
41
42.globl _memcmp_zero_ptr_aligned
43
44/*****************************************************************************
45 *  Macros                                                                   *
46 *****************************************************************************/
47
48.macro EstablishFrame
49	ARM64_STACK_PROLOG
50	stp       fp, lr, [sp, #-16]!
51	mov       fp,      sp
52.endm
53
54.macro ClearFrameAndReturn
55	ldp       fp, lr, [sp], #16
56	ARM64_STACK_EPILOG
57.endm
58
59/*****************************************************************************
60 *  Constants                                                                *
61 *****************************************************************************/
62
63.text
64.align 5
65
66/*****************************************************************************
67 *  memcmp_zero_ptr_aligned entrypoint                                        *
68 *****************************************************************************/
69
70_memcmp_zero_ptr_aligned:
71
72//  For the use case in <rdar://problem/59523721>, memory corruption should be rare
73//  so check for all zeros is fairly simple when early out is not necessary.
74//  We just load all the bytes and logical OR them together. If the result
75//  is still zero, all the bytes are zero.
76
77	EstablishFrame
78	cmp         x1,     #64
79	b.lo        L_sizeIsSmall
80
81//	Load the first 64 bytes, and compute the number of bytes to the
82//	first 64-byte aligned location.  Even though we are going to test
83//	64 bytes, only those preceeding that 64-byte location "count" towards
84//	reducing the length of the buffer or advancing the pointers.
85	mov         x2,     x0          // copy the original addr
86	add         x0,     x0, #64
87	and         x0,     x0, #-64    // aligned addr
88	ldp         q4, q5, [x2]
89	ldp         q6, q7, [x2, #32]
90	sub         x2,     x0, x2      // bytes between original and aligned addr
91	sub         x1,     x1, x2      // update length
92	subs        x1,     x1, #64     // check length > 64
93	b.ls        L_cleanup
94
95L_loop:
96	ldp         q0, q1, [x0]
97	ldp         q2, q3, [x0, #32]
98	orr.16b     v4,     v4, v0      // use orr to keep non-zero bytes
99	orr.16b     v5,     v5, v1
100	orr.16b     v6,     v6, v2
101	orr.16b     v7,     v7, v3
102	add         x0,     x0, #64     // advance pointer
103	subs        x1,     x1, #64     // check length > 64
104	b.hi        L_loop
105
106L_cleanup:
107//  Between 0 and 64 more bytes need to be tested.  The exact
108//	number of bytes to test is x1 + 64.  Instead of using smaller conditional
109//	checks, we simply check 64 unaligned bytes from x0+x1. This load may overlap
110//  with the previous one but it's ok.
111	add         x0,     x0, x1
112	ldp         q0, q1, [x0]
113	ldp         q2, q3, [x0, #32]
114	orr.16b     v4,     v4, v0      // use orr to keep non-zero bytes
115	orr.16b     v5,     v5, v1
116	orr.16b     v6,     v6, v2
117	orr.16b     v7,     v7, v3
118
119	orr.16b     v4,     v4, v5  // reduce four regs into two
120	orr.16b     v6,     v6, v7
121	orr.16b     v4,     v4, v6  // reduce two regs into one
122	umaxv.16b   b0,     v4      // reduce 16 bytes into one
123	umov        w0,     v0.b[0] // move byte to GPR for testing
124	tst         w0,     w0
125	cset        x0,     ne      // return 1 if non-zero, 0 otherwise
126	ClearFrameAndReturn
127
128L_sizeIsSmall:
129	cbz     x1,     L_sizeIsZero    // return zero if length is zero
130
131	mov     x3,     #0
1320:	ldr     x2,    [x0],#8
133	orr     x3,     x3, x2      // use orr to keep non-zero bytes
134	subs    x1,     x1, #8      // update length
135	b.hi    0b
136
137	tst     x3,     x3
138	cset    x0,     ne          // return 1 if non-zero, 0 otherwise
139	ClearFrameAndReturn
140
141L_sizeIsZero:
142	mov     x0,     #0
143	ClearFrameAndReturn
144
145#endif // __arm64__
146