1/* 2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 * 28 * This file implements the following function for the arm64 architecture: 29 * 30 * int memcmp_zero_ptr_aligned(const void *s, size_t n); 31 * 32 * The memcmp_zero_ptr_aligned function checks string s of n bytes contains all zeros. 33 * Address and size of the string s must be pointer-aligned (8-byte for arm64). 34 * Return 0 if true, 1 otherwise. Also return 0 if n is 0. 35 */ 36 37/* this guard is used by tests */ 38#ifdef __arm64__ 39 40#include "asm.h" 41 42.globl _memcmp_zero_ptr_aligned 43 44/***************************************************************************** 45 * Macros * 46 *****************************************************************************/ 47 48.macro EstablishFrame 49 ARM64_STACK_PROLOG 50 stp fp, lr, [sp, #-16]! 51 mov fp, sp 52.endm 53 54.macro ClearFrameAndReturn 55 ldp fp, lr, [sp], #16 56 ARM64_STACK_EPILOG 57.endm 58 59/***************************************************************************** 60 * Constants * 61 *****************************************************************************/ 62 63.text 64.align 5 65 66/***************************************************************************** 67 * memcmp_zero_ptr_aligned entrypoint * 68 *****************************************************************************/ 69 70_memcmp_zero_ptr_aligned: 71 72// For the use case in <rdar://problem/59523721>, memory corruption should be rare 73// so check for all zeros is fairly simple when early out is not necessary. 74// We just load all the bytes and logical OR them together. If the result 75// is still zero, all the bytes are zero. 76 77 EstablishFrame 78 cmp x1, #64 79 b.lo L_sizeIsSmall 80 81// Load the first 64 bytes, and compute the number of bytes to the 82// first 64-byte aligned location. Even though we are going to test 83// 64 bytes, only those preceeding that 64-byte location "count" towards 84// reducing the length of the buffer or advancing the pointers. 85 mov x2, x0 // copy the original addr 86 add x0, x0, #64 87 and x0, x0, #-64 // aligned addr 88 ldp q4, q5, [x2] 89 ldp q6, q7, [x2, #32] 90 sub x2, x0, x2 // bytes between original and aligned addr 91 sub x1, x1, x2 // update length 92 subs x1, x1, #64 // check length > 64 93 b.ls L_cleanup 94 95L_loop: 96 ldp q0, q1, [x0] 97 ldp q2, q3, [x0, #32] 98 orr.16b v4, v4, v0 // use orr to keep non-zero bytes 99 orr.16b v5, v5, v1 100 orr.16b v6, v6, v2 101 orr.16b v7, v7, v3 102 add x0, x0, #64 // advance pointer 103 subs x1, x1, #64 // check length > 64 104 b.hi L_loop 105 106L_cleanup: 107// Between 0 and 64 more bytes need to be tested. The exact 108// number of bytes to test is x1 + 64. Instead of using smaller conditional 109// checks, we simply check 64 unaligned bytes from x0+x1. This load may overlap 110// with the previous one but it's ok. 111 add x0, x0, x1 112 ldp q0, q1, [x0] 113 ldp q2, q3, [x0, #32] 114 orr.16b v4, v4, v0 // use orr to keep non-zero bytes 115 orr.16b v5, v5, v1 116 orr.16b v6, v6, v2 117 orr.16b v7, v7, v3 118 119 orr.16b v4, v4, v5 // reduce four regs into two 120 orr.16b v6, v6, v7 121 orr.16b v4, v4, v6 // reduce two regs into one 122 umaxv.16b b0, v4 // reduce 16 bytes into one 123 umov w0, v0.b[0] // move byte to GPR for testing 124 tst w0, w0 125 cset x0, ne // return 1 if non-zero, 0 otherwise 126 ClearFrameAndReturn 127 128L_sizeIsSmall: 129 cbz x1, L_sizeIsZero // return zero if length is zero 130 131 mov x3, #0 1320: ldr x2, [x0],#8 133 orr x3, x3, x2 // use orr to keep non-zero bytes 134 subs x1, x1, #8 // update length 135 b.hi 0b 136 137 tst x3, x3 138 cset x0, ne // return 1 if non-zero, 0 otherwise 139 ClearFrameAndReturn 140 141L_sizeIsZero: 142 mov x0, #0 143 ClearFrameAndReturn 144 145#endif // __arm64__ 146