1/* 2 * Copyright (c) 2020-2021 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* 30 * extern int os_memcmp_mask_{16,32,48,64,80}B(const uint8_t *src1, 31 * const uint8_t *src2, const uint8_t *mask); 32 * 33 * This module implements fixed-length memory compare with mask routines, 34 * used mainly by the Skywalk networking subsystem. Each routine is called 35 * on every packet and therefore needs to be as efficient as possible. 36 * 37 * When used in the kernel, these routines save and restore XMM registers. 38 */ 39 40#ifndef KERNEL 41#ifndef LIBSYSCALL_INTERFACE 42#error "LIBSYSCALL_INTERFACE not defined" 43#endif /* !LIBSYSCALL_INTERFACE */ 44#endif /* !KERNEL */ 45 46#define src1 %rdi /* 1st arg */ 47#define src2 %rsi /* 2nd arg */ 48#define mask %rdx /* 3rd arg */ 49 50/* 51 * @abstract Compare 16-byte buffers src1 against src2, applying the byte 52 * masks to input data before comparison. 53 * 54 * @discussion 55 * Returns zero if the two buffers are identical after applying the byte 56 * masks, otherwise non-zero. 57 * 58 * @param src1 first 16-byte input buffer 59 * @param src2 second 16-byte input buffer 60 * @param byte_mask 16-byte byte mask applied before comparision 61 */ 62 .globl _os_memcmp_mask_16B 63 .text 64 .align 4 65_os_memcmp_mask_16B: 66 67 /* push callee-saved registers and set up base pointer */ 68 push %rbp 69 movq %rsp, %rbp 70 71#ifdef KERNEL 72 /* allocate stack space and save xmm regs */ 73 sub $2*16, %rsp 74 movdqa %xmm0, 0*16(%rsp) 75 movdqa %xmm1, 1*16(%rsp) 76#endif /* KERNEL */ 77 78 movdqu (src1), %xmm0 79 movdqu (src2), %xmm1 80 pxor %xmm0, %xmm1 81 movdqu (mask), %xmm0 82 pand %xmm1, %xmm0 83 xorq %rax, %rax 84 ptest %xmm0, %xmm0 85 setne %al 86 87#ifdef KERNEL 88 /* restore xmm regs and deallocate stack space */ 89 movdqa 0*16(%rsp), %xmm0 90 movdqa 1*16(%rsp), %xmm1 91 add $2*16, %rsp 92#endif /* KERNEL */ 93 94 /* restore callee-saved registers */ 95 pop %rbp 96 ret 97 98/* 99 * @abstract Compare 32-byte buffers src1 against src2, applying the byte 100 * masks to input data before comparison. 101 * 102 * @discussion 103 * Returns zero if the two buffers are identical after applying the byte 104 * masks, otherwise non-zero. 105 * 106 * @param src1 first 32-byte input buffer 107 * @param src2 second 32-byte input buffer 108 * @param byte_mask 32-byte byte mask applied before comparision 109 */ 110 .globl _os_memcmp_mask_32B 111 .text 112 .align 4 113_os_memcmp_mask_32B: 114 115 /* push callee-saved registers and set up base pointer */ 116 push %rbp 117 movq %rsp, %rbp 118 119#ifdef KERNEL 120 /* allocate stack space and save xmm regs */ 121 sub $3*16, %rsp 122 movdqa %xmm0, 0*16(%rsp) 123 movdqa %xmm1, 1*16(%rsp) 124 movdqa %xmm2, 2*16(%rsp) 125#endif /* KERNEL */ 126 127 movdqu (src1), %xmm0 128 movdqu 0x10(src1), %xmm1 129 movdqu (src2), %xmm2 130 pxor %xmm0, %xmm2 131 movdqu 0x10(src2), %xmm0 132 pxor %xmm1, %xmm0 133 movdqu (mask), %xmm1 134 pand %xmm2, %xmm1 135 movdqu 0x10(mask), %xmm2 136 pand %xmm0, %xmm2 137 por %xmm1, %xmm2 138 xorq %rax, %rax 139 ptest %xmm2, %xmm2 140 setne %al 141 142#ifdef KERNEL 143 /* restore xmm regs and deallocate stack space */ 144 movdqa 0*16(%rsp), %xmm0 145 movdqa 1*16(%rsp), %xmm1 146 movdqa 2*16(%rsp), %xmm2 147 add $3*16, %rsp 148#endif /* KERNEL */ 149 150 /* restore callee-saved registers */ 151 pop %rbp 152 ret 153 154/* 155 * @abstract Compare 48-byte buffers src1 against src2, applying the byte 156 * masks to input data before comparison. 157 * 158 * @discussion 159 * Returns zero if the two buffers are identical after applying the byte 160 * masks, otherwise non-zero. 161 * 162 * @param src1 first 48-byte input buffer 163 * @param src2 second 48-byte input buffer 164 * @param byte_mask 48-byte byte mask applied before comparision 165 */ 166 .globl _os_memcmp_mask_48B 167 .text 168 .align 4 169_os_memcmp_mask_48B: 170 171 /* push callee-saved registers and set up base pointer */ 172 push %rbp 173 movq %rsp, %rbp 174 175#ifdef KERNEL 176 /* allocate stack space and save xmm regs */ 177 sub $4*16, %rsp 178 movdqa %xmm0, 0*16(%rsp) 179 movdqa %xmm1, 1*16(%rsp) 180 movdqa %xmm2, 2*16(%rsp) 181 movdqa %xmm3, 3*16(%rsp) 182#endif /* KERNEL */ 183 184 movdqu (src1), %xmm0 185 movdqu 0x10(src1), %xmm1 186 movdqu 0x20(src1), %xmm2 187 movdqu (src2), %xmm3 188 pxor %xmm0, %xmm3 189 movdqu 0x10(src2), %xmm0 190 pxor %xmm1, %xmm0 191 movdqu 0x20(src2), %xmm1 192 pxor %xmm2, %xmm1 193 movdqu (mask), %xmm2 194 pand %xmm3, %xmm2 195 movdqu 0x10(mask), %xmm3 196 pand %xmm0, %xmm3 197 por %xmm2, %xmm3 198 movdqu 0x20(mask), %xmm0 199 pand %xmm1, %xmm0 200 por %xmm3, %xmm0 201 xorq %rax, %rax 202 ptest %xmm0, %xmm0 203 setne %al 204 205#ifdef KERNEL 206 /* restore xmm regs and deallocate stack space */ 207 movdqa 0*16(%rsp), %xmm0 208 movdqa 1*16(%rsp), %xmm1 209 movdqa 2*16(%rsp), %xmm2 210 movdqa 3*16(%rsp), %xmm3 211 add $4*16, %rsp 212#endif /* KERNEL */ 213 214 /* restore callee-saved registers */ 215 pop %rbp 216 ret 217 218/* 219 * @abstract Compare 64-byte buffers src1 against src2, applying the byte 220 * masks to input data before comparison. 221 * 222 * @discussion 223 * Returns zero if the two buffers are identical after applying the byte 224 * masks, otherwise non-zero. 225 * 226 * @param src1 first 64-byte input buffer 227 * @param src2 second 64-byte input buffer 228 * @param byte_mask 64-byte byte mask applied before comparision 229 */ 230 .globl _os_memcmp_mask_64B 231 .text 232 .align 4 233_os_memcmp_mask_64B: 234 235 /* push callee-saved registers and set up base pointer */ 236 push %rbp 237 movq %rsp, %rbp 238 239#ifdef KERNEL 240 /* allocate stack space and save xmm regs */ 241 sub $5*16, %rsp 242 movdqa %xmm0, 0*16(%rsp) 243 movdqa %xmm1, 1*16(%rsp) 244 movdqa %xmm2, 2*16(%rsp) 245 movdqa %xmm3, 3*16(%rsp) 246 movdqa %xmm4, 4*16(%rsp) 247#endif /* KERNEL */ 248 249 movdqu (src1), %xmm0 250 movdqu 0x10(src1), %xmm1 251 movdqu 0x20(src1), %xmm2 252 movdqu 0x30(src1), %xmm3 253 movdqu (src2), %xmm4 254 pxor %xmm0, %xmm4 255 movdqu 0x10(src2), %xmm0 256 pxor %xmm1, %xmm0 257 movdqu 0x20(src2), %xmm1 258 pxor %xmm2, %xmm1 259 movdqu 0x30(src2), %xmm2 260 pxor %xmm3, %xmm2 261 movdqu (mask), %xmm3 262 pand %xmm4, %xmm3 263 movdqu 0x10(mask), %xmm4 264 pand %xmm0, %xmm4 265 por %xmm3, %xmm4 266 movdqu 0x20(mask), %xmm0 267 pand %xmm1, %xmm0 268 movdqu 0x30(mask), %xmm1 269 pand %xmm2, %xmm1 270 por %xmm0, %xmm1 271 por %xmm4, %xmm1 272 xorq %rax, %rax 273 ptest %xmm1, %xmm1 274 setne %al 275 276#ifdef KERNEL 277 /* restore xmm regs and deallocate stack space */ 278 movdqa 0*16(%rsp), %xmm0 279 movdqa 1*16(%rsp), %xmm1 280 movdqa 2*16(%rsp), %xmm2 281 movdqa 3*16(%rsp), %xmm3 282 movdqa 4*16(%rsp), %xmm4 283 add $5*16, %rsp 284#endif /* KERNEL */ 285 286 /* restore callee-saved registers */ 287 pop %rbp 288 ret 289 290/* 291 * @abstract Compare 80-byte buffers src1 against src2, applying the byte 292 * masks to input data before comparison. 293 * 294 * @discussion 295 * Returns zero if the two buffers are identical after applying the byte 296 * masks, otherwise non-zero. 297 * 298 * @param src1 first 80-byte input buffer 299 * @param src2 second 80-byte input buffer 300 * @param byte_mask 80-byte byte mask applied before comparision 301 */ 302 .globl _os_memcmp_mask_80B 303 .text 304 .align 4 305_os_memcmp_mask_80B: 306 307 /* push callee-saved registers and set up base pointer */ 308 push %rbp 309 movq %rsp, %rbp 310 311#ifdef KERNEL 312 /* allocate stack space and save xmm regs */ 313 sub $6*16, %rsp 314 movdqa %xmm0, 0*16(%rsp) 315 movdqa %xmm1, 1*16(%rsp) 316 movdqa %xmm2, 2*16(%rsp) 317 movdqa %xmm3, 3*16(%rsp) 318 movdqa %xmm4, 4*16(%rsp) 319 movdqa %xmm5, 5*16(%rsp) 320#endif /* KERNEL */ 321 322 movdqu (src1), %xmm0 323 movdqu 0x10(src1), %xmm1 324 movdqu 0x20(src1), %xmm2 325 movdqu 0x30(src1), %xmm3 326 movdqu 0x40(src1), %xmm4 327 movdqu (src2), %xmm5 328 pxor %xmm0, %xmm5 329 movdqu 0x10(src2), %xmm0 330 pxor %xmm1, %xmm0 331 movdqu 0x20(src2), %xmm1 332 pxor %xmm2, %xmm1 333 movdqu 0x30(src2), %xmm2 334 pxor %xmm3, %xmm2 335 movdqu 0x40(src2), %xmm3 336 pxor %xmm4, %xmm3 337 movdqu (mask), %xmm4 338 pand %xmm5, %xmm4 339 movdqu 0x10(mask), %xmm5 340 pand %xmm0, %xmm5 341 por %xmm4, %xmm5 342 movdqu 0x20(mask), %xmm0 343 pand %xmm1, %xmm0 344 movdqu 0x30(mask), %xmm4 345 pand %xmm2, %xmm4 346 por %xmm0, %xmm4 347 movdqu 0x40(mask), %xmm1 348 pand %xmm3, %xmm1 349 por %xmm5, %xmm4 350 por %xmm1, %xmm4 351 xorq %rax, %rax 352 ptest %xmm4, %xmm4 353 setne %al 354 355#ifdef KERNEL 356 /* restore xmm regs and deallocate stack space */ 357 movdqa 0*16(%rsp), %xmm0 358 movdqa 1*16(%rsp), %xmm1 359 movdqa 2*16(%rsp), %xmm2 360 movdqa 3*16(%rsp), %xmm3 361 movdqa 4*16(%rsp), %xmm4 362 movdqa 5*16(%rsp), %xmm5 363 add $6*16, %rsp 364#endif /* KERNEL */ 365 366 /* restore callee-saved registers */ 367 pop %rbp 368 ret 369