xref: /xnu-8792.61.2/bsd/dev/i386/cpu_memcmp_mask.s (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1/*
2 * Copyright (c) 2020-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * extern int os_memcmp_mask_{16,32,48,64,80}B(const uint8_t *src1,
31 *     const uint8_t *src2, const uint8_t *mask);
32 *
33 * This module implements fixed-length memory compare with mask routines,
34 * used mainly by the Skywalk networking subsystem.  Each routine is called
35 * on every packet and therefore needs to be as efficient as possible.
36 *
37 * When used in the kernel, these routines save and restore XMM registers.
38 */
39
40#ifndef KERNEL
41#ifndef LIBSYSCALL_INTERFACE
42#error "LIBSYSCALL_INTERFACE not defined"
43#endif /* !LIBSYSCALL_INTERFACE */
44#endif /* !KERNEL */
45
46#define	src1		%rdi	/* 1st arg */
47#define	src2		%rsi	/* 2nd arg */
48#define	mask		%rdx	/* 3rd arg */
49
50/*
51 *  @abstract Compare 16-byte buffers src1 against src2, applying the byte
52 *  masks to input data before comparison.
53 *
54 *  @discussion
55 *  Returns zero if the two buffers are identical after applying the byte
56 *  masks, otherwise non-zero.
57 *
58 *  @param src1 first 16-byte input buffer
59 *  @param src2 second 16-byte input buffer
60 *  @param byte_mask 16-byte byte mask applied before comparision
61 */
62	.globl _os_memcmp_mask_16B
63	.text
64	.align	4
65_os_memcmp_mask_16B:
66
67	/* push callee-saved registers and set up base pointer */
68	push	%rbp
69	movq	%rsp, %rbp
70
71#ifdef KERNEL
72	/* allocate stack space and save xmm regs */
73	sub	$2*16, %rsp
74	movdqa	%xmm0, 0*16(%rsp)
75	movdqa	%xmm1, 1*16(%rsp)
76#endif /* KERNEL */
77
78	movdqu	(src1), %xmm0
79	movdqu  (src2), %xmm1
80	pxor    %xmm0, %xmm1
81	movdqu  (mask), %xmm0
82	pand    %xmm1, %xmm0
83	xorq    %rax, %rax
84	ptest	%xmm0, %xmm0
85	setne   %al
86
87#ifdef KERNEL
88	/* restore xmm regs and deallocate stack space */
89	movdqa	0*16(%rsp), %xmm0
90	movdqa	1*16(%rsp), %xmm1
91	add	$2*16, %rsp
92#endif /* KERNEL */
93
94	/* restore callee-saved registers */
95	pop	%rbp
96	ret
97
98/*
99 *  @abstract Compare 32-byte buffers src1 against src2, applying the byte
100 *  masks to input data before comparison.
101 *
102 *  @discussion
103 *  Returns zero if the two buffers are identical after applying the byte
104 *  masks, otherwise non-zero.
105 *
106 *  @param src1 first 32-byte input buffer
107 *  @param src2 second 32-byte input buffer
108 *  @param byte_mask 32-byte byte mask applied before comparision
109 */
110	.globl _os_memcmp_mask_32B
111	.text
112	.align	4
113_os_memcmp_mask_32B:
114
115	/* push callee-saved registers and set up base pointer */
116	push	%rbp
117	movq	%rsp, %rbp
118
119#ifdef KERNEL
120	/* allocate stack space and save xmm regs */
121	sub	$3*16, %rsp
122	movdqa	%xmm0, 0*16(%rsp)
123	movdqa	%xmm1, 1*16(%rsp)
124	movdqa	%xmm2, 2*16(%rsp)
125#endif /* KERNEL */
126
127	movdqu	(src1), %xmm0
128	movdqu	0x10(src1), %xmm1
129	movdqu  (src2), %xmm2
130	pxor    %xmm0, %xmm2
131	movdqu  0x10(src2), %xmm0
132	pxor    %xmm1, %xmm0
133	movdqu  (mask), %xmm1
134	pand    %xmm2, %xmm1
135	movdqu  0x10(mask), %xmm2
136	pand    %xmm0, %xmm2
137	por     %xmm1, %xmm2
138	xorq    %rax, %rax
139	ptest   %xmm2, %xmm2
140	setne   %al
141
142#ifdef KERNEL
143	/* restore xmm regs and deallocate stack space */
144	movdqa	0*16(%rsp), %xmm0
145	movdqa	1*16(%rsp), %xmm1
146	movdqa	2*16(%rsp), %xmm2
147	add	$3*16, %rsp
148#endif /* KERNEL */
149
150	/* restore callee-saved registers */
151	pop	%rbp
152	ret
153
154/*
155 *  @abstract Compare 48-byte buffers src1 against src2, applying the byte
156 *  masks to input data before comparison.
157 *
158 *  @discussion
159 *  Returns zero if the two buffers are identical after applying the byte
160 *  masks, otherwise non-zero.
161 *
162 *  @param src1 first 48-byte input buffer
163 *  @param src2 second 48-byte input buffer
164 *  @param byte_mask 48-byte byte mask applied before comparision
165 */
166	.globl _os_memcmp_mask_48B
167	.text
168	.align	4
169_os_memcmp_mask_48B:
170
171	/* push callee-saved registers and set up base pointer */
172	push	%rbp
173	movq	%rsp, %rbp
174
175#ifdef KERNEL
176	/* allocate stack space and save xmm regs */
177	sub	$4*16, %rsp
178	movdqa	%xmm0, 0*16(%rsp)
179	movdqa	%xmm1, 1*16(%rsp)
180	movdqa	%xmm2, 2*16(%rsp)
181	movdqa	%xmm3, 3*16(%rsp)
182#endif /* KERNEL */
183
184	movdqu  (src1), %xmm0
185	movdqu  0x10(src1), %xmm1
186	movdqu  0x20(src1), %xmm2
187	movdqu  (src2), %xmm3
188	pxor    %xmm0, %xmm3
189	movdqu  0x10(src2), %xmm0
190	pxor    %xmm1, %xmm0
191	movdqu  0x20(src2), %xmm1
192	pxor    %xmm2, %xmm1
193	movdqu  (mask), %xmm2
194	pand    %xmm3, %xmm2
195	movdqu  0x10(mask), %xmm3
196	pand    %xmm0, %xmm3
197	por     %xmm2, %xmm3
198	movdqu  0x20(mask), %xmm0
199	pand    %xmm1, %xmm0
200	por     %xmm3, %xmm0
201	xorq    %rax, %rax
202	ptest   %xmm0, %xmm0
203	setne   %al
204
205#ifdef KERNEL
206	/* restore xmm regs and deallocate stack space */
207	movdqa	0*16(%rsp), %xmm0
208	movdqa	1*16(%rsp), %xmm1
209	movdqa	2*16(%rsp), %xmm2
210	movdqa	3*16(%rsp), %xmm3
211	add	$4*16, %rsp
212#endif /* KERNEL */
213
214	/* restore callee-saved registers */
215	pop	%rbp
216	ret
217
218/*
219 *  @abstract Compare 64-byte buffers src1 against src2, applying the byte
220 *  masks to input data before comparison.
221 *
222 *  @discussion
223 *  Returns zero if the two buffers are identical after applying the byte
224 *  masks, otherwise non-zero.
225 *
226 *  @param src1 first 64-byte input buffer
227 *  @param src2 second 64-byte input buffer
228 *  @param byte_mask 64-byte byte mask applied before comparision
229 */
230	.globl _os_memcmp_mask_64B
231	.text
232	.align	4
233_os_memcmp_mask_64B:
234
235	/* push callee-saved registers and set up base pointer */
236	push	%rbp
237	movq	%rsp, %rbp
238
239#ifdef KERNEL
240	/* allocate stack space and save xmm regs */
241	sub	$5*16, %rsp
242	movdqa	%xmm0, 0*16(%rsp)
243	movdqa	%xmm1, 1*16(%rsp)
244	movdqa	%xmm2, 2*16(%rsp)
245	movdqa	%xmm3, 3*16(%rsp)
246	movdqa	%xmm4, 4*16(%rsp)
247#endif /* KERNEL */
248
249	movdqu       (src1), %xmm0
250	movdqu       0x10(src1), %xmm1
251	movdqu       0x20(src1), %xmm2
252	movdqu       0x30(src1), %xmm3
253	movdqu       (src2), %xmm4
254	pxor         %xmm0, %xmm4
255	movdqu       0x10(src2), %xmm0
256	pxor         %xmm1, %xmm0
257	movdqu       0x20(src2), %xmm1
258	pxor         %xmm2, %xmm1
259	movdqu       0x30(src2), %xmm2
260	pxor         %xmm3, %xmm2
261	movdqu       (mask), %xmm3
262	pand         %xmm4, %xmm3
263	movdqu       0x10(mask), %xmm4
264	pand         %xmm0, %xmm4
265	por          %xmm3, %xmm4
266	movdqu       0x20(mask), %xmm0
267	pand         %xmm1, %xmm0
268	movdqu       0x30(mask), %xmm1
269	pand         %xmm2, %xmm1
270	por          %xmm0, %xmm1
271	por          %xmm4, %xmm1
272	xorq         %rax, %rax
273	ptest        %xmm1, %xmm1
274	setne        %al
275
276#ifdef KERNEL
277	/* restore xmm regs and deallocate stack space */
278	movdqa	0*16(%rsp), %xmm0
279	movdqa	1*16(%rsp), %xmm1
280	movdqa	2*16(%rsp), %xmm2
281	movdqa	3*16(%rsp), %xmm3
282	movdqa	4*16(%rsp), %xmm4
283	add	$5*16, %rsp
284#endif /* KERNEL */
285
286	/* restore callee-saved registers */
287	pop	%rbp
288	ret
289
290/*
291 *  @abstract Compare 80-byte buffers src1 against src2, applying the byte
292 *  masks to input data before comparison.
293 *
294 *  @discussion
295 *  Returns zero if the two buffers are identical after applying the byte
296 *  masks, otherwise non-zero.
297 *
298 *  @param src1 first 80-byte input buffer
299 *  @param src2 second 80-byte input buffer
300 *  @param byte_mask 80-byte byte mask applied before comparision
301 */
302	.globl _os_memcmp_mask_80B
303	.text
304	.align	4
305_os_memcmp_mask_80B:
306
307	/* push callee-saved registers and set up base pointer */
308	push	%rbp
309	movq	%rsp, %rbp
310
311#ifdef KERNEL
312	/* allocate stack space and save xmm regs */
313	sub	$6*16, %rsp
314	movdqa	%xmm0, 0*16(%rsp)
315	movdqa	%xmm1, 1*16(%rsp)
316	movdqa	%xmm2, 2*16(%rsp)
317	movdqa	%xmm3, 3*16(%rsp)
318	movdqa	%xmm4, 4*16(%rsp)
319	movdqa	%xmm5, 5*16(%rsp)
320#endif /* KERNEL */
321
322	movdqu  (src1), %xmm0
323	movdqu  0x10(src1), %xmm1
324	movdqu  0x20(src1), %xmm2
325	movdqu  0x30(src1), %xmm3
326	movdqu  0x40(src1), %xmm4
327	movdqu  (src2), %xmm5
328	pxor    %xmm0, %xmm5
329	movdqu  0x10(src2), %xmm0
330	pxor    %xmm1, %xmm0
331	movdqu  0x20(src2), %xmm1
332	pxor    %xmm2, %xmm1
333	movdqu  0x30(src2), %xmm2
334	pxor    %xmm3, %xmm2
335	movdqu  0x40(src2), %xmm3
336	pxor    %xmm4, %xmm3
337	movdqu  (mask), %xmm4
338	pand    %xmm5, %xmm4
339	movdqu  0x10(mask), %xmm5
340	pand    %xmm0, %xmm5
341	por     %xmm4, %xmm5
342	movdqu  0x20(mask), %xmm0
343	pand    %xmm1, %xmm0
344	movdqu  0x30(mask), %xmm4
345	pand    %xmm2, %xmm4
346	por     %xmm0, %xmm4
347	movdqu  0x40(mask), %xmm1
348	pand    %xmm3, %xmm1
349	por     %xmm5, %xmm4
350	por     %xmm1, %xmm4
351	xorq    %rax, %rax
352	ptest   %xmm4, %xmm4
353	setne   %al
354
355#ifdef KERNEL
356	/* restore xmm regs and deallocate stack space */
357	movdqa	0*16(%rsp), %xmm0
358	movdqa	1*16(%rsp), %xmm1
359	movdqa	2*16(%rsp), %xmm2
360	movdqa	3*16(%rsp), %xmm3
361	movdqa	4*16(%rsp), %xmm4
362	movdqa	5*16(%rsp), %xmm5
363	add	$6*16, %rsp
364#endif /* KERNEL */
365
366	/* restore callee-saved registers */
367	pop	%rbp
368	ret
369