xref: /xnu-11215.41.3/bsd/dev/arm/cpu_memcmp_mask.s (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1/*
2 * Copyright (c) 2020-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * extern int os_memcmp_mask_{16,32,48,64,80}B(const uint8_t *src1,
31 *     const uint8_t *src2, const uint8_t *mask);
32 *
33 * This module implements fixed-length memory compare with mask routines,
34 * used mainly by the Skywalk networking subsystem.  Each routine is called
35 * on every packet and therefore needs to be as efficient as possible.
36 *
37 * When used in the kernel, these routines save and restore vector registers.
38 */
39
40#ifdef KERNEL
41#include "../../../osfmk/arm/arch.h"
42#include "../../../osfmk/arm64/proc_reg.h"
43
44#if __ARM_VFP__ < 3
45#error "Unsupported: __ARM_VFP__ < 3"
46#endif /* __ARM_VFP__ < 3 */
47#else /* !KERNEL */
48#ifndef LIBSYSCALL_INTERFACE
49#error "LIBSYSCALL_INTERFACE not defined"
50#endif /* !LIBSYSCALL_INTERFACE */
51#endif /* !KERNEL */
52
53#define	src1		r0	/* 1st arg */
54#define	src2		r1	/* 2nd arg */
55#define	mask		r2	/* 3rd arg */
56
57/*
58 *  @abstract Compare 16-byte buffers src1 against src2, applying the byte
59 *  masks to input data before comparison.
60 *
61 *  @discussion
62 *  Returns zero if the two buffers are identical after applying the byte
63 *  masks, otherwise non-zero.
64 *
65 *  @param src1 first 16-byte input buffer
66 *  @param src2 second 16-byte input buffer
67 *  @param byte_mask 16-byte byte mask applied before comparision
68 */
69	.syntax	unified
70	.globl _os_memcmp_mask_16B
71	.text
72	.align	4
73_os_memcmp_mask_16B:
74
75#ifdef KERNEL
76	vpush		{q0-q2}
77#endif /* KERNEL */
78
79	vld1.8		{q0}, [src1]
80	vld1.8		{q1}, [src2]
81	vld1.8		{q2}, [mask]
82	veor		q0, q0, q1
83	vand		q0, q0, q2
84	vorr.u32	d2, d0, d1
85	vpmax.u32	d0, d2, d2
86	vmov.u32	r0, d0[0]
87
88#ifdef KERNEL
89	vpop		{q0-q2}
90#endif /* KERNEL */
91
92	bx		lr
93
94/*
95 *  @abstract Compare 32-byte buffers src1 against src2, applying the byte
96 *  masks to input data before comparison.
97 *
98 *  @discussion
99 *  Returns zero if the two buffers are identical after applying the byte
100 *  masks, otherwise non-zero.
101 *
102 *  @param src1 first 32-byte input buffer
103 *  @param src2 second 32-byte input buffer
104 *  @param byte_mask 32-byte byte mask applied before comparision
105 */
106	.syntax	unified
107	.globl _os_memcmp_mask_32B
108	.text
109	.align	4
110_os_memcmp_mask_32B:
111
112#ifdef KERNEL
113	vpush		{q0-q5}
114#endif /* KERNEL */
115
116	vld1.8		{q0, q1}, [src1]
117	vld1.8		{q2, q3}, [src2]
118	vld1.8		{q4, q5}, [mask]
119	veor		q0, q0, q2
120	veor		q1, q1, q3
121	vand		q0, q0, q4
122	vand		q1, q1, q5
123	vorr		q0, q0, q1
124	vorr.u32	d2, d0, d1
125	vpmax.u32	d0, d2, d2
126	vmov.u32	r0, d0[0]
127
128#ifdef KERNEL
129	vpop		{q0-q5}
130#endif /* KERNEL */
131
132	bx		lr
133
134/*
135 *  @abstract Compare 48-byte buffers src1 against src2, applying the byte
136 *  masks to input data before comparison.
137 *
138 *  @discussion
139 *  Returns zero if the two buffers are identical after applying the byte
140 *  masks, otherwise non-zero.
141 *
142 *  @param src1 first 48-byte input buffer
143 *  @param src2 second 48-byte input buffer
144 *  @param byte_mask 48-byte byte mask applied before comparision
145 */
146	.syntax	unified
147	.globl _os_memcmp_mask_48B
148	.text
149	.align	4
150_os_memcmp_mask_48B:
151
152#ifdef KERNEL
153	vpush		{q0-q7}
154	vpush		{q8}
155#endif /* KERNEL */
156
157	vld1.8		{q0, q1}, [src1]!
158	vld1.8		q2, [src1]
159	vld1.8		{q3, q4}, [src2]!
160	vld1.8		q5, [src2]
161	vld1.8		{q6, q7}, [mask]!
162	vld1.8		q8, [mask]
163	veor		q0, q0, q3
164	veor		q1, q1, q4
165	veor		q2, q2, q5
166	vand		q0, q0, q6
167	vand		q1, q1, q7
168	vand		q2, q2, q8
169	vorr		q0, q0, q1
170	vorr		q0, q0, q2
171	vorr.u32	d2, d0, d1
172	vpmax.u32	d0, d2, d2
173	vmov.u32	r0, d0[0]
174
175#ifdef KERNEL
176	vpop		{q8}
177	vpop		{q0-q7}
178#endif /* KERNEL */
179
180	bx	lr
181
182/*
183 *  @abstract Compare 64-byte buffers src1 against src2, applying the byte
184 *  masks to input data before comparison.
185 *
186 *  @discussion
187 *  Returns zero if the two buffers are identical after applying the byte
188 *  masks, otherwise non-zero.
189 *
190 *  @param src1 first 64-byte input buffer
191 *  @param src2 second 64-byte input buffer
192 *  @param byte_mask 64-byte byte mask applied before comparision
193 */
194	.syntax	unified
195	.globl _os_memcmp_mask_64B
196	.text
197	.align	4
198_os_memcmp_mask_64B:
199
200#ifdef KERNEL
201	vpush		{q0-q7}
202	vpush		{q8-q11}
203#endif /* KERNEL */
204
205	vld1.8		{q0, q1}, [src1]!
206	vld1.8		{q2, q3}, [src1]
207	vld1.8		{q4, q5}, [src2]!
208	vld1.8		{q6, q7}, [src2]
209	vld1.8		{q8, q9}, [mask]!
210	vld1.8		{q10, q11}, [mask]
211	veor		q0, q0, q4
212	veor		q1, q1, q5
213	veor		q2, q2, q6
214	veor		q3, q3, q7
215	vand		q0, q0, q8
216	vand		q1, q1, q9
217	vand		q2, q2, q10
218	vand		q3, q3, q11
219	vorr		q0, q0, q1
220	vorr		q2, q2, q3
221	vorr		q0, q0, q2
222	vorr.u32	d2, d0, d1
223	vpmax.u32	d0, d2, d2
224	vmov.u32	r0, d0[0]
225
226#ifdef KERNEL
227	vpop		{q8-q11}
228	vpop		{q0-q7}
229#endif /* KERNEL */
230
231	bx		lr
232
233/*
234 *  @abstract Compare 80-byte buffers src1 against src2, applying the byte
235 *  masks to input data before comparison.
236 *
237 *  @discussion
238 *  Returns zero if the two buffers are identical after applying the byte
239 *  masks, otherwise non-zero.
240 *
241 *  @param src1 first 80-byte input buffer
242 *  @param src2 second 80-byte input buffer
243 *  @param byte_mask 80-byte byte mask applied before comparision
244 */
245	.syntax	unified
246	.globl _os_memcmp_mask_80B
247	.text
248	.align	4
249_os_memcmp_mask_80B:
250
251#ifdef KERNEL
252	vpush		{q0-q7}
253	vpush		{q8-q14}
254#endif /* KERNEL */
255
256	vld1.8		{q0, q1}, [src1]!
257	vld1.8		{q2, q3}, [src1]!
258	vld1.8		q4, [src1]
259	vld1.8		{q5, q6}, [src2]!
260	vld1.8		{q7, q8}, [src2]!
261	vld1.8		q9, [src2]
262	vld1.8		{q10, q11}, [mask]!
263	vld1.8		{q12, q13}, [mask]!
264	vld1.8		q14, [mask]
265	veor		q0, q0, q5
266	veor		q1, q1, q6
267	veor		q2, q2, q7
268	veor		q3, q3, q8
269	veor		q4, q4, q9
270	vand		q0, q0, q10
271	vand		q1, q1, q11
272	vand		q2, q2, q12
273	vand		q3, q3, q13
274	vand		q4, q4, q14
275	vorr		q0, q0, q1
276	vorr		q2, q2, q3
277	vorr		q0, q0, q2
278	vorr		q0, q0, q4
279	vorr.u32	d2, d0, d1
280	vpmax.u32	d0, d2, d2
281	vmov.u32	r0, d0[0]
282
283#ifdef KERNEL
284	vpop		{q8-q14}
285	vpop		{q0-q7}
286#endif /* KERNEL */
287
288	bx		lr
289