xref: /xnu-11417.140.69/bsd/dev/arm64/cpu_memcmp_mask.s (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1/*
2 * Copyright (c) 2020-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * extern int os_memcmp_mask_{16,32,48,64,80}B(const uint8_t *src1,
31 *     const uint8_t *src2, const uint8_t *mask);
32 *
33 * This module implements fixed-length memory compare with mask routines,
34 * used mainly by the Skywalk networking subsystem.  Each routine is called
35 * on every packet and therefore needs to be as efficient as possible.
36 *
37 * ARM64 kernel mode -- just like user mode -- no longer requires saving
38 * the vector registers, since it's done by the exception handler code.
39 */
40
41#ifdef KERNEL
42#include <arm64/asm.h>
43#else
44#ifndef LIBSYSCALL_INTERFACE
45#error "LIBSYSCALL_INTERFACE not defined"
46#endif /* !LIBSYSCALL_INTERFACE */
47#endif /* KERNEL */
48
49#define	src1		x0	/* 1st arg */
50#define	src2		x1	/* 2nd arg */
51#define	mask		x2	/* 3rd arg */
52
53/*
54 *  @abstract Compare 16-byte buffers src1 against src2, applying the byte
55 *  masks to input data before comparison.
56 *
57 *  @discussion
58 *  Returns zero if the two buffers are identical after applying the byte
59 *  masks, otherwise non-zero.
60 *
61 *  @param src1 first 16-byte input buffer
62 *  @param src2 second 16-byte input buffer
63 *  @param byte_mask 16-byte byte mask applied before comparision
64 */
65	.globl _os_memcmp_mask_16B
66	.text
67	.align	4
68_os_memcmp_mask_16B:
69#ifdef KERNEL
70	ARM64_PROLOG
71#endif /* KERNEL */
72	ld1.16b  {v0}, [src1]
73	ld1.16b  {v1}, [src2]
74	ld1.16b  {v2}, [mask]
75	eor.16b  v0, v0, v1
76	and.16b  v0, v0, v2
77	umaxv    b0, v0.16b
78	umov     w0, v0.s[0]
79
80	ret	lr
81
82/*
83 *  @abstract Compare 32-byte buffers src1 against src2, applying the byte
84 *  masks to input data before comparison.
85 *
86 *  @discussion
87 *  Returns zero if the two buffers are identical after applying the byte
88 *  masks, otherwise non-zero.
89 *
90 *  @param src1 first 32-byte input buffer
91 *  @param src2 second 32-byte input buffer
92 *  @param byte_mask 32-byte byte mask applied before comparision
93 */
94	.globl _os_memcmp_mask_32B
95	.text
96	.align	4
97_os_memcmp_mask_32B:
98#ifdef KERNEL
99	ARM64_PROLOG
100#endif /* KERNEL */
101	ld1.16b  {v0, v1}, [src1]
102	ld1.16b  {v2, v3}, [src2]
103	ld1.16b  {v4, v5}, [mask]
104	eor.16b  v0, v0, v2
105	eor.16b  v1, v1, v3
106	and.16b  v0, v0, v4
107	and.16b  v1, v1, v5
108	orr.16b  v0, v0, v1
109	umaxv    b0, v0.16b
110	umov     w0, v0.s[0]
111
112	ret	lr
113
114/*
115 *  @abstract Compare 48-byte buffers src1 against src2, applying the byte
116 *  masks to input data before comparison.
117 *
118 *  @discussion
119 *  Returns zero if the two buffers are identical after applying the byte
120 *  masks, otherwise non-zero.
121 *
122 *  @param src1 first 48-byte input buffer
123 *  @param src2 second 48-byte input buffer
124 *  @param byte_mask 48-byte byte mask applied before comparision
125 */
126	.globl _os_memcmp_mask_48B
127	.text
128	.align	4
129_os_memcmp_mask_48B:
130#ifdef KERNEL
131	ARM64_PROLOG
132#endif /* KERNEL */
133	ld1.16b  {v0, v1, v2}, [src1]
134	ld1.16b  {v3, v4, v5}, [src2]
135	ld1.16b  {v16, v17, v18}, [mask]
136	eor.16b  v0, v0, v3
137	eor.16b  v1, v1, v4
138	eor.16b  v2, v2, v5
139	and.16b  v0, v0, v16
140	and.16b  v1, v1, v17
141	and.16b  v2, v2, v18
142	orr.16b  v0, v0, v1
143	orr.16b  v0, v0, v2
144	umaxv    b0, v0.16b
145	umov     w0, v0.s[0]
146
147	ret	lr
148
149/*
150 *  @abstract Compare 64-byte buffers src1 against src2, applying the byte
151 *  masks to input data before comparison.
152 *
153 *  @discussion
154 *  Returns zero if the two buffers are identical after applying the byte
155 *  masks, otherwise non-zero.
156 *
157 *  @param src1 first 64-byte input buffer
158 *  @param src2 second 64-byte input buffer
159 *  @param byte_mask 64-byte byte mask applied before comparision
160 */
161	.globl _os_memcmp_mask_64B
162	.text
163	.align	4
164_os_memcmp_mask_64B:
165#ifdef KERNEL
166	ARM64_PROLOG
167#endif /* KERNEL */
168	ld1.16b  {v0, v1, v2, v3}, [src1]
169	ld1.16b  {v4, v5, v6, v7}, [src2]
170	ld1.16b  {v16, v17, v18, v19}, [mask]
171	eor.16b  v0, v0, v4
172	eor.16b  v1, v1, v5
173	eor.16b  v2, v2, v6
174	eor.16b  v3, v3, v7
175	and.16b  v0, v0, v16
176	and.16b  v1, v1, v17
177	and.16b  v2, v2, v18
178	and.16b  v3, v3, v19
179	orr.16b  v0, v0, v1
180	orr.16b  v2, v2, v3
181	orr.16b  v0, v0, v2
182	umaxv    b0, v0.16b
183	umov     w0, v0.s[0]
184
185	ret	lr
186
187/*
188 *  @abstract Compare 80-byte buffers src1 against src2, applying the byte
189 *  masks to input data before comparison.
190 *
191 *  @discussion
192 *  Returns zero if the two buffers are identical after applying the byte
193 *  masks, otherwise non-zero.
194 *
195 *  @param src1 first 80-byte input buffer
196 *  @param src2 second 80-byte input buffer
197 *  @param byte_mask 80-byte byte mask applied before comparision
198 */
199	.globl _os_memcmp_mask_80B
200	.text
201	.align	4
202_os_memcmp_mask_80B:
203#ifdef KERNEL
204	ARM64_PROLOG
205#endif /* KERNEL */
206	ld1.16b  {v0, v1, v2, v3}, [src1], #64
207	ld1.16b  {v4}, [src1]
208	ld1.16b  {v16, v17, v18, v19}, [src2], #64
209	ld1.16b  {v20}, [src2]
210	ld1.16b  {v21, v22, v23, v24}, [mask], #64
211	ld1.16b  {v25}, [mask]
212	eor.16b  v0, v0, v16
213	eor.16b  v1, v1, v17
214	eor.16b  v2, v2, v18
215	eor.16b  v3, v3, v19
216	eor.16b  v4, v4, v20
217	and.16b  v0, v0, v21
218	and.16b  v1, v1, v22
219	and.16b  v2, v2, v23
220	and.16b  v3, v3, v24
221	and.16b  v4, v4, v25
222	orr.16b  v0, v0, v1
223	orr.16b  v2, v2, v3
224	orr.16b  v0, v0, v2
225	orr.16b  v0, v0, v4
226	umaxv    b0, v0.16b
227	umov     w0, v0.s[0]
228
229	ret	lr
230