xref: /xnu-11215.41.3/bsd/dev/arm64/cpu_memcmp_mask.s (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1/*
2 * Copyright (c) 2020-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * extern int os_memcmp_mask_{16,32,48,64,80}B(const uint8_t *src1,
31 *     const uint8_t *src2, const uint8_t *mask);
32 *
33 * This module implements fixed-length memory compare with mask routines,
34 * used mainly by the Skywalk networking subsystem.  Each routine is called
35 * on every packet and therefore needs to be as efficient as possible.
36 *
37 * ARM64 kernel mode -- just like user mode -- no longer requires saving
38 * the vector registers, since it's done by the exception handler code.
39 */
40
41#ifndef KERNEL
42#ifndef LIBSYSCALL_INTERFACE
43#error "LIBSYSCALL_INTERFACE not defined"
44#endif /* !LIBSYSCALL_INTERFACE */
45#endif /* !KERNEL */
46
47#define	src1		x0	/* 1st arg */
48#define	src2		x1	/* 2nd arg */
49#define	mask		x2	/* 3rd arg */
50
51/*
52 *  @abstract Compare 16-byte buffers src1 against src2, applying the byte
53 *  masks to input data before comparison.
54 *
55 *  @discussion
56 *  Returns zero if the two buffers are identical after applying the byte
57 *  masks, otherwise non-zero.
58 *
59 *  @param src1 first 16-byte input buffer
60 *  @param src2 second 16-byte input buffer
61 *  @param byte_mask 16-byte byte mask applied before comparision
62 */
63	.globl _os_memcmp_mask_16B
64	.text
65	.align	4
66_os_memcmp_mask_16B:
67
68	ld1.16b  {v0}, [src1]
69	ld1.16b  {v1}, [src2]
70	ld1.16b  {v2}, [mask]
71	eor.16b  v0, v0, v1
72	and.16b  v0, v0, v2
73	umaxv    b0, v0.16b
74	umov     w0, v0.s[0]
75
76	ret	lr
77
78/*
79 *  @abstract Compare 32-byte buffers src1 against src2, applying the byte
80 *  masks to input data before comparison.
81 *
82 *  @discussion
83 *  Returns zero if the two buffers are identical after applying the byte
84 *  masks, otherwise non-zero.
85 *
86 *  @param src1 first 32-byte input buffer
87 *  @param src2 second 32-byte input buffer
88 *  @param byte_mask 32-byte byte mask applied before comparision
89 */
90	.globl _os_memcmp_mask_32B
91	.text
92	.align	4
93_os_memcmp_mask_32B:
94
95	ld1.16b  {v0, v1}, [src1]
96	ld1.16b  {v2, v3}, [src2]
97	ld1.16b  {v4, v5}, [mask]
98	eor.16b  v0, v0, v2
99	eor.16b  v1, v1, v3
100	and.16b  v0, v0, v4
101	and.16b  v1, v1, v5
102	orr.16b  v0, v0, v1
103	umaxv    b0, v0.16b
104	umov     w0, v0.s[0]
105
106	ret	lr
107
108/*
109 *  @abstract Compare 48-byte buffers src1 against src2, applying the byte
110 *  masks to input data before comparison.
111 *
112 *  @discussion
113 *  Returns zero if the two buffers are identical after applying the byte
114 *  masks, otherwise non-zero.
115 *
116 *  @param src1 first 48-byte input buffer
117 *  @param src2 second 48-byte input buffer
118 *  @param byte_mask 48-byte byte mask applied before comparision
119 */
120	.globl _os_memcmp_mask_48B
121	.text
122	.align	4
123_os_memcmp_mask_48B:
124
125	ld1.16b  {v0, v1, v2}, [src1]
126	ld1.16b  {v3, v4, v5}, [src2]
127	ld1.16b  {v16, v17, v18}, [mask]
128	eor.16b  v0, v0, v3
129	eor.16b  v1, v1, v4
130	eor.16b  v2, v2, v5
131	and.16b  v0, v0, v16
132	and.16b  v1, v1, v17
133	and.16b  v2, v2, v18
134	orr.16b  v0, v0, v1
135	orr.16b  v0, v0, v2
136	umaxv    b0, v0.16b
137	umov     w0, v0.s[0]
138
139	ret	lr
140
141/*
142 *  @abstract Compare 64-byte buffers src1 against src2, applying the byte
143 *  masks to input data before comparison.
144 *
145 *  @discussion
146 *  Returns zero if the two buffers are identical after applying the byte
147 *  masks, otherwise non-zero.
148 *
149 *  @param src1 first 64-byte input buffer
150 *  @param src2 second 64-byte input buffer
151 *  @param byte_mask 64-byte byte mask applied before comparision
152 */
153	.globl _os_memcmp_mask_64B
154	.text
155	.align	4
156_os_memcmp_mask_64B:
157
158	ld1.16b  {v0, v1, v2, v3}, [src1]
159	ld1.16b  {v4, v5, v6, v7}, [src2]
160	ld1.16b  {v16, v17, v18, v19}, [mask]
161	eor.16b  v0, v0, v4
162	eor.16b  v1, v1, v5
163	eor.16b  v2, v2, v6
164	eor.16b  v3, v3, v7
165	and.16b  v0, v0, v16
166	and.16b  v1, v1, v17
167	and.16b  v2, v2, v18
168	and.16b  v3, v3, v19
169	orr.16b  v0, v0, v1
170	orr.16b  v2, v2, v3
171	orr.16b  v0, v0, v2
172	umaxv    b0, v0.16b
173	umov     w0, v0.s[0]
174
175	ret	lr
176
177/*
178 *  @abstract Compare 80-byte buffers src1 against src2, applying the byte
179 *  masks to input data before comparison.
180 *
181 *  @discussion
182 *  Returns zero if the two buffers are identical after applying the byte
183 *  masks, otherwise non-zero.
184 *
185 *  @param src1 first 80-byte input buffer
186 *  @param src2 second 80-byte input buffer
187 *  @param byte_mask 80-byte byte mask applied before comparision
188 */
189	.globl _os_memcmp_mask_80B
190	.text
191	.align	4
192_os_memcmp_mask_80B:
193
194	ld1.16b  {v0, v1, v2, v3}, [src1], #64
195	ld1.16b  {v4}, [src1]
196	ld1.16b  {v16, v17, v18, v19}, [src2], #64
197	ld1.16b  {v20}, [src2]
198	ld1.16b  {v21, v22, v23, v24}, [mask], #64
199	ld1.16b  {v25}, [mask]
200	eor.16b  v0, v0, v16
201	eor.16b  v1, v1, v17
202	eor.16b  v2, v2, v18
203	eor.16b  v3, v3, v19
204	eor.16b  v4, v4, v20
205	and.16b  v0, v0, v21
206	and.16b  v1, v1, v22
207	and.16b  v2, v2, v23
208	and.16b  v3, v3, v24
209	and.16b  v4, v4, v25
210	orr.16b  v0, v0, v1
211	orr.16b  v2, v2, v3
212	orr.16b  v0, v0, v2
213	orr.16b  v0, v0, v4
214	umaxv    b0, v0.16b
215	umov     w0, v0.s[0]
216
217	ret	lr
218