xref: /xnu-10063.121.3/osfmk/corecrypto/ccmode_gcm_gf_mult.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /* Copyright (c) (2011,2014,2015,2018,2019,2021,2023) Apple Inc. All rights reserved.
2  *
3  * corecrypto is licensed under Apple Inc.’s Internal Use License Agreement (which
4  * is contained in the License.txt file distributed with corecrypto) and only to
5  * people who accept that license. IMPORTANT:  Any license rights granted to you by
6  * Apple Inc. (if any) are limited to internal use within your organization only on
7  * devices and computers you own or control, for the sole purpose of verifying the
8  * security characteristics and correct functioning of the Apple Software.  You may
9  * not, directly or indirectly, redistribute the Apple Software or any portions thereof.
10  *
11  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
12  *
13  * This file contains Original Code and/or Modifications of Original Code
14  * as defined in and that are subject to the Apple Public Source License
15  * Version 2.0 (the 'License'). You may not use this file except in
16  * compliance with the License. The rights granted to you under the License
17  * may not be used to create, or enable the creation or redistribution of,
18  * unlawful or unlicensed copies of an Apple operating system, or to
19  * circumvent, violate, or enable the circumvention or violation of, any
20  * terms of an Apple operating system software license agreement.
21  *
22  * Please obtain a copy of the License at
23  * http://www.opensource.apple.com/apsl/ and read it before using this file.
24  *
25  * The Original Code and all software distributed under the License are
26  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
27  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
28  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
29  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
30  * Please see the License for the specific language governing rights and
31  * limitations under the License.
32  *
33  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
34  */
35 
36 #include <corecrypto/cc_config.h>
37 #include "ccmode_internal.h"
38 #include "ccn_internal.h"
39 
40 
41 #if (CCN_UNIT_SIZE == 8) && CC_DUNIT_SUPPORTED
42 
43 // Binary multiplication, x * y = (r_hi << 64) | r_lo.
44 static void
bmul64(uint64_t x,uint64_t y,uint64_t * r_hi,uint64_t * r_lo)45 bmul64(uint64_t x, uint64_t y, uint64_t *r_hi, uint64_t *r_lo)
46 {
47 	cc_dunit x1, x2, x3, x4, x5;
48 	cc_dunit y1, y2, y3, y4, y5;
49 	cc_dunit r, z;
50 
51 	const cc_unit m1 = 0x1084210842108421;
52 	const cc_unit m2 = 0x2108421084210842;
53 	const cc_unit m3 = 0x4210842108421084;
54 	const cc_unit m4 = 0x8421084210842108;
55 	const cc_unit m5 = 0x0842108421084210;
56 
57 	x1 = x & m1;
58 	y1 = y & m1;
59 	x2 = x & m2;
60 	y2 = y & m2;
61 	x3 = x & m3;
62 	y3 = y & m3;
63 	x4 = x & m4;
64 	y4 = y & m4;
65 	x5 = x & m5;
66 	y5 = y & m5;
67 
68 	z = (x1 * y1) ^ (x2 * y5) ^ (x3 * y4) ^ (x4 * y3) ^ (x5 * y2);
69 	r = z & (((cc_dunit)m2 << 64) | m1);
70 	z = (x1 * y2) ^ (x2 * y1) ^ (x3 * y5) ^ (x4 * y4) ^ (x5 * y3);
71 	r |= z & (((cc_dunit)m3 << 64) | m2);
72 	z = (x1 * y3) ^ (x2 * y2) ^ (x3 * y1) ^ (x4 * y5) ^ (x5 * y4);
73 	r |= z & (((cc_dunit)m4 << 64) | m3);
74 	z = (x1 * y4) ^ (x2 * y3) ^ (x3 * y2) ^ (x4 * y1) ^ (x5 * y5);
75 	r |= z & (((cc_dunit)m5 << 64) | m4);
76 	z = (x1 * y5) ^ (x2 * y4) ^ (x3 * y3) ^ (x4 * y2) ^ (x5 * y1);
77 	r |= z & (((cc_dunit)m1 << 64) | m5);
78 
79 	*r_hi = (uint64_t)(r >> 64);
80 	*r_lo = (uint64_t)r;
81 }
82 
83 void
ccmode_gcm_gf_mult_64(const unsigned char * a,const unsigned char * b,unsigned char * c)84 ccmode_gcm_gf_mult_64(const unsigned char *a, const unsigned char *b, unsigned char *c)
85 {
86 	cc_unit a_lo, a_hi, b_lo, b_hi;
87 	cc_unit z0_lo, z0_hi, z1_lo, z1_hi, z2_lo, z2_hi;
88 	cc_dunit z_hi, z_lo;
89 
90 	a_lo = cc_load64_be(a + 8);;
91 	a_hi = cc_load64_be(a);
92 
93 	b_lo = cc_load64_be(b + 8);
94 	b_hi = cc_load64_be(b);
95 
96 	// Binary Karatsuba multiplication z = a * b.
97 	bmul64(a_lo, b_lo, &z0_hi, &z0_lo);
98 	bmul64(a_hi, b_hi, &z2_hi, &z2_lo);
99 	bmul64(a_hi ^ a_lo, b_hi ^ b_lo, &z1_hi, &z1_lo);
100 	z1_hi ^= z2_hi ^ z0_hi;
101 	z1_lo ^= z2_lo ^ z0_lo;
102 	z_hi = ((cc_dunit)z2_hi << 64) | (z2_lo ^ z1_hi);
103 	z_lo = (((cc_dunit)z0_hi << 64) | z0_lo) ^ (((cc_dunit)z1_lo) << 64);
104 
105 	// Shift left by one to get reflected(a * b).
106 	z_hi = (z_hi << 1) | (z_lo >> 127);
107 	z_lo <<= 1;
108 
109 	// Reduce.
110 	z_lo ^= (z_lo << 126) ^ (z_lo << 121);
111 	z_hi ^= z_lo ^ (z_lo >> 1) ^ (z_lo >> 2) ^ (z_lo >> 7);
112 
113 	cc_store64_be((cc_unit)z_hi, c + 8);
114 	cc_store64_be((cc_unit)(z_hi >> 64), c);
115 }
116 
117 #endif
118 
119 // Binary multiplication, x * y = (r_hi << 32) | r_lo.
120 static void
bmul32(uint32_t x,uint32_t y,uint32_t * r_hi,uint32_t * r_lo)121 bmul32(uint32_t x, uint32_t y, uint32_t *r_hi, uint32_t *r_lo)
122 {
123 	uint32_t x0, x1, x2, x3;
124 	uint32_t y0, y1, y2, y3;
125 	uint64_t z, z0, z1, z2, z3;
126 
127 	const uint32_t m1 = 0x11111111;
128 	const uint32_t m2 = 0x22222222;
129 	const uint32_t m4 = 0x44444444;
130 	const uint32_t m8 = 0x88888888;
131 
132 	x0 = x & m1;
133 	x1 = x & m2;
134 	x2 = x & m4;
135 	x3 = x & m8;
136 	y0 = y & m1;
137 	y1 = y & m2;
138 	y2 = y & m4;
139 	y3 = y & m8;
140 
141 	z0 = ((uint64_t)x0 * y0) ^ ((uint64_t)x1 * y3) ^ ((uint64_t)x2 * y2) ^ ((uint64_t)x3 * y1);
142 	z1 = ((uint64_t)x0 * y1) ^ ((uint64_t)x1 * y0) ^ ((uint64_t)x2 * y3) ^ ((uint64_t)x3 * y2);
143 	z2 = ((uint64_t)x0 * y2) ^ ((uint64_t)x1 * y1) ^ ((uint64_t)x2 * y0) ^ ((uint64_t)x3 * y3);
144 	z3 = ((uint64_t)x0 * y3) ^ ((uint64_t)x1 * y2) ^ ((uint64_t)x2 * y1) ^ ((uint64_t)x3 * y0);
145 
146 	z0 &= ((uint64_t)m1 << 32) | m1;
147 	z1 &= ((uint64_t)m2 << 32) | m2;
148 	z2 &= ((uint64_t)m4 << 32) | m4;
149 	z3 &= ((uint64_t)m8 << 32) | m8;
150 	z = z0 | z1 | z2 | z3;
151 
152 	*r_hi = (uint32_t)(z >> 32);
153 	*r_lo = (uint32_t)z;
154 }
155 
156 void
ccmode_gcm_gf_mult_32(const unsigned char * a,const unsigned char * b,unsigned char * c)157 ccmode_gcm_gf_mult_32(const unsigned char *a, const unsigned char *b, unsigned char *c)
158 {
159 	uint32_t a_hi_h, a_hi_l, a_lo_h, a_lo_l;
160 	uint32_t b_hi_h, b_hi_l, b_lo_h, b_lo_l;
161 
162 	uint64_t z_hi_h, z_hi_l, z_lo_h, z_lo_l;
163 	uint32_t z0_a_h, z0_a_l, z0_b_h, z0_b_l;
164 	uint32_t z1_a_h, z1_a_l, z1_b_h, z1_b_l;
165 	uint32_t z2_a_h, z2_a_l, z2_b_h, z2_b_l;
166 
167 	uint32_t t_hi, t_lo;
168 
169 	a_lo_l = cc_load32_be(a + 12);
170 	a_lo_h = cc_load32_be(a + 8);
171 	a_hi_l = cc_load32_be(a + 4);
172 	a_hi_h = cc_load32_be(a);
173 
174 	uint32_t a_hiXlo_h = a_hi_h ^ a_lo_h;
175 	uint32_t a_hiXlo_l = a_hi_l ^ a_lo_l;
176 
177 	b_lo_l = cc_load32_be(b + 12);
178 	b_lo_h = cc_load32_be(b + 8);
179 	b_hi_l = cc_load32_be(b + 4);
180 	b_hi_h = cc_load32_be(b);
181 
182 	uint32_t b_hiXlo_h = b_hi_h ^ b_lo_h;
183 	uint32_t b_hiXlo_l = b_hi_l ^ b_lo_l;
184 
185 	// Binary Karatsuba multiplication z = a * b.
186 
187 	// a_lo * b_lo (64 bits)
188 	bmul32(a_lo_h, b_lo_h, &z0_a_h, &z0_a_l);
189 	bmul32(a_lo_l, b_lo_l, &z0_b_h, &z0_b_l);
190 	bmul32(a_lo_h ^ a_lo_l, b_lo_h ^ b_lo_l, &t_hi, &t_lo);
191 	t_hi ^= z0_a_h ^ z0_b_h;
192 	t_lo ^= z0_a_l ^ z0_b_l;
193 	z0_a_l ^= t_hi;
194 	z0_b_h ^= t_lo;
195 
196 	// a_hi * b_hi (64 bits)
197 	bmul32(a_hi_h, b_hi_h, &z2_a_h, &z2_a_l);
198 	bmul32(a_hi_l, b_hi_l, &z2_b_h, &z2_b_l);
199 	bmul32(a_hi_h ^ a_hi_l, b_hi_h ^ b_hi_l, &t_hi, &t_lo);
200 	t_hi ^= z2_a_h ^ z2_b_h;
201 	t_lo ^= z2_a_l ^ z2_b_l;
202 	z2_a_l ^= t_hi;
203 	z2_b_h ^= t_lo;
204 
205 	// (a_hi ^ a_lo) * (b_hi ^ b_lo) (64 bits)
206 	bmul32(a_hiXlo_h, b_hiXlo_h, &z1_a_h, &z1_a_l);
207 	bmul32(a_hiXlo_l, b_hiXlo_l, &z1_b_h, &z1_b_l);
208 	bmul32(a_hiXlo_h ^ a_hiXlo_l, b_hiXlo_h ^ b_hiXlo_l, &t_hi, &t_lo);
209 	t_hi ^= z1_a_h ^ z1_b_h;
210 	t_lo ^= z1_a_l ^ z1_b_l;
211 	z1_a_l ^= t_hi;
212 	z1_b_h ^= t_lo;
213 
214 	// Another round of Karatsuba for a 128-bit result.
215 	z1_a_h ^= z0_a_h ^ z2_a_h;
216 	z1_a_l ^= z0_a_l ^ z2_a_l;
217 	z1_b_h ^= z0_b_h ^ z2_b_h;
218 	z1_b_l ^= z0_b_l ^ z2_b_l;
219 	z_hi_h = ((uint64_t)z2_a_h << 32) | z2_a_l;
220 	z_hi_l = (((uint64_t)z2_b_h << 32) | z2_b_l) ^ (((uint64_t)z1_a_h << 32) | z1_a_l);
221 	z_lo_h = (((uint64_t)z0_a_h << 32) | z0_a_l) ^ (((uint64_t)z1_b_h << 32) | z1_b_l);
222 	z_lo_l = ((uint64_t)z0_b_h << 32) | z0_b_l;
223 
224 	// Shift left by one to get reflected(a * b).
225 	z_hi_h = (z_hi_h << 1) | (z_hi_l >> 63);
226 	z_hi_l = (z_hi_l << 1) | (z_lo_h >> 63);
227 	z_lo_h = (z_lo_h << 1) | (z_lo_l >> 63);
228 	z_lo_l <<= 1;
229 
230 	// Reduce.
231 	z_lo_h ^= (z_lo_l << 62) ^ (z_lo_l << 57);
232 	z_hi_h ^= z_lo_h ^ (z_lo_h >> 1) ^ (z_lo_h >> 2) ^ (z_lo_h >> 7);
233 	z_hi_l ^= z_lo_l ^ (z_lo_l >> 1) ^ (z_lo_l >> 2) ^ (z_lo_l >> 7);
234 	z_hi_l ^= (z_lo_h << 63) ^ (z_lo_h << 62) ^ (z_lo_h << 57);
235 
236 	cc_store64_be(z_hi_l, c + 8);
237 	cc_store64_be(z_hi_h, c);
238 }
239 
240 void
ccmode_gcm_gf_mult(const unsigned char * a,const unsigned char * b,unsigned char * c)241 ccmode_gcm_gf_mult(const unsigned char *a, const unsigned char *b, unsigned char *c)
242 {
243 #if (CCN_UNIT_SIZE == 8) && CC_DUNIT_SUPPORTED
244 	ccmode_gcm_gf_mult_64(a, b, c);
245 #else
246 	ccmode_gcm_gf_mult_32(a, b, c);
247 #endif
248 }
249