1 // Copyright 2026 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // func memHashAES(p unsafe.Pointer, h, s uintptr) uintptr
8 // hash function using AES hardware instructions
9 TEXT ·memHashAES<ABIInternal>(SB),NOSPLIT,$0-32
10 // AX: data
11 // BX: hash seed
12 // CX: length
13 // At return: AX = return value
14
15 // Fill an SSE register with our seeds.
16 MOVQ BX, X0 // 64 bits of per-table hash seed
17 PINSRW $4, CX, X0 // 16 bits of length
18 PSHUFHW $0, X0, X0 // repeat length 4 times total
19 MOVO X0, X1 // save unscrambled seed
20 PXOR ·aeskeysched(SB), X0 // xor in per-process seed
21 AESENC X0, X0 // scramble seed
22
23 CMPQ CX, $16
24 JB aes0to15
25 JE aes16
26 CMPQ CX, $32
27 JBE aes17to32
28 CMPQ CX, $64
29 JBE aes33to64
30 CMPQ CX, $128
31 JBE aes65to128
32 JMP aes129plus
33
34 aes0to15:
35 TESTQ CX, CX
36 JE aes0
37
38 ADDQ $16, AX
39 TESTW $0xff0, AX
40 JE endofpage
41
42 // 16 bytes loaded at this address won't cross
43 // a page boundary, so we can load it directly.
44 MOVOU -16(AX), X1
45 ADDQ CX, CX
46 MOVQ $masks<>(SB), AX
47 PAND (AX)(CX*8), X1
48 final1:
49 PXOR X0, X1 // xor data with seed
50 AESENC X1, X1 // scramble combo 3 times
51 AESENC X1, X1
52 AESENC X1, X1
53 MOVQ X1, AX // return X1
54 RET
55
56 endofpage:
57 // address ends in 1111xxxx. Might be up against
58 // a page boundary, so load ending at last byte.
59 // Then shift bytes down using pshufb.
60 MOVOU -32(AX)(CX*1), X1
61 ADDQ CX, CX
62 MOVQ $shifts<>(SB), AX
63 PSHUFB (AX)(CX*8), X1
64 JMP final1
65
66 aes0:
67 // Return scrambled input seed
68 AESENC X0, X0
69 MOVQ X0, AX // return X0
70 RET
71
72 aes16:
73 MOVOU (AX), X1
74 JMP final1
75
76 aes17to32:
77 // make second starting seed
78 PXOR ·aeskeysched+16(SB), X1
79 AESENC X1, X1
80
81 // load data to be hashed
82 MOVOU (AX), X2
83 MOVOU -16(AX)(CX*1), X3
84
85 // xor with seed
86 PXOR X0, X2
87 PXOR X1, X3
88
89 // scramble 3 times
90 AESENC X2, X2
91 AESENC X3, X3
92 AESENC X2, X2
93 AESENC X3, X3
94 AESENC X2, X2
95 AESENC X3, X3
96
97 // combine results
98 PXOR X3, X2
99 MOVQ X2, AX // return X2
100 RET
101
102 aes33to64:
103 // make 3 more starting seeds
104 MOVO X1, X2
105 MOVO X1, X3
106 PXOR ·aeskeysched+16(SB), X1
107 PXOR ·aeskeysched+32(SB), X2
108 PXOR ·aeskeysched+48(SB), X3
109 AESENC X1, X1
110 AESENC X2, X2
111 AESENC X3, X3
112
113 MOVOU (AX), X4
114 MOVOU 16(AX), X5
115 MOVOU -32(AX)(CX*1), X6
116 MOVOU -16(AX)(CX*1), X7
117
118 PXOR X0, X4
119 PXOR X1, X5
120 PXOR X2, X6
121 PXOR X3, X7
122
123 AESENC X4, X4
124 AESENC X5, X5
125 AESENC X6, X6
126 AESENC X7, X7
127
128 AESENC X4, X4
129 AESENC X5, X5
130 AESENC X6, X6
131 AESENC X7, X7
132
133 AESENC X4, X4
134 AESENC X5, X5
135 AESENC X6, X6
136 AESENC X7, X7
137
138 PXOR X6, X4
139 PXOR X7, X5
140 PXOR X5, X4
141 MOVQ X4, AX // return X4
142 RET
143
144 aes65to128:
145 // make 7 more starting seeds
146 MOVO X1, X2
147 MOVO X1, X3
148 MOVO X1, X4
149 MOVO X1, X5
150 MOVO X1, X6
151 MOVO X1, X7
152 PXOR ·aeskeysched+16(SB), X1
153 PXOR ·aeskeysched+32(SB), X2
154 PXOR ·aeskeysched+48(SB), X3
155 PXOR ·aeskeysched+64(SB), X4
156 PXOR ·aeskeysched+80(SB), X5
157 PXOR ·aeskeysched+96(SB), X6
158 PXOR ·aeskeysched+112(SB), X7
159 AESENC X1, X1
160 AESENC X2, X2
161 AESENC X3, X3
162 AESENC X4, X4
163 AESENC X5, X5
164 AESENC X6, X6
165 AESENC X7, X7
166
167 // load data
168 MOVOU (AX), X8
169 MOVOU 16(AX), X9
170 MOVOU 32(AX), X10
171 MOVOU 48(AX), X11
172 MOVOU -64(AX)(CX*1), X12
173 MOVOU -48(AX)(CX*1), X13
174 MOVOU -32(AX)(CX*1), X14
175 MOVOU -16(AX)(CX*1), X15
176
177 // xor with seed
178 PXOR X0, X8
179 PXOR X1, X9
180 PXOR X2, X10
181 PXOR X3, X11
182 PXOR X4, X12
183 PXOR X5, X13
184 PXOR X6, X14
185 PXOR X7, X15
186
187 // scramble 3 times
188 AESENC X8, X8
189 AESENC X9, X9
190 AESENC X10, X10
191 AESENC X11, X11
192 AESENC X12, X12
193 AESENC X13, X13
194 AESENC X14, X14
195 AESENC X15, X15
196
197 AESENC X8, X8
198 AESENC X9, X9
199 AESENC X10, X10
200 AESENC X11, X11
201 AESENC X12, X12
202 AESENC X13, X13
203 AESENC X14, X14
204 AESENC X15, X15
205
206 AESENC X8, X8
207 AESENC X9, X9
208 AESENC X10, X10
209 AESENC X11, X11
210 AESENC X12, X12
211 AESENC X13, X13
212 AESENC X14, X14
213 AESENC X15, X15
214
215 // combine results
216 PXOR X12, X8
217 PXOR X13, X9
218 PXOR X14, X10
219 PXOR X15, X11
220 PXOR X10, X8
221 PXOR X11, X9
222 PXOR X9, X8
223 // X15 must be zero on return
224 PXOR X15, X15
225 MOVQ X8, AX // return X8
226 RET
227
228 aes129plus:
229 // make 7 more starting seeds
230 MOVO X1, X2
231 MOVO X1, X3
232 MOVO X1, X4
233 MOVO X1, X5
234 MOVO X1, X6
235 MOVO X1, X7
236 PXOR ·aeskeysched+16(SB), X1
237 PXOR ·aeskeysched+32(SB), X2
238 PXOR ·aeskeysched+48(SB), X3
239 PXOR ·aeskeysched+64(SB), X4
240 PXOR ·aeskeysched+80(SB), X5
241 PXOR ·aeskeysched+96(SB), X6
242 PXOR ·aeskeysched+112(SB), X7
243 AESENC X1, X1
244 AESENC X2, X2
245 AESENC X3, X3
246 AESENC X4, X4
247 AESENC X5, X5
248 AESENC X6, X6
249 AESENC X7, X7
250
251 // start with last (possibly overlapping) block
252 MOVOU -128(AX)(CX*1), X8
253 MOVOU -112(AX)(CX*1), X9
254 MOVOU -96(AX)(CX*1), X10
255 MOVOU -80(AX)(CX*1), X11
256 MOVOU -64(AX)(CX*1), X12
257 MOVOU -48(AX)(CX*1), X13
258 MOVOU -32(AX)(CX*1), X14
259 MOVOU -16(AX)(CX*1), X15
260
261 // xor in seed
262 PXOR X0, X8
263 PXOR X1, X9
264 PXOR X2, X10
265 PXOR X3, X11
266 PXOR X4, X12
267 PXOR X5, X13
268 PXOR X6, X14
269 PXOR X7, X15
270
271 // compute number of remaining 128-byte blocks
272 DECQ CX
273 SHRQ $7, CX
274
275 PCALIGN $16
276 aesloop:
277 // scramble state
278 AESENC X8, X8
279 AESENC X9, X9
280 AESENC X10, X10
281 AESENC X11, X11
282 AESENC X12, X12
283 AESENC X13, X13
284 AESENC X14, X14
285 AESENC X15, X15
286
287 // scramble state, xor in a block
288 MOVOU (AX), X0
289 MOVOU 16(AX), X1
290 MOVOU 32(AX), X2
291 MOVOU 48(AX), X3
292 AESENC X0, X8
293 AESENC X1, X9
294 AESENC X2, X10
295 AESENC X3, X11
296 MOVOU 64(AX), X4
297 MOVOU 80(AX), X5
298 MOVOU 96(AX), X6
299 MOVOU 112(AX), X7
300 AESENC X4, X12
301 AESENC X5, X13
302 AESENC X6, X14
303 AESENC X7, X15
304
305 ADDQ $128, AX
306 DECQ CX
307 JNE aesloop
308
309 // 3 more scrambles to finish
310 AESENC X8, X8
311 AESENC X9, X9
312 AESENC X10, X10
313 AESENC X11, X11
314 AESENC X12, X12
315 AESENC X13, X13
316 AESENC X14, X14
317 AESENC X15, X15
318 AESENC X8, X8
319 AESENC X9, X9
320 AESENC X10, X10
321 AESENC X11, X11
322 AESENC X12, X12
323 AESENC X13, X13
324 AESENC X14, X14
325 AESENC X15, X15
326 AESENC X8, X8
327 AESENC X9, X9
328 AESENC X10, X10
329 AESENC X11, X11
330 AESENC X12, X12
331 AESENC X13, X13
332 AESENC X14, X14
333 AESENC X15, X15
334
335 PXOR X12, X8
336 PXOR X13, X9
337 PXOR X14, X10
338 PXOR X15, X11
339 PXOR X10, X8
340 PXOR X11, X9
341 PXOR X9, X8
342 // X15 must be zero on return
343 PXOR X15, X15
344 MOVQ X8, AX // return X8
345 RET
346
347 // simple mask to get rid of data in the high part of the register.
348 DATA masks<>+0x00(SB)/8, $0x0000000000000000
349 DATA masks<>+0x08(SB)/8, $0x0000000000000000
350 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
351 DATA masks<>+0x18(SB)/8, $0x0000000000000000
352 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
353 DATA masks<>+0x28(SB)/8, $0x0000000000000000
354 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
355 DATA masks<>+0x38(SB)/8, $0x0000000000000000
356 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
357 DATA masks<>+0x48(SB)/8, $0x0000000000000000
358 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
359 DATA masks<>+0x58(SB)/8, $0x0000000000000000
360 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
361 DATA masks<>+0x68(SB)/8, $0x0000000000000000
362 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
363 DATA masks<>+0x78(SB)/8, $0x0000000000000000
364 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
365 DATA masks<>+0x88(SB)/8, $0x0000000000000000
366 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
367 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
368 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
369 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
370 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
371 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
372 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
373 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
374 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
375 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
376 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
377 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
378 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
379 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
380 GLOBL masks<>(SB),RODATA,$256
381
382 // these are arguments to pshufb. They move data down from
383 // the high bytes of the register to the low bytes of the register.
384 // index is how many bytes to move.
385 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
386 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
387 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
388 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
389 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
390 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
391 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
392 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
393 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
394 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
395 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
396 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
397 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
398 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
399 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
400 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
401 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
402 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
403 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
404 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
405 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
406 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
407 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
408 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
409 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
410 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
411 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
412 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
413 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
414 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
415 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
416 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
417 GLOBL shifts<>(SB),RODATA,$256
418
419 TEXT ·checkMasksAndShiftsAlignment<ABIInternal>(SB),NOSPLIT,$0-1
420 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
421 MOVQ $masks<>(SB), AX
422 MOVQ $shifts<>(SB), BX
423 ORQ BX, AX
424 TESTQ $15, AX
425 SETEQ AX
426 RET
427
View as plain text