Text file src/internal/chacha8rand/chacha8_riscv64.s

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "asm_riscv64.h"
     6  #include "go_asm.h"
     7  #include "textflag.h"
     8  
     9  // TODO(mzh): use Zvkb if possible
    10  
    11  #define QR(A, B, C, D) \
    12  	VADDVV	A, B, A \
    13  	VXORVV	D, A, D \
    14  	VSLLVI	$16, D, V28 \
    15  	VSRLVI	$16, D, D \
    16  	VXORVV	V28, D, D \
    17  	VADDVV	D, C, C  \
    18  	VXORVV	C, B, B \
    19  	VSLLVI	$12, B, V29 \
    20  	VSRLVI	$20, B, B \
    21  	VXORVV	V29, B, B \
    22  	VADDVV	B, A, A  \
    23  	VXORVV	A, D, D \
    24  	VSLLVI	$8, D, V30 \
    25  	VSRLVI	$24, D, D \
    26  	VXORVV	V30, D, D \
    27  	VADDVV	D, C, C  \
    28  	VXORVV	C, B, B \
    29  	VSLLVI	$7, B, V31 \
    30  	VSRLVI	$25, B, B \
    31  	VXORVV	V31, B, B
    32  
    33  // block runs four ChaCha8 block transformations using four elements in each V register.
    34  // func block(seed *[8]uint32, blocks *[16][4]uint32, counter uint32)
    35  TEXT ·block<ABIInternal>(SB), NOSPLIT, $0
    36  	// seed in X10
    37  	// blocks in X11
    38  	// counter in X12
    39  
    40  #ifndef hasV
    41  	MOVB	internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X13
    42  	BNEZ	X13, vector_chacha8
    43  	JMP	·block_generic<ABIInternal>(SB)
    44  #endif
    45  
    46  vector_chacha8:
    47  	// At least VLEN >= 128
    48  	VSETIVLI	$4, E32, M1, TA, MA, X0
    49  	// Load initial constants into top row.
    50  	MOV $·chachaConst(SB), X14
    51  	VLSSEG4E32V	(X14), X0, V0 // V0, V1, V2, V3 = const row
    52  	VLSSEG8E32V	(X10), X0, V4 // V4 ... V11, seed
    53  	VIDV	V12
    54  	VADDVX	X12, V12, V12		// counter
    55  
    56  	// Clear all nonces.
    57  	VXORVV	V13, V13, V13
    58  	VXORVV	V14, V14, V14
    59  	VXORVV	V15, V15, V15
    60  
    61  	// Copy initial state.
    62  	VMV4RV V4, V20
    63  	VMV4RV V8, V24
    64  
    65  	MOV	$4, X15
    66  	PCALIGN	$16
    67  loop:
    68  	QR(V0, V4, V8, V12)
    69  	QR(V1, V5, V9, V13)
    70  	QR(V2, V6, V10, V14)
    71  	QR(V3, V7, V11, V15)
    72  
    73  	QR(V0, V5, V10, V15)
    74  	QR(V1, V6, V11, V12)
    75  	QR(V2, V7, V8, V13)
    76  	QR(V3, V4, V9, V14)
    77  
    78  	SUB	$1, X15
    79  	BNEZ	X15, loop
    80  
    81  	VADDVV	V20, V4, V4
    82  	VADDVV	V21, V5, V5
    83  	VADDVV	V22, V6, V6
    84  	VADDVV	V23, V7, V7
    85  	VADDVV	V24, V8, V8
    86  	VADDVV	V25, V9, V9
    87  	VADDVV	V26, V10, V10
    88  	VADDVV	V27, V11, V11
    89  
    90  	VSE32V	V0, (X11); ADD $16, X11;
    91  	VSE32V	V1, (X11); ADD $16, X11;
    92  	VSE32V	V2, (X11); ADD $16, X11;
    93  	VSE32V	V3, (X11); ADD $16, X11;
    94  	VSE32V	V4, (X11); ADD $16, X11;
    95  	VSE32V	V5, (X11); ADD $16, X11;
    96  	VSE32V	V6, (X11); ADD $16, X11;
    97  	VSE32V	V7, (X11); ADD $16, X11;
    98  	VSE32V	V8, (X11); ADD $16, X11;
    99  	VSE32V	V9, (X11); ADD $16, X11;
   100  	VSE32V	V10, (X11); ADD $16, X11;
   101  	VSE32V	V11, (X11); ADD $16, X11;
   102  	VSE32V	V12, (X11); ADD $16, X11;
   103  	VSE32V	V13, (X11); ADD $16, X11;
   104  	VSE32V	V14, (X11); ADD $16, X11;
   105  	VSE32V	V15, (X11); ADD $16, X11;
   106  
   107  	RET
   108  
   109  GLOBL	·chachaConst(SB), NOPTR|RODATA, $32
   110  DATA	·chachaConst+0x00(SB)/4, $0x61707865
   111  DATA	·chachaConst+0x04(SB)/4, $0x3320646e
   112  DATA	·chachaConst+0x08(SB)/4, $0x79622d32
   113  DATA	·chachaConst+0x0c(SB)/4, $0x6b206574
   114  

View as plain text