Text file src/internal/bytealg/compare_riscv64.s

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
     9  	// X10 = a_base
    10  	// X11 = a_len
    11  	// X12 = a_cap (unused)
    12  	// X13 = b_base (want in X12)
    13  	// X14 = b_len (want in X13)
    14  	// X15 = b_cap (unused)
    15  	MOV	X13, X12
    16  	MOV	X14, X13
    17  	JMP	compare<>(SB)
    18  
    19  TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    20  	// X10 = a_base
    21  	// X11 = a_len
    22  	// X12 = b_base
    23  	// X13 = b_len
    24  	JMP	compare<>(SB)
    25  
    26  // On entry:
    27  // X10 points to start of a
    28  // X11 length of a
    29  // X12 points to start of b
    30  // X13 length of b
    31  // return value in X10 (-1/0/1)
    32  TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
    33  	BEQ	X10, X12, cmp_len
    34  
    35  	MIN	X11, X13, X5
    36  	BEQZ	X5, cmp_len
    37  
    38  	MOV	$32, X6
    39  	BLT	X5, X6, check8_unaligned
    40  
    41  	// Check alignment - if alignment differs we have to do one byte at a time.
    42  	AND	$7, X10, X7
    43  	AND	$7, X12, X8
    44  	BNE	X7, X8, check8_unaligned
    45  	BEQZ	X7, compare32
    46  
    47  	// Check one byte at a time until we reach 8 byte alignment.
    48  	SUB	X7, X0, X7
    49  	ADD	$8, X7, X7
    50  	SUB	X7, X5, X5
    51  align:
    52  	SUB	$1, X7
    53  	MOVBU	0(X10), X8
    54  	MOVBU	0(X12), X9
    55  	BNE	X8, X9, cmp
    56  	ADD	$1, X10
    57  	ADD	$1, X12
    58  	BNEZ	X7, align
    59  
    60  check32:
    61  	// X6 contains $32
    62  	BLT	X5, X6, compare16
    63  compare32:
    64  	MOV	0(X10), X15
    65  	MOV	0(X12), X16
    66  	MOV	8(X10), X17
    67  	MOV	8(X12), X18
    68  	BNE	X15, X16, cmp8a
    69  	BNE	X17, X18, cmp8b
    70  	MOV	16(X10), X15
    71  	MOV	16(X12), X16
    72  	MOV	24(X10), X17
    73  	MOV	24(X12), X18
    74  	BNE	X15, X16, cmp8a
    75  	BNE	X17, X18, cmp8b
    76  	ADD	$32, X10
    77  	ADD	$32, X12
    78  	SUB	$32, X5
    79  	BGE	X5, X6, compare32
    80  	BEQZ	X5, cmp_len
    81  
    82  check16:
    83  	MOV	$16, X6
    84  	BLT	X5, X6, check8_unaligned
    85  compare16:
    86  	MOV	0(X10), X15
    87  	MOV	0(X12), X16
    88  	MOV	8(X10), X17
    89  	MOV	8(X12), X18
    90  	BNE	X15, X16, cmp8a
    91  	BNE	X17, X18, cmp8b
    92  	ADD	$16, X10
    93  	ADD	$16, X12
    94  	SUB	$16, X5
    95  	BEQZ	X5, cmp_len
    96  
    97  check8_unaligned:
    98  	MOV	$8, X6
    99  	BLT	X5, X6, check4_unaligned
   100  compare8_unaligned:
   101  	MOVBU	0(X10), X8
   102  	MOVBU	1(X10), X15
   103  	MOVBU	2(X10), X17
   104  	MOVBU	3(X10), X19
   105  	MOVBU	4(X10), X21
   106  	MOVBU	5(X10), X23
   107  	MOVBU	6(X10), X25
   108  	MOVBU	7(X10), X29
   109  	MOVBU	0(X12), X9
   110  	MOVBU	1(X12), X16
   111  	MOVBU	2(X12), X18
   112  	MOVBU	3(X12), X20
   113  	MOVBU	4(X12), X22
   114  	MOVBU	5(X12), X24
   115  	MOVBU	6(X12), X28
   116  	MOVBU	7(X12), X30
   117  	BNE	X8, X9, cmp1a
   118  	BNE	X15, X16, cmp1b
   119  	BNE	X17, X18, cmp1c
   120  	BNE	X19, X20, cmp1d
   121  	BNE	X21, X22, cmp1e
   122  	BNE	X23, X24, cmp1f
   123  	BNE	X25, X28, cmp1g
   124  	BNE	X29, X30, cmp1h
   125  	ADD	$8, X10
   126  	ADD	$8, X12
   127  	SUB	$8, X5
   128  	BGE	X5, X6, compare8_unaligned
   129  	BEQZ	X5, cmp_len
   130  
   131  check4_unaligned:
   132  	MOV	$4, X6
   133  	BLT	X5, X6, compare1
   134  compare4_unaligned:
   135  	MOVBU	0(X10), X8
   136  	MOVBU	1(X10), X15
   137  	MOVBU	2(X10), X17
   138  	MOVBU	3(X10), X19
   139  	MOVBU	0(X12), X9
   140  	MOVBU	1(X12), X16
   141  	MOVBU	2(X12), X18
   142  	MOVBU	3(X12), X20
   143  	BNE	X8, X9, cmp1a
   144  	BNE	X15, X16, cmp1b
   145  	BNE	X17, X18, cmp1c
   146  	BNE	X19, X20, cmp1d
   147  	ADD	$4, X10
   148  	ADD	$4, X12
   149  	SUB	$4, X5
   150  	BGE	X5, X6, compare4_unaligned
   151  
   152  compare1:
   153  	BEQZ	X5, cmp_len
   154  	MOVBU	0(X10), X8
   155  	MOVBU	0(X12), X9
   156  	BNE	X8, X9, cmp
   157  	ADD	$1, X10
   158  	ADD	$1, X12
   159  	SUB	$1, X5
   160  	JMP	compare1
   161  
   162  	// Compare 8 bytes of memory in X15/X16 that are known to differ.
   163  cmp8a:
   164  	MOV	X15, X17
   165  	MOV	X16, X18
   166  
   167  	// Compare 8 bytes of memory in X17/X18 that are known to differ.
   168  cmp8b:
   169  	MOV	$0xff, X19
   170  cmp8_loop:
   171  	AND	X17, X19, X8
   172  	AND	X18, X19, X9
   173  	BNE	X8, X9, cmp
   174  	SLLI	$8, X19
   175  	JMP	cmp8_loop
   176  
   177  cmp1a:
   178  	SLTU	X9, X8, X5
   179  	SLTU	X8, X9, X6
   180  	JMP	cmp_ret
   181  cmp1b:
   182  	SLTU	X16, X15, X5
   183  	SLTU	X15, X16, X6
   184  	JMP	cmp_ret
   185  cmp1c:
   186  	SLTU	X18, X17, X5
   187  	SLTU	X17, X18, X6
   188  	JMP	cmp_ret
   189  cmp1d:
   190  	SLTU	X20, X19, X5
   191  	SLTU	X19, X20, X6
   192  	JMP	cmp_ret
   193  cmp1e:
   194  	SLTU	X22, X21, X5
   195  	SLTU	X21, X22, X6
   196  	JMP	cmp_ret
   197  cmp1f:
   198  	SLTU	X24, X23, X5
   199  	SLTU	X23, X24, X6
   200  	JMP	cmp_ret
   201  cmp1g:
   202  	SLTU	X28, X25, X5
   203  	SLTU	X25, X28, X6
   204  	JMP	cmp_ret
   205  cmp1h:
   206  	SLTU	X30, X29, X5
   207  	SLTU	X29, X30, X6
   208  	JMP	cmp_ret
   209  
   210  cmp_len:
   211  	MOV	X11, X8
   212  	MOV	X13, X9
   213  cmp:
   214  	SLTU	X9, X8, X5
   215  	SLTU	X8, X9, X6
   216  cmp_ret:
   217  	SUB	X5, X6, X10
   218  	RET
   219  

View as plain text