1 // Copyright 2022 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
9 // X10 = a_base
10 // X11 = a_len
11 // X12 = a_cap (unused)
12 // X13 = b_base (want in X12)
13 // X14 = b_len (want in X13)
14 // X15 = b_cap (unused)
15 MOV X13, X12
16 MOV X14, X13
17 JMP compare<>(SB)
18
19 TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
20 // X10 = a_base
21 // X11 = a_len
22 // X12 = b_base
23 // X13 = b_len
24 JMP compare<>(SB)
25
26 // On entry:
27 // X10 points to start of a
28 // X11 length of a
29 // X12 points to start of b
30 // X13 length of b
31 // return value in X10 (-1/0/1)
32 TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
33 BEQ X10, X12, cmp_len
34
35 MIN X11, X13, X5
36 BEQZ X5, cmp_len
37
38 MOV $32, X6
39 BLT X5, X6, check8_unaligned
40
41 // Check alignment - if alignment differs we have to do one byte at a time.
42 AND $7, X10, X7
43 AND $7, X12, X8
44 BNE X7, X8, check8_unaligned
45 BEQZ X7, compare32
46
47 // Check one byte at a time until we reach 8 byte alignment.
48 SUB X7, X0, X7
49 ADD $8, X7, X7
50 SUB X7, X5, X5
51 align:
52 SUB $1, X7
53 MOVBU 0(X10), X8
54 MOVBU 0(X12), X9
55 BNE X8, X9, cmp
56 ADD $1, X10
57 ADD $1, X12
58 BNEZ X7, align
59
60 check32:
61 // X6 contains $32
62 BLT X5, X6, compare16
63 compare32:
64 MOV 0(X10), X15
65 MOV 0(X12), X16
66 MOV 8(X10), X17
67 MOV 8(X12), X18
68 BNE X15, X16, cmp8a
69 BNE X17, X18, cmp8b
70 MOV 16(X10), X15
71 MOV 16(X12), X16
72 MOV 24(X10), X17
73 MOV 24(X12), X18
74 BNE X15, X16, cmp8a
75 BNE X17, X18, cmp8b
76 ADD $32, X10
77 ADD $32, X12
78 SUB $32, X5
79 BGE X5, X6, compare32
80 BEQZ X5, cmp_len
81
82 check16:
83 MOV $16, X6
84 BLT X5, X6, check8_unaligned
85 compare16:
86 MOV 0(X10), X15
87 MOV 0(X12), X16
88 MOV 8(X10), X17
89 MOV 8(X12), X18
90 BNE X15, X16, cmp8a
91 BNE X17, X18, cmp8b
92 ADD $16, X10
93 ADD $16, X12
94 SUB $16, X5
95 BEQZ X5, cmp_len
96
97 check8_unaligned:
98 MOV $8, X6
99 BLT X5, X6, check4_unaligned
100 compare8_unaligned:
101 MOVBU 0(X10), X8
102 MOVBU 1(X10), X15
103 MOVBU 2(X10), X17
104 MOVBU 3(X10), X19
105 MOVBU 4(X10), X21
106 MOVBU 5(X10), X23
107 MOVBU 6(X10), X25
108 MOVBU 7(X10), X29
109 MOVBU 0(X12), X9
110 MOVBU 1(X12), X16
111 MOVBU 2(X12), X18
112 MOVBU 3(X12), X20
113 MOVBU 4(X12), X22
114 MOVBU 5(X12), X24
115 MOVBU 6(X12), X28
116 MOVBU 7(X12), X30
117 BNE X8, X9, cmp1a
118 BNE X15, X16, cmp1b
119 BNE X17, X18, cmp1c
120 BNE X19, X20, cmp1d
121 BNE X21, X22, cmp1e
122 BNE X23, X24, cmp1f
123 BNE X25, X28, cmp1g
124 BNE X29, X30, cmp1h
125 ADD $8, X10
126 ADD $8, X12
127 SUB $8, X5
128 BGE X5, X6, compare8_unaligned
129 BEQZ X5, cmp_len
130
131 check4_unaligned:
132 MOV $4, X6
133 BLT X5, X6, compare1
134 compare4_unaligned:
135 MOVBU 0(X10), X8
136 MOVBU 1(X10), X15
137 MOVBU 2(X10), X17
138 MOVBU 3(X10), X19
139 MOVBU 0(X12), X9
140 MOVBU 1(X12), X16
141 MOVBU 2(X12), X18
142 MOVBU 3(X12), X20
143 BNE X8, X9, cmp1a
144 BNE X15, X16, cmp1b
145 BNE X17, X18, cmp1c
146 BNE X19, X20, cmp1d
147 ADD $4, X10
148 ADD $4, X12
149 SUB $4, X5
150 BGE X5, X6, compare4_unaligned
151
152 compare1:
153 BEQZ X5, cmp_len
154 MOVBU 0(X10), X8
155 MOVBU 0(X12), X9
156 BNE X8, X9, cmp
157 ADD $1, X10
158 ADD $1, X12
159 SUB $1, X5
160 JMP compare1
161
162 // Compare 8 bytes of memory in X15/X16 that are known to differ.
163 cmp8a:
164 MOV X15, X17
165 MOV X16, X18
166
167 // Compare 8 bytes of memory in X17/X18 that are known to differ.
168 cmp8b:
169 MOV $0xff, X19
170 cmp8_loop:
171 AND X17, X19, X8
172 AND X18, X19, X9
173 BNE X8, X9, cmp
174 SLLI $8, X19
175 JMP cmp8_loop
176
177 cmp1a:
178 SLTU X9, X8, X5
179 SLTU X8, X9, X6
180 JMP cmp_ret
181 cmp1b:
182 SLTU X16, X15, X5
183 SLTU X15, X16, X6
184 JMP cmp_ret
185 cmp1c:
186 SLTU X18, X17, X5
187 SLTU X17, X18, X6
188 JMP cmp_ret
189 cmp1d:
190 SLTU X20, X19, X5
191 SLTU X19, X20, X6
192 JMP cmp_ret
193 cmp1e:
194 SLTU X22, X21, X5
195 SLTU X21, X22, X6
196 JMP cmp_ret
197 cmp1f:
198 SLTU X24, X23, X5
199 SLTU X23, X24, X6
200 JMP cmp_ret
201 cmp1g:
202 SLTU X28, X25, X5
203 SLTU X25, X28, X6
204 JMP cmp_ret
205 cmp1h:
206 SLTU X30, X29, X5
207 SLTU X29, X30, X6
208 JMP cmp_ret
209
210 cmp_len:
211 MOV X11, X8
212 MOV X13, X9
213 cmp:
214 SLTU X9, X8, X5
215 SLTU X8, X9, X6
216 cmp_ret:
217 SUB X5, X6, X10
218 RET
219
View as plain text