Text file src/internal/bytealg/index_arm64.s

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // func Index(a, b []byte) int
     9  // input:
    10  //   R0: a ptr (haystack)
    11  //   R1: a len (haystack)
    12  //   R2: a cap (haystack) (unused)
    13  //   R3: b ptr (needle)
    14  //   R4: b len (needle) (2 <= len <= 32)
    15  //   R5: b cap (needle) (unused)
    16  // return:
    17  //   R0: result
    18  TEXT ·Index<ABIInternal>(SB),NOSPLIT,$0-56
    19  	MOVD	R3, R2
    20  	MOVD	R4, R3
    21  	B	·IndexString<ABIInternal>(SB)
    22  
    23  // func IndexString(a, b string) int
    24  // input:
    25  //   R0: a ptr (haystack)
    26  //   R1: a len (haystack)
    27  //   R2: b ptr (needle)
    28  //   R3: b len (needle) (2 <= len <= 32)
    29  // return:
    30  //   R0: result
    31  TEXT ·IndexString<ABIInternal>(SB),NOSPLIT,$0-40
    32  	// main idea is to load 'sep' into separate register(s)
    33  	// to avoid repeatedly re-load it again and again
    34  	// for sebsequent substring comparisons
    35  	SUB	R3, R1, R4
    36  	// R4 contains the start of last substring for comparison
    37  	ADD	R0, R4, R4
    38  	ADD	$1, R0, R8
    39  
    40  	CMP	$8, R3
    41  	BHI	greater_8
    42  	TBZ	$3, R3, len_2_7
    43  len_8:
    44  	// R5 contains 8-byte of sep
    45  	MOVD	(R2), R5
    46  loop_8:
    47  	// R6 contains substring for comparison
    48  	CMP	R4, R0
    49  	BHI	not_found
    50  	MOVD.P	1(R0), R6
    51  	CMP	R5, R6
    52  	BNE	loop_8
    53  	B	found
    54  len_2_7:
    55  	TBZ	$2, R3, len_2_3
    56  	TBZ	$1, R3, len_4_5
    57  	TBZ	$0, R3, len_6
    58  len_7:
    59  	// R5 and R6 contain 7-byte of sep
    60  	MOVWU	(R2), R5
    61  	// 1-byte overlap with R5
    62  	MOVWU	3(R2), R6
    63  loop_7:
    64  	CMP	R4, R0
    65  	BHI	not_found
    66  	MOVWU.P	1(R0), R3
    67  	CMP	R5, R3
    68  	BNE	loop_7
    69  	MOVWU	2(R0), R3
    70  	CMP	R6, R3
    71  	BNE	loop_7
    72  	B	found
    73  len_6:
    74  	// R5 and R6 contain 6-byte of sep
    75  	MOVWU	(R2), R5
    76  	MOVHU	4(R2), R6
    77  loop_6:
    78  	CMP	R4, R0
    79  	BHI	not_found
    80  	MOVWU.P	1(R0), R3
    81  	CMP	R5, R3
    82  	BNE	loop_6
    83  	MOVHU	3(R0), R3
    84  	CMP	R6, R3
    85  	BNE	loop_6
    86  	B	found
    87  len_4_5:
    88  	TBZ	$0, R3, len_4
    89  len_5:
    90  	// R5 and R7 contain 5-byte of sep
    91  	MOVWU	(R2), R5
    92  	MOVBU	4(R2), R7
    93  loop_5:
    94  	CMP	R4, R0
    95  	BHI	not_found
    96  	MOVWU.P	1(R0), R3
    97  	CMP	R5, R3
    98  	BNE	loop_5
    99  	MOVBU	3(R0), R3
   100  	CMP	R7, R3
   101  	BNE	loop_5
   102  	B	found
   103  len_4:
   104  	// R5 contains 4-byte of sep
   105  	MOVWU	(R2), R5
   106  loop_4:
   107  	CMP	R4, R0
   108  	BHI	not_found
   109  	MOVWU.P	1(R0), R6
   110  	CMP	R5, R6
   111  	BNE	loop_4
   112  	B	found
   113  len_2_3:
   114  	TBZ	$0, R3, len_2
   115  len_3:
   116  	// R6 and R7 contain 3-byte of sep
   117  	MOVHU	(R2), R6
   118  	MOVBU	2(R2), R7
   119  loop_3:
   120  	CMP	R4, R0
   121  	BHI	not_found
   122  	MOVHU.P	1(R0), R3
   123  	CMP	R6, R3
   124  	BNE	loop_3
   125  	MOVBU	1(R0), R3
   126  	CMP	R7, R3
   127  	BNE	loop_3
   128  	B	found
   129  len_2:
   130  	// R5 contains 2-byte of sep
   131  	MOVHU	(R2), R5
   132  loop_2:
   133  	CMP	R4, R0
   134  	BHI	not_found
   135  	MOVHU.P	1(R0), R6
   136  	CMP	R5, R6
   137  	BNE	loop_2
   138  found:
   139  	SUB	R8, R0, R0
   140  	RET
   141  not_found:
   142  	MOVD	$-1, R0
   143  	RET
   144  greater_8:
   145  	SUB	$9, R3, R11	// len(sep) - 9, offset of R0 for last 8 bytes
   146  	CMP	$16, R3
   147  	BHI	greater_16
   148  len_9_16:
   149  	MOVD.P	8(R2), R5	// R5 contains the first 8-byte of sep
   150  	SUB	$16, R3, R7	// len(sep) - 16, offset of R2 for last 8 bytes
   151  	MOVD	(R2)(R7), R6	// R6 contains the last 8-byte of sep
   152  loop_9_16:
   153  	// search the first 8 bytes first
   154  	CMP	R4, R0
   155  	BHI	not_found
   156  	MOVD.P	1(R0), R7
   157  	CMP	R5, R7
   158  	BNE	loop_9_16
   159  	MOVD	(R0)(R11), R7
   160  	CMP	R6, R7		// compare the last 8 bytes
   161  	BNE	loop_9_16
   162  	B	found
   163  greater_16:
   164  	CMP	$24, R3
   165  	BHI	len_25_32
   166  len_17_24:
   167  	LDP.P	16(R2), (R5, R6)	// R5 and R6 contain the first 16-byte of sep
   168  	SUB	$24, R3, R10		// len(sep) - 24
   169  	MOVD	(R2)(R10), R7		// R7 contains the last 8-byte of sep
   170  loop_17_24:
   171  	// search the first 16 bytes first
   172  	CMP	R4, R0
   173  	BHI	not_found
   174  	MOVD.P	1(R0), R10
   175  	CMP	R5, R10
   176  	BNE	loop_17_24
   177  	MOVD	7(R0), R10
   178  	CMP	R6, R10
   179  	BNE	loop_17_24
   180  	MOVD	(R0)(R11), R10
   181  	CMP	R7, R10		// compare the last 8 bytes
   182  	BNE	loop_17_24
   183  	B	found
   184  len_25_32:
   185  	LDP.P	16(R2), (R5, R6)
   186  	MOVD.P	8(R2), R7	// R5, R6 and R7 contain the first 24-byte of sep
   187  	SUB	$32, R3, R12	// len(sep) - 32
   188  	MOVD	(R2)(R12), R10	// R10 contains the last 8-byte of sep
   189  loop_25_32:
   190  	// search the first 24 bytes first
   191  	CMP	R4, R0
   192  	BHI	not_found
   193  	MOVD.P	1(R0), R12
   194  	CMP	R5, R12
   195  	BNE	loop_25_32
   196  	MOVD	7(R0), R12
   197  	CMP	R6, R12
   198  	BNE	loop_25_32
   199  	MOVD	15(R0), R12
   200  	CMP	R7, R12
   201  	BNE	loop_25_32
   202  	MOVD	(R0)(R11), R12
   203  	CMP	R10, R12	// compare the last 8 bytes
   204  	BNE	loop_25_32
   205  	B	found
   206  

View as plain text