Source file test/codegen/memcombine.go

     1  // asmcheck
     2  
     3  // Copyright 2018 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  package codegen
     8  
     9  import (
    10  	"encoding/binary"
    11  	"runtime"
    12  )
    13  
    14  // ------------- //
    15  //    Loading    //
    16  // ------------- //
    17  
    18  func load_le64(b []byte) uint64 {
    19  	// amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]\t[^$]`,-`OR`
    20  	// s390x:`MOVDBR\s\(.*\),`
    21  	// arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`
    22  	// loong64:`MOVV\s\(R[0-9]+\),`
    23  	// ppc64le:`MOVD\s`,-`MOV[BHW]Z`
    24  	// ppc64:`MOVDBR\s`,-`MOV[BHW]Z`
    25  	return binary.LittleEndian.Uint64(b)
    26  }
    27  
    28  func load_le64_idx(b []byte, idx int) uint64 {
    29  	// amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]\t[^$]`,-`OR`
    30  	// s390x:`MOVDBR\s\(.*\)\(.*\*1\),`
    31  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]`
    32  	// loong64:`MOVV\s\(R[0-9]+\)\(R[0-9]+\),`
    33  	// ppc64le:`MOVD\s`,-`MOV[BHW]Z\s`
    34  	// ppc64:`MOVDBR\s`,-`MOV[BHW]Z\s`
    35  	return binary.LittleEndian.Uint64(b[idx:])
    36  }
    37  
    38  func load_le32(b []byte) uint32 {
    39  	// amd64:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
    40  	// 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
    41  	// s390x:`MOVWBR\s\(.*\),`
    42  	// arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`
    43  	// loong64:`MOVWU\s\(R[0-9]+\),`
    44  	// ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
    45  	// ppc64:`MOVWBR\s`,-`MOV[BH]Z\s`
    46  	return binary.LittleEndian.Uint32(b)
    47  }
    48  
    49  func load_le32_idx(b []byte, idx int) uint32 {
    50  	// amd64:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
    51  	// 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
    52  	// s390x:`MOVWBR\s\(.*\)\(.*\*1\),`
    53  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]`
    54  	// loong64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`
    55  	// ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
    56  	// ppc64:`MOVWBR\s`,-`MOV[BH]Z\s'
    57  	return binary.LittleEndian.Uint32(b[idx:])
    58  }
    59  
    60  func load_le16(b []byte) uint16 {
    61  	// amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
    62  	// ppc64le:`MOVHZ\s`,-`MOVBZ`
    63  	// arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB`
    64  	// loong64:`MOVHU\s\(R[0-9]+\),`
    65  	// s390x:`MOVHBR\s\(.*\),`
    66  	// ppc64:`MOVHBR\s`,-`MOVBZ`
    67  	return binary.LittleEndian.Uint16(b)
    68  }
    69  
    70  func load_le16_idx(b []byte, idx int) uint16 {
    71  	// amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
    72  	// ppc64le:`MOVHZ\s`,-`MOVBZ`
    73  	// ppc64:`MOVHBR\s`,-`MOVBZ`
    74  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
    75  	// loong64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`
    76  	// s390x:`MOVHBR\s\(.*\)\(.*\*1\),`
    77  	return binary.LittleEndian.Uint16(b[idx:])
    78  }
    79  
    80  func load_be64(b []byte) uint64 {
    81  	// amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
    82  	// amd64/v3:`MOVBEQ`
    83  	// s390x:`MOVD\s\(.*\),`
    84  	// arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W`
    85  	// ppc64le:`MOVDBR`,-`MOV[BHW]Z`
    86  	// ppc64:`MOVD`,-`MOV[BHW]Z`
    87  	return binary.BigEndian.Uint64(b)
    88  }
    89  
    90  func load_be64_idx(b []byte, idx int) uint64 {
    91  	// amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
    92  	// amd64/v3: `MOVBEQ\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
    93  	// s390x:`MOVD\s\(.*\)\(.*\*1\),`
    94  	// arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W`
    95  	// ppc64le:`MOVDBR`,-`MOV[BHW]Z`
    96  	// ppc64:`MOVD`,-`MOV[BHW]Z`
    97  	return binary.BigEndian.Uint64(b[idx:])
    98  }
    99  
   100  func load_be32(b []byte) uint32 {
   101  	// amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR`
   102  	// amd64/v3: `MOVBEL`
   103  	// s390x:`MOVWZ\s\(.*\),`
   104  	// arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W`
   105  	// ppc64le:`MOVWBR`,-`MOV[BH]Z`
   106  	// ppc64:`MOVWZ`,-MOV[BH]Z`
   107  	return binary.BigEndian.Uint32(b)
   108  }
   109  
   110  func load_be32_idx(b []byte, idx int) uint32 {
   111  	// amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR`
   112  	// amd64/v3: `MOVBEL\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
   113  	// s390x:`MOVWZ\s\(.*\)\(.*\*1\),`
   114  	// arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W`
   115  	// ppc64le:`MOVWBR`,-`MOV[BH]Z`
   116  	// ppc64:`MOVWZ`,-MOV[BH]Z`
   117  	return binary.BigEndian.Uint32(b[idx:])
   118  }
   119  
   120  func load_be16(b []byte) uint16 {
   121  	// amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
   122  	// arm64:`REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB`
   123  	// ppc64le:`MOVHBR`,-`MOVBZ`
   124  	// ppc64:`MOVHZ`,-`MOVBZ`
   125  	// s390x:`MOVHZ\s\(.*\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
   126  	return binary.BigEndian.Uint16(b)
   127  }
   128  
   129  func load_be16_idx(b []byte, idx int) uint16 {
   130  	// amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
   131  	// arm64:`REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
   132  	// ppc64le:`MOVHBR`,-`MOVBZ`
   133  	// ppc64:`MOVHZ`,-`MOVBZ`
   134  	// s390x:`MOVHZ\s\(.*\)\(.*\*1\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
   135  	return binary.BigEndian.Uint16(b[idx:])
   136  }
   137  
   138  func load_le_byte2_uint16(s []byte) uint16 {
   139  	// arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
   140  	// 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
   141  	// amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
   142  	// ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ`
   143  	// ppc64:`MOVHBR`,-`MOVBZ`
   144  	return uint16(s[0]) | uint16(s[1])<<8
   145  }
   146  
   147  func load_le_byte2_uint16_inv(s []byte) uint16 {
   148  	// arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
   149  	// 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
   150  	// amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
   151  	// ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ`
   152  	// ppc64:`MOVHBR`,-`MOVBZ`
   153  	return uint16(s[1])<<8 | uint16(s[0])
   154  }
   155  
   156  func load_le_byte4_uint32(s []byte) uint32 {
   157  	// arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
   158  	// 386:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
   159  	// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
   160  	// ppc64le:`MOVWZ\t\(R[0-9]+\)`,-`MOV[BH]Z`
   161  	// ppc64:`MOVWBR`,-MOV[BH]Z`
   162  	return uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24
   163  }
   164  
   165  func load_le_byte4_uint32_inv(s []byte) uint32 {
   166  	// arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
   167  	// ppc64le:`MOVWZ`,-`MOV[BH]Z`
   168  	// ppc64:`MOVWBR`,-`MOV[BH]Z`
   169  	return uint32(s[3])<<24 | uint32(s[2])<<16 | uint32(s[1])<<8 | uint32(s[0])
   170  }
   171  
   172  func load_le_byte8_uint64(s []byte) uint64 {
   173  	// arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
   174  	// amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]\t[^$]`,-`OR`
   175  	// ppc64le:`MOVD\t\(R[0-9]+\)`,-`MOV[BHW]Z`
   176  	// ppc64:`MOVDBR`,-`MOVW[WHB]Z`
   177  	return uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 | uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56
   178  }
   179  
   180  func load_le_byte8_uint64_inv(s []byte) uint64 {
   181  	// arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
   182  	// ppc64le:`MOVD`,-`MOV[WHB]Z`
   183  	// ppc64:`MOVDBR`,-`MOV[WHB]Z`
   184  	return uint64(s[7])<<56 | uint64(s[6])<<48 | uint64(s[5])<<40 | uint64(s[4])<<32 | uint64(s[3])<<24 | uint64(s[2])<<16 | uint64(s[1])<<8 | uint64(s[0])
   185  }
   186  
   187  func load_be_byte2_uint16(s []byte) uint16 {
   188  	// arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
   189  	// amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
   190  	// ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
   191  	// ppc64:`MOVHZ`,-`MOVBZ`
   192  	return uint16(s[0])<<8 | uint16(s[1])
   193  }
   194  
   195  func load_be_byte2_uint16_inv(s []byte) uint16 {
   196  	// arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
   197  	// amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
   198  	// ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
   199  	// ppc64:`MOVHZ`,-`MOVBZ`
   200  	return uint16(s[1]) | uint16(s[0])<<8
   201  }
   202  
   203  func load_be_byte4_uint32(s []byte) uint32 {
   204  	// arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
   205  	// ppc64le:`MOVWBR`,-`MOV[HB]Z`
   206  	// ppc64:`MOVWZ`,-`MOV[HB]Z`
   207  	return uint32(s[0])<<24 | uint32(s[1])<<16 | uint32(s[2])<<8 | uint32(s[3])
   208  }
   209  
   210  func load_be_byte4_uint32_inv(s []byte) uint32 {
   211  	// arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
   212  	// amd64/v1,amd64/v2:`MOVL\s\([A-Z]+\)`,`BSWAPL`,-`MOV[BW]`,-`OR`
   213  	// amd64/v3: `MOVBEL`
   214  	// ppc64le:`MOVWBR`,-`MOV[HB]Z`
   215  	// ppc64:`MOVWZ`,-`MOV[HB]Z`
   216  	return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24
   217  }
   218  
   219  func load_be_byte8_uint64(s []byte) uint64 {
   220  	// arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
   221  	// ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
   222  	// ppc64:`MOVD`,-`MOV[WHB]Z`
   223  	return uint64(s[0])<<56 | uint64(s[1])<<48 | uint64(s[2])<<40 | uint64(s[3])<<32 | uint64(s[4])<<24 | uint64(s[5])<<16 | uint64(s[6])<<8 | uint64(s[7])
   224  }
   225  
   226  func load_be_byte8_uint64_inv(s []byte) uint64 {
   227  	// arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
   228  	// amd64/v1,amd64/v2:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
   229  	// amd64/v3: `MOVBEQ`
   230  	// ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
   231  	// ppc64:`MOVD`,-`MOV[BHW]Z`
   232  	return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56
   233  }
   234  
   235  func load_le_byte2_uint16_idx(s []byte, idx int) uint16 {
   236  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
   237  	// 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
   238  	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
   239  	// ppc64le:`MOVHZ`,-`MOVBZ`
   240  	// ppc64:`MOVHBR`,-`MOVBZ`
   241  	return uint16(s[idx]) | uint16(s[idx+1])<<8
   242  }
   243  
   244  func load_le_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
   245  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
   246  	// 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
   247  	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
   248  	// ppc64le:`MOVHZ`,-`MOVBZ`
   249  	// ppc64:`MOVHBR`,-`MOVBZ`
   250  	return uint16(s[idx+1])<<8 | uint16(s[idx])
   251  }
   252  
   253  func load_le_byte4_uint32_idx(s []byte, idx int) uint32 {
   254  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
   255  	// amd64:`MOVL\s\([A-Z]+\)\([A-Z]+`,-`MOV[BW]`,-`OR`
   256  	return uint32(s[idx]) | uint32(s[idx+1])<<8 | uint32(s[idx+2])<<16 | uint32(s[idx+3])<<24
   257  }
   258  
   259  func load_le_byte4_uint32_idx_inv(s []byte, idx int) uint32 {
   260  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
   261  	return uint32(s[idx+3])<<24 | uint32(s[idx+2])<<16 | uint32(s[idx+1])<<8 | uint32(s[idx])
   262  }
   263  
   264  func load_le_byte8_uint64_idx(s []byte, idx int) uint64 {
   265  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
   266  	// amd64:`MOVQ\s\([A-Z]+\)\([A-Z]+`,-`MOV[BWL]`,-`OR`
   267  	return uint64(s[idx]) | uint64(s[idx+1])<<8 | uint64(s[idx+2])<<16 | uint64(s[idx+3])<<24 | uint64(s[idx+4])<<32 | uint64(s[idx+5])<<40 | uint64(s[idx+6])<<48 | uint64(s[idx+7])<<56
   268  }
   269  
   270  func load_le_byte8_uint64_idx_inv(s []byte, idx int) uint64 {
   271  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
   272  	return uint64(s[idx+7])<<56 | uint64(s[idx+6])<<48 | uint64(s[idx+5])<<40 | uint64(s[idx+4])<<32 | uint64(s[idx+3])<<24 | uint64(s[idx+2])<<16 | uint64(s[idx+1])<<8 | uint64(s[idx])
   273  }
   274  
   275  func load_be_byte2_uint16_idx(s []byte, idx int) uint16 {
   276  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
   277  	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
   278  	return uint16(s[idx])<<8 | uint16(s[idx+1])
   279  }
   280  
   281  func load_be_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
   282  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
   283  	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
   284  	return uint16(s[idx+1]) | uint16(s[idx])<<8
   285  }
   286  
   287  func load_be_byte4_uint32_idx(s []byte, idx int) uint32 {
   288  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
   289  	return uint32(s[idx])<<24 | uint32(s[idx+1])<<16 | uint32(s[idx+2])<<8 | uint32(s[idx+3])
   290  }
   291  
   292  func load_be_byte8_uint64_idx(s []byte, idx int) uint64 {
   293  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
   294  	return uint64(s[idx])<<56 | uint64(s[idx+1])<<48 | uint64(s[idx+2])<<40 | uint64(s[idx+3])<<32 | uint64(s[idx+4])<<24 | uint64(s[idx+5])<<16 | uint64(s[idx+6])<<8 | uint64(s[idx+7])
   295  }
   296  
   297  func load_le_byte2_uint16_idx2(s []byte, idx int) uint16 {
   298  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
   299  	return uint16(s[idx<<1]) | uint16(s[(idx<<1)+1])<<8
   300  }
   301  
   302  func load_le_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
   303  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
   304  	return uint16(s[(idx<<1)+1])<<8 | uint16(s[idx<<1])
   305  }
   306  
   307  func load_le_byte4_uint32_idx4(s []byte, idx int) uint32 {
   308  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
   309  	return uint32(s[idx<<2]) | uint32(s[(idx<<2)+1])<<8 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+3])<<24
   310  }
   311  
   312  func load_le_byte4_uint32_idx4_inv(s []byte, idx int) uint32 {
   313  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
   314  	return uint32(s[(idx<<2)+3])<<24 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+1])<<8 | uint32(s[idx<<2])
   315  }
   316  
   317  func load_le_byte8_uint64_idx8(s []byte, idx int) uint64 {
   318  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
   319  	return uint64(s[idx<<3]) | uint64(s[(idx<<3)+1])<<8 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+7])<<56
   320  }
   321  
   322  func load_le_byte8_uint64_idx8_inv(s []byte, idx int) uint64 {
   323  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
   324  	return uint64(s[(idx<<3)+7])<<56 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+1])<<8 | uint64(s[idx<<3])
   325  }
   326  
   327  func load_be_byte2_uint16_idx2(s []byte, idx int) uint16 {
   328  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
   329  	return uint16(s[idx<<1])<<8 | uint16(s[(idx<<1)+1])
   330  }
   331  
   332  func load_be_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
   333  	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
   334  	return uint16(s[(idx<<1)+1]) | uint16(s[idx<<1])<<8
   335  }
   336  
   337  func load_be_byte4_uint32_idx4(s []byte, idx int) uint32 {
   338  	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
   339  	return uint32(s[idx<<2])<<24 | uint32(s[(idx<<2)+1])<<16 | uint32(s[(idx<<2)+2])<<8 | uint32(s[(idx<<2)+3])
   340  }
   341  
   342  func load_be_byte8_uint64_idx8(s []byte, idx int) uint64 {
   343  	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
   344  	return uint64(s[idx<<3])<<56 | uint64(s[(idx<<3)+1])<<48 | uint64(s[(idx<<3)+2])<<40 | uint64(s[(idx<<3)+3])<<32 | uint64(s[(idx<<3)+4])<<24 | uint64(s[(idx<<3)+5])<<16 | uint64(s[(idx<<3)+6])<<8 | uint64(s[(idx<<3)+7])
   345  }
   346  
   347  // Some tougher cases for the memcombine pass.
   348  
   349  func reassoc_load_uint32(b []byte) uint32 {
   350  	// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
   351  	return (uint32(b[0]) | uint32(b[1])<<8) | (uint32(b[2])<<16 | uint32(b[3])<<24)
   352  }
   353  
   354  func extrashift_load_uint32(b []byte) uint32 {
   355  	// amd64:`MOVL\s\([A-Z]+\)`,`SHLL\s[$]2`,-`MOV[BW]`,-`OR`
   356  	return uint32(b[0])<<2 | uint32(b[1])<<10 | uint32(b[2])<<18 | uint32(b[3])<<26
   357  }
   358  
   359  func outoforder_load_uint32(b []byte) uint32 {
   360  	// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
   361  	return uint32(b[0]) | uint32(b[2])<<16 | uint32(b[1])<<8 | uint32(b[3])<<24
   362  }
   363  
   364  func extraOr_load_uint32(b []byte, x, y uint32) uint32 {
   365  	// amd64:`ORL\s\([A-Z]+\)`,-`MOV[BW]`
   366  	return x | binary.LittleEndian.Uint32(b) | y
   367  	// TODO: Note that
   368  	//   x | uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | y
   369  	// doesn't work because it associates in a way that memcombine can't detect it.
   370  }
   371  
   372  // Check load combining across function calls.
   373  
   374  func fcall_byte(a [2]byte) [2]byte {
   375  	return fcall_byte(fcall_byte(a)) // amd64:`MOVW`
   376  }
   377  
   378  func fcall_uint16(a [2]uint16) [2]uint16 {
   379  	return fcall_uint16(fcall_uint16(a)) // amd64:`MOVL`
   380  }
   381  
   382  func fcall_uint32(a [2]uint32) [2]uint32 {
   383  	return fcall_uint32(fcall_uint32(a)) // amd64:`MOVQ`
   384  }
   385  
   386  // We want to merge load+op in the first function, but not in the
   387  // second. See Issue 19595.
   388  func load_op_merge(p, q *int) {
   389  	x := *p // amd64:`ADDQ\t\(`
   390  	*q += x // The combined nilcheck and load would normally have this line number, but we want that combined operation to have the line number of the nil check instead (see #33724).
   391  }
   392  func load_op_no_merge(p, q *int) {
   393  	x := *p
   394  	for i := 0; i < 10; i++ {
   395  		*q += x // amd64:`ADDQ\t[A-Z]`
   396  	}
   397  }
   398  
   399  func load_op_in_loop(a []int) int {
   400  	r := 0
   401  	for _, x := range a {
   402  		// amd64:`ADDQ\t\([A-Z]+\)\([A-Z]+\*8\), [A-Z]+`
   403  		r += x
   404  	}
   405  	return r
   406  }
   407  
   408  // Make sure offsets are folded into loads and stores.
   409  func offsets_fold(_, a [20]byte) (b [20]byte) {
   410  	// arm64:`MOVD\tcommand-line-arguments\.a\+[0-9]+\(FP\), R[0-9]+`,`MOVD\tR[0-9]+, command-line-arguments\.b\+[0-9]+\(FP\)`
   411  	b = a
   412  	return
   413  }
   414  
   415  // Make sure we don't put pointers in SSE registers across safe
   416  // points.
   417  
   418  func safe_point(p, q *[2]*int) {
   419  	a, b := p[0], p[1] // amd64:-`MOVUPS`
   420  	runtime.GC()
   421  	q[0], q[1] = a, b // amd64:-`MOVUPS`
   422  }
   423  
   424  // ------------- //
   425  //    Storing    //
   426  // ------------- //
   427  
   428  func store_le64(b []byte, x uint64) {
   429  	// amd64:`MOVQ\s.*\(.*\)$`,-`SHR.`
   430  	// arm64:`MOVD`,-`MOV[WBH]`
   431  	// ppc64le:`MOVD\s`,-`MOV[BHW]\s`
   432  	// ppc64:`MOVDBR`,-MOVB\s`
   433  	// s390x:`MOVDBR\s.*\(.*\)$`
   434  	binary.LittleEndian.PutUint64(b, x)
   435  }
   436  
   437  func store_le64_idx(b []byte, x uint64, idx int) {
   438  	// amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
   439  	// arm64:`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`
   440  	// ppc64le:`MOVD\s`,-`MOV[BHW]\s`
   441  	// ppc64:`MOVDBR`,-`MOVBZ`
   442  	// s390x:`MOVDBR\s.*\(.*\)\(.*\*1\)$`
   443  	binary.LittleEndian.PutUint64(b[idx:], x)
   444  }
   445  
   446  func store_le64_idx2(dst []byte, d, length, offset int) []byte {
   447  	a := dst[d : d+length]
   448  	b := dst[d-offset:]
   449  	// amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
   450  	binary.LittleEndian.PutUint64(a, binary.LittleEndian.Uint64(b))
   451  	return dst
   452  }
   453  
   454  func store_le64_idx_const(b []byte, idx int) {
   455  	// amd64:`MOVQ\s\$123, \(.*\)\(.*\*1\)$`
   456  	binary.LittleEndian.PutUint64(b[idx:], 123)
   457  }
   458  
   459  func store_le64_load(b []byte, x *[8]byte) {
   460  	_ = b[8]
   461  	// amd64:-`MOV[BWL]`
   462  	// arm64:-`MOV[BWH]`
   463  	// ppc64le:`MOVD\s`,-`MOV[BWH]Z`
   464  	// ppc64:`MOVDBR`
   465  	// s390x:-`MOVB`,-`MOV[WH]BR`
   466  	binary.LittleEndian.PutUint64(b, binary.LittleEndian.Uint64(x[:]))
   467  }
   468  
   469  func store_le32(b []byte, x uint32) {
   470  	// amd64:`MOVL\s`
   471  	// arm64:`MOVW`,-`MOV[BH]`
   472  	// ppc64le:`MOVW\s`
   473  	// ppc64:`MOVWBR`
   474  	// s390x:`MOVWBR\s.*\(.*\)$`
   475  	binary.LittleEndian.PutUint32(b, x)
   476  }
   477  
   478  func store_le32_idx(b []byte, x uint32, idx int) {
   479  	// amd64:`MOVL\s`
   480  	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`
   481  	// ppc64le:`MOVW\s`
   482  	// ppc64:`MOVWBR`
   483  	// s390x:`MOVWBR\s.*\(.*\)\(.*\*1\)$`
   484  	binary.LittleEndian.PutUint32(b[idx:], x)
   485  }
   486  
   487  func store_le32_idx_const(b []byte, idx int) {
   488  	// amd64:`MOVL\s\$123, \(.*\)\(.*\*1\)$`
   489  	// ppc64x:`MOVW\s`,-MOV[HB]`
   490  	binary.LittleEndian.PutUint32(b[idx:], 123)
   491  }
   492  
   493  func store_le16(b []byte, x uint16) {
   494  	// amd64:`MOVW\s`
   495  	// arm64:`MOVH`,-`MOVB`
   496  	// ppc64le:`MOVH\s`
   497  	// ppc64:`MOVHBR`
   498  	// s390x:`MOVHBR\s.*\(.*\)$`
   499  	binary.LittleEndian.PutUint16(b, x)
   500  }
   501  
   502  func store_le16_idx(b []byte, x uint16, idx int) {
   503  	// amd64:`MOVW\s`
   504  	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
   505  	// ppc64le:`MOVH\s`
   506  	// ppc64:`MOVHBR\s`
   507  	// s390x:`MOVHBR\s.*\(.*\)\(.*\*1\)$`
   508  	binary.LittleEndian.PutUint16(b[idx:], x)
   509  }
   510  
   511  func store_le16_idx_const(b []byte, idx int) {
   512  	// amd64:`MOVW\s\$123, \(.*\)\(.*\*1\)$`
   513  	// ppc64x:`MOVH\s`
   514  	binary.LittleEndian.PutUint16(b[idx:], 123)
   515  }
   516  
   517  func store_be64(b []byte, x uint64) {
   518  	// amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.`
   519  	// amd64/v3: `MOVBEQ`
   520  	// arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W`
   521  	// ppc64le:`MOVDBR`
   522  	// ppc64:`MOVD\s`
   523  	// s390x:`MOVD\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
   524  	binary.BigEndian.PutUint64(b, x)
   525  }
   526  
   527  func store_be64_idx(b []byte, x uint64, idx int) {
   528  	// amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.`
   529  	// amd64/v3:`MOVBEQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
   530  	// arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW`
   531  	// ppc64le:`MOVDBR`
   532  	// ppc64:`MOVD\s`
   533  	// s390x:`MOVD\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
   534  	binary.BigEndian.PutUint64(b[idx:], x)
   535  }
   536  
   537  func store_be32(b []byte, x uint32) {
   538  	// amd64/v1,amd64/v2:`BSWAPL`,-`SHR.`
   539  	// amd64/v3:`MOVBEL`
   540  	// arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W`
   541  	// ppc64le:`MOVWBR`
   542  	// ppc64:`MOVW\s`
   543  	// s390x:`MOVW\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
   544  	binary.BigEndian.PutUint32(b, x)
   545  }
   546  
   547  func store_be64_load(b, x *[8]byte) {
   548  	// arm64:-`REV`
   549  	// amd64:-`BSWAPQ`
   550  	binary.BigEndian.PutUint64(b[:], binary.BigEndian.Uint64(x[:]))
   551  }
   552  
   553  func store_be32_load(b, x *[8]byte) {
   554  	// arm64:-`REVW`
   555  	// amd64:-`BSWAPL`
   556  	binary.BigEndian.PutUint32(b[:], binary.BigEndian.Uint32(x[:]))
   557  }
   558  
   559  func store_be32_idx(b []byte, x uint32, idx int) {
   560  	// amd64/v1,amd64/v2:`BSWAPL`,-`SHR.`
   561  	// amd64/v3:`MOVBEL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
   562  	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W`
   563  	// ppc64le:`MOVWBR`
   564  	// ppc64:`MOVW\s`
   565  	// s390x:`MOVW\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
   566  	binary.BigEndian.PutUint32(b[idx:], x)
   567  }
   568  
   569  func store_be16(b []byte, x uint16) {
   570  	// amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
   571  	// amd64/v3:`MOVBEW`,-`ROLW`
   572  	// arm64:`MOVH`,`REV16W`,-`MOVB`
   573  	// ppc64le:`MOVHBR`
   574  	// ppc64:`MOVH\s`
   575  	// s390x:`MOVH\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
   576  	binary.BigEndian.PutUint16(b, x)
   577  }
   578  
   579  func store_be16_idx(b []byte, x uint16, idx int) {
   580  	// amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
   581  	// amd64/v3:`MOVBEW\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
   582  	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
   583  	// ppc64le:`MOVHBR`
   584  	// ppc64:`MOVH\s`
   585  	// s390x:`MOVH\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
   586  	binary.BigEndian.PutUint16(b[idx:], x)
   587  }
   588  
   589  func store_le_byte_2(b []byte, val uint16) {
   590  	_ = b[2]
   591  	// arm64:`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
   592  	// 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
   593  	// amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
   594  	// ppc64le:`MOVH\s`,-`MOVB`
   595  	// ppc64:`MOVHBR`,-`MOVB`
   596  	b[1], b[2] = byte(val), byte(val>>8)
   597  }
   598  
   599  func store_le_byte_2_inv(b []byte, val uint16) {
   600  	_ = b[2]
   601  	// 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
   602  	// amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
   603  	// ppc64le:`MOVH\s`,-`MOVB`
   604  	// ppc64:`MOVHBR`,-`MOVB`
   605  	b[2], b[1] = byte(val>>8), byte(val)
   606  }
   607  
   608  func store_le_byte_4(b []byte, val uint32) {
   609  	_ = b[4]
   610  	// arm64:`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`
   611  	// 386:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
   612  	// amd64:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
   613  	// ppc64le:`MOVW\s`
   614  	// ppc64:`MOVWBR\s`
   615  	b[1], b[2], b[3], b[4] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24)
   616  }
   617  
   618  func store_le_byte_8(b []byte, val uint64) {
   619  	_ = b[8]
   620  	// arm64:`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`
   621  	// amd64:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
   622  	// ppc64le:`MOVD\s`,-`MOVW`
   623  	// ppc64:`MOVDBR\s`
   624  	b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24), byte(val>>32), byte(val>>40), byte(val>>48), byte(val>>56)
   625  }
   626  
   627  func store_be_byte_2(b []byte, val uint16) {
   628  	_ = b[2]
   629  	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
   630  	// amd64/v1,amd64/v2:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
   631  	// amd64/v3: `MOVBEW`
   632  	// ppc64le:`MOVHBR`
   633  	// ppc64:`MOVH\s`
   634  	b[1], b[2] = byte(val>>8), byte(val)
   635  }
   636  
   637  func store_be_byte_4(b []byte, val uint32) {
   638  	_ = b[4]
   639  	// arm64:`REVW`,`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
   640  	// amd64/v1,amd64/v2:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
   641  	// amd64/v3:`MOVBEL\s[A-Z]+,\s1\([A-Z]+\)`
   642  	// ppc64le:`MOVWBR`
   643  	// ppc64:`MOVW\s`
   644  	b[1], b[2], b[3], b[4] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   645  }
   646  
   647  func store_be_byte_8(b []byte, val uint64) {
   648  	_ = b[8]
   649  	// arm64:`REV`,`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`,-`REV16W`,-`REVW`
   650  	// amd64/v1,amd64/v2:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
   651  	// amd64/v3:`MOVBEQ\s[A-Z]+,\s1\([A-Z]+\)`, -`MOVBEL`
   652  	// ppc64le:`MOVDBR`
   653  	// ppc64:`MOVD`
   654  	b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val>>56), byte(val>>48), byte(val>>40), byte(val>>32), byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   655  }
   656  
   657  func store_le_byte_2_idx(b []byte, idx int, val uint16) {
   658  	_, _ = b[idx+0], b[idx+1]
   659  	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
   660  	// 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
   661  	// ppc64le:`MOVH\s`
   662  	// ppc64:`MOVHBR`
   663  	b[idx+1], b[idx+0] = byte(val>>8), byte(val)
   664  }
   665  
   666  func store_le_byte_2_idx_inv(b []byte, idx int, val uint16) {
   667  	_, _ = b[idx+0], b[idx+1]
   668  	// 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
   669  	// ppc64le:`MOVH\s`
   670  	// ppc64:`MOVHBR`
   671  	b[idx+0], b[idx+1] = byte(val), byte(val>>8)
   672  }
   673  
   674  func store_le_byte_4_idx(b []byte, idx int, val uint32) {
   675  	_, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
   676  	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`
   677  	// ppc64le:`MOVW\s`
   678  	// ppc64:`MOVWBR`
   679  	b[idx+3], b[idx+2], b[idx+1], b[idx+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   680  }
   681  
   682  func store_be_byte_2_idx(b []byte, idx int, val uint16) {
   683  	_, _ = b[idx+0], b[idx+1]
   684  	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
   685  	// ppc64le:`MOVHBR`
   686  	// ppc64:`MOVH\s`
   687  	b[idx+0], b[idx+1] = byte(val>>8), byte(val)
   688  }
   689  
   690  func store_be_byte_4_idx(b []byte, idx int, val uint32) {
   691  	_, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
   692  	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
   693  	// ppc64le:`MOVWBR`
   694  	// ppc64:`MOVW\s`
   695  	b[idx+0], b[idx+1], b[idx+2], b[idx+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   696  }
   697  
   698  func store_be_byte_2_idx2(b []byte, idx int, val uint16) {
   699  	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
   700  	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
   701  	// ppc64le:`MOVHBR`
   702  	// ppc64:`MOVH\s`
   703  	b[(idx<<1)+0], b[(idx<<1)+1] = byte(val>>8), byte(val)
   704  }
   705  
   706  func store_le_byte_2_idx2(b []byte, idx int, val uint16) {
   707  	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
   708  	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
   709  	// ppc64le:`MOVH\s`
   710  	// ppc64:`MOVHBR`
   711  	b[(idx<<1)+1], b[(idx<<1)+0] = byte(val>>8), byte(val)
   712  }
   713  
   714  func store_be_byte_4_idx4(b []byte, idx int, val uint32) {
   715  	_, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
   716  	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`,-`REV16W`
   717  	// ppc64le:`MOVWBR`
   718  	// ppc64:`MOVW\s`
   719  	b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   720  }
   721  
   722  func store_le_byte_4_idx4_inv(b []byte, idx int, val uint32) {
   723  	_, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
   724  	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`
   725  	// ppc64le:`MOVW\s`
   726  	// ppc64:`MOVWBR`
   727  	b[(idx<<2)+3], b[(idx<<2)+2], b[(idx<<2)+1], b[(idx<<2)+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
   728  }
   729  
   730  // ------------- //
   731  //    Zeroing    //
   732  // ------------- //
   733  
   734  // Check that zero stores are combined into larger stores
   735  
   736  func zero_byte_2(b1, b2 []byte) {
   737  	// bounds checks to guarantee safety of writes below
   738  	_, _ = b1[1], b2[1]
   739  	// arm64:"MOVH\tZR",-"MOVB"
   740  	// amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
   741  	// 386:`MOVW\s[$]0,\s\([A-Z]+\)`
   742  	// ppc64x:`MOVH\s`
   743  	b1[0], b1[1] = 0, 0
   744  	// arm64:"MOVH\tZR",-"MOVB"
   745  	// 386:`MOVW\s[$]0,\s\([A-Z]+\)`
   746  	// amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
   747  	// ppc64x:`MOVH`
   748  	b2[1], b2[0] = 0, 0
   749  }
   750  
   751  func zero_byte_4(b1, b2 []byte) {
   752  	_, _ = b1[3], b2[3]
   753  	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
   754  	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
   755  	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
   756  	// ppc64x:`MOVW\s`
   757  	b1[0], b1[1], b1[2], b1[3] = 0, 0, 0, 0
   758  	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
   759  	// ppc64x:`MOVW\s`
   760  	b2[2], b2[3], b2[1], b2[0] = 0, 0, 0, 0
   761  }
   762  
   763  func zero_byte_8(b []byte) {
   764  	_ = b[7]
   765  	b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
   766  	b[4], b[5], b[6], b[7] = 0, 0, 0, 0
   767  }
   768  
   769  func zero_byte_16(b []byte) {
   770  	_ = b[15]
   771  	b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
   772  	b[4], b[5], b[6], b[7] = 0, 0, 0, 0
   773  	b[8], b[9], b[10], b[11] = 0, 0, 0, 0
   774  	b[12], b[13], b[14], b[15] = 0, 0, 0, 0
   775  }
   776  
   777  func zero_byte_30(a *[30]byte) {
   778  	*a = [30]byte{} // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
   779  }
   780  
   781  func zero_byte_39(a *[39]byte) {
   782  	*a = [39]byte{} // arm64:"MOVD",-"MOVB",-"MOVH",-"MOVW"
   783  }
   784  
   785  func zero_byte_2_idx(b []byte, idx int) {
   786  	_, _ = b[idx+0], b[idx+1]
   787  	// arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
   788  	// ppc64x:`MOVH\s`
   789  	b[idx+0], b[idx+1] = 0, 0
   790  }
   791  
   792  func zero_byte_2_idx2(b []byte, idx int) {
   793  	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
   794  	// arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
   795  	// ppc64x:`MOVH\s`
   796  	b[(idx<<1)+0], b[(idx<<1)+1] = 0, 0
   797  }
   798  
   799  func zero_uint16_2(h1, h2 []uint16) {
   800  	_, _ = h1[1], h2[1]
   801  	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
   802  	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
   803  	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
   804  	// ppc64x:`MOVW\s`
   805  	h1[0], h1[1] = 0, 0
   806  	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
   807  	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
   808  	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
   809  	// ppc64x:`MOVW`
   810  	h2[1], h2[0] = 0, 0
   811  }
   812  
   813  func zero_uint16_4(h1, h2 []uint16) {
   814  	_, _ = h1[3], h2[3]
   815  	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
   816  	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
   817  	// ppc64x:`MOVD\s`
   818  	h1[0], h1[1], h1[2], h1[3] = 0, 0, 0, 0
   819  	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
   820  	// ppc64x:`MOVD\s`
   821  	h2[2], h2[3], h2[1], h2[0] = 0, 0, 0, 0
   822  }
   823  
   824  func zero_uint16_8(h []uint16) {
   825  	_ = h[7]
   826  	h[0], h[1], h[2], h[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
   827  	h[4], h[5], h[6], h[7] = 0, 0, 0, 0
   828  }
   829  
   830  func zero_uint32_2(w1, w2 []uint32) {
   831  	_, _ = w1[1], w2[1]
   832  	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
   833  	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
   834  	// ppc64x:`MOVD\s`
   835  	w1[0], w1[1] = 0, 0
   836  	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
   837  	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
   838  	// ppc64x:`MOVD\s`
   839  	w2[1], w2[0] = 0, 0
   840  }
   841  
   842  func zero_uint32_4(w1, w2 []uint32) {
   843  	_, _ = w1[3], w2[3]
   844  	w1[0], w1[1], w1[2], w1[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
   845  	w2[2], w2[3], w2[1], w2[0] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
   846  }
   847  
   848  func zero_uint64_2(d1, d2 []uint64) {
   849  	_, _ = d1[1], d2[1]
   850  	d1[0], d1[1] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
   851  	d2[1], d2[0] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
   852  }
   853  
   854  func loadstore(p, q *[4]uint8) {
   855  	// amd64:"MOVL",-"MOVB"
   856  	// arm64:"MOVWU",-"MOVBU"
   857  	x0, x1, x2, x3 := q[0], q[1], q[2], q[3]
   858  	// amd64:"MOVL",-"MOVB"
   859  	// arm64:"MOVW",-"MOVB"
   860  	p[0], p[1], p[2], p[3] = x0, x1, x2, x3
   861  }
   862  
   863  type S1 struct {
   864  	a, b int16
   865  }
   866  
   867  func loadstore2(p, q *S1) {
   868  	// amd64:"MOVL",-"MOVWLZX"
   869  	// arm64:"MOVWU",-"MOVH"
   870  	a, b := p.a, p.b
   871  	// amd64:"MOVL",-"MOVW"
   872  	// arm64:"MOVW",-"MOVH"
   873  	q.a, q.b = a, b
   874  }
   875  
   876  func wideStore(p *[8]uint64) {
   877  	if p == nil {
   878  		return
   879  	}
   880  
   881  	// amd64:"MOVUPS",-"MOVQ"
   882  	// arm64:"STP",-"MOVD"
   883  	p[0] = 0
   884  	// amd64:-"MOVUPS",-"MOVQ"
   885  	// arm64:-"STP",-"MOVD"
   886  	p[1] = 0
   887  }
   888  
   889  func wideStore2(p *[8]uint64, x, y uint64) {
   890  	if p == nil {
   891  		return
   892  	}
   893  
   894  	// s390x:"STMG"
   895  	p[0] = x
   896  	// s390x:-"STMG",-"MOVD"
   897  	p[1] = y
   898  }
   899  
   900  func store32le(p *struct{ a, b uint32 }, x uint64) {
   901  	// amd64:"MOVQ",-"MOVL",-"SHRQ"
   902  	// arm64:"MOVD",-"MOVW",-"LSR"
   903  	// ppc64le:"MOVD",-"MOVW",-"SRD"
   904  	p.a = uint32(x)
   905  	// amd64:-"MOVL",-"SHRQ"
   906  	// arm64:-"MOVW",-"LSR"
   907  	// ppc64le:-"MOVW",-"SRD"
   908  	p.b = uint32(x >> 32)
   909  }
   910  func store32be(p *struct{ a, b uint32 }, x uint64) {
   911  	// arm64:"STPW"
   912  	// ppc64:"MOVD",-"MOVW",-"SRD"
   913  	// s390x:"MOVD",-"MOVW",-"SRD"
   914  	p.a = uint32(x >> 32)
   915  	// arm64:-"STPW"
   916  	// ppc64:-"MOVW",-"SRD"
   917  	// s390x:-"MOVW",-"SRD"
   918  	p.b = uint32(x)
   919  }
   920  func store16le(p *struct{ a, b uint16 }, x uint32) {
   921  	// amd64:"MOVL",-"MOVW",-"SHRL"
   922  	// arm64:"MOVW",-"MOVH",-"UBFX"
   923  	// ppc64le:"MOVW",-"MOVH",-"SRW"
   924  	p.a = uint16(x)
   925  	// amd64:-"MOVW",-"SHRL"
   926  	// arm64:-"MOVH",-"UBFX"
   927  	// ppc64le:-"MOVH",-"SRW"
   928  	p.b = uint16(x >> 16)
   929  }
   930  func store16be(p *struct{ a, b uint16 }, x uint32) {
   931  	// ppc64:"MOVW",-"MOVH",-"SRW"
   932  	// s390x:"MOVW",-"MOVH",-"SRW"
   933  	p.a = uint16(x >> 16)
   934  	// ppc64:-"MOVH",-"SRW"
   935  	// s390x:-"MOVH",-"SRW"
   936  	p.b = uint16(x)
   937  }
   938  
   939  func storeBoolConst(p *struct{ a, b bool }) {
   940  	// amd64:"MOVW",-"MOVB"
   941  	// arm64:"MOVH",-"MOVB"
   942  	p.a = true
   943  	p.b = true
   944  }
   945  func issue66413(p *struct {
   946  	a byte
   947  	b bool
   948  	c bool
   949  	d int8
   950  }) {
   951  	// amd64:"MOVL",-"MOVB"
   952  	// arm64:"MOVW",-"MOVB"
   953  	p.a = 31
   954  	p.b = false
   955  	p.c = true
   956  	p.d = 12
   957  }
   958  
   959  func issue70300(v uint64) (b [8]byte) {
   960  	// amd64:"MOVQ",-"MOVB"
   961  	b[0] = byte(v)
   962  	b[1] = byte(v >> 8)
   963  	b[2] = byte(v >> 16)
   964  	b[3] = byte(v >> 24)
   965  	b[4] = byte(v >> 32)
   966  	b[5] = byte(v >> 40)
   967  	b[6] = byte(v >> 48)
   968  	b[7] = byte(v >> 56)
   969  	return b
   970  }
   971  
   972  func issue70300Reverse(v uint64) (b [8]byte) {
   973  	// amd64:"MOVQ",-"MOVB"
   974  	b[7] = byte(v >> 56)
   975  	b[6] = byte(v >> 48)
   976  	b[5] = byte(v >> 40)
   977  	b[4] = byte(v >> 32)
   978  	b[3] = byte(v >> 24)
   979  	b[2] = byte(v >> 16)
   980  	b[1] = byte(v >> 8)
   981  	b[0] = byte(v)
   982  	return b
   983  }
   984  
   985  // --------------------------------- //
   986  //    Arm64 double-register loads    //
   987  // --------------------------------- //
   988  
   989  func dwloadI64(p *struct{ a, b int64 }) int64 {
   990  	// arm64:"LDP\t"
   991  	return p.a + p.b
   992  }
   993  func dwloadI32(p *struct{ a, b int32 }) int32 {
   994  	// arm64:"LDPSW\t"
   995  	return p.a + p.b
   996  }
   997  func dwloadU32(p *struct{ a, b uint32 }) uint32 {
   998  	// arm64:"LDPW\t"
   999  	return p.a + p.b
  1000  }
  1001  func dwloadF64(p *struct{ a, b float64 }) float64 {
  1002  	// arm64:"FLDPD\t"
  1003  	return p.a + p.b
  1004  }
  1005  func dwloadF32(p *struct{ a, b float32 }) float32 {
  1006  	// arm64:"FLDPS\t"
  1007  	return p.a + p.b
  1008  }
  1009  
  1010  func dwloadBig(p *struct{ a, b, c, d, e, f int64 }) int64 {
  1011  	// arm64:"LDP\t\\(", "LDP\t16", "LDP\t32"
  1012  	return p.c + p.f + p.a + p.e + p.d + p.b
  1013  }
  1014  
  1015  func dwloadArg(a [2]int64) int64 {
  1016  	// arm64:"LDP\t"
  1017  	return a[0] + a[1]
  1018  }
  1019  
  1020  func dwloadResult1(p *string) string {
  1021  	// arm64:"LDP\t\\(R0\\), \\(R0, R1\\)"
  1022  	return *p
  1023  }
  1024  
  1025  func dwloadResult2(p *[2]int64) (int64, int64) {
  1026  	// arm64:"LDP\t\\(R0\\), \\(R1, R0\\)"
  1027  	return p[1], p[0]
  1028  }
  1029  
  1030  // ---------------------------------- //
  1031  //    Arm64 double-register stores    //
  1032  // ---------------------------------- //
  1033  
  1034  func dwstoreI64(p *struct{ a, b int64 }, x, y int64) {
  1035  	// arm64:"STP\t"
  1036  	p.a = x
  1037  	p.b = y
  1038  }
  1039  func dwstoreI32(p *struct{ a, b int32 }, x, y int32) {
  1040  	// arm64:"STPW\t"
  1041  	p.a = x
  1042  	p.b = y
  1043  }
  1044  func dwstoreF64(p *struct{ a, b float64 }, x, y float64) {
  1045  	// arm64:"FSTPD\t"
  1046  	p.a = x
  1047  	p.b = y
  1048  }
  1049  func dwstoreF32(p *struct{ a, b float32 }, x, y float32) {
  1050  	// arm64:"FSTPS\t"
  1051  	p.a = x
  1052  	p.b = y
  1053  }
  1054  
  1055  func dwstoreBig(p *struct{ a, b, c, d, e, f int64 }, a, b, c, d, e, f int64) {
  1056  	// This is not perfect. We merge b+a, then d+e, then c and f have no pair.
  1057  	p.c = c
  1058  	p.f = f
  1059  	// arm64:`STP\s\(R[0-9]+, R[0-9]+\), \(R[0-9]+\)`
  1060  	p.a = a
  1061  	// arm64:`STP\s\(R[0-9]+, R[0-9]+\), 24\(R[0-9]+\)`
  1062  	p.e = e
  1063  	p.d = d
  1064  	p.b = b
  1065  }
  1066  
  1067  func dwstoreRet() [2]int {
  1068  	// arm64:"STP\t"
  1069  	return [2]int{5, 6}
  1070  }
  1071  
  1072  func dwstoreLocal(i int) int64 {
  1073  	var a [2]int64
  1074  	a[0] = 5
  1075  	// arm64:"STP\t"
  1076  	a[1] = 6
  1077  	return a[i]
  1078  }
  1079  
  1080  func dwstoreOrder(p *struct {
  1081  	a, b       int64
  1082  	c, d, e, f bool
  1083  }, a, b int64) {
  1084  	// arm64:"STP\t"
  1085  	p.a = a
  1086  	p.c = true
  1087  	p.e = true
  1088  	p.b = b
  1089  }
  1090  

View as plain text