Source file src/cmd/internal/obj/x86/asm6.go

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"internal/buildcfg"
    40  	"log"
    41  	"strings"
    42  )
    43  
    44  var (
    45  	plan9privates *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  // Loop alignment constants:
    51  // want to align loop entry to loopAlign-byte boundary,
    52  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    53  // We define a loop entry as the target of a backward jump.
    54  //
    55  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56  // and it aligns all jump targets, not just backward jump targets.
    57  //
    58  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59  // is very slight but negative, so the alignment is disabled by
    60  // setting MaxLoopPad = 0. The code is here for reference and
    61  // for future experiments.
    62  const (
    63  	loopAlign  = 16
    64  	maxLoopPad = 0
    65  )
    66  
    67  // Bit flags that are used to express jump target properties.
    68  const (
    69  	// branchBackwards marks targets that are located behind.
    70  	// Used to express jumps to loop headers.
    71  	branchBackwards = (1 << iota)
    72  	// branchShort marks branches those target is close,
    73  	// with offset is in -128..127 range.
    74  	branchShort
    75  	// branchLoopHead marks loop entry.
    76  	// Used to insert padding for misaligned loops.
    77  	branchLoopHead
    78  )
    79  
    80  // opBytes holds optab encoding bytes.
    81  // Each ytab reserves fixed amount of bytes in this array.
    82  //
    83  // The size should be the minimal number of bytes that
    84  // are enough to hold biggest optab op lines.
    85  type opBytes [31]uint8
    86  
    87  type Optab struct {
    88  	as     obj.As
    89  	ytab   []ytab
    90  	prefix uint8
    91  	op     opBytes
    92  }
    93  
    94  type movtab struct {
    95  	as   obj.As
    96  	ft   uint8
    97  	f3t  uint8
    98  	tt   uint8
    99  	code uint8
   100  	op   [4]uint8
   101  }
   102  
   103  const (
   104  	Yxxx = iota
   105  	Ynone
   106  	Yi0 // $0
   107  	Yi1 // $1
   108  	Yu2 // $x, x fits in uint2
   109  	Yi8 // $x, x fits in int8
   110  	Yu8 // $x, x fits in uint8
   111  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   112  	Ys32
   113  	Yi32
   114  	Yi64
   115  	Yiauto
   116  	Yal
   117  	Ycl
   118  	Yax
   119  	Ycx
   120  	Yrb
   121  	Yrl
   122  	Yrl32 // Yrl on 32-bit system
   123  	Yrf
   124  	Yf0
   125  	Yrx
   126  	Ymb
   127  	Yml
   128  	Ym
   129  	Ybr
   130  	Ycs
   131  	Yss
   132  	Yds
   133  	Yes
   134  	Yfs
   135  	Ygs
   136  	Ygdtr
   137  	Yidtr
   138  	Yldtr
   139  	Ymsw
   140  	Ytask
   141  	Ycr0
   142  	Ycr1
   143  	Ycr2
   144  	Ycr3
   145  	Ycr4
   146  	Ycr5
   147  	Ycr6
   148  	Ycr7
   149  	Ycr8
   150  	Ydr0
   151  	Ydr1
   152  	Ydr2
   153  	Ydr3
   154  	Ydr4
   155  	Ydr5
   156  	Ydr6
   157  	Ydr7
   158  	Ytr0
   159  	Ytr1
   160  	Ytr2
   161  	Ytr3
   162  	Ytr4
   163  	Ytr5
   164  	Ytr6
   165  	Ytr7
   166  	Ymr
   167  	Ymm
   168  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   169  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   170  	Yxr           // X0..X15
   171  	YxrEvex       // X0..X31
   172  	Yxm
   173  	YxmEvex       // YxrEvex+Ym
   174  	Yxvm          // VSIB vector array; vm32x/vm64x
   175  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   176  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   177  	Yyr           // Y0..Y15
   178  	YyrEvex       // Y0..Y31
   179  	Yym
   180  	YymEvex   // YyrEvex+Ym
   181  	Yyvm      // VSIB vector array; vm32y/vm64y
   182  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   183  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   184  	Yzr       // Z0..Z31
   185  	Yzm       // Yzr+Ym
   186  	Yzvm      // VSIB vector array; vm32z/vm64z
   187  	Yk0       // K0
   188  	Yknot0    // K1..K7; write mask
   189  	Yk        // K0..K7; used for KOP
   190  	Ykm       // Yk+Ym; used for KOP
   191  	Ytls
   192  	Ytextsize
   193  	Yindir
   194  	Ymax
   195  )
   196  
   197  const (
   198  	Zxxx = iota
   199  	Zlit
   200  	Zlitm_r
   201  	Zlitr_m
   202  	Zlit_m_r
   203  	Z_rp
   204  	Zbr
   205  	Zcall
   206  	Zcallcon
   207  	Zcallduff
   208  	Zcallind
   209  	Zcallindreg
   210  	Zib_
   211  	Zib_rp
   212  	Zibo_m
   213  	Zibo_m_xm
   214  	Zil_
   215  	Zil_rp
   216  	Ziq_rp
   217  	Zilo_m
   218  	Zjmp
   219  	Zjmpcon
   220  	Zloop
   221  	Zo_iw
   222  	Zm_o
   223  	Zm_r
   224  	Z_m_r
   225  	Zm2_r
   226  	Zm_r_xm
   227  	Zm_r_i_xm
   228  	Zm_r_xm_nr
   229  	Zr_m_xm_nr
   230  	Zibm_r // mmx1,mmx2/mem64,imm8
   231  	Zibr_m
   232  	Zmb_r
   233  	Zaut_r
   234  	Zo_m
   235  	Zo_m64
   236  	Zpseudo
   237  	Zr_m
   238  	Zr_m_xm
   239  	Zrp_
   240  	Z_ib
   241  	Z_il
   242  	Zm_ibo
   243  	Zm_ilo
   244  	Zib_rr
   245  	Zil_rr
   246  	Zbyte
   247  
   248  	Zvex_rm_v_r
   249  	Zvex_rm_v_ro
   250  	Zvex_r_v_rm
   251  	Zvex_i_rm_vo
   252  	Zvex_v_rm_r
   253  	Zvex_i_rm_r
   254  	Zvex_i_r_v
   255  	Zvex_i_rm_v_r
   256  	Zvex
   257  	Zvex_rm_r_vo
   258  	Zvex_i_r_rm
   259  	Zvex_hr_rm_v_r
   260  
   261  	Zevex_first
   262  	Zevex_i_r_k_rm
   263  	Zevex_i_r_rm
   264  	Zevex_i_rm_k_r
   265  	Zevex_i_rm_k_vo
   266  	Zevex_i_rm_r
   267  	Zevex_i_rm_v_k_r
   268  	Zevex_i_rm_v_r
   269  	Zevex_i_rm_vo
   270  	Zevex_k_rmo
   271  	Zevex_r_k_rm
   272  	Zevex_r_v_k_rm
   273  	Zevex_r_v_rm
   274  	Zevex_rm_k_r
   275  	Zevex_rm_v_k_r
   276  	Zevex_rm_v_r
   277  	Zevex_last
   278  
   279  	Zmax
   280  )
   281  
   282  const (
   283  	Px   = 0
   284  	Px1  = 1    // symbolic; exact value doesn't matter
   285  	P32  = 0x32 // 32-bit only
   286  	Pe   = 0x66 // operand escape
   287  	Pm   = 0x0f // 2byte opcode escape
   288  	Pq   = 0xff // both escapes: 66 0f
   289  	Pb   = 0xfe // byte operands
   290  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   291  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   292  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   293  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   294  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   295  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   296  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   297  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   298  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   299  	Pw   = 0x48 // Rex.w
   300  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   301  	Py   = 0x80 // defaults to 64-bit mode
   302  	Py1  = 0x81 // symbolic; exact value doesn't matter
   303  	Py3  = 0x83 // symbolic; exact value doesn't matter
   304  	Pavx = 0x84 // symbolic; exact value doesn't matter
   305  
   306  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   307  	Rxw     = 1 << 3 // =1, 64-bit operand size
   308  	Rxr     = 1 << 2 // extend modrm reg
   309  	Rxx     = 1 << 1 // extend sib index
   310  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   311  )
   312  
   313  const (
   314  	// Encoding for VEX prefix in tables.
   315  	// The P, L, and W fields are chosen to match
   316  	// their eventual locations in the VEX prefix bytes.
   317  
   318  	// Encoding for VEX prefix in tables.
   319  	// The P, L, and W fields are chosen to match
   320  	// their eventual locations in the VEX prefix bytes.
   321  
   322  	// Using spare bit to make leading [E]VEX encoding byte different from
   323  	// 0x0f even if all other VEX fields are 0.
   324  	avxEscape = 1 << 6
   325  
   326  	// P field - 2 bits
   327  	vex66 = 1 << 0
   328  	vexF3 = 2 << 0
   329  	vexF2 = 3 << 0
   330  	// L field - 1 bit
   331  	vexLZ  = 0 << 2
   332  	vexLIG = 0 << 2
   333  	vex128 = 0 << 2
   334  	vex256 = 1 << 2
   335  	// W field - 1 bit
   336  	vexWIG = 0 << 7
   337  	vexW0  = 0 << 7
   338  	vexW1  = 1 << 7
   339  	// M field - 5 bits, but mostly reserved; we can store up to 3
   340  	vex0F   = 1 << 3
   341  	vex0F38 = 2 << 3
   342  	vex0F3A = 3 << 3
   343  )
   344  
   345  var ycover [Ymax * Ymax]uint8
   346  
   347  var reg [MAXREG]int
   348  
   349  var regrex [MAXREG + 1]int
   350  
   351  var ynone = []ytab{
   352  	{Zlit, 1, argList{}},
   353  }
   354  
   355  var ytext = []ytab{
   356  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   357  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   358  }
   359  
   360  var ynop = []ytab{
   361  	{Zpseudo, 0, argList{}},
   362  	{Zpseudo, 0, argList{Yiauto}},
   363  	{Zpseudo, 0, argList{Yml}},
   364  	{Zpseudo, 0, argList{Yrf}},
   365  	{Zpseudo, 0, argList{Yxr}},
   366  	{Zpseudo, 0, argList{Yiauto}},
   367  	{Zpseudo, 0, argList{Yml}},
   368  	{Zpseudo, 0, argList{Yrf}},
   369  	{Zpseudo, 1, argList{Yxr}},
   370  }
   371  
   372  var yfuncdata = []ytab{
   373  	{Zpseudo, 0, argList{Yi32, Ym}},
   374  }
   375  
   376  var ypcdata = []ytab{
   377  	{Zpseudo, 0, argList{Yi32, Yi32}},
   378  }
   379  
   380  var yxorb = []ytab{
   381  	{Zib_, 1, argList{Yi32, Yal}},
   382  	{Zibo_m, 2, argList{Yi32, Ymb}},
   383  	{Zr_m, 1, argList{Yrb, Ymb}},
   384  	{Zm_r, 1, argList{Ymb, Yrb}},
   385  }
   386  
   387  var yaddl = []ytab{
   388  	{Zibo_m, 2, argList{Yi8, Yml}},
   389  	{Zil_, 1, argList{Yi32, Yax}},
   390  	{Zilo_m, 2, argList{Yi32, Yml}},
   391  	{Zr_m, 1, argList{Yrl, Yml}},
   392  	{Zm_r, 1, argList{Yml, Yrl}},
   393  }
   394  
   395  var yincl = []ytab{
   396  	{Z_rp, 1, argList{Yrl}},
   397  	{Zo_m, 2, argList{Yml}},
   398  }
   399  
   400  var yincq = []ytab{
   401  	{Zo_m, 2, argList{Yml}},
   402  }
   403  
   404  var ycmpb = []ytab{
   405  	{Z_ib, 1, argList{Yal, Yi32}},
   406  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   407  	{Zm_r, 1, argList{Ymb, Yrb}},
   408  	{Zr_m, 1, argList{Yrb, Ymb}},
   409  }
   410  
   411  var ycmpl = []ytab{
   412  	{Zm_ibo, 2, argList{Yml, Yi8}},
   413  	{Z_il, 1, argList{Yax, Yi32}},
   414  	{Zm_ilo, 2, argList{Yml, Yi32}},
   415  	{Zm_r, 1, argList{Yml, Yrl}},
   416  	{Zr_m, 1, argList{Yrl, Yml}},
   417  }
   418  
   419  var yshb = []ytab{
   420  	{Zo_m, 2, argList{Yi1, Ymb}},
   421  	{Zibo_m, 2, argList{Yu8, Ymb}},
   422  	{Zo_m, 2, argList{Ycx, Ymb}},
   423  }
   424  
   425  var yshl = []ytab{
   426  	{Zo_m, 2, argList{Yi1, Yml}},
   427  	{Zibo_m, 2, argList{Yu8, Yml}},
   428  	{Zo_m, 2, argList{Ycl, Yml}},
   429  	{Zo_m, 2, argList{Ycx, Yml}},
   430  }
   431  
   432  var ytestl = []ytab{
   433  	{Zil_, 1, argList{Yi32, Yax}},
   434  	{Zilo_m, 2, argList{Yi32, Yml}},
   435  	{Zr_m, 1, argList{Yrl, Yml}},
   436  	{Zm_r, 1, argList{Yml, Yrl}},
   437  }
   438  
   439  var ymovb = []ytab{
   440  	{Zr_m, 1, argList{Yrb, Ymb}},
   441  	{Zm_r, 1, argList{Ymb, Yrb}},
   442  	{Zib_rp, 1, argList{Yi32, Yrb}},
   443  	{Zibo_m, 2, argList{Yi32, Ymb}},
   444  }
   445  
   446  var ybtl = []ytab{
   447  	{Zibo_m, 2, argList{Yi8, Yml}},
   448  	{Zr_m, 1, argList{Yrl, Yml}},
   449  }
   450  
   451  var ymovw = []ytab{
   452  	{Zr_m, 1, argList{Yrl, Yml}},
   453  	{Zm_r, 1, argList{Yml, Yrl}},
   454  	{Zil_rp, 1, argList{Yi32, Yrl}},
   455  	{Zilo_m, 2, argList{Yi32, Yml}},
   456  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   457  }
   458  
   459  var ymovl = []ytab{
   460  	{Zr_m, 1, argList{Yrl, Yml}},
   461  	{Zm_r, 1, argList{Yml, Yrl}},
   462  	{Zil_rp, 1, argList{Yi32, Yrl}},
   463  	{Zilo_m, 2, argList{Yi32, Yml}},
   464  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   465  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   466  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   467  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   468  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   469  }
   470  
   471  var yret = []ytab{
   472  	{Zo_iw, 1, argList{}},
   473  	{Zo_iw, 1, argList{Yi32}},
   474  }
   475  
   476  var ymovq = []ytab{
   477  	// valid in 32-bit mode
   478  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   479  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   480  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   481  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   482  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   483  
   484  	// valid only in 64-bit mode, usually with 64-bit prefix
   485  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   486  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   487  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   488  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   489  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   490  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   491  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   492  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   493  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   494  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   495  }
   496  
   497  var ymovbe = []ytab{
   498  	{Zlitm_r, 3, argList{Ym, Yrl}},
   499  	{Zlitr_m, 3, argList{Yrl, Ym}},
   500  }
   501  
   502  var ym_rl = []ytab{
   503  	{Zm_r, 1, argList{Ym, Yrl}},
   504  }
   505  
   506  var yrl_m = []ytab{
   507  	{Zr_m, 1, argList{Yrl, Ym}},
   508  }
   509  
   510  var ymb_rl = []ytab{
   511  	{Zmb_r, 1, argList{Ymb, Yrl}},
   512  }
   513  
   514  var yml_rl = []ytab{
   515  	{Zm_r, 1, argList{Yml, Yrl}},
   516  }
   517  
   518  var yrl_ml = []ytab{
   519  	{Zr_m, 1, argList{Yrl, Yml}},
   520  }
   521  
   522  var yml_mb = []ytab{
   523  	{Zr_m, 1, argList{Yrb, Ymb}},
   524  	{Zm_r, 1, argList{Ymb, Yrb}},
   525  }
   526  
   527  var yrb_mb = []ytab{
   528  	{Zr_m, 1, argList{Yrb, Ymb}},
   529  }
   530  
   531  var yxchg = []ytab{
   532  	{Z_rp, 1, argList{Yax, Yrl}},
   533  	{Zrp_, 1, argList{Yrl, Yax}},
   534  	{Zr_m, 1, argList{Yrl, Yml}},
   535  	{Zm_r, 1, argList{Yml, Yrl}},
   536  }
   537  
   538  var ydivl = []ytab{
   539  	{Zm_o, 2, argList{Yml}},
   540  }
   541  
   542  var ydivb = []ytab{
   543  	{Zm_o, 2, argList{Ymb}},
   544  }
   545  
   546  var yimul = []ytab{
   547  	{Zm_o, 2, argList{Yml}},
   548  	{Zib_rr, 1, argList{Yi8, Yrl}},
   549  	{Zil_rr, 1, argList{Yi32, Yrl}},
   550  	{Zm_r, 2, argList{Yml, Yrl}},
   551  }
   552  
   553  var yimul3 = []ytab{
   554  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   555  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   556  }
   557  
   558  var ybyte = []ytab{
   559  	{Zbyte, 1, argList{Yi64}},
   560  }
   561  
   562  var yin = []ytab{
   563  	{Zib_, 1, argList{Yi32}},
   564  	{Zlit, 1, argList{}},
   565  }
   566  
   567  var yint = []ytab{
   568  	{Zib_, 1, argList{Yi32}},
   569  }
   570  
   571  var ypushl = []ytab{
   572  	{Zrp_, 1, argList{Yrl}},
   573  	{Zm_o, 2, argList{Ym}},
   574  	{Zib_, 1, argList{Yi8}},
   575  	{Zil_, 1, argList{Yi32}},
   576  }
   577  
   578  var ypopl = []ytab{
   579  	{Z_rp, 1, argList{Yrl}},
   580  	{Zo_m, 2, argList{Ym}},
   581  }
   582  
   583  var ywrfsbase = []ytab{
   584  	{Zm_o, 2, argList{Yrl}},
   585  }
   586  
   587  var yrdrand = []ytab{
   588  	{Zo_m, 2, argList{Yrl}},
   589  }
   590  
   591  var yclflush = []ytab{
   592  	{Zo_m, 2, argList{Ym}},
   593  }
   594  
   595  var ybswap = []ytab{
   596  	{Z_rp, 2, argList{Yrl}},
   597  }
   598  
   599  var yscond = []ytab{
   600  	{Zo_m, 2, argList{Ymb}},
   601  }
   602  
   603  var yjcond = []ytab{
   604  	{Zbr, 0, argList{Ybr}},
   605  	{Zbr, 0, argList{Yi0, Ybr}},
   606  	{Zbr, 1, argList{Yi1, Ybr}},
   607  }
   608  
   609  var yloop = []ytab{
   610  	{Zloop, 1, argList{Ybr}},
   611  }
   612  
   613  var ycall = []ytab{
   614  	{Zcallindreg, 0, argList{Yml}},
   615  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   616  	{Zcallind, 2, argList{Yindir}},
   617  	{Zcall, 0, argList{Ybr}},
   618  	{Zcallcon, 1, argList{Yi32}},
   619  }
   620  
   621  var yduff = []ytab{
   622  	{Zcallduff, 1, argList{Yi32}},
   623  }
   624  
   625  var yjmp = []ytab{
   626  	{Zo_m64, 2, argList{Yml}},
   627  	{Zjmp, 0, argList{Ybr}},
   628  	{Zjmpcon, 1, argList{Yi32}},
   629  }
   630  
   631  var yfmvd = []ytab{
   632  	{Zm_o, 2, argList{Ym, Yf0}},
   633  	{Zo_m, 2, argList{Yf0, Ym}},
   634  	{Zm_o, 2, argList{Yrf, Yf0}},
   635  	{Zo_m, 2, argList{Yf0, Yrf}},
   636  }
   637  
   638  var yfmvdp = []ytab{
   639  	{Zo_m, 2, argList{Yf0, Ym}},
   640  	{Zo_m, 2, argList{Yf0, Yrf}},
   641  }
   642  
   643  var yfmvf = []ytab{
   644  	{Zm_o, 2, argList{Ym, Yf0}},
   645  	{Zo_m, 2, argList{Yf0, Ym}},
   646  }
   647  
   648  var yfmvx = []ytab{
   649  	{Zm_o, 2, argList{Ym, Yf0}},
   650  }
   651  
   652  var yfmvp = []ytab{
   653  	{Zo_m, 2, argList{Yf0, Ym}},
   654  }
   655  
   656  var yfcmv = []ytab{
   657  	{Zm_o, 2, argList{Yrf, Yf0}},
   658  }
   659  
   660  var yfadd = []ytab{
   661  	{Zm_o, 2, argList{Ym, Yf0}},
   662  	{Zm_o, 2, argList{Yrf, Yf0}},
   663  	{Zo_m, 2, argList{Yf0, Yrf}},
   664  }
   665  
   666  var yfxch = []ytab{
   667  	{Zo_m, 2, argList{Yf0, Yrf}},
   668  	{Zm_o, 2, argList{Yrf, Yf0}},
   669  }
   670  
   671  var ycompp = []ytab{
   672  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   673  }
   674  
   675  var ystsw = []ytab{
   676  	{Zo_m, 2, argList{Ym}},
   677  	{Zlit, 1, argList{Yax}},
   678  }
   679  
   680  var ysvrs_mo = []ytab{
   681  	{Zm_o, 2, argList{Ym}},
   682  }
   683  
   684  // unaryDst version of "ysvrs_mo".
   685  var ysvrs_om = []ytab{
   686  	{Zo_m, 2, argList{Ym}},
   687  }
   688  
   689  var ymm = []ytab{
   690  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   691  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   692  }
   693  
   694  var yxm = []ytab{
   695  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   696  }
   697  
   698  var yxm_q4 = []ytab{
   699  	{Zm_r, 1, argList{Yxm, Yxr}},
   700  }
   701  
   702  var yxcvm1 = []ytab{
   703  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   704  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   705  }
   706  
   707  var yxcvm2 = []ytab{
   708  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   709  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   710  }
   711  
   712  var yxr = []ytab{
   713  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   714  }
   715  
   716  var yxr_ml = []ytab{
   717  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   718  }
   719  
   720  var ymr = []ytab{
   721  	{Zm_r, 1, argList{Ymr, Ymr}},
   722  }
   723  
   724  var ymr_ml = []ytab{
   725  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   726  }
   727  
   728  var yxcmpi = []ytab{
   729  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   730  }
   731  
   732  var yxmov = []ytab{
   733  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   734  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   735  }
   736  
   737  var yxcvfl = []ytab{
   738  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   739  }
   740  
   741  var yxcvlf = []ytab{
   742  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   743  }
   744  
   745  var yxcvfq = []ytab{
   746  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   747  }
   748  
   749  var yxcvqf = []ytab{
   750  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   751  }
   752  
   753  var yps = []ytab{
   754  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   755  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   756  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   757  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   758  }
   759  
   760  var yxrrl = []ytab{
   761  	{Zm_r, 1, argList{Yxr, Yrl}},
   762  }
   763  
   764  var ymrxr = []ytab{
   765  	{Zm_r, 1, argList{Ymr, Yxr}},
   766  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   767  }
   768  
   769  var ymshuf = []ytab{
   770  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   771  }
   772  
   773  var ymshufb = []ytab{
   774  	{Zm2_r, 2, argList{Yxm, Yxr}},
   775  }
   776  
   777  // It should never have more than 1 entry,
   778  // because some optab entries have opcode sequences that
   779  // are longer than 2 bytes (zoffset=2 here),
   780  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   781  // to name a few.
   782  var yxshuf = []ytab{
   783  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   784  }
   785  
   786  var yextrw = []ytab{
   787  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   788  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   789  }
   790  
   791  var yextr = []ytab{
   792  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   793  }
   794  
   795  var yinsrw = []ytab{
   796  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   797  }
   798  
   799  var yinsr = []ytab{
   800  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   801  }
   802  
   803  var ypsdq = []ytab{
   804  	{Zibo_m, 2, argList{Yi8, Yxr}},
   805  }
   806  
   807  var ymskb = []ytab{
   808  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   809  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   810  }
   811  
   812  var ycrc32l = []ytab{
   813  	{Zlitm_r, 0, argList{Yml, Yrl}},
   814  }
   815  
   816  var ycrc32b = []ytab{
   817  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   818  }
   819  
   820  var yprefetch = []ytab{
   821  	{Zm_o, 2, argList{Ym}},
   822  }
   823  
   824  var yaes = []ytab{
   825  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   826  }
   827  
   828  var yxbegin = []ytab{
   829  	{Zjmp, 1, argList{Ybr}},
   830  }
   831  
   832  var yxabort = []ytab{
   833  	{Zib_, 1, argList{Yu8}},
   834  }
   835  
   836  var ylddqu = []ytab{
   837  	{Zm_r, 1, argList{Ym, Yxr}},
   838  }
   839  
   840  var ypalignr = []ytab{
   841  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   842  }
   843  
   844  var ysha256rnds2 = []ytab{
   845  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   846  }
   847  
   848  var yblendvpd = []ytab{
   849  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   850  }
   851  
   852  var ymmxmm0f38 = []ytab{
   853  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   854  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   855  }
   856  
   857  var yextractps = []ytab{
   858  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   859  }
   860  
   861  var ysha1rnds4 = []ytab{
   862  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   863  }
   864  
   865  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   866  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   867  // to find the entry with the given p.As and then looks through the ytable for
   868  // that instruction (the second field in the optab struct) for a line whose
   869  // first two values match the Ytypes of the p.From and p.To operands.  The
   870  // function oclass computes the specific Ytype of an operand and then the set
   871  // of more general Ytypes that it satisfies is implied by the ycover table, set
   872  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   873  // from the more general 8-bit constants, but instinit says
   874  //
   875  //	ycover[Yi0*Ymax+Ys32] = 1
   876  //	ycover[Yi1*Ymax+Ys32] = 1
   877  //	ycover[Yi8*Ymax+Ys32] = 1
   878  //
   879  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   880  // if that's what an instruction can handle.
   881  //
   882  // In parallel with the scan through the ytable for the appropriate line, there
   883  // is a z pointer that starts out pointing at the strange magic byte list in
   884  // the Optab struct.  With each step past a non-matching ytable line, z
   885  // advances by the 4th entry in the line.  When a matching line is found, that
   886  // z pointer has the extra data to use in laying down the instruction bytes.
   887  // The actual bytes laid down are a function of the 3rd entry in the line (that
   888  // is, the Ztype) and the z bytes.
   889  //
   890  // For example, let's look at AADDL.  The optab line says:
   891  //
   892  //	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893  //
   894  // and yaddl says
   895  //
   896  //	var yaddl = []ytab{
   897  //	        {Yi8, Ynone, Yml, Zibo_m, 2},
   898  //	        {Yi32, Ynone, Yax, Zil_, 1},
   899  //	        {Yi32, Ynone, Yml, Zilo_m, 2},
   900  //	        {Yrl, Ynone, Yml, Zr_m, 1},
   901  //	        {Yml, Ynone, Yrl, Zm_r, 1},
   902  //	}
   903  //
   904  // so there are 5 possible types of ADDL instruction that can be laid down, and
   905  // possible states used to lay them down (Ztype and z pointer, assuming z
   906  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   907  //
   908  //	Yi8, Yml -> Zibo_m, z (0x83, 00)
   909  //	Yi32, Yax -> Zil_, z+2 (0x05)
   910  //	Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   911  //	Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   912  //	Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   913  //
   914  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   915  // relatively straightforward as this program goes.
   916  //
   917  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   918  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   919  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   920  // Zilo_m is the same but a long (32-bit) immediate.
   921  var optab =
   922  // as, ytab, andproto, opcode
   923  [...]Optab{
   924  	{obj.AXXX, nil, 0, opBytes{}},
   925  	{AAAA, ynone, P32, opBytes{0x37}},
   926  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   927  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   928  	{AAAS, ynone, P32, opBytes{0x3f}},
   929  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   930  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   933  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   934  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   935  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   936  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   937  	{AADDPD, yxm, Pq, opBytes{0x58}},
   938  	{AADDPS, yxm, Pm, opBytes{0x58}},
   939  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   940  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   941  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   942  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   943  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   944  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   945  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   946  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   947  	{AADJSP, nil, 0, opBytes{}},
   948  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   949  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   950  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   951  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   952  	{AANDPD, yxm, Pq, opBytes{0x54}},
   953  	{AANDPS, yxm, Pm, opBytes{0x54}},
   954  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   956  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   957  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   958  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   959  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   960  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   961  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   962  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   963  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   964  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   965  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   966  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   967  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   968  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   969  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   970  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   971  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   972  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   973  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   974  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   975  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   976  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   977  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   978  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   979  	{ABYTE, ybyte, Px, opBytes{1}},
   980  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   981  	{ACBW, ynone, Pe, opBytes{0x98}},
   982  	{ACDQ, ynone, Px, opBytes{0x99}},
   983  	{ACDQE, ynone, Pw, opBytes{0x98}},
   984  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   985  	{ACLC, ynone, Px, opBytes{0xf8}},
   986  	{ACLD, ynone, Px, opBytes{0xfc}},
   987  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   988  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   989  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   990  	{ACLI, ynone, Px, opBytes{0xfa}},
   991  	{ACLTS, ynone, Pm, opBytes{0x06}},
   992  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   993  	{ACMC, ynone, Px, opBytes{0xf5}},
   994  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   995  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   996  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   997  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   998  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   999  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
  1000  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1001  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1002  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1003  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1004  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1005  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1006  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1007  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1008  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1009  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1010  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1011  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1012  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1013  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1014  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1015  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1016  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1017  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1018  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1019  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1020  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1021  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1022  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1023  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1024  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1025  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1026  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1027  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1028  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1029  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1030  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1031  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1032  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1033  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1034  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1035  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1036  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1037  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1038  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1039  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1040  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1041  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1042  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1043  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1044  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1045  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1046  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1047  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1048  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1049  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1050  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1051  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1052  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1053  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1054  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1055  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1056  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1057  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1058  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1059  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1060  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1061  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1062  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1063  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1064  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1065  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1066  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1067  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1068  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1069  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1070  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1071  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1072  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1073  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1074  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1075  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1076  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1077  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1078  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1079  	{ACWD, ynone, Pe, opBytes{0x99}},
  1080  	{ACWDE, ynone, Px, opBytes{0x98}},
  1081  	{ACQO, ynone, Pw, opBytes{0x99}},
  1082  	{ADAA, ynone, P32, opBytes{0x27}},
  1083  	{ADAS, ynone, P32, opBytes{0x2f}},
  1084  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1085  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1086  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1087  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1088  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1089  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1090  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1091  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1092  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1093  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1094  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1095  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1096  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1097  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1098  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1099  	{AENDBR64, ynone, Pf3, opBytes{0x1e, 0xfa}},
  1100  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1101  	{AENTER, nil, 0, opBytes{}}, // botch
  1102  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1103  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1104  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1105  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1106  	{AHLT, ynone, Px, opBytes{0xf4}},
  1107  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1108  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1109  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1110  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1111  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1112  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1115  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1117  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1118  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1119  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1120  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1121  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1122  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1123  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1124  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1125  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1126  	{AINSL, ynone, Px, opBytes{0x6d}},
  1127  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1128  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1129  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1130  	{AINT, yint, Px, opBytes{0xcd}},
  1131  	{AINTO, ynone, P32, opBytes{0xce}},
  1132  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1133  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1134  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1135  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1136  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1137  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1138  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1139  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1140  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1141  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1142  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1143  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1144  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1145  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1146  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1147  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1148  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1149  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1150  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1151  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1152  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1153  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1154  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1155  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1156  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1157  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1158  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1159  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1160  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1161  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1162  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1163  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1164  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1165  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1166  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1167  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1168  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1169  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1170  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1171  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1172  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1173  	{ALODSL, ynone, Px, opBytes{0xad}},
  1174  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1175  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1176  	{ALONG, ybyte, Px, opBytes{4}},
  1177  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1178  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1179  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1180  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1181  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1182  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1183  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1184  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1185  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1186  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1187  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1188  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1189  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1190  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1191  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1192  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1193  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1194  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1195  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1196  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1197  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1198  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1199  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1200  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1201  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1202  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1203  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1204  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1205  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1206  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1207  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1208  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1209  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1210  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1211  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1212  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1213  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1214  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1215  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1216  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1217  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1218  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1219  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1220  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1221  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1222  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1223  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1224  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1225  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1226  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1227  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1228  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1229  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1230  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1231  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1232  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1233  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1234  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1235  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1236  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1237  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1238  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1239  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1240  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1241  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1242  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1243  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1244  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1245  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1246  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1247  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1248  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1249  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1250  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1251  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1252  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1253  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1254  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1255  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1256  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1257  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1258  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1259  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1260  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1261  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1262  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1263  	{AORPD, yxm, Pq, opBytes{0x56}},
  1264  	{AORPS, yxm, Pm, opBytes{0x56}},
  1265  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1266  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1267  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1268  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1269  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1270  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1271  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1272  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1273  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1274  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1275  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1276  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1277  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1278  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1279  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1280  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1281  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1282  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1283  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1284  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1285  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1286  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1287  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1288  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1289  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1290  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1291  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1292  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1293  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1294  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1295  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1296  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1297  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1298  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1299  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1300  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1301  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1302  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1303  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1304  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1305  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1306  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1307  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1308  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1309  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1310  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1311  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1312  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1313  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1314  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1315  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1316  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1317  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1318  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1319  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1320  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1321  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1322  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1323  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1324  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1325  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1326  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1327  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1328  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1329  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1330  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1331  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1332  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1333  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1334  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1335  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1336  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1337  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1338  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1339  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1340  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1341  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1342  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1343  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1344  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1345  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1346  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1347  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1348  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1349  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1350  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1351  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1352  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1353  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1354  	{APOPAL, ynone, P32, opBytes{0x61}},
  1355  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1356  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1357  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1358  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1359  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1360  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1361  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1362  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1363  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1364  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1365  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1366  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1367  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1368  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1369  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1370  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1371  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1372  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1373  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1374  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1375  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1376  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1377  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1378  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1379  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1380  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1381  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1382  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1383  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1384  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1385  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1386  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1387  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1388  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1389  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1390  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1391  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1392  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1393  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1394  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1395  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1396  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1397  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1398  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1399  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1400  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1401  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1402  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1403  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1404  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1405  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1406  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1407  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1409  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1410  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1411  	{AQUAD, ybyte, Px, opBytes{8}},
  1412  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1413  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1415  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1416  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1417  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1418  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1419  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1421  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1422  	{AREP, ynone, Px, opBytes{0xf3}},
  1423  	{AREPN, ynone, Px, opBytes{0xf2}},
  1424  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1425  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1426  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1427  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1428  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1429  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1431  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1432  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1433  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1435  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1436  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1437  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1438  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1439  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1440  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1442  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1443  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1444  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1446  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1447  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1448  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1450  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1451  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1452  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1453  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1454  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1455  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1456  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1457  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1458  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1459  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1460  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1461  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1462  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1463  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1464  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1465  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1466  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1467  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1468  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1469  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1470  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1471  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1472  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1474  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1475  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1476  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1478  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1479  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1480  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1481  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1482  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1483  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1484  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1485  	{ASTC, ynone, Px, opBytes{0xf9}},
  1486  	{ASTD, ynone, Px, opBytes{0xfd}},
  1487  	{ASTI, ynone, Px, opBytes{0xfb}},
  1488  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1489  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1490  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1491  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1492  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1493  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1494  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1495  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1496  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1497  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1498  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1499  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1500  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1501  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1502  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1503  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1504  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1506  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1507  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1508  	{obj.ATEXT, ytext, Px, opBytes{}},
  1509  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1510  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1511  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1512  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1513  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1514  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1515  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1516  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1517  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1518  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1519  	{AWORD, ybyte, Px, opBytes{2}},
  1520  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1521  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1523  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1524  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1525  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1526  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1527  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1528  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1529  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1530  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1531  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1532  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1533  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1534  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1535  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1536  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1537  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1538  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1539  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1540  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1541  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1542  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1543  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1544  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1545  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1546  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1547  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1548  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1549  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1550  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1551  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1552  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1553  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1554  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1555  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1556  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1557  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1558  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1559  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1560  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1561  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1562  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1563  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1564  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1565  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1566  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1567  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1568  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1569  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1570  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1571  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1572  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1573  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1574  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1575  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1576  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1577  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1578  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1579  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1580  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1581  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1582  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1583  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1584  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1585  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1586  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1587  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1588  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1589  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1590  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1591  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1592  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1593  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1594  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1595  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1596  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1597  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1598  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1599  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1600  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1601  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1602  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1603  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1604  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1605  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1606  	{AFFREE, nil, 0, opBytes{}},
  1607  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1608  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1609  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1610  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1611  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1612  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1613  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1614  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1615  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1616  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1617  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1618  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1619  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1620  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1621  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1622  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1623  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1624  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1625  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1626  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1627  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1628  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1629  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1630  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1631  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1632  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1633  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1634  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1635  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1636  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1637  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1638  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1639  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1640  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1641  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1642  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1643  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1644  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1645  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1646  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1647  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1649  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1650  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1651  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1652  	{AINVD, ynone, Pm, opBytes{0x08}},
  1653  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1654  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1655  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1656  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1657  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1658  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1659  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1660  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1661  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1662  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1663  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1664  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1665  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1666  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1667  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1668  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1669  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1670  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1671  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1672  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1673  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1674  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1676  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1677  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1678  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1679  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1680  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1681  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1682  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1683  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1684  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1685  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1686  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1687  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1688  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1689  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1690  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1691  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1692  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1693  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1694  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1695  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1696  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1697  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1698  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1699  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1700  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1701  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1702  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1703  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1704  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1705  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1706  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1707  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1708  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1709  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1710  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1711  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1712  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1713  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1715  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1716  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1717  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1718  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1719  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1720  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1721  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1722  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1723  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1724  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1725  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1726  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1727  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1728  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1729  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1730  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1732  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1733  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1734  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1735  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1736  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1737  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1738  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1739  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1740  	{AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1741  	{AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1742  	{AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1743  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1744  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1745  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1746  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1747  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1748  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1749  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1750  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1751  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1752  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1753  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1754  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1755  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1756  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1757  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1758  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1759  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1760  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1761  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1762  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1763  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1764  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1765  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1766  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1767  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1768  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1769  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1770  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1771  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1772  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1773  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1774  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1775  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1776  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1777  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1778  	{ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}},
  1779  
  1780  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1781  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1782  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1783  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1784  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1785  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1786  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1787  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1788  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1789  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1790  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1791  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1792  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1793  
  1794  	{obj.AEND, nil, 0, opBytes{}},
  1795  	{0, nil, 0, opBytes{}},
  1796  }
  1797  
  1798  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1799  
  1800  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1801  // This happens on systems like Solaris that call .so functions instead of system calls.
  1802  // It does not seem to be necessary for any other systems. This is probably working
  1803  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1804  // what that bug is. And this does fix it.
  1805  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1806  	if ctxt.Headtype == objabi.Hsolaris {
  1807  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1808  		return strings.HasPrefix(s.Name, "libc_")
  1809  	}
  1810  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1811  }
  1812  
  1813  // single-instruction no-ops of various lengths.
  1814  // constructed by hand and disassembled with gdb to verify.
  1815  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1816  var nop = [][16]uint8{
  1817  	{0x90},
  1818  	{0x66, 0x90},
  1819  	{0x0F, 0x1F, 0x00},
  1820  	{0x0F, 0x1F, 0x40, 0x00},
  1821  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1822  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1823  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1824  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1825  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1826  }
  1827  
  1828  // Native Client rejects the repeated 0x66 prefix.
  1829  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1830  func fillnop(p []byte, n int) {
  1831  	var m int
  1832  
  1833  	for n > 0 {
  1834  		m = n
  1835  		if m > len(nop) {
  1836  			m = len(nop)
  1837  		}
  1838  		copy(p[:m], nop[m-1][:m])
  1839  		p = p[m:]
  1840  		n -= m
  1841  	}
  1842  }
  1843  
  1844  func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1845  	s.Grow(int64(c) + int64(pad))
  1846  	fillnop(s.P[c:], int(pad))
  1847  	return c + pad
  1848  }
  1849  
  1850  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1851  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1852  		return l
  1853  	}
  1854  	return q
  1855  }
  1856  
  1857  // isJump returns whether p is a jump instruction.
  1858  // It is used to ensure that no standalone or macro-fused jump will straddle
  1859  // or end on a 32 byte boundary by inserting NOPs before the jumps.
  1860  func isJump(p *obj.Prog) bool {
  1861  	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1862  		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1863  }
  1864  
  1865  // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1866  // jump. Otherwise, nil is returned.
  1867  func lookForJCC(p *obj.Prog) *obj.Prog {
  1868  	// Skip any PCDATA, FUNCDATA or NOP instructions
  1869  	var q *obj.Prog
  1870  	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1871  	}
  1872  
  1873  	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1874  		return nil
  1875  	}
  1876  
  1877  	switch q.As {
  1878  	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1879  		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1880  	default:
  1881  		return nil
  1882  	}
  1883  
  1884  	return q
  1885  }
  1886  
  1887  // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1888  // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1889  // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1890  func fusedJump(p *obj.Prog) (bool, uint8) {
  1891  	var fusedSize uint8
  1892  
  1893  	// The first instruction in a macro fused pair may be preceded by the LOCK prefix,
  1894  	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1895  	// need to be careful to insert any padding before the locks rather than directly after them.
  1896  
  1897  	if p.As == AXRELEASE || p.As == AXACQUIRE {
  1898  		fusedSize += p.Isize
  1899  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1900  		}
  1901  		if p == nil {
  1902  			return false, 0
  1903  		}
  1904  	}
  1905  	if p.As == ALOCK {
  1906  		fusedSize += p.Isize
  1907  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1908  		}
  1909  		if p == nil {
  1910  			return false, 0
  1911  		}
  1912  	}
  1913  	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1914  
  1915  	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1916  		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1917  
  1918  	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1919  		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1920  
  1921  	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1922  		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1923  
  1924  	if !cmpAddSub && !testAnd && !incDec {
  1925  		return false, 0
  1926  	}
  1927  
  1928  	if !incDec {
  1929  		var argOne obj.AddrType
  1930  		var argTwo obj.AddrType
  1931  		if cmp {
  1932  			argOne = p.From.Type
  1933  			argTwo = p.To.Type
  1934  		} else {
  1935  			argOne = p.To.Type
  1936  			argTwo = p.From.Type
  1937  		}
  1938  		if argOne == obj.TYPE_REG {
  1939  			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1940  				return false, 0
  1941  			}
  1942  		} else if argOne == obj.TYPE_MEM {
  1943  			if argTwo != obj.TYPE_REG {
  1944  				return false, 0
  1945  			}
  1946  		} else {
  1947  			return false, 0
  1948  		}
  1949  	}
  1950  
  1951  	fusedSize += p.Isize
  1952  	jmp := lookForJCC(p)
  1953  	if jmp == nil {
  1954  		return false, 0
  1955  	}
  1956  
  1957  	fusedSize += jmp.Isize
  1958  
  1959  	if testAnd {
  1960  		return true, fusedSize
  1961  	}
  1962  
  1963  	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1964  		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1965  		return false, 0
  1966  	}
  1967  
  1968  	if cmpAddSub {
  1969  		return true, fusedSize
  1970  	}
  1971  
  1972  	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1973  		return false, 0
  1974  	}
  1975  
  1976  	return true, fusedSize
  1977  }
  1978  
  1979  type padJumpsCtx int32
  1980  
  1981  func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1982  	// Disable jump padding on 32 bit builds by setting
  1983  	// padJumps to 0.
  1984  	if ctxt.Arch.Family == sys.I386 {
  1985  		return padJumpsCtx(0)
  1986  	}
  1987  
  1988  	// Disable jump padding for hand written assembly code.
  1989  	if ctxt.IsAsm {
  1990  		return padJumpsCtx(0)
  1991  	}
  1992  
  1993  	return padJumpsCtx(32)
  1994  }
  1995  
  1996  // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1997  // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  1998  // not cross or end on a 32 byte boundary.
  1999  func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  2000  	if pjc == 0 {
  2001  		return c
  2002  	}
  2003  
  2004  	var toPad int32
  2005  	fj, fjSize := fusedJump(p)
  2006  	mask := int32(pjc - 1)
  2007  	if fj {
  2008  		if (c&mask)+int32(fjSize) >= int32(pjc) {
  2009  			toPad = int32(pjc) - (c & mask)
  2010  		}
  2011  	} else if isJump(p) {
  2012  		if (c&mask)+int32(p.Isize) >= int32(pjc) {
  2013  			toPad = int32(pjc) - (c & mask)
  2014  		}
  2015  	}
  2016  	if toPad <= 0 {
  2017  		return c
  2018  	}
  2019  
  2020  	return noppad(ctxt, s, c, toPad)
  2021  }
  2022  
  2023  // reAssemble is called if an instruction's size changes during assembly. If
  2024  // it does and the instruction is a standalone or a macro-fused jump we need to
  2025  // reassemble.
  2026  func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  2027  	if pjc == 0 {
  2028  		return false
  2029  	}
  2030  
  2031  	fj, _ := fusedJump(p)
  2032  	return fj || isJump(p)
  2033  }
  2034  
  2035  type nopPad struct {
  2036  	p *obj.Prog // Instruction before the pad
  2037  	n int32     // Size of the pad
  2038  }
  2039  
  2040  // requireAlignment ensures that the function alignment is at
  2041  // least as high as a, which should be a power of two
  2042  // and between 8 and 2048, inclusive.
  2043  //
  2044  // the boolean result indicates whether the alignment meets those constraints
  2045  func requireAlignment(a int64, ctxt *obj.Link, cursym *obj.LSym) bool {
  2046  	if !((a&(a-1) == 0) && 8 <= a && a <= 2048) {
  2047  		ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a)
  2048  		return false
  2049  	}
  2050  	// By default function alignment is 32 bytes for amd64
  2051  	if cursym.Func().Align < int32(a) {
  2052  		cursym.Func().Align = int32(a)
  2053  	}
  2054  	return true
  2055  }
  2056  
  2057  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  2058  	if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 {
  2059  		ctxt.Diag("-spectre=ret not supported on 386")
  2060  		ctxt.Retpoline = false // don't keep printing
  2061  	}
  2062  
  2063  	pjc := makePjcCtx(ctxt)
  2064  
  2065  	if s.P != nil {
  2066  		return
  2067  	}
  2068  
  2069  	if ycover[0] == 0 {
  2070  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  2071  	}
  2072  
  2073  	for p := s.Func().Text; p != nil; p = p.Link {
  2074  		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  2075  			p.To.SetTarget(p)
  2076  		}
  2077  		if p.As == AADJSP {
  2078  			p.To.Type = obj.TYPE_REG
  2079  			p.To.Reg = REG_SP
  2080  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  2081  			// One exception: It is smaller to encode $-0x80 than $0x80.
  2082  			// For that case, flip the sign and the op:
  2083  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  2084  			switch v := p.From.Offset; {
  2085  			case v == 0:
  2086  				p.As = obj.ANOP
  2087  			case v == 0x80 || (v < 0 && v != -0x80):
  2088  				p.As = spadjop(ctxt, AADDL, AADDQ)
  2089  				p.From.Offset *= -1
  2090  			default:
  2091  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  2092  			}
  2093  		}
  2094  		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  2095  			if p.To.Type != obj.TYPE_REG {
  2096  				ctxt.Diag("non-retpoline-compatible: %v", p)
  2097  				continue
  2098  			}
  2099  			p.To.Type = obj.TYPE_BRANCH
  2100  			p.To.Name = obj.NAME_EXTERN
  2101  			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  2102  			p.To.Reg = 0
  2103  			p.To.Offset = 0
  2104  		}
  2105  	}
  2106  
  2107  	var count int64 // rough count of number of instructions
  2108  	for p := s.Func().Text; p != nil; p = p.Link {
  2109  		count++
  2110  		p.Back = branchShort // use short branches first time through
  2111  		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  2112  			p.Back |= branchBackwards
  2113  			q.Back |= branchLoopHead
  2114  		}
  2115  	}
  2116  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  2117  
  2118  	var ab AsmBuf
  2119  	var n int
  2120  	var c int32
  2121  	errors := ctxt.Errors
  2122  	var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
  2123  	nrelocs0 := len(s.R)
  2124  	for {
  2125  		// This loop continues while there are reasons to re-assemble
  2126  		// whole block, like the presence of long forward jumps.
  2127  		reAssemble := false
  2128  		for i := range s.R[nrelocs0:] {
  2129  			s.R[nrelocs0+i] = obj.Reloc{}
  2130  		}
  2131  		s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
  2132  		s.P = s.P[:0]
  2133  		c = 0
  2134  		var pPrev *obj.Prog
  2135  		nops = nops[:0]
  2136  		for p := s.Func().Text; p != nil; p = p.Link {
  2137  			c0 := c
  2138  			c = pjc.padJump(ctxt, s, p, c)
  2139  
  2140  			if p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX {
  2141  				v := obj.AlignmentPadding(c, p, ctxt, s)
  2142  				if v > 0 {
  2143  					s.Grow(int64(c) + int64(v))
  2144  					fillnop(s.P[c:], int(v))
  2145  				}
  2146  				p.Pc = int64(c)
  2147  				c += int32(v)
  2148  				pPrev = p
  2149  				continue
  2150  
  2151  			}
  2152  
  2153  			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  2154  				// pad with NOPs
  2155  				v := -c & (loopAlign - 1)
  2156  
  2157  				if v <= maxLoopPad {
  2158  					s.Grow(int64(c) + int64(v))
  2159  					fillnop(s.P[c:], int(v))
  2160  					c += v
  2161  				}
  2162  			}
  2163  
  2164  			p.Pc = int64(c)
  2165  
  2166  			// process forward jumps to p
  2167  			for q := p.Rel; q != nil; q = q.Forwd {
  2168  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2169  				if q.Back&branchShort != 0 {
  2170  					if v > 127 {
  2171  						reAssemble = true
  2172  						q.Back ^= branchShort
  2173  					}
  2174  
  2175  					if q.As == AJCXZL || q.As == AXBEGIN {
  2176  						s.P[q.Pc+2] = byte(v)
  2177  					} else {
  2178  						s.P[q.Pc+1] = byte(v)
  2179  					}
  2180  				} else {
  2181  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2182  				}
  2183  			}
  2184  
  2185  			p.Rel = nil
  2186  
  2187  			p.Pc = int64(c)
  2188  			ab.asmins(ctxt, s, p)
  2189  			m := ab.Len()
  2190  			if int(p.Isize) != m {
  2191  				p.Isize = uint8(m)
  2192  				if pjc.reAssemble(p) {
  2193  					// We need to re-assemble here to check for jumps and fused jumps
  2194  					// that span or end on 32 byte boundaries.
  2195  					reAssemble = true
  2196  				}
  2197  			}
  2198  
  2199  			s.Grow(p.Pc + int64(m))
  2200  			copy(s.P[p.Pc:], ab.Bytes())
  2201  			// If there was padding, remember it.
  2202  			if pPrev != nil && !ctxt.IsAsm && c > c0 {
  2203  				nops = append(nops, nopPad{p: pPrev, n: c - c0})
  2204  			}
  2205  			c += int32(m)
  2206  			pPrev = p
  2207  		}
  2208  
  2209  		n++
  2210  		if n > 1000 {
  2211  			ctxt.Diag("span must be looping")
  2212  			log.Fatalf("loop")
  2213  		}
  2214  		if !reAssemble {
  2215  			break
  2216  		}
  2217  		if ctxt.Errors > errors {
  2218  			return
  2219  		}
  2220  	}
  2221  	// splice padding nops into Progs
  2222  	for _, n := range nops {
  2223  		pp := n.p
  2224  		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2225  		pp.Link = np
  2226  	}
  2227  
  2228  	s.Size = int64(c)
  2229  
  2230  	if false { /* debug['a'] > 1 */
  2231  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2232  		var i int
  2233  		for i = 0; i < len(s.P); i++ {
  2234  			fmt.Printf(" %.2x", s.P[i])
  2235  			if i%16 == 15 {
  2236  				fmt.Printf("\n  %.6x", uint(i+1))
  2237  			}
  2238  		}
  2239  
  2240  		if i%16 != 0 {
  2241  			fmt.Printf("\n")
  2242  		}
  2243  
  2244  		for i := 0; i < len(s.R); i++ {
  2245  			r := &s.R[i]
  2246  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2247  		}
  2248  	}
  2249  
  2250  	// Mark nonpreemptible instruction sequences.
  2251  	// The 2-instruction TLS access sequence
  2252  	//	MOVQ TLS, BX
  2253  	//	MOVQ 0(BX)(TLS*1), BX
  2254  	// is not async preemptible, as if it is preempted and resumed on
  2255  	// a different thread, the TLS address may become invalid.
  2256  	if !CanUse1InsnTLS(ctxt) {
  2257  		useTLS := func(p *obj.Prog) bool {
  2258  			// Only need to mark the second instruction, which has
  2259  			// REG_TLS as Index. (It is okay to interrupt and restart
  2260  			// the first instruction.)
  2261  			return p.From.Index == REG_TLS
  2262  		}
  2263  		obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
  2264  	}
  2265  
  2266  	// Now that we know byte offsets, we can generate jump table entries.
  2267  	// TODO: could this live in obj instead of obj/$ARCH?
  2268  	for _, jt := range s.Func().JumpTables {
  2269  		for i, p := range jt.Targets {
  2270  			// The ith jumptable entry points to the p.Pc'th
  2271  			// byte in the function symbol s.
  2272  			jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
  2273  		}
  2274  	}
  2275  }
  2276  
  2277  func instinit(ctxt *obj.Link) {
  2278  	if ycover[0] != 0 {
  2279  		// Already initialized; stop now.
  2280  		// This happens in the cmd/asm tests,
  2281  		// each of which re-initializes the arch.
  2282  		return
  2283  	}
  2284  
  2285  	switch ctxt.Headtype {
  2286  	case objabi.Hplan9:
  2287  		plan9privates = ctxt.Lookup("_privates")
  2288  	}
  2289  
  2290  	for i := range avxOptab {
  2291  		c := avxOptab[i].as
  2292  		if opindex[c&obj.AMask] != nil {
  2293  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2294  		}
  2295  		opindex[c&obj.AMask] = &avxOptab[i]
  2296  	}
  2297  	for i := 1; optab[i].as != 0; i++ {
  2298  		c := optab[i].as
  2299  		if opindex[c&obj.AMask] != nil {
  2300  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2301  		}
  2302  		opindex[c&obj.AMask] = &optab[i]
  2303  	}
  2304  
  2305  	for i := 0; i < Ymax; i++ {
  2306  		ycover[i*Ymax+i] = 1
  2307  	}
  2308  
  2309  	ycover[Yi0*Ymax+Yu2] = 1
  2310  	ycover[Yi1*Ymax+Yu2] = 1
  2311  
  2312  	ycover[Yi0*Ymax+Yi8] = 1
  2313  	ycover[Yi1*Ymax+Yi8] = 1
  2314  	ycover[Yu2*Ymax+Yi8] = 1
  2315  	ycover[Yu7*Ymax+Yi8] = 1
  2316  
  2317  	ycover[Yi0*Ymax+Yu7] = 1
  2318  	ycover[Yi1*Ymax+Yu7] = 1
  2319  	ycover[Yu2*Ymax+Yu7] = 1
  2320  
  2321  	ycover[Yi0*Ymax+Yu8] = 1
  2322  	ycover[Yi1*Ymax+Yu8] = 1
  2323  	ycover[Yu2*Ymax+Yu8] = 1
  2324  	ycover[Yu7*Ymax+Yu8] = 1
  2325  
  2326  	ycover[Yi0*Ymax+Ys32] = 1
  2327  	ycover[Yi1*Ymax+Ys32] = 1
  2328  	ycover[Yu2*Ymax+Ys32] = 1
  2329  	ycover[Yu7*Ymax+Ys32] = 1
  2330  	ycover[Yu8*Ymax+Ys32] = 1
  2331  	ycover[Yi8*Ymax+Ys32] = 1
  2332  
  2333  	ycover[Yi0*Ymax+Yi32] = 1
  2334  	ycover[Yi1*Ymax+Yi32] = 1
  2335  	ycover[Yu2*Ymax+Yi32] = 1
  2336  	ycover[Yu7*Ymax+Yi32] = 1
  2337  	ycover[Yu8*Ymax+Yi32] = 1
  2338  	ycover[Yi8*Ymax+Yi32] = 1
  2339  	ycover[Ys32*Ymax+Yi32] = 1
  2340  
  2341  	ycover[Yi0*Ymax+Yi64] = 1
  2342  	ycover[Yi1*Ymax+Yi64] = 1
  2343  	ycover[Yu7*Ymax+Yi64] = 1
  2344  	ycover[Yu2*Ymax+Yi64] = 1
  2345  	ycover[Yu8*Ymax+Yi64] = 1
  2346  	ycover[Yi8*Ymax+Yi64] = 1
  2347  	ycover[Ys32*Ymax+Yi64] = 1
  2348  	ycover[Yi32*Ymax+Yi64] = 1
  2349  
  2350  	ycover[Yal*Ymax+Yrb] = 1
  2351  	ycover[Ycl*Ymax+Yrb] = 1
  2352  	ycover[Yax*Ymax+Yrb] = 1
  2353  	ycover[Ycx*Ymax+Yrb] = 1
  2354  	ycover[Yrx*Ymax+Yrb] = 1
  2355  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2356  
  2357  	ycover[Ycl*Ymax+Ycx] = 1
  2358  
  2359  	ycover[Yax*Ymax+Yrx] = 1
  2360  	ycover[Ycx*Ymax+Yrx] = 1
  2361  
  2362  	ycover[Yax*Ymax+Yrl] = 1
  2363  	ycover[Ycx*Ymax+Yrl] = 1
  2364  	ycover[Yrx*Ymax+Yrl] = 1
  2365  	ycover[Yrl32*Ymax+Yrl] = 1
  2366  
  2367  	ycover[Yf0*Ymax+Yrf] = 1
  2368  
  2369  	ycover[Yal*Ymax+Ymb] = 1
  2370  	ycover[Ycl*Ymax+Ymb] = 1
  2371  	ycover[Yax*Ymax+Ymb] = 1
  2372  	ycover[Ycx*Ymax+Ymb] = 1
  2373  	ycover[Yrx*Ymax+Ymb] = 1
  2374  	ycover[Yrb*Ymax+Ymb] = 1
  2375  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2376  	ycover[Ym*Ymax+Ymb] = 1
  2377  
  2378  	ycover[Yax*Ymax+Yml] = 1
  2379  	ycover[Ycx*Ymax+Yml] = 1
  2380  	ycover[Yrx*Ymax+Yml] = 1
  2381  	ycover[Yrl*Ymax+Yml] = 1
  2382  	ycover[Yrl32*Ymax+Yml] = 1
  2383  	ycover[Ym*Ymax+Yml] = 1
  2384  
  2385  	ycover[Yax*Ymax+Ymm] = 1
  2386  	ycover[Ycx*Ymax+Ymm] = 1
  2387  	ycover[Yrx*Ymax+Ymm] = 1
  2388  	ycover[Yrl*Ymax+Ymm] = 1
  2389  	ycover[Yrl32*Ymax+Ymm] = 1
  2390  	ycover[Ym*Ymax+Ymm] = 1
  2391  	ycover[Ymr*Ymax+Ymm] = 1
  2392  
  2393  	ycover[Yxr0*Ymax+Yxr] = 1
  2394  
  2395  	ycover[Ym*Ymax+Yxm] = 1
  2396  	ycover[Yxr0*Ymax+Yxm] = 1
  2397  	ycover[Yxr*Ymax+Yxm] = 1
  2398  
  2399  	ycover[Ym*Ymax+Yym] = 1
  2400  	ycover[Yyr*Ymax+Yym] = 1
  2401  
  2402  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2403  	ycover[Yxr*Ymax+YxrEvex] = 1
  2404  
  2405  	ycover[Ym*Ymax+YxmEvex] = 1
  2406  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2407  	ycover[Yxr*Ymax+YxmEvex] = 1
  2408  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2409  
  2410  	ycover[Yyr*Ymax+YyrEvex] = 1
  2411  
  2412  	ycover[Ym*Ymax+YymEvex] = 1
  2413  	ycover[Yyr*Ymax+YymEvex] = 1
  2414  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2415  
  2416  	ycover[Ym*Ymax+Yzm] = 1
  2417  	ycover[Yzr*Ymax+Yzm] = 1
  2418  
  2419  	ycover[Yk0*Ymax+Yk] = 1
  2420  	ycover[Yknot0*Ymax+Yk] = 1
  2421  
  2422  	ycover[Yk0*Ymax+Ykm] = 1
  2423  	ycover[Yknot0*Ymax+Ykm] = 1
  2424  	ycover[Yk*Ymax+Ykm] = 1
  2425  	ycover[Ym*Ymax+Ykm] = 1
  2426  
  2427  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2428  
  2429  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2430  
  2431  	for i := 0; i < MAXREG; i++ {
  2432  		reg[i] = -1
  2433  		if i >= REG_AL && i <= REG_R15B {
  2434  			reg[i] = (i - REG_AL) & 7
  2435  			if i >= REG_SPB && i <= REG_DIB {
  2436  				regrex[i] = 0x40
  2437  			}
  2438  			if i >= REG_R8B && i <= REG_R15B {
  2439  				regrex[i] = Rxr | Rxx | Rxb
  2440  			}
  2441  		}
  2442  
  2443  		if i >= REG_AH && i <= REG_BH {
  2444  			reg[i] = 4 + ((i - REG_AH) & 7)
  2445  		}
  2446  		if i >= REG_AX && i <= REG_R15 {
  2447  			reg[i] = (i - REG_AX) & 7
  2448  			if i >= REG_R8 {
  2449  				regrex[i] = Rxr | Rxx | Rxb
  2450  			}
  2451  		}
  2452  
  2453  		if i >= REG_F0 && i <= REG_F0+7 {
  2454  			reg[i] = (i - REG_F0) & 7
  2455  		}
  2456  		if i >= REG_M0 && i <= REG_M0+7 {
  2457  			reg[i] = (i - REG_M0) & 7
  2458  		}
  2459  		if i >= REG_K0 && i <= REG_K0+7 {
  2460  			reg[i] = (i - REG_K0) & 7
  2461  		}
  2462  		if i >= REG_X0 && i <= REG_X0+15 {
  2463  			reg[i] = (i - REG_X0) & 7
  2464  			if i >= REG_X0+8 {
  2465  				regrex[i] = Rxr | Rxx | Rxb
  2466  			}
  2467  		}
  2468  		if i >= REG_X16 && i <= REG_X16+15 {
  2469  			reg[i] = (i - REG_X16) & 7
  2470  			if i >= REG_X16+8 {
  2471  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2472  			} else {
  2473  				regrex[i] = RxrEvex
  2474  			}
  2475  		}
  2476  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2477  			reg[i] = (i - REG_Y0) & 7
  2478  			if i >= REG_Y0+8 {
  2479  				regrex[i] = Rxr | Rxx | Rxb
  2480  			}
  2481  		}
  2482  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2483  			reg[i] = (i - REG_Y16) & 7
  2484  			if i >= REG_Y16+8 {
  2485  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2486  			} else {
  2487  				regrex[i] = RxrEvex
  2488  			}
  2489  		}
  2490  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2491  			reg[i] = (i - REG_Z0) & 7
  2492  			if i > REG_Z0+7 {
  2493  				regrex[i] = Rxr | Rxx | Rxb
  2494  			}
  2495  		}
  2496  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2497  			reg[i] = (i - REG_Z16) & 7
  2498  			if i >= REG_Z16+8 {
  2499  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2500  			} else {
  2501  				regrex[i] = RxrEvex
  2502  			}
  2503  		}
  2504  
  2505  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2506  			regrex[i] = Rxr
  2507  		}
  2508  	}
  2509  }
  2510  
  2511  var isAndroid = buildcfg.GOOS == "android"
  2512  
  2513  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2514  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2515  		return 0
  2516  	}
  2517  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2518  		switch a.Reg {
  2519  		case REG_CS:
  2520  			return 0x2e
  2521  
  2522  		case REG_DS:
  2523  			return 0x3e
  2524  
  2525  		case REG_ES:
  2526  			return 0x26
  2527  
  2528  		case REG_FS:
  2529  			return 0x64
  2530  
  2531  		case REG_GS:
  2532  			return 0x65
  2533  
  2534  		case REG_TLS:
  2535  			// NOTE: Systems listed here should be only systems that
  2536  			// support direct TLS references like 8(TLS) implemented as
  2537  			// direct references from FS or GS. Systems that require
  2538  			// the initial-exec model, where you load the TLS base into
  2539  			// a register and then index from that register, do not reach
  2540  			// this code and should not be listed.
  2541  			if ctxt.Arch.Family == sys.I386 {
  2542  				switch ctxt.Headtype {
  2543  				default:
  2544  					if isAndroid {
  2545  						return 0x65 // GS
  2546  					}
  2547  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2548  
  2549  				case objabi.Hdarwin,
  2550  					objabi.Hdragonfly,
  2551  					objabi.Hfreebsd,
  2552  					objabi.Hnetbsd,
  2553  					objabi.Hopenbsd:
  2554  					return 0x65 // GS
  2555  				}
  2556  			}
  2557  
  2558  			switch ctxt.Headtype {
  2559  			default:
  2560  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2561  
  2562  			case objabi.Hlinux:
  2563  				if isAndroid {
  2564  					return 0x64 // FS
  2565  				}
  2566  
  2567  				if ctxt.Flag_shared {
  2568  					log.Fatalf("unknown TLS base register for linux with -shared")
  2569  				} else {
  2570  					return 0x64 // FS
  2571  				}
  2572  
  2573  			case objabi.Hdragonfly,
  2574  				objabi.Hfreebsd,
  2575  				objabi.Hnetbsd,
  2576  				objabi.Hopenbsd,
  2577  				objabi.Hsolaris:
  2578  				return 0x64 // FS
  2579  
  2580  			case objabi.Hdarwin:
  2581  				return 0x65 // GS
  2582  			}
  2583  		}
  2584  	}
  2585  
  2586  	switch a.Index {
  2587  	case REG_CS:
  2588  		return 0x2e
  2589  
  2590  	case REG_DS:
  2591  		return 0x3e
  2592  
  2593  	case REG_ES:
  2594  		return 0x26
  2595  
  2596  	case REG_TLS:
  2597  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2598  			// When building for inclusion into a shared library, an instruction of the form
  2599  			//     MOV off(CX)(TLS*1), AX
  2600  			// becomes
  2601  			//     mov %gs:off(%ecx), %eax // on i386
  2602  			//     mov %fs:off(%rcx), %rax // on amd64
  2603  			// which assumes that the correct TLS offset has been loaded into CX (today
  2604  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2605  			// a shared library the instruction it becomes
  2606  			//     mov 0x0(%ecx), %eax // on i386
  2607  			//     mov 0x0(%rcx), %rax // on amd64
  2608  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2609  			if ctxt.Arch.Family == sys.I386 {
  2610  				return 0x65 // GS
  2611  			}
  2612  			return 0x64 // FS
  2613  		}
  2614  
  2615  	case REG_FS:
  2616  		return 0x64
  2617  
  2618  	case REG_GS:
  2619  		return 0x65
  2620  	}
  2621  
  2622  	return 0
  2623  }
  2624  
  2625  // oclassRegList returns multisource operand class for addr.
  2626  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2627  	// TODO(quasilyte): when oclass register case is refactored into
  2628  	// lookup table, use it here to get register kind more easily.
  2629  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2630  
  2631  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2632  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2633  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2634  
  2635  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2636  	low := regIndex(int16(reg0))
  2637  	high := regIndex(int16(reg1))
  2638  
  2639  	if ctxt.Arch.Family == sys.I386 {
  2640  		if low >= 8 || high >= 8 {
  2641  			return Yxxx
  2642  		}
  2643  	}
  2644  
  2645  	switch high - low {
  2646  	case 3:
  2647  		switch {
  2648  		case regIsXmm(reg0) && regIsXmm(reg1):
  2649  			return YxrEvexMulti4
  2650  		case regIsYmm(reg0) && regIsYmm(reg1):
  2651  			return YyrEvexMulti4
  2652  		case regIsZmm(reg0) && regIsZmm(reg1):
  2653  			return YzrMulti4
  2654  		default:
  2655  			return Yxxx
  2656  		}
  2657  	default:
  2658  		return Yxxx
  2659  	}
  2660  }
  2661  
  2662  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2663  // For addr that is not V-mem returns (Yxxx, false).
  2664  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2665  	switch addr.Index {
  2666  	case REG_X0 + 0,
  2667  		REG_X0 + 1,
  2668  		REG_X0 + 2,
  2669  		REG_X0 + 3,
  2670  		REG_X0 + 4,
  2671  		REG_X0 + 5,
  2672  		REG_X0 + 6,
  2673  		REG_X0 + 7:
  2674  		return Yxvm, true
  2675  	case REG_X8 + 0,
  2676  		REG_X8 + 1,
  2677  		REG_X8 + 2,
  2678  		REG_X8 + 3,
  2679  		REG_X8 + 4,
  2680  		REG_X8 + 5,
  2681  		REG_X8 + 6,
  2682  		REG_X8 + 7:
  2683  		if ctxt.Arch.Family == sys.I386 {
  2684  			return Yxxx, true
  2685  		}
  2686  		return Yxvm, true
  2687  	case REG_X16 + 0,
  2688  		REG_X16 + 1,
  2689  		REG_X16 + 2,
  2690  		REG_X16 + 3,
  2691  		REG_X16 + 4,
  2692  		REG_X16 + 5,
  2693  		REG_X16 + 6,
  2694  		REG_X16 + 7,
  2695  		REG_X16 + 8,
  2696  		REG_X16 + 9,
  2697  		REG_X16 + 10,
  2698  		REG_X16 + 11,
  2699  		REG_X16 + 12,
  2700  		REG_X16 + 13,
  2701  		REG_X16 + 14,
  2702  		REG_X16 + 15:
  2703  		if ctxt.Arch.Family == sys.I386 {
  2704  			return Yxxx, true
  2705  		}
  2706  		return YxvmEvex, true
  2707  
  2708  	case REG_Y0 + 0,
  2709  		REG_Y0 + 1,
  2710  		REG_Y0 + 2,
  2711  		REG_Y0 + 3,
  2712  		REG_Y0 + 4,
  2713  		REG_Y0 + 5,
  2714  		REG_Y0 + 6,
  2715  		REG_Y0 + 7:
  2716  		return Yyvm, true
  2717  	case REG_Y8 + 0,
  2718  		REG_Y8 + 1,
  2719  		REG_Y8 + 2,
  2720  		REG_Y8 + 3,
  2721  		REG_Y8 + 4,
  2722  		REG_Y8 + 5,
  2723  		REG_Y8 + 6,
  2724  		REG_Y8 + 7:
  2725  		if ctxt.Arch.Family == sys.I386 {
  2726  			return Yxxx, true
  2727  		}
  2728  		return Yyvm, true
  2729  	case REG_Y16 + 0,
  2730  		REG_Y16 + 1,
  2731  		REG_Y16 + 2,
  2732  		REG_Y16 + 3,
  2733  		REG_Y16 + 4,
  2734  		REG_Y16 + 5,
  2735  		REG_Y16 + 6,
  2736  		REG_Y16 + 7,
  2737  		REG_Y16 + 8,
  2738  		REG_Y16 + 9,
  2739  		REG_Y16 + 10,
  2740  		REG_Y16 + 11,
  2741  		REG_Y16 + 12,
  2742  		REG_Y16 + 13,
  2743  		REG_Y16 + 14,
  2744  		REG_Y16 + 15:
  2745  		if ctxt.Arch.Family == sys.I386 {
  2746  			return Yxxx, true
  2747  		}
  2748  		return YyvmEvex, true
  2749  
  2750  	case REG_Z0 + 0,
  2751  		REG_Z0 + 1,
  2752  		REG_Z0 + 2,
  2753  		REG_Z0 + 3,
  2754  		REG_Z0 + 4,
  2755  		REG_Z0 + 5,
  2756  		REG_Z0 + 6,
  2757  		REG_Z0 + 7:
  2758  		return Yzvm, true
  2759  	case REG_Z8 + 0,
  2760  		REG_Z8 + 1,
  2761  		REG_Z8 + 2,
  2762  		REG_Z8 + 3,
  2763  		REG_Z8 + 4,
  2764  		REG_Z8 + 5,
  2765  		REG_Z8 + 6,
  2766  		REG_Z8 + 7,
  2767  		REG_Z8 + 8,
  2768  		REG_Z8 + 9,
  2769  		REG_Z8 + 10,
  2770  		REG_Z8 + 11,
  2771  		REG_Z8 + 12,
  2772  		REG_Z8 + 13,
  2773  		REG_Z8 + 14,
  2774  		REG_Z8 + 15,
  2775  		REG_Z8 + 16,
  2776  		REG_Z8 + 17,
  2777  		REG_Z8 + 18,
  2778  		REG_Z8 + 19,
  2779  		REG_Z8 + 20,
  2780  		REG_Z8 + 21,
  2781  		REG_Z8 + 22,
  2782  		REG_Z8 + 23:
  2783  		if ctxt.Arch.Family == sys.I386 {
  2784  			return Yxxx, true
  2785  		}
  2786  		return Yzvm, true
  2787  	}
  2788  
  2789  	return Yxxx, false
  2790  }
  2791  
  2792  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2793  	switch a.Type {
  2794  	case obj.TYPE_REGLIST:
  2795  		return oclassRegList(ctxt, a)
  2796  
  2797  	case obj.TYPE_NONE:
  2798  		return Ynone
  2799  
  2800  	case obj.TYPE_BRANCH:
  2801  		return Ybr
  2802  
  2803  	case obj.TYPE_INDIR:
  2804  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2805  			return Yindir
  2806  		}
  2807  		return Yxxx
  2808  
  2809  	case obj.TYPE_MEM:
  2810  		// Pseudo registers have negative index, but SP is
  2811  		// not pseudo on x86, hence REG_SP check is not redundant.
  2812  		if a.Index == REG_SP || a.Index < 0 {
  2813  			// Can't use FP/SB/PC/SP as the index register.
  2814  			return Yxxx
  2815  		}
  2816  
  2817  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2818  			return vmem
  2819  		}
  2820  
  2821  		if ctxt.Arch.Family == sys.AMD64 {
  2822  			switch a.Name {
  2823  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2824  				// Global variables can't use index registers and their
  2825  				// base register is %rip (%rip is encoded as REG_NONE).
  2826  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2827  					return Yxxx
  2828  				}
  2829  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2830  				// These names must have a base of SP.  The old compiler
  2831  				// uses 0 for the base register. SSA uses REG_SP.
  2832  				if a.Reg != REG_SP && a.Reg != 0 {
  2833  					return Yxxx
  2834  				}
  2835  			case obj.NAME_NONE:
  2836  				// everything is ok
  2837  			default:
  2838  				// unknown name
  2839  				return Yxxx
  2840  			}
  2841  		}
  2842  		return Ym
  2843  
  2844  	case obj.TYPE_ADDR:
  2845  		switch a.Name {
  2846  		case obj.NAME_GOTREF:
  2847  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2848  			return Yxxx
  2849  
  2850  		case obj.NAME_EXTERN,
  2851  			obj.NAME_STATIC:
  2852  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2853  				return Yi32
  2854  			}
  2855  			return Yiauto // use pc-relative addressing
  2856  
  2857  		case obj.NAME_AUTO,
  2858  			obj.NAME_PARAM:
  2859  			return Yiauto
  2860  		}
  2861  
  2862  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2863  		// and got Yi32 in an earlier version of this code.
  2864  		// Keep doing that until we fix yduff etc.
  2865  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2866  			return Yi32
  2867  		}
  2868  
  2869  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2870  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2871  		}
  2872  		fallthrough
  2873  
  2874  	case obj.TYPE_CONST:
  2875  		if a.Sym != nil {
  2876  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2877  		}
  2878  
  2879  		v := a.Offset
  2880  		if ctxt.Arch.Family == sys.I386 {
  2881  			v = int64(int32(v))
  2882  		}
  2883  		switch {
  2884  		case v == 0:
  2885  			return Yi0
  2886  		case v == 1:
  2887  			return Yi1
  2888  		case v >= 0 && v <= 3:
  2889  			return Yu2
  2890  		case v >= 0 && v <= 127:
  2891  			return Yu7
  2892  		case v >= 0 && v <= 255:
  2893  			return Yu8
  2894  		case v >= -128 && v <= 127:
  2895  			return Yi8
  2896  		}
  2897  		if ctxt.Arch.Family == sys.I386 {
  2898  			return Yi32
  2899  		}
  2900  		l := int32(v)
  2901  		if int64(l) == v {
  2902  			return Ys32 // can sign extend
  2903  		}
  2904  		if v>>32 == 0 {
  2905  			return Yi32 // unsigned
  2906  		}
  2907  		return Yi64
  2908  
  2909  	case obj.TYPE_TEXTSIZE:
  2910  		return Ytextsize
  2911  	}
  2912  
  2913  	if a.Type != obj.TYPE_REG {
  2914  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2915  		return Yxxx
  2916  	}
  2917  
  2918  	switch a.Reg {
  2919  	case REG_AL:
  2920  		return Yal
  2921  
  2922  	case REG_AX:
  2923  		return Yax
  2924  
  2925  		/*
  2926  			case REG_SPB:
  2927  		*/
  2928  	case REG_BPB,
  2929  		REG_SIB,
  2930  		REG_DIB,
  2931  		REG_R8B,
  2932  		REG_R9B,
  2933  		REG_R10B,
  2934  		REG_R11B,
  2935  		REG_R12B,
  2936  		REG_R13B,
  2937  		REG_R14B,
  2938  		REG_R15B:
  2939  		if ctxt.Arch.Family == sys.I386 {
  2940  			return Yxxx
  2941  		}
  2942  		fallthrough
  2943  
  2944  	case REG_DL,
  2945  		REG_BL,
  2946  		REG_AH,
  2947  		REG_CH,
  2948  		REG_DH,
  2949  		REG_BH:
  2950  		return Yrb
  2951  
  2952  	case REG_CL:
  2953  		return Ycl
  2954  
  2955  	case REG_CX:
  2956  		return Ycx
  2957  
  2958  	case REG_DX, REG_BX:
  2959  		return Yrx
  2960  
  2961  	case REG_R8, // not really Yrl
  2962  		REG_R9,
  2963  		REG_R10,
  2964  		REG_R11,
  2965  		REG_R12,
  2966  		REG_R13,
  2967  		REG_R14,
  2968  		REG_R15:
  2969  		if ctxt.Arch.Family == sys.I386 {
  2970  			return Yxxx
  2971  		}
  2972  		fallthrough
  2973  
  2974  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2975  		if ctxt.Arch.Family == sys.I386 {
  2976  			return Yrl32
  2977  		}
  2978  		return Yrl
  2979  
  2980  	case REG_F0 + 0:
  2981  		return Yf0
  2982  
  2983  	case REG_F0 + 1,
  2984  		REG_F0 + 2,
  2985  		REG_F0 + 3,
  2986  		REG_F0 + 4,
  2987  		REG_F0 + 5,
  2988  		REG_F0 + 6,
  2989  		REG_F0 + 7:
  2990  		return Yrf
  2991  
  2992  	case REG_M0 + 0,
  2993  		REG_M0 + 1,
  2994  		REG_M0 + 2,
  2995  		REG_M0 + 3,
  2996  		REG_M0 + 4,
  2997  		REG_M0 + 5,
  2998  		REG_M0 + 6,
  2999  		REG_M0 + 7:
  3000  		return Ymr
  3001  
  3002  	case REG_X0:
  3003  		return Yxr0
  3004  
  3005  	case REG_X0 + 1,
  3006  		REG_X0 + 2,
  3007  		REG_X0 + 3,
  3008  		REG_X0 + 4,
  3009  		REG_X0 + 5,
  3010  		REG_X0 + 6,
  3011  		REG_X0 + 7,
  3012  		REG_X0 + 8,
  3013  		REG_X0 + 9,
  3014  		REG_X0 + 10,
  3015  		REG_X0 + 11,
  3016  		REG_X0 + 12,
  3017  		REG_X0 + 13,
  3018  		REG_X0 + 14,
  3019  		REG_X0 + 15:
  3020  		return Yxr
  3021  
  3022  	case REG_X0 + 16,
  3023  		REG_X0 + 17,
  3024  		REG_X0 + 18,
  3025  		REG_X0 + 19,
  3026  		REG_X0 + 20,
  3027  		REG_X0 + 21,
  3028  		REG_X0 + 22,
  3029  		REG_X0 + 23,
  3030  		REG_X0 + 24,
  3031  		REG_X0 + 25,
  3032  		REG_X0 + 26,
  3033  		REG_X0 + 27,
  3034  		REG_X0 + 28,
  3035  		REG_X0 + 29,
  3036  		REG_X0 + 30,
  3037  		REG_X0 + 31:
  3038  		return YxrEvex
  3039  
  3040  	case REG_Y0 + 0,
  3041  		REG_Y0 + 1,
  3042  		REG_Y0 + 2,
  3043  		REG_Y0 + 3,
  3044  		REG_Y0 + 4,
  3045  		REG_Y0 + 5,
  3046  		REG_Y0 + 6,
  3047  		REG_Y0 + 7,
  3048  		REG_Y0 + 8,
  3049  		REG_Y0 + 9,
  3050  		REG_Y0 + 10,
  3051  		REG_Y0 + 11,
  3052  		REG_Y0 + 12,
  3053  		REG_Y0 + 13,
  3054  		REG_Y0 + 14,
  3055  		REG_Y0 + 15:
  3056  		return Yyr
  3057  
  3058  	case REG_Y0 + 16,
  3059  		REG_Y0 + 17,
  3060  		REG_Y0 + 18,
  3061  		REG_Y0 + 19,
  3062  		REG_Y0 + 20,
  3063  		REG_Y0 + 21,
  3064  		REG_Y0 + 22,
  3065  		REG_Y0 + 23,
  3066  		REG_Y0 + 24,
  3067  		REG_Y0 + 25,
  3068  		REG_Y0 + 26,
  3069  		REG_Y0 + 27,
  3070  		REG_Y0 + 28,
  3071  		REG_Y0 + 29,
  3072  		REG_Y0 + 30,
  3073  		REG_Y0 + 31:
  3074  		return YyrEvex
  3075  
  3076  	case REG_Z0 + 0,
  3077  		REG_Z0 + 1,
  3078  		REG_Z0 + 2,
  3079  		REG_Z0 + 3,
  3080  		REG_Z0 + 4,
  3081  		REG_Z0 + 5,
  3082  		REG_Z0 + 6,
  3083  		REG_Z0 + 7:
  3084  		return Yzr
  3085  
  3086  	case REG_Z0 + 8,
  3087  		REG_Z0 + 9,
  3088  		REG_Z0 + 10,
  3089  		REG_Z0 + 11,
  3090  		REG_Z0 + 12,
  3091  		REG_Z0 + 13,
  3092  		REG_Z0 + 14,
  3093  		REG_Z0 + 15,
  3094  		REG_Z0 + 16,
  3095  		REG_Z0 + 17,
  3096  		REG_Z0 + 18,
  3097  		REG_Z0 + 19,
  3098  		REG_Z0 + 20,
  3099  		REG_Z0 + 21,
  3100  		REG_Z0 + 22,
  3101  		REG_Z0 + 23,
  3102  		REG_Z0 + 24,
  3103  		REG_Z0 + 25,
  3104  		REG_Z0 + 26,
  3105  		REG_Z0 + 27,
  3106  		REG_Z0 + 28,
  3107  		REG_Z0 + 29,
  3108  		REG_Z0 + 30,
  3109  		REG_Z0 + 31:
  3110  		if ctxt.Arch.Family == sys.I386 {
  3111  			return Yxxx
  3112  		}
  3113  		return Yzr
  3114  
  3115  	case REG_K0:
  3116  		return Yk0
  3117  
  3118  	case REG_K0 + 1,
  3119  		REG_K0 + 2,
  3120  		REG_K0 + 3,
  3121  		REG_K0 + 4,
  3122  		REG_K0 + 5,
  3123  		REG_K0 + 6,
  3124  		REG_K0 + 7:
  3125  		return Yknot0
  3126  
  3127  	case REG_CS:
  3128  		return Ycs
  3129  	case REG_SS:
  3130  		return Yss
  3131  	case REG_DS:
  3132  		return Yds
  3133  	case REG_ES:
  3134  		return Yes
  3135  	case REG_FS:
  3136  		return Yfs
  3137  	case REG_GS:
  3138  		return Ygs
  3139  	case REG_TLS:
  3140  		return Ytls
  3141  
  3142  	case REG_GDTR:
  3143  		return Ygdtr
  3144  	case REG_IDTR:
  3145  		return Yidtr
  3146  	case REG_LDTR:
  3147  		return Yldtr
  3148  	case REG_MSW:
  3149  		return Ymsw
  3150  	case REG_TASK:
  3151  		return Ytask
  3152  
  3153  	case REG_CR + 0:
  3154  		return Ycr0
  3155  	case REG_CR + 1:
  3156  		return Ycr1
  3157  	case REG_CR + 2:
  3158  		return Ycr2
  3159  	case REG_CR + 3:
  3160  		return Ycr3
  3161  	case REG_CR + 4:
  3162  		return Ycr4
  3163  	case REG_CR + 5:
  3164  		return Ycr5
  3165  	case REG_CR + 6:
  3166  		return Ycr6
  3167  	case REG_CR + 7:
  3168  		return Ycr7
  3169  	case REG_CR + 8:
  3170  		return Ycr8
  3171  
  3172  	case REG_DR + 0:
  3173  		return Ydr0
  3174  	case REG_DR + 1:
  3175  		return Ydr1
  3176  	case REG_DR + 2:
  3177  		return Ydr2
  3178  	case REG_DR + 3:
  3179  		return Ydr3
  3180  	case REG_DR + 4:
  3181  		return Ydr4
  3182  	case REG_DR + 5:
  3183  		return Ydr5
  3184  	case REG_DR + 6:
  3185  		return Ydr6
  3186  	case REG_DR + 7:
  3187  		return Ydr7
  3188  
  3189  	case REG_TR + 0:
  3190  		return Ytr0
  3191  	case REG_TR + 1:
  3192  		return Ytr1
  3193  	case REG_TR + 2:
  3194  		return Ytr2
  3195  	case REG_TR + 3:
  3196  		return Ytr3
  3197  	case REG_TR + 4:
  3198  		return Ytr4
  3199  	case REG_TR + 5:
  3200  		return Ytr5
  3201  	case REG_TR + 6:
  3202  		return Ytr6
  3203  	case REG_TR + 7:
  3204  		return Ytr7
  3205  	}
  3206  
  3207  	return Yxxx
  3208  }
  3209  
  3210  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3211  // and hold assembly state.
  3212  type AsmBuf struct {
  3213  	buf      [100]byte
  3214  	off      int
  3215  	rexflag  int
  3216  	vexflag  bool // Per inst: true for VEX-encoded
  3217  	evexflag bool // Per inst: true for EVEX-encoded
  3218  	rep      bool
  3219  	repn     bool
  3220  	lock     bool
  3221  
  3222  	evex evexBits // Initialized when evexflag is true
  3223  }
  3224  
  3225  // Put1 appends one byte to the end of the buffer.
  3226  func (ab *AsmBuf) Put1(x byte) {
  3227  	ab.buf[ab.off] = x
  3228  	ab.off++
  3229  }
  3230  
  3231  // Put2 appends two bytes to the end of the buffer.
  3232  func (ab *AsmBuf) Put2(x, y byte) {
  3233  	ab.buf[ab.off+0] = x
  3234  	ab.buf[ab.off+1] = y
  3235  	ab.off += 2
  3236  }
  3237  
  3238  // Put3 appends three bytes to the end of the buffer.
  3239  func (ab *AsmBuf) Put3(x, y, z byte) {
  3240  	ab.buf[ab.off+0] = x
  3241  	ab.buf[ab.off+1] = y
  3242  	ab.buf[ab.off+2] = z
  3243  	ab.off += 3
  3244  }
  3245  
  3246  // Put4 appends four bytes to the end of the buffer.
  3247  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3248  	ab.buf[ab.off+0] = x
  3249  	ab.buf[ab.off+1] = y
  3250  	ab.buf[ab.off+2] = z
  3251  	ab.buf[ab.off+3] = w
  3252  	ab.off += 4
  3253  }
  3254  
  3255  // PutInt16 writes v into the buffer using little-endian encoding.
  3256  func (ab *AsmBuf) PutInt16(v int16) {
  3257  	ab.buf[ab.off+0] = byte(v)
  3258  	ab.buf[ab.off+1] = byte(v >> 8)
  3259  	ab.off += 2
  3260  }
  3261  
  3262  // PutInt32 writes v into the buffer using little-endian encoding.
  3263  func (ab *AsmBuf) PutInt32(v int32) {
  3264  	ab.buf[ab.off+0] = byte(v)
  3265  	ab.buf[ab.off+1] = byte(v >> 8)
  3266  	ab.buf[ab.off+2] = byte(v >> 16)
  3267  	ab.buf[ab.off+3] = byte(v >> 24)
  3268  	ab.off += 4
  3269  }
  3270  
  3271  // PutInt64 writes v into the buffer using little-endian encoding.
  3272  func (ab *AsmBuf) PutInt64(v int64) {
  3273  	ab.buf[ab.off+0] = byte(v)
  3274  	ab.buf[ab.off+1] = byte(v >> 8)
  3275  	ab.buf[ab.off+2] = byte(v >> 16)
  3276  	ab.buf[ab.off+3] = byte(v >> 24)
  3277  	ab.buf[ab.off+4] = byte(v >> 32)
  3278  	ab.buf[ab.off+5] = byte(v >> 40)
  3279  	ab.buf[ab.off+6] = byte(v >> 48)
  3280  	ab.buf[ab.off+7] = byte(v >> 56)
  3281  	ab.off += 8
  3282  }
  3283  
  3284  // Put copies b into the buffer.
  3285  func (ab *AsmBuf) Put(b []byte) {
  3286  	copy(ab.buf[ab.off:], b)
  3287  	ab.off += len(b)
  3288  }
  3289  
  3290  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3291  // starting at specified offset (e.g. z counter value).
  3292  // Trailing 0 is not written.
  3293  //
  3294  // Intended to be used for literal Z cases.
  3295  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3296  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3297  	for int(op[offset]) != 0 {
  3298  		ab.Put1(byte(op[offset]))
  3299  		offset++
  3300  	}
  3301  }
  3302  
  3303  // Insert inserts b at offset i.
  3304  func (ab *AsmBuf) Insert(i int, b byte) {
  3305  	ab.off++
  3306  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3307  	ab.buf[i] = b
  3308  }
  3309  
  3310  // Last returns the byte at the end of the buffer.
  3311  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3312  
  3313  // Len returns the length of the buffer.
  3314  func (ab *AsmBuf) Len() int { return ab.off }
  3315  
  3316  // Bytes returns the contents of the buffer.
  3317  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3318  
  3319  // Reset empties the buffer.
  3320  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3321  
  3322  // At returns the byte at offset i.
  3323  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3324  
  3325  // asmidx emits SIB byte.
  3326  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3327  	var i int
  3328  
  3329  	// X/Y index register is used in VSIB.
  3330  	switch index {
  3331  	default:
  3332  		goto bad
  3333  
  3334  	case REG_NONE:
  3335  		i = 4 << 3
  3336  		goto bas
  3337  
  3338  	case REG_R8,
  3339  		REG_R9,
  3340  		REG_R10,
  3341  		REG_R11,
  3342  		REG_R12,
  3343  		REG_R13,
  3344  		REG_R14,
  3345  		REG_R15,
  3346  		REG_X8,
  3347  		REG_X9,
  3348  		REG_X10,
  3349  		REG_X11,
  3350  		REG_X12,
  3351  		REG_X13,
  3352  		REG_X14,
  3353  		REG_X15,
  3354  		REG_X16,
  3355  		REG_X17,
  3356  		REG_X18,
  3357  		REG_X19,
  3358  		REG_X20,
  3359  		REG_X21,
  3360  		REG_X22,
  3361  		REG_X23,
  3362  		REG_X24,
  3363  		REG_X25,
  3364  		REG_X26,
  3365  		REG_X27,
  3366  		REG_X28,
  3367  		REG_X29,
  3368  		REG_X30,
  3369  		REG_X31,
  3370  		REG_Y8,
  3371  		REG_Y9,
  3372  		REG_Y10,
  3373  		REG_Y11,
  3374  		REG_Y12,
  3375  		REG_Y13,
  3376  		REG_Y14,
  3377  		REG_Y15,
  3378  		REG_Y16,
  3379  		REG_Y17,
  3380  		REG_Y18,
  3381  		REG_Y19,
  3382  		REG_Y20,
  3383  		REG_Y21,
  3384  		REG_Y22,
  3385  		REG_Y23,
  3386  		REG_Y24,
  3387  		REG_Y25,
  3388  		REG_Y26,
  3389  		REG_Y27,
  3390  		REG_Y28,
  3391  		REG_Y29,
  3392  		REG_Y30,
  3393  		REG_Y31,
  3394  		REG_Z8,
  3395  		REG_Z9,
  3396  		REG_Z10,
  3397  		REG_Z11,
  3398  		REG_Z12,
  3399  		REG_Z13,
  3400  		REG_Z14,
  3401  		REG_Z15,
  3402  		REG_Z16,
  3403  		REG_Z17,
  3404  		REG_Z18,
  3405  		REG_Z19,
  3406  		REG_Z20,
  3407  		REG_Z21,
  3408  		REG_Z22,
  3409  		REG_Z23,
  3410  		REG_Z24,
  3411  		REG_Z25,
  3412  		REG_Z26,
  3413  		REG_Z27,
  3414  		REG_Z28,
  3415  		REG_Z29,
  3416  		REG_Z30,
  3417  		REG_Z31:
  3418  		if ctxt.Arch.Family == sys.I386 {
  3419  			goto bad
  3420  		}
  3421  		fallthrough
  3422  
  3423  	case REG_AX,
  3424  		REG_CX,
  3425  		REG_DX,
  3426  		REG_BX,
  3427  		REG_BP,
  3428  		REG_SI,
  3429  		REG_DI,
  3430  		REG_X0,
  3431  		REG_X1,
  3432  		REG_X2,
  3433  		REG_X3,
  3434  		REG_X4,
  3435  		REG_X5,
  3436  		REG_X6,
  3437  		REG_X7,
  3438  		REG_Y0,
  3439  		REG_Y1,
  3440  		REG_Y2,
  3441  		REG_Y3,
  3442  		REG_Y4,
  3443  		REG_Y5,
  3444  		REG_Y6,
  3445  		REG_Y7,
  3446  		REG_Z0,
  3447  		REG_Z1,
  3448  		REG_Z2,
  3449  		REG_Z3,
  3450  		REG_Z4,
  3451  		REG_Z5,
  3452  		REG_Z6,
  3453  		REG_Z7:
  3454  		i = reg[index] << 3
  3455  	}
  3456  
  3457  	switch scale {
  3458  	default:
  3459  		goto bad
  3460  
  3461  	case 1:
  3462  		break
  3463  
  3464  	case 2:
  3465  		i |= 1 << 6
  3466  
  3467  	case 4:
  3468  		i |= 2 << 6
  3469  
  3470  	case 8:
  3471  		i |= 3 << 6
  3472  	}
  3473  
  3474  bas:
  3475  	switch base {
  3476  	default:
  3477  		goto bad
  3478  
  3479  	case REG_NONE: // must be mod=00
  3480  		i |= 5
  3481  
  3482  	case REG_R8,
  3483  		REG_R9,
  3484  		REG_R10,
  3485  		REG_R11,
  3486  		REG_R12,
  3487  		REG_R13,
  3488  		REG_R14,
  3489  		REG_R15:
  3490  		if ctxt.Arch.Family == sys.I386 {
  3491  			goto bad
  3492  		}
  3493  		fallthrough
  3494  
  3495  	case REG_AX,
  3496  		REG_CX,
  3497  		REG_DX,
  3498  		REG_BX,
  3499  		REG_SP,
  3500  		REG_BP,
  3501  		REG_SI,
  3502  		REG_DI:
  3503  		i |= reg[base]
  3504  	}
  3505  
  3506  	ab.Put1(byte(i))
  3507  	return
  3508  
  3509  bad:
  3510  	ctxt.Diag("asmidx: bad address %d/%s/%s", scale, rconv(index), rconv(base))
  3511  	ab.Put1(0)
  3512  }
  3513  
  3514  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3515  	var rel obj.Reloc
  3516  
  3517  	v := vaddr(ctxt, p, a, &rel)
  3518  	if rel.Siz != 0 {
  3519  		if rel.Siz != 4 {
  3520  			ctxt.Diag("bad reloc")
  3521  		}
  3522  		rel.Off = int32(p.Pc + int64(ab.Len()))
  3523  		cursym.AddRel(ctxt, rel)
  3524  	}
  3525  
  3526  	ab.PutInt32(int32(v))
  3527  }
  3528  
  3529  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3530  	if r != nil {
  3531  		*r = obj.Reloc{}
  3532  	}
  3533  
  3534  	switch a.Name {
  3535  	case obj.NAME_STATIC,
  3536  		obj.NAME_GOTREF,
  3537  		obj.NAME_EXTERN:
  3538  		s := a.Sym
  3539  		if r == nil {
  3540  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3541  			log.Fatalf("reloc")
  3542  		}
  3543  
  3544  		if a.Name == obj.NAME_GOTREF {
  3545  			r.Siz = 4
  3546  			r.Type = objabi.R_GOTPCREL
  3547  		} else if useAbs(ctxt, s) {
  3548  			r.Siz = 4
  3549  			r.Type = objabi.R_ADDR
  3550  		} else {
  3551  			r.Siz = 4
  3552  			r.Type = objabi.R_PCREL
  3553  		}
  3554  
  3555  		r.Off = -1 // caller must fill in
  3556  		r.Sym = s
  3557  		r.Add = a.Offset
  3558  
  3559  		return 0
  3560  	}
  3561  
  3562  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3563  		if r == nil {
  3564  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3565  			log.Fatalf("reloc")
  3566  		}
  3567  
  3568  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3569  			r.Type = objabi.R_TLS_LE
  3570  			r.Siz = 4
  3571  			r.Off = -1 // caller must fill in
  3572  			r.Add = a.Offset
  3573  		}
  3574  		return 0
  3575  	}
  3576  
  3577  	return a.Offset
  3578  }
  3579  
  3580  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3581  	var base int
  3582  	var rel obj.Reloc
  3583  
  3584  	rex &= 0x40 | Rxr
  3585  	if a.Offset != int64(int32(a.Offset)) {
  3586  		// The rules are slightly different for 386 and AMD64,
  3587  		// mostly for historical reasons. We may unify them later,
  3588  		// but it must be discussed beforehand.
  3589  		//
  3590  		// For 64bit mode only LEAL is allowed to overflow.
  3591  		// It's how https://golang.org/cl/59630 made it.
  3592  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3593  		//
  3594  		// For 32bit mode rules are more permissive.
  3595  		// If offset fits uint32, it's permitted.
  3596  		// This is allowed for assembly that wants to use 32-bit hex
  3597  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3598  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3599  			(ctxt.Arch.Family != sys.AMD64 &&
  3600  				int64(uint32(a.Offset)) == a.Offset &&
  3601  				ab.rexflag&Rxw == 0)
  3602  		if !overflowOK {
  3603  			ctxt.Diag("offset too large in %s", p)
  3604  		}
  3605  	}
  3606  	v := int32(a.Offset)
  3607  	rel.Siz = 0
  3608  
  3609  	switch a.Type {
  3610  	case obj.TYPE_ADDR:
  3611  		if a.Name == obj.NAME_NONE {
  3612  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3613  		}
  3614  		if a.Index == REG_TLS {
  3615  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3616  		}
  3617  		goto bad
  3618  
  3619  	case obj.TYPE_REG:
  3620  		const regFirst = REG_AL
  3621  		const regLast = REG_Z31
  3622  		if a.Reg < regFirst || regLast < a.Reg {
  3623  			goto bad
  3624  		}
  3625  		if v != 0 {
  3626  			goto bad
  3627  		}
  3628  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3629  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3630  		return
  3631  	}
  3632  
  3633  	if a.Type != obj.TYPE_MEM {
  3634  		goto bad
  3635  	}
  3636  
  3637  	if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) {
  3638  		base := int(a.Reg)
  3639  		switch a.Name {
  3640  		case obj.NAME_EXTERN,
  3641  			obj.NAME_GOTREF,
  3642  			obj.NAME_STATIC:
  3643  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3644  				goto bad
  3645  			}
  3646  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3647  				// The base register has already been set. It holds the PC
  3648  				// of this instruction returned by a PC-reading thunk.
  3649  				// See obj6.go:rewriteToPcrel.
  3650  			} else {
  3651  				base = REG_NONE
  3652  			}
  3653  			v = int32(vaddr(ctxt, p, a, &rel))
  3654  
  3655  		case obj.NAME_AUTO,
  3656  			obj.NAME_PARAM:
  3657  			base = REG_SP
  3658  		}
  3659  
  3660  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3661  		if base == REG_NONE {
  3662  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3663  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3664  			goto putrelv
  3665  		}
  3666  
  3667  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3668  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3669  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3670  			return
  3671  		}
  3672  
  3673  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3674  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3675  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3676  			ab.Put1(disp8)
  3677  			return
  3678  		}
  3679  
  3680  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3681  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3682  		goto putrelv
  3683  	}
  3684  
  3685  	base = int(a.Reg)
  3686  	switch a.Name {
  3687  	case obj.NAME_STATIC,
  3688  		obj.NAME_GOTREF,
  3689  		obj.NAME_EXTERN:
  3690  		if a.Sym == nil {
  3691  			ctxt.Diag("bad addr: %v", p)
  3692  		}
  3693  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3694  			// The base register has already been set. It holds the PC
  3695  			// of this instruction returned by a PC-reading thunk.
  3696  			// See obj6.go:rewriteToPcrel.
  3697  		} else {
  3698  			base = REG_NONE
  3699  		}
  3700  		v = int32(vaddr(ctxt, p, a, &rel))
  3701  
  3702  	case obj.NAME_AUTO,
  3703  		obj.NAME_PARAM:
  3704  		base = REG_SP
  3705  	}
  3706  
  3707  	if base == REG_TLS {
  3708  		v = int32(vaddr(ctxt, p, a, &rel))
  3709  	}
  3710  
  3711  	ab.rexflag |= regrex[base]&Rxb | rex
  3712  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3713  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3714  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3715  				ctxt.Diag("%v has offset against gotref", p)
  3716  			}
  3717  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3718  			goto putrelv
  3719  		}
  3720  
  3721  		// temporary
  3722  		ab.Put2(
  3723  			byte(0<<6|4<<0|r<<3), // sib present
  3724  			0<<6|4<<3|5<<0,       // DS:d32
  3725  		)
  3726  		goto putrelv
  3727  	}
  3728  
  3729  	if base == REG_SP || base == REG_R12 {
  3730  		if v == 0 {
  3731  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3732  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3733  			return
  3734  		}
  3735  
  3736  		if disp8, ok := toDisp8(v, p, ab); ok {
  3737  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3738  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3739  			ab.Put1(disp8)
  3740  			return
  3741  		}
  3742  
  3743  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3744  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3745  		goto putrelv
  3746  	}
  3747  
  3748  	if REG_AX <= base && base <= REG_R15 {
  3749  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid &&
  3750  			ctxt.Headtype != objabi.Hwindows {
  3751  			rel = obj.Reloc{}
  3752  			rel.Type = objabi.R_TLS_LE
  3753  			rel.Siz = 4
  3754  			rel.Sym = nil
  3755  			rel.Add = int64(v)
  3756  			v = 0
  3757  		}
  3758  
  3759  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3760  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3761  			return
  3762  		}
  3763  
  3764  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3765  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3766  			return
  3767  		}
  3768  
  3769  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3770  		goto putrelv
  3771  	}
  3772  
  3773  	goto bad
  3774  
  3775  putrelv:
  3776  	if rel.Siz != 0 {
  3777  		if rel.Siz != 4 {
  3778  			ctxt.Diag("bad rel")
  3779  			goto bad
  3780  		}
  3781  
  3782  		rel.Off = int32(p.Pc + int64(ab.Len()))
  3783  		cursym.AddRel(ctxt, rel)
  3784  	}
  3785  
  3786  	ab.PutInt32(v)
  3787  	return
  3788  
  3789  bad:
  3790  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3791  }
  3792  
  3793  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3794  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3795  }
  3796  
  3797  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3798  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3799  }
  3800  
  3801  func bytereg(a *obj.Addr, t *uint8) {
  3802  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3803  		a.Reg += REG_AL - REG_AX
  3804  		*t = 0
  3805  	}
  3806  }
  3807  
  3808  func unbytereg(a *obj.Addr, t *uint8) {
  3809  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3810  		a.Reg += REG_AX - REG_AL
  3811  		*t = 0
  3812  	}
  3813  }
  3814  
  3815  const (
  3816  	movLit uint8 = iota // Like Zlit
  3817  	movRegMem
  3818  	movMemReg
  3819  	movRegMem2op
  3820  	movMemReg2op
  3821  	movFullPtr // Load full pointer, trash heap (unsupported)
  3822  	movDoubleShift
  3823  	movTLSReg
  3824  )
  3825  
  3826  var ymovtab = []movtab{
  3827  	// push
  3828  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3829  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3830  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3831  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3832  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3833  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3834  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3835  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3836  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3837  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3838  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3839  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3840  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3841  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3842  
  3843  	// pop
  3844  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3845  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3846  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3847  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3848  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3849  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3850  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3851  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3852  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3853  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3854  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3855  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3856  
  3857  	// mov seg
  3858  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3859  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3860  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3861  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3862  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3863  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3864  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3865  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3866  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3867  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3868  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3869  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3870  
  3871  	// mov cr
  3872  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3873  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3874  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3875  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3876  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3877  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3878  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3879  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3880  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3881  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3882  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3883  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3884  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3885  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3886  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3887  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3888  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3889  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3890  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3891  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3892  
  3893  	// mov dr
  3894  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3895  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3896  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3897  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3898  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3899  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3900  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3901  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3902  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3903  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3904  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3905  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3906  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3907  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3908  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3909  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3910  
  3911  	// mov tr
  3912  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3913  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3914  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3915  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3916  
  3917  	// lgdt, sgdt, lidt, sidt
  3918  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3919  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3920  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3921  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3922  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3923  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3924  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3925  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3926  
  3927  	// lldt, sldt
  3928  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3929  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3930  
  3931  	// lmsw, smsw
  3932  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3933  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3934  
  3935  	// ltr, str
  3936  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3937  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3938  
  3939  	/* load full pointer - unsupported
  3940  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3941  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3942  	*/
  3943  
  3944  	// double shift
  3945  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3946  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3947  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3948  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3949  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3950  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3951  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3952  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3953  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3954  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3955  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3956  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3957  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3958  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3959  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3960  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3961  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3962  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3963  
  3964  	// load TLS base
  3965  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3966  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3967  	{0, 0, 0, 0, 0, [4]uint8{}},
  3968  }
  3969  
  3970  func isax(a *obj.Addr) bool {
  3971  	switch a.Reg {
  3972  	case REG_AX, REG_AL, REG_AH:
  3973  		return true
  3974  	}
  3975  
  3976  	return a.Index == REG_AX
  3977  }
  3978  
  3979  func subreg(p *obj.Prog, from int, to int) {
  3980  	if false { /* debug['Q'] */
  3981  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3982  	}
  3983  
  3984  	if int(p.From.Reg) == from {
  3985  		p.From.Reg = int16(to)
  3986  		p.Ft = 0
  3987  	}
  3988  
  3989  	if int(p.To.Reg) == from {
  3990  		p.To.Reg = int16(to)
  3991  		p.Tt = 0
  3992  	}
  3993  
  3994  	if int(p.From.Index) == from {
  3995  		p.From.Index = int16(to)
  3996  		p.Ft = 0
  3997  	}
  3998  
  3999  	if int(p.To.Index) == from {
  4000  		p.To.Index = int16(to)
  4001  		p.Tt = 0
  4002  	}
  4003  
  4004  	if false { /* debug['Q'] */
  4005  		fmt.Printf("%v\n", p)
  4006  	}
  4007  }
  4008  
  4009  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  4010  	switch op {
  4011  	case Pm, Pe, Pf2, Pf3:
  4012  		if osize != 1 {
  4013  			if op != Pm {
  4014  				ab.Put1(byte(op))
  4015  			}
  4016  			ab.Put1(Pm)
  4017  			z++
  4018  			op = int(o.op[z])
  4019  			break
  4020  		}
  4021  		fallthrough
  4022  
  4023  	default:
  4024  		if ab.Len() == 0 || ab.Last() != Pm {
  4025  			ab.Put1(Pm)
  4026  		}
  4027  	}
  4028  
  4029  	ab.Put1(byte(op))
  4030  	return z
  4031  }
  4032  
  4033  var bpduff1 = []byte{
  4034  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  4035  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  4036  }
  4037  
  4038  var bpduff2 = []byte{
  4039  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  4040  }
  4041  
  4042  // asmevex emits EVEX pregis and opcode byte.
  4043  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  4044  // K-masking register.
  4045  //
  4046  // Expects asmbuf.evex to be properly initialized.
  4047  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  4048  	ab.evexflag = true
  4049  	evex := ab.evex
  4050  
  4051  	rexR := byte(1)
  4052  	evexR := byte(1)
  4053  	rexX := byte(1)
  4054  	rexB := byte(1)
  4055  	if r != nil {
  4056  		if regrex[r.Reg]&Rxr != 0 {
  4057  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  4058  		}
  4059  		if regrex[r.Reg]&RxrEvex != 0 {
  4060  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  4061  		}
  4062  	}
  4063  	if rm != nil {
  4064  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  4065  			rexX = 0
  4066  		} else if regrex[rm.Index]&Rxx != 0 {
  4067  			rexX = 0
  4068  		}
  4069  		if regrex[rm.Reg]&Rxb != 0 {
  4070  			rexB = 0
  4071  		}
  4072  	}
  4073  	// P0 = [R][X][B][R'][00][mm]
  4074  	p0 := (rexR << 7) |
  4075  		(rexX << 6) |
  4076  		(rexB << 5) |
  4077  		(evexR << 4) |
  4078  		(0 << 2) |
  4079  		(evex.M() << 0)
  4080  
  4081  	vexV := byte(0)
  4082  	if v != nil {
  4083  		// 4bit-wide reg index.
  4084  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4085  	}
  4086  	vexV ^= 0x0F
  4087  	// P1 = [W][vvvv][1][pp]
  4088  	p1 := (evex.W() << 7) |
  4089  		(vexV << 3) |
  4090  		(1 << 2) |
  4091  		(evex.P() << 0)
  4092  
  4093  	suffix := evexSuffixMap[p.Scond]
  4094  	evexZ := byte(0)
  4095  	evexLL := evex.L()
  4096  	evexB := byte(0)
  4097  	evexV := byte(1)
  4098  	evexA := byte(0)
  4099  	if suffix.zeroing {
  4100  		if !evex.ZeroingEnabled() {
  4101  			ctxt.Diag("unsupported zeroing: %v", p)
  4102  		}
  4103  		if k == nil {
  4104  			// When you request zeroing you must specify a mask register.
  4105  			// See issue 57952.
  4106  			ctxt.Diag("mask register must be specified for .Z instructions: %v", p)
  4107  		} else if k.Reg == REG_K0 {
  4108  			// The mask register must not be K0. That restriction is already
  4109  			// handled by the Yknot0 restriction in the opcode tables, so we
  4110  			// won't ever reach here. But put something sensible here just in case.
  4111  			ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p)
  4112  		}
  4113  		evexZ = 1
  4114  	}
  4115  	switch {
  4116  	case suffix.rounding != rcUnset:
  4117  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4118  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  4119  		} else if !evex.RoundingEnabled() {
  4120  			ctxt.Diag("unsupported rounding: %v", p)
  4121  		}
  4122  		evexB = 1
  4123  		evexLL = suffix.rounding
  4124  	case suffix.broadcast:
  4125  		if rm == nil || rm.Type != obj.TYPE_MEM {
  4126  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  4127  		} else if !evex.BroadcastEnabled() {
  4128  			ctxt.Diag("unsupported broadcast: %v", p)
  4129  		}
  4130  		evexB = 1
  4131  	case suffix.sae:
  4132  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4133  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  4134  		} else if !evex.SaeEnabled() {
  4135  			ctxt.Diag("unsupported SAE: %v", p)
  4136  		}
  4137  		evexB = 1
  4138  	}
  4139  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  4140  		evexV = 0
  4141  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  4142  		evexV = 0 // VSR selector 5th bit.
  4143  	}
  4144  	if k != nil {
  4145  		evexA = byte(reg[k.Reg])
  4146  	}
  4147  	// P2 = [z][L'L][b][V'][aaa]
  4148  	p2 := (evexZ << 7) |
  4149  		(evexLL << 5) |
  4150  		(evexB << 4) |
  4151  		(evexV << 3) |
  4152  		(evexA << 0)
  4153  
  4154  	const evexEscapeByte = 0x62
  4155  	ab.Put4(evexEscapeByte, p0, p1, p2)
  4156  	ab.Put1(evex.opcode)
  4157  }
  4158  
  4159  // Emit VEX prefix and opcode byte.
  4160  // The three addresses are the r/m, vvvv, and reg fields.
  4161  // The reg and rm arguments appear in the same order as the
  4162  // arguments to asmand, which typically follows the call to asmvex.
  4163  // The final two arguments are the VEX prefix (see encoding above)
  4164  // and the opcode byte.
  4165  // For details about vex prefix see:
  4166  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  4167  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  4168  	ab.vexflag = true
  4169  	rexR := 0
  4170  	if r != nil {
  4171  		rexR = regrex[r.Reg] & Rxr
  4172  	}
  4173  	rexB := 0
  4174  	rexX := 0
  4175  	if rm != nil {
  4176  		rexB = regrex[rm.Reg] & Rxb
  4177  		rexX = regrex[rm.Index] & Rxx
  4178  	}
  4179  	vexM := (vex >> 3) & 0x7
  4180  	vexWLP := vex & 0x87
  4181  	vexV := byte(0)
  4182  	if v != nil {
  4183  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4184  	}
  4185  	vexV ^= 0xF
  4186  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  4187  		// Can use 2-byte encoding.
  4188  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  4189  	} else {
  4190  		// Must use 3-byte encoding.
  4191  		ab.Put3(0xc4,
  4192  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  4193  			vexV<<3|vexWLP,
  4194  		)
  4195  	}
  4196  	ab.Put1(opcode)
  4197  }
  4198  
  4199  // regIndex returns register index that fits in 5 bits.
  4200  //
  4201  //	R         : 3 bit | legacy instructions     | N/A
  4202  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  4203  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  4204  //
  4205  // Examples:
  4206  //
  4207  //	REG_Z30 => 30
  4208  //	REG_X15 => 15
  4209  //	REG_R9  => 9
  4210  //	REG_AX  => 0
  4211  func regIndex(r int16) int {
  4212  	lower3bits := reg[r]
  4213  	high4bit := regrex[r] & Rxr << 1
  4214  	high5bit := regrex[r] & RxrEvex << 0
  4215  	return lower3bits | high4bit | high5bit
  4216  }
  4217  
  4218  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4219  // Reports errors via ctxt.
  4220  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4221  	// If any pair of the index, mask, or destination registers
  4222  	// are the same, illegal instruction trap (#UD) is triggered.
  4223  	index := regIndex(p.GetFrom3().Index)
  4224  	mask := regIndex(p.From.Reg)
  4225  	dest := regIndex(p.To.Reg)
  4226  	if dest == mask || dest == index || mask == index {
  4227  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4228  		return false
  4229  	}
  4230  
  4231  	return true
  4232  }
  4233  
  4234  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4235  // Reports errors via ctxt.
  4236  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4237  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4238  	// register is the same as index vector in VSIB.
  4239  	index := regIndex(p.From.Index)
  4240  	dest := regIndex(p.To.Reg)
  4241  	if dest == index {
  4242  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4243  		return false
  4244  	}
  4245  
  4246  	return true
  4247  }
  4248  
  4249  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4250  	o := opindex[p.As&obj.AMask]
  4251  
  4252  	if o == nil {
  4253  		ctxt.Diag("asmins: missing op %v", p)
  4254  		return
  4255  	}
  4256  
  4257  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4258  		ab.Put1(byte(pre))
  4259  	}
  4260  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4261  		ab.Put1(byte(pre))
  4262  	}
  4263  
  4264  	// Checks to warn about instruction/arguments combinations that
  4265  	// will unconditionally trigger illegal instruction trap (#UD).
  4266  	switch p.As {
  4267  	case AVGATHERDPD,
  4268  		AVGATHERQPD,
  4269  		AVGATHERDPS,
  4270  		AVGATHERQPS,
  4271  		AVPGATHERDD,
  4272  		AVPGATHERQD,
  4273  		AVPGATHERDQ,
  4274  		AVPGATHERQQ:
  4275  		if p.GetFrom3() == nil {
  4276  			// gathers need a 3rd arg. See issue 58822.
  4277  			ctxt.Diag("need a third arg for gather instruction: %v", p)
  4278  			return
  4279  		}
  4280  		// AVX512 gather requires explicit K mask.
  4281  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4282  			if !avx512gatherValid(ctxt, p) {
  4283  				return
  4284  			}
  4285  		} else {
  4286  			if !avx2gatherValid(ctxt, p) {
  4287  				return
  4288  			}
  4289  		}
  4290  	}
  4291  
  4292  	if p.Ft == 0 {
  4293  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4294  	}
  4295  	if p.Tt == 0 {
  4296  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4297  	}
  4298  
  4299  	ft := int(p.Ft) * Ymax
  4300  	tt := int(p.Tt) * Ymax
  4301  
  4302  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4303  	z := 0
  4304  
  4305  	args := make([]int, 0, argListMax)
  4306  	if ft != Ynone*Ymax {
  4307  		args = append(args, ft)
  4308  	}
  4309  	for i := range p.RestArgs {
  4310  		args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
  4311  	}
  4312  	if tt != Ynone*Ymax {
  4313  		args = append(args, tt)
  4314  	}
  4315  
  4316  	var f3t int
  4317  	for _, yt := range o.ytab {
  4318  		// ytab matching is purely args-based,
  4319  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4320  		// add EVEX-only filter that will reject non-EVEX matches.
  4321  		//
  4322  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4323  		// Without this rule, operands will lead to VEX-encoded form
  4324  		// and produce "c5b15813" encoding.
  4325  		if !yt.match(args) {
  4326  			// "xo" is always zero for VEX/EVEX encoded insts.
  4327  			z += int(yt.zoffset) + xo
  4328  		} else {
  4329  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4330  				// Do not signal error and continue to search
  4331  				// for matching EVEX-encoded form.
  4332  				z += int(yt.zoffset)
  4333  				continue
  4334  			}
  4335  
  4336  			switch o.prefix {
  4337  			case Px1: // first option valid only in 32-bit mode
  4338  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4339  					z += int(yt.zoffset) + xo
  4340  					continue
  4341  				}
  4342  			case Pq: // 16 bit escape and opcode escape
  4343  				ab.Put2(Pe, Pm)
  4344  
  4345  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4346  				ab.rexflag |= Pw
  4347  				ab.Put2(Pe, Pm)
  4348  
  4349  			case Pq4: // 66 0F 38
  4350  				ab.Put3(0x66, 0x0F, 0x38)
  4351  
  4352  			case Pq4w: // 66 0F 38 + REX.W
  4353  				ab.rexflag |= Pw
  4354  				ab.Put3(0x66, 0x0F, 0x38)
  4355  
  4356  			case Pq5: // F3 0F 38
  4357  				ab.Put3(0xF3, 0x0F, 0x38)
  4358  
  4359  			case Pq5w: //  F3 0F 38 + REX.W
  4360  				ab.rexflag |= Pw
  4361  				ab.Put3(0xF3, 0x0F, 0x38)
  4362  
  4363  			case Pf2, // xmm opcode escape
  4364  				Pf3:
  4365  				ab.Put2(o.prefix, Pm)
  4366  
  4367  			case Pef3:
  4368  				ab.Put3(Pe, Pf3, Pm)
  4369  
  4370  			case Pfw: // xmm opcode escape + REX.W
  4371  				ab.rexflag |= Pw
  4372  				ab.Put2(Pf3, Pm)
  4373  
  4374  			case Pm: // opcode escape
  4375  				ab.Put1(Pm)
  4376  
  4377  			case Pe: // 16 bit escape
  4378  				ab.Put1(Pe)
  4379  
  4380  			case Pw: // 64-bit escape
  4381  				if ctxt.Arch.Family != sys.AMD64 {
  4382  					ctxt.Diag("asmins: illegal 64: %v", p)
  4383  				}
  4384  				ab.rexflag |= Pw
  4385  
  4386  			case Pw8: // 64-bit escape if z >= 8
  4387  				if z >= 8 {
  4388  					if ctxt.Arch.Family != sys.AMD64 {
  4389  						ctxt.Diag("asmins: illegal 64: %v", p)
  4390  					}
  4391  					ab.rexflag |= Pw
  4392  				}
  4393  
  4394  			case Pb: // botch
  4395  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4396  					goto bad
  4397  				}
  4398  				// NOTE(rsc): This is probably safe to do always,
  4399  				// but when enabled it chooses different encodings
  4400  				// than the old cmd/internal/obj/i386 code did,
  4401  				// which breaks our "same bits out" checks.
  4402  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4403  				// in the original obj/i386, and it would encode
  4404  				// (using a valid, shorter form) as 3c 00 if we enabled
  4405  				// the call to bytereg here.
  4406  				if ctxt.Arch.Family == sys.AMD64 {
  4407  					bytereg(&p.From, &p.Ft)
  4408  					bytereg(&p.To, &p.Tt)
  4409  				}
  4410  
  4411  			case P32: // 32 bit but illegal if 64-bit mode
  4412  				if ctxt.Arch.Family == sys.AMD64 {
  4413  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4414  				}
  4415  
  4416  			case Py: // 64-bit only, no prefix
  4417  				if ctxt.Arch.Family != sys.AMD64 {
  4418  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4419  				}
  4420  
  4421  			case Py1: // 64-bit only if z < 1, no prefix
  4422  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4423  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4424  				}
  4425  
  4426  			case Py3: // 64-bit only if z < 3, no prefix
  4427  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4428  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4429  				}
  4430  			}
  4431  
  4432  			if z >= len(o.op) {
  4433  				log.Fatalf("asmins bad table %v", p)
  4434  			}
  4435  			op := int(o.op[z])
  4436  			if op == 0x0f {
  4437  				ab.Put1(byte(op))
  4438  				z++
  4439  				op = int(o.op[z])
  4440  			}
  4441  
  4442  			switch yt.zcase {
  4443  			default:
  4444  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4445  				return
  4446  
  4447  			case Zpseudo:
  4448  				break
  4449  
  4450  			case Zlit:
  4451  				ab.PutOpBytesLit(z, &o.op)
  4452  
  4453  			case Zlitr_m:
  4454  				ab.PutOpBytesLit(z, &o.op)
  4455  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4456  
  4457  			case Zlitm_r:
  4458  				ab.PutOpBytesLit(z, &o.op)
  4459  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4460  
  4461  			case Zlit_m_r:
  4462  				ab.PutOpBytesLit(z, &o.op)
  4463  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4464  
  4465  			case Zmb_r:
  4466  				bytereg(&p.From, &p.Ft)
  4467  				fallthrough
  4468  
  4469  			case Zm_r:
  4470  				ab.Put1(byte(op))
  4471  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4472  
  4473  			case Z_m_r:
  4474  				ab.Put1(byte(op))
  4475  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4476  
  4477  			case Zm2_r:
  4478  				ab.Put2(byte(op), o.op[z+1])
  4479  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4480  
  4481  			case Zm_r_xm:
  4482  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4483  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4484  
  4485  			case Zm_r_xm_nr:
  4486  				ab.rexflag = 0
  4487  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4488  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4489  
  4490  			case Zm_r_i_xm:
  4491  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4492  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4493  				ab.Put1(byte(p.To.Offset))
  4494  
  4495  			case Zibm_r, Zibr_m:
  4496  				ab.PutOpBytesLit(z, &o.op)
  4497  				if yt.zcase == Zibr_m {
  4498  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4499  				} else {
  4500  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4501  				}
  4502  				switch {
  4503  				default:
  4504  					ab.Put1(byte(p.From.Offset))
  4505  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4506  					ab.PutInt16(int16(p.From.Offset))
  4507  				case yt.args[0] == Yi32:
  4508  					ab.PutInt32(int32(p.From.Offset))
  4509  				}
  4510  
  4511  			case Zaut_r:
  4512  				ab.Put1(0x8d) // leal
  4513  				if p.From.Type != obj.TYPE_ADDR {
  4514  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4515  				}
  4516  				p.From.Type = obj.TYPE_MEM
  4517  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4518  				p.From.Type = obj.TYPE_ADDR
  4519  
  4520  			case Zm_o:
  4521  				ab.Put1(byte(op))
  4522  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4523  
  4524  			case Zr_m:
  4525  				ab.Put1(byte(op))
  4526  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4527  
  4528  			case Zvex:
  4529  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4530  
  4531  			case Zvex_rm_v_r:
  4532  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4533  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4534  
  4535  			case Zvex_rm_v_ro:
  4536  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4537  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4538  
  4539  			case Zvex_i_rm_vo:
  4540  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4541  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4542  				ab.Put1(byte(p.From.Offset))
  4543  
  4544  			case Zvex_i_r_v:
  4545  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4546  				regnum := byte(0x7)
  4547  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4548  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4549  				} else {
  4550  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4551  				}
  4552  				ab.Put1(o.op[z+2] | regnum)
  4553  				ab.Put1(byte(p.From.Offset))
  4554  
  4555  			case Zvex_i_rm_v_r:
  4556  				imm, from, from3, to := unpackOps4(p)
  4557  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4558  				ab.asmand(ctxt, cursym, p, from, to)
  4559  				ab.Put1(byte(imm.Offset))
  4560  
  4561  			case Zvex_i_rm_r:
  4562  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4563  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4564  				ab.Put1(byte(p.From.Offset))
  4565  
  4566  			case Zvex_v_rm_r:
  4567  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4568  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4569  
  4570  			case Zvex_r_v_rm:
  4571  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4572  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4573  
  4574  			case Zvex_rm_r_vo:
  4575  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4576  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4577  
  4578  			case Zvex_i_r_rm:
  4579  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4580  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4581  				ab.Put1(byte(p.From.Offset))
  4582  
  4583  			case Zvex_hr_rm_v_r:
  4584  				hr, from, from3, to := unpackOps4(p)
  4585  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4586  				ab.asmand(ctxt, cursym, p, from, to)
  4587  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4588  
  4589  			case Zevex_k_rmo:
  4590  				ab.evex = newEVEXBits(z, &o.op)
  4591  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4592  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4593  
  4594  			case Zevex_i_rm_vo:
  4595  				ab.evex = newEVEXBits(z, &o.op)
  4596  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4597  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4598  				ab.Put1(byte(p.From.Offset))
  4599  
  4600  			case Zevex_i_rm_k_vo:
  4601  				imm, from, kmask, to := unpackOps4(p)
  4602  				ab.evex = newEVEXBits(z, &o.op)
  4603  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4604  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4605  				ab.Put1(byte(imm.Offset))
  4606  
  4607  			case Zevex_i_r_rm:
  4608  				ab.evex = newEVEXBits(z, &o.op)
  4609  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4610  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4611  				ab.Put1(byte(p.From.Offset))
  4612  
  4613  			case Zevex_i_r_k_rm:
  4614  				imm, from, kmask, to := unpackOps4(p)
  4615  				ab.evex = newEVEXBits(z, &o.op)
  4616  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4617  				ab.asmand(ctxt, cursym, p, to, from)
  4618  				ab.Put1(byte(imm.Offset))
  4619  
  4620  			case Zevex_i_rm_r:
  4621  				ab.evex = newEVEXBits(z, &o.op)
  4622  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4623  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4624  				ab.Put1(byte(p.From.Offset))
  4625  
  4626  			case Zevex_i_rm_k_r:
  4627  				imm, from, kmask, to := unpackOps4(p)
  4628  				ab.evex = newEVEXBits(z, &o.op)
  4629  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4630  				ab.asmand(ctxt, cursym, p, from, to)
  4631  				ab.Put1(byte(imm.Offset))
  4632  
  4633  			case Zevex_i_rm_v_r:
  4634  				imm, from, from3, to := unpackOps4(p)
  4635  				ab.evex = newEVEXBits(z, &o.op)
  4636  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4637  				ab.asmand(ctxt, cursym, p, from, to)
  4638  				ab.Put1(byte(imm.Offset))
  4639  
  4640  			case Zevex_i_rm_v_k_r:
  4641  				imm, from, from3, kmask, to := unpackOps5(p)
  4642  				ab.evex = newEVEXBits(z, &o.op)
  4643  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4644  				ab.asmand(ctxt, cursym, p, from, to)
  4645  				ab.Put1(byte(imm.Offset))
  4646  
  4647  			case Zevex_r_v_rm:
  4648  				ab.evex = newEVEXBits(z, &o.op)
  4649  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4650  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4651  
  4652  			case Zevex_rm_v_r:
  4653  				ab.evex = newEVEXBits(z, &o.op)
  4654  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4655  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4656  
  4657  			case Zevex_rm_k_r:
  4658  				ab.evex = newEVEXBits(z, &o.op)
  4659  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4660  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4661  
  4662  			case Zevex_r_k_rm:
  4663  				ab.evex = newEVEXBits(z, &o.op)
  4664  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4665  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4666  
  4667  			case Zevex_rm_v_k_r:
  4668  				from, from3, kmask, to := unpackOps4(p)
  4669  				ab.evex = newEVEXBits(z, &o.op)
  4670  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4671  				ab.asmand(ctxt, cursym, p, from, to)
  4672  
  4673  			case Zevex_r_v_k_rm:
  4674  				from, from3, kmask, to := unpackOps4(p)
  4675  				ab.evex = newEVEXBits(z, &o.op)
  4676  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4677  				ab.asmand(ctxt, cursym, p, to, from)
  4678  
  4679  			case Zr_m_xm:
  4680  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4681  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4682  
  4683  			case Zr_m_xm_nr:
  4684  				ab.rexflag = 0
  4685  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4686  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4687  
  4688  			case Zo_m:
  4689  				ab.Put1(byte(op))
  4690  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4691  
  4692  			case Zcallindreg:
  4693  				cursym.AddRel(ctxt, obj.Reloc{
  4694  					Type: objabi.R_CALLIND,
  4695  					Off:  int32(p.Pc),
  4696  				})
  4697  				fallthrough
  4698  
  4699  			case Zo_m64:
  4700  				ab.Put1(byte(op))
  4701  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4702  
  4703  			case Zm_ibo:
  4704  				ab.Put1(byte(op))
  4705  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4706  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4707  
  4708  			case Zibo_m:
  4709  				ab.Put1(byte(op))
  4710  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4711  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4712  
  4713  			case Zibo_m_xm:
  4714  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4715  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4716  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4717  
  4718  			case Z_ib, Zib_:
  4719  				var a *obj.Addr
  4720  				if yt.zcase == Zib_ {
  4721  					a = &p.From
  4722  				} else {
  4723  					a = &p.To
  4724  				}
  4725  				ab.Put1(byte(op))
  4726  				if p.As == AXABORT {
  4727  					ab.Put1(o.op[z+1])
  4728  				}
  4729  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4730  
  4731  			case Zib_rp:
  4732  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4733  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4734  
  4735  			case Zil_rp:
  4736  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4737  				ab.Put1(byte(op + reg[p.To.Reg]))
  4738  				if o.prefix == Pe {
  4739  					v := vaddr(ctxt, p, &p.From, nil)
  4740  					ab.PutInt16(int16(v))
  4741  				} else {
  4742  					ab.relput4(ctxt, cursym, p, &p.From)
  4743  				}
  4744  
  4745  			case Zo_iw:
  4746  				ab.Put1(byte(op))
  4747  				if p.From.Type != obj.TYPE_NONE {
  4748  					v := vaddr(ctxt, p, &p.From, nil)
  4749  					ab.PutInt16(int16(v))
  4750  				}
  4751  
  4752  			case Ziq_rp:
  4753  				var rel obj.Reloc
  4754  				v := vaddr(ctxt, p, &p.From, &rel)
  4755  				l := int(v >> 32)
  4756  				if l == 0 && rel.Siz != 8 {
  4757  					ab.rexflag &^= (0x40 | Rxw)
  4758  
  4759  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4760  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4761  					if rel.Type != 0 {
  4762  						rel.Off = int32(p.Pc + int64(ab.Len()))
  4763  						cursym.AddRel(ctxt, rel)
  4764  					}
  4765  
  4766  					ab.PutInt32(int32(v))
  4767  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4768  					ab.Put1(0xc7)
  4769  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4770  
  4771  					ab.PutInt32(int32(v)) // need all 8
  4772  				} else {
  4773  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4774  					ab.Put1(byte(op + reg[p.To.Reg]))
  4775  					if rel.Type != 0 {
  4776  						rel.Off = int32(p.Pc + int64(ab.Len()))
  4777  						cursym.AddRel(ctxt, rel)
  4778  					}
  4779  
  4780  					ab.PutInt64(v)
  4781  				}
  4782  
  4783  			case Zib_rr:
  4784  				ab.Put1(byte(op))
  4785  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4786  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4787  
  4788  			case Z_il, Zil_:
  4789  				var a *obj.Addr
  4790  				if yt.zcase == Zil_ {
  4791  					a = &p.From
  4792  				} else {
  4793  					a = &p.To
  4794  				}
  4795  				ab.Put1(byte(op))
  4796  				if o.prefix == Pe {
  4797  					v := vaddr(ctxt, p, a, nil)
  4798  					ab.PutInt16(int16(v))
  4799  				} else {
  4800  					ab.relput4(ctxt, cursym, p, a)
  4801  				}
  4802  
  4803  			case Zm_ilo, Zilo_m:
  4804  				var a *obj.Addr
  4805  				ab.Put1(byte(op))
  4806  				if yt.zcase == Zilo_m {
  4807  					a = &p.From
  4808  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4809  				} else {
  4810  					a = &p.To
  4811  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4812  				}
  4813  
  4814  				if o.prefix == Pe {
  4815  					v := vaddr(ctxt, p, a, nil)
  4816  					ab.PutInt16(int16(v))
  4817  				} else {
  4818  					ab.relput4(ctxt, cursym, p, a)
  4819  				}
  4820  
  4821  			case Zil_rr:
  4822  				ab.Put1(byte(op))
  4823  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4824  				if o.prefix == Pe {
  4825  					v := vaddr(ctxt, p, &p.From, nil)
  4826  					ab.PutInt16(int16(v))
  4827  				} else {
  4828  					ab.relput4(ctxt, cursym, p, &p.From)
  4829  				}
  4830  
  4831  			case Z_rp:
  4832  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4833  				ab.Put1(byte(op + reg[p.To.Reg]))
  4834  
  4835  			case Zrp_:
  4836  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4837  				ab.Put1(byte(op + reg[p.From.Reg]))
  4838  
  4839  			case Zcallcon, Zjmpcon:
  4840  				if yt.zcase == Zcallcon {
  4841  					ab.Put1(byte(op))
  4842  				} else {
  4843  					ab.Put1(o.op[z+1])
  4844  				}
  4845  				cursym.AddRel(ctxt, obj.Reloc{
  4846  					Type: objabi.R_PCREL,
  4847  					Off:  int32(p.Pc + int64(ab.Len())),
  4848  					Siz:  4,
  4849  					Add:  p.To.Offset,
  4850  				})
  4851  				ab.PutInt32(0)
  4852  
  4853  			case Zcallind:
  4854  				ab.Put2(byte(op), o.op[z+1])
  4855  				typ := objabi.R_ADDR
  4856  				if ctxt.Arch.Family == sys.AMD64 {
  4857  					typ = objabi.R_PCREL
  4858  				}
  4859  				cursym.AddRel(ctxt, obj.Reloc{
  4860  					Type: typ,
  4861  					Off:  int32(p.Pc + int64(ab.Len())),
  4862  					Siz:  4,
  4863  					Sym:  p.To.Sym,
  4864  					Add:  p.To.Offset,
  4865  				})
  4866  				ab.PutInt32(0)
  4867  
  4868  			case Zcall, Zcallduff:
  4869  				if p.To.Sym == nil {
  4870  					ctxt.Diag("call without target")
  4871  					ctxt.DiagFlush()
  4872  					log.Fatalf("bad code")
  4873  				}
  4874  
  4875  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4876  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4877  				}
  4878  
  4879  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4880  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4881  					// (the call jumps into the middle of the function).
  4882  					// This makes it possible to see call sites for duffcopy/duffzero in
  4883  					// BP-based profiling tools like Linux perf (which is the
  4884  					// whole point of maintaining frame pointers in Go).
  4885  					// MOVQ BP, -16(SP)
  4886  					// LEAQ -16(SP), BP
  4887  					ab.Put(bpduff1)
  4888  				}
  4889  				ab.Put1(byte(op))
  4890  				cursym.AddRel(ctxt, obj.Reloc{
  4891  					Type: objabi.R_CALL,
  4892  					Off:  int32(p.Pc + int64(ab.Len())),
  4893  					Siz:  4,
  4894  					Sym:  p.To.Sym,
  4895  					Add:  p.To.Offset,
  4896  				})
  4897  				ab.PutInt32(0)
  4898  
  4899  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4900  					// Pop BP pushed above.
  4901  					// MOVQ 0(BP), BP
  4902  					ab.Put(bpduff2)
  4903  				}
  4904  
  4905  			// TODO: jump across functions needs reloc
  4906  			case Zbr, Zjmp, Zloop:
  4907  				if p.As == AXBEGIN {
  4908  					ab.Put1(byte(op))
  4909  				}
  4910  				if p.To.Sym != nil {
  4911  					if yt.zcase != Zjmp {
  4912  						ctxt.Diag("branch to ATEXT")
  4913  						ctxt.DiagFlush()
  4914  						log.Fatalf("bad code")
  4915  					}
  4916  
  4917  					ab.Put1(o.op[z+1])
  4918  					cursym.AddRel(ctxt, obj.Reloc{
  4919  						// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4920  						// it can point to a trampoline instead of the destination itself.
  4921  						Type: objabi.R_CALL,
  4922  						Off:  int32(p.Pc + int64(ab.Len())),
  4923  						Siz:  4,
  4924  						Sym:  p.To.Sym,
  4925  					})
  4926  					ab.PutInt32(0)
  4927  					break
  4928  				}
  4929  
  4930  				// Assumes q is in this function.
  4931  				// TODO: Check in input, preserve in brchain.
  4932  
  4933  				// Fill in backward jump now.
  4934  				q := p.To.Target()
  4935  
  4936  				if q == nil {
  4937  					ctxt.Diag("jmp/branch/loop without target")
  4938  					ctxt.DiagFlush()
  4939  					log.Fatalf("bad code")
  4940  				}
  4941  
  4942  				if p.Back&branchBackwards != 0 {
  4943  					v := q.Pc - (p.Pc + 2)
  4944  					if v >= -128 && p.As != AXBEGIN {
  4945  						if p.As == AJCXZL {
  4946  							ab.Put1(0x67)
  4947  						}
  4948  						ab.Put2(byte(op), byte(v))
  4949  					} else if yt.zcase == Zloop {
  4950  						ctxt.Diag("loop too far: %v", p)
  4951  					} else {
  4952  						v -= 5 - 2
  4953  						if p.As == AXBEGIN {
  4954  							v--
  4955  						}
  4956  						if yt.zcase == Zbr {
  4957  							ab.Put1(0x0f)
  4958  							v--
  4959  						}
  4960  
  4961  						ab.Put1(o.op[z+1])
  4962  						ab.PutInt32(int32(v))
  4963  					}
  4964  
  4965  					break
  4966  				}
  4967  
  4968  				// Annotate target; will fill in later.
  4969  				p.Forwd = q.Rel
  4970  
  4971  				q.Rel = p
  4972  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4973  					if p.As == AJCXZL {
  4974  						ab.Put1(0x67)
  4975  					}
  4976  					ab.Put2(byte(op), 0)
  4977  				} else if yt.zcase == Zloop {
  4978  					ctxt.Diag("loop too far: %v", p)
  4979  				} else {
  4980  					if yt.zcase == Zbr {
  4981  						ab.Put1(0x0f)
  4982  					}
  4983  					ab.Put1(o.op[z+1])
  4984  					ab.PutInt32(0)
  4985  				}
  4986  
  4987  			case Zbyte:
  4988  				var rel obj.Reloc
  4989  				v := vaddr(ctxt, p, &p.From, &rel)
  4990  				if rel.Siz != 0 {
  4991  					rel.Siz = uint8(op)
  4992  					rel.Off = int32(p.Pc + int64(ab.Len()))
  4993  					cursym.AddRel(ctxt, rel)
  4994  				}
  4995  
  4996  				ab.Put1(byte(v))
  4997  				if op > 1 {
  4998  					ab.Put1(byte(v >> 8))
  4999  					if op > 2 {
  5000  						ab.PutInt16(int16(v >> 16))
  5001  						if op > 4 {
  5002  							ab.PutInt32(int32(v >> 32))
  5003  						}
  5004  					}
  5005  				}
  5006  			}
  5007  
  5008  			return
  5009  		}
  5010  	}
  5011  	f3t = Ynone * Ymax
  5012  	if p.GetFrom3() != nil {
  5013  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  5014  	}
  5015  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  5016  		var pp obj.Prog
  5017  		var t []byte
  5018  		if p.As == mo[0].as {
  5019  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  5020  				t = mo[0].op[:]
  5021  				switch mo[0].code {
  5022  				default:
  5023  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  5024  
  5025  				case movLit:
  5026  					for z = 0; t[z] != 0; z++ {
  5027  						ab.Put1(t[z])
  5028  					}
  5029  
  5030  				case movRegMem:
  5031  					ab.Put1(t[0])
  5032  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  5033  
  5034  				case movMemReg:
  5035  					ab.Put1(t[0])
  5036  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  5037  
  5038  				case movRegMem2op: // r,m - 2op
  5039  					ab.Put2(t[0], t[1])
  5040  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  5041  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  5042  
  5043  				case movMemReg2op:
  5044  					ab.Put2(t[0], t[1])
  5045  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  5046  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  5047  
  5048  				case movFullPtr:
  5049  					if t[0] != 0 {
  5050  						ab.Put1(t[0])
  5051  					}
  5052  					switch p.To.Index {
  5053  					default:
  5054  						goto bad
  5055  
  5056  					case REG_DS:
  5057  						ab.Put1(0xc5)
  5058  
  5059  					case REG_SS:
  5060  						ab.Put2(0x0f, 0xb2)
  5061  
  5062  					case REG_ES:
  5063  						ab.Put1(0xc4)
  5064  
  5065  					case REG_FS:
  5066  						ab.Put2(0x0f, 0xb4)
  5067  
  5068  					case REG_GS:
  5069  						ab.Put2(0x0f, 0xb5)
  5070  					}
  5071  
  5072  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  5073  
  5074  				case movDoubleShift:
  5075  					if t[0] == Pw {
  5076  						if ctxt.Arch.Family != sys.AMD64 {
  5077  							ctxt.Diag("asmins: illegal 64: %v", p)
  5078  						}
  5079  						ab.rexflag |= Pw
  5080  						t = t[1:]
  5081  					} else if t[0] == Pe {
  5082  						ab.Put1(Pe)
  5083  						t = t[1:]
  5084  					}
  5085  
  5086  					switch p.From.Type {
  5087  					default:
  5088  						goto bad
  5089  
  5090  					case obj.TYPE_CONST:
  5091  						ab.Put2(0x0f, t[0])
  5092  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5093  						ab.Put1(byte(p.From.Offset))
  5094  
  5095  					case obj.TYPE_REG:
  5096  						switch p.From.Reg {
  5097  						default:
  5098  							goto bad
  5099  
  5100  						case REG_CL, REG_CX:
  5101  							ab.Put2(0x0f, t[1])
  5102  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5103  						}
  5104  					}
  5105  
  5106  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5107  				// where you load the TLS base register into a register and then index off that
  5108  				// register to access the actual TLS variables. Systems that allow direct TLS access
  5109  				// are handled in prefixof above and should not be listed here.
  5110  				case movTLSReg:
  5111  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  5112  						ctxt.Diag("invalid load of TLS: %v", p)
  5113  					}
  5114  
  5115  					if ctxt.Arch.Family == sys.I386 {
  5116  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5117  						// where you load the TLS base register into a register and then index off that
  5118  						// register to access the actual TLS variables. Systems that allow direct TLS access
  5119  						// are handled in prefixof above and should not be listed here.
  5120  						switch ctxt.Headtype {
  5121  						default:
  5122  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5123  
  5124  						case objabi.Hlinux, objabi.Hfreebsd:
  5125  							if ctxt.Flag_shared {
  5126  								// Note that this is not generating the same insns as the other cases.
  5127  								//     MOV TLS, dst
  5128  								// becomes
  5129  								//     call __x86.get_pc_thunk.dst
  5130  								//     movl (gotpc + g@gotntpoff)(dst), dst
  5131  								// which is encoded as
  5132  								//     call __x86.get_pc_thunk.dst
  5133  								//     movq 0(dst), dst
  5134  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  5135  								// is g, which we can't check here, but will when we assemble the second
  5136  								// instruction.
  5137  								dst := p.To.Reg
  5138  								ab.Put1(0xe8)
  5139  								cursym.AddRel(ctxt, obj.Reloc{
  5140  									Type: objabi.R_CALL,
  5141  									Off:  int32(p.Pc + int64(ab.Len())),
  5142  									Siz:  4,
  5143  									Sym:  ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))),
  5144  								})
  5145  								ab.PutInt32(0)
  5146  
  5147  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  5148  								cursym.AddRel(ctxt, obj.Reloc{
  5149  									Type: objabi.R_TLS_IE,
  5150  									Off:  int32(p.Pc + int64(ab.Len())),
  5151  									Siz:  4,
  5152  									Add:  2,
  5153  								})
  5154  								ab.PutInt32(0)
  5155  							} else {
  5156  								// ELF TLS base is 0(GS).
  5157  								pp.From = p.From
  5158  
  5159  								pp.From.Type = obj.TYPE_MEM
  5160  								pp.From.Reg = REG_GS
  5161  								pp.From.Offset = 0
  5162  								pp.From.Index = REG_NONE
  5163  								pp.From.Scale = 0
  5164  								ab.Put2(0x65, // GS
  5165  									0x8B)
  5166  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5167  							}
  5168  						case objabi.Hplan9:
  5169  							pp.From = obj.Addr{}
  5170  							pp.From.Type = obj.TYPE_MEM
  5171  							pp.From.Name = obj.NAME_EXTERN
  5172  							pp.From.Sym = plan9privates
  5173  							pp.From.Offset = 0
  5174  							pp.From.Index = REG_NONE
  5175  							ab.Put1(0x8B)
  5176  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5177  						}
  5178  						break
  5179  					}
  5180  
  5181  					switch ctxt.Headtype {
  5182  					default:
  5183  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5184  
  5185  					case objabi.Hlinux, objabi.Hfreebsd:
  5186  						if !ctxt.Flag_shared {
  5187  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  5188  						}
  5189  						// Note that this is not generating the same insn as the other cases.
  5190  						//     MOV TLS, R_to
  5191  						// becomes
  5192  						//     movq g@gottpoff(%rip), R_to
  5193  						// which is encoded as
  5194  						//     movq 0(%rip), R_to
  5195  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  5196  						// is g, which we can't check here, but will when we assemble the second
  5197  						// instruction.
  5198  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  5199  
  5200  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  5201  						cursym.AddRel(ctxt, obj.Reloc{
  5202  							Type: objabi.R_TLS_IE,
  5203  							Off:  int32(p.Pc + int64(ab.Len())),
  5204  							Siz:  4,
  5205  							Add:  -4,
  5206  						})
  5207  						ab.PutInt32(0)
  5208  
  5209  					case objabi.Hplan9:
  5210  						pp.From = obj.Addr{}
  5211  						pp.From.Type = obj.TYPE_MEM
  5212  						pp.From.Name = obj.NAME_EXTERN
  5213  						pp.From.Sym = plan9privates
  5214  						pp.From.Offset = 0
  5215  						pp.From.Index = REG_NONE
  5216  						ab.rexflag |= Pw
  5217  						ab.Put1(0x8B)
  5218  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5219  
  5220  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5221  						// TLS base is 0(FS).
  5222  						pp.From = p.From
  5223  
  5224  						pp.From.Type = obj.TYPE_MEM
  5225  						pp.From.Name = obj.NAME_NONE
  5226  						pp.From.Reg = REG_NONE
  5227  						pp.From.Offset = 0
  5228  						pp.From.Index = REG_NONE
  5229  						pp.From.Scale = 0
  5230  						ab.rexflag |= Pw
  5231  						ab.Put2(0x64, // FS
  5232  							0x8B)
  5233  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5234  					}
  5235  				}
  5236  				return
  5237  			}
  5238  		}
  5239  	}
  5240  	goto bad
  5241  
  5242  bad:
  5243  	if ctxt.Arch.Family != sys.AMD64 {
  5244  		// here, the assembly has failed.
  5245  		// if it's a byte instruction that has
  5246  		// unaddressable registers, try to
  5247  		// exchange registers and reissue the
  5248  		// instruction with the operands renamed.
  5249  		pp := *p
  5250  
  5251  		unbytereg(&pp.From, &pp.Ft)
  5252  		unbytereg(&pp.To, &pp.Tt)
  5253  
  5254  		z := int(p.From.Reg)
  5255  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5256  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5257  			// For now, different to keep bit-for-bit compatibility.
  5258  			if ctxt.Arch.Family == sys.I386 {
  5259  				breg := byteswapreg(ctxt, &p.To)
  5260  				if breg != REG_AX {
  5261  					ab.Put1(0x87) // xchg lhs,bx
  5262  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5263  					subreg(&pp, z, breg)
  5264  					ab.doasm(ctxt, cursym, &pp)
  5265  					ab.Put1(0x87) // xchg lhs,bx
  5266  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5267  				} else {
  5268  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5269  					subreg(&pp, z, REG_AX)
  5270  					ab.doasm(ctxt, cursym, &pp)
  5271  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5272  				}
  5273  				return
  5274  			}
  5275  
  5276  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5277  				// We certainly don't want to exchange
  5278  				// with AX if the op is MUL or DIV.
  5279  				ab.Put1(0x87) // xchg lhs,bx
  5280  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5281  				subreg(&pp, z, REG_BX)
  5282  				ab.doasm(ctxt, cursym, &pp)
  5283  				ab.Put1(0x87) // xchg lhs,bx
  5284  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5285  			} else {
  5286  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5287  				subreg(&pp, z, REG_AX)
  5288  				ab.doasm(ctxt, cursym, &pp)
  5289  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5290  			}
  5291  			return
  5292  		}
  5293  
  5294  		z = int(p.To.Reg)
  5295  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5296  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5297  			// For now, different to keep bit-for-bit compatibility.
  5298  			if ctxt.Arch.Family == sys.I386 {
  5299  				breg := byteswapreg(ctxt, &p.From)
  5300  				if breg != REG_AX {
  5301  					ab.Put1(0x87) //xchg rhs,bx
  5302  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5303  					subreg(&pp, z, breg)
  5304  					ab.doasm(ctxt, cursym, &pp)
  5305  					ab.Put1(0x87) // xchg rhs,bx
  5306  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5307  				} else {
  5308  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5309  					subreg(&pp, z, REG_AX)
  5310  					ab.doasm(ctxt, cursym, &pp)
  5311  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5312  				}
  5313  				return
  5314  			}
  5315  
  5316  			if isax(&p.From) {
  5317  				ab.Put1(0x87) // xchg rhs,bx
  5318  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5319  				subreg(&pp, z, REG_BX)
  5320  				ab.doasm(ctxt, cursym, &pp)
  5321  				ab.Put1(0x87) // xchg rhs,bx
  5322  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5323  			} else {
  5324  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5325  				subreg(&pp, z, REG_AX)
  5326  				ab.doasm(ctxt, cursym, &pp)
  5327  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5328  			}
  5329  			return
  5330  		}
  5331  	}
  5332  
  5333  	ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
  5334  }
  5335  
  5336  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5337  // which is not referenced in a.
  5338  // If a is empty, it returns BX to account for MULB-like instructions
  5339  // that might use DX and AX.
  5340  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5341  	cana, canb, canc, cand := true, true, true, true
  5342  	if a.Type == obj.TYPE_NONE {
  5343  		cana, cand = false, false
  5344  	}
  5345  
  5346  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5347  		switch a.Reg {
  5348  		case REG_NONE:
  5349  			cana, cand = false, false
  5350  		case REG_AX, REG_AL, REG_AH:
  5351  			cana = false
  5352  		case REG_BX, REG_BL, REG_BH:
  5353  			canb = false
  5354  		case REG_CX, REG_CL, REG_CH:
  5355  			canc = false
  5356  		case REG_DX, REG_DL, REG_DH:
  5357  			cand = false
  5358  		}
  5359  	}
  5360  
  5361  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5362  		switch a.Index {
  5363  		case REG_AX:
  5364  			cana = false
  5365  		case REG_BX:
  5366  			canb = false
  5367  		case REG_CX:
  5368  			canc = false
  5369  		case REG_DX:
  5370  			cand = false
  5371  		}
  5372  	}
  5373  
  5374  	switch {
  5375  	case cana:
  5376  		return REG_AX
  5377  	case canb:
  5378  		return REG_BX
  5379  	case canc:
  5380  		return REG_CX
  5381  	case cand:
  5382  		return REG_DX
  5383  	default:
  5384  		ctxt.Diag("impossible byte register")
  5385  		ctxt.DiagFlush()
  5386  		log.Fatalf("bad code")
  5387  		return 0
  5388  	}
  5389  }
  5390  
  5391  func isbadbyte(a *obj.Addr) bool {
  5392  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5393  }
  5394  
  5395  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5396  	ab.Reset()
  5397  
  5398  	ab.rexflag = 0
  5399  	ab.vexflag = false
  5400  	ab.evexflag = false
  5401  	mark := ab.Len()
  5402  	ab.doasm(ctxt, cursym, p)
  5403  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5404  		// as befits the whole approach of the architecture,
  5405  		// the rex prefix must appear before the first opcode byte
  5406  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5407  		// before the 0f opcode escape!), or it might be ignored.
  5408  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5409  		if ctxt.Arch.Family != sys.AMD64 {
  5410  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5411  		}
  5412  		n := ab.Len()
  5413  		var np int
  5414  		for np = mark; np < n; np++ {
  5415  			c := ab.At(np)
  5416  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5417  				break
  5418  			}
  5419  		}
  5420  		ab.Insert(np, byte(0x40|ab.rexflag))
  5421  	}
  5422  
  5423  	n := ab.Len()
  5424  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5425  		r := &cursym.R[i]
  5426  		if int64(r.Off) < p.Pc {
  5427  			break
  5428  		}
  5429  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5430  			r.Off++
  5431  		}
  5432  		if r.Type == objabi.R_PCREL {
  5433  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5434  				// PC-relative addressing is relative to the end of the instruction,
  5435  				// but the relocations applied by the linker are relative to the end
  5436  				// of the relocation. Because immediate instruction
  5437  				// arguments can follow the PC-relative memory reference in the
  5438  				// instruction encoding, the two may not coincide. In this case,
  5439  				// adjust addend so that linker can keep relocating relative to the
  5440  				// end of the relocation.
  5441  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5442  			} else if ctxt.Arch.Family == sys.I386 {
  5443  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5444  				// assumes that the previous instruction loaded the PC of the end
  5445  				// of that instruction into CX, so the adjustment is relative to
  5446  				// that.
  5447  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5448  			}
  5449  		}
  5450  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5451  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5452  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5453  		}
  5454  
  5455  	}
  5456  }
  5457  
  5458  // unpackOps4 extracts 4 operands from p.
  5459  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5460  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
  5461  }
  5462  
  5463  // unpackOps5 extracts 5 operands from p.
  5464  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5465  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
  5466  }
  5467  

View as plain text