Text file src/runtime/asm_arm64.s

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "tls_arm64.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  #include "cgo/abi_arm64.h"
    11  
    12  // _rt0_arm64 is common startup code for most arm64 systems when using
    13  // internal linking. This is the entry point for the program from the
    14  // kernel for an ordinary -buildmode=exe program. The stack holds the
    15  // number of arguments and the C-style argv.
    16  TEXT _rt0_arm64(SB),NOSPLIT,$0
    17  	MOVD	0(RSP), R0	// argc
    18  	ADD	$8, RSP, R1	// argv
    19  	JMP	runtime·rt0_go(SB)
    20  
    21  // main is common startup code for most amd64 systems when using
    22  // external linking. The C startup code will call the symbol "main"
    23  // passing argc and argv in the usual C ABI registers R0 and R1.
    24  TEXT main(SB),NOSPLIT,$0
    25  	JMP	runtime·rt0_go(SB)
    26  
    27  // _rt0_arm64_lib is common startup code for most arm64 systems when
    28  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    29  // arrange to invoke this function as a global constructor (for
    30  // c-archive) or when the shared library is loaded (for c-shared).
    31  // We expect argc and argv to be passed in the usual C ABI registers
    32  // R0 and R1.
    33  TEXT _rt0_arm64_lib(SB),NOSPLIT,$184
    34  	// Preserve callee-save registers.
    35  	SAVE_R19_TO_R28(24)
    36  	SAVE_F8_TO_F15(104)
    37  
    38  	// Initialize g as null in case of using g later e.g. sigaction in cgo_sigaction.go
    39  	MOVD	ZR, g
    40  
    41  	MOVD	R0, _rt0_arm64_lib_argc<>(SB)
    42  	MOVD	R1, _rt0_arm64_lib_argv<>(SB)
    43  
    44  	// Synchronous initialization.
    45  	MOVD	$runtime·libpreinit(SB), R4
    46  	BL	(R4)
    47  
    48  	// Create a new thread to do the runtime initialization and return.
    49  	MOVD	_cgo_sys_thread_create(SB), R4
    50  	CBZ	R4, nocgo
    51  	MOVD	$_rt0_arm64_lib_go(SB), R0
    52  	MOVD	$0, R1
    53  	SUB	$16, RSP		// reserve 16 bytes for sp-8 where fp may be saved.
    54  	BL	(R4)
    55  	ADD	$16, RSP
    56  	B	restore
    57  
    58  nocgo:
    59  	MOVD	$0x800000, R0                     // stacksize = 8192KB
    60  	MOVD	$_rt0_arm64_lib_go(SB), R1
    61  	MOVD	R0, 8(RSP)
    62  	MOVD	R1, 16(RSP)
    63  	MOVD	$runtime·newosproc0(SB),R4
    64  	BL	(R4)
    65  
    66  restore:
    67  	// Restore callee-save registers.
    68  	RESTORE_R19_TO_R28(24)
    69  	RESTORE_F8_TO_F15(104)
    70  	RET
    71  
    72  TEXT _rt0_arm64_lib_go(SB),NOSPLIT,$0
    73  	MOVD	_rt0_arm64_lib_argc<>(SB), R0
    74  	MOVD	_rt0_arm64_lib_argv<>(SB), R1
    75  	MOVD	$runtime·rt0_go(SB),R4
    76  	B	(R4)
    77  
    78  DATA _rt0_arm64_lib_argc<>(SB)/8, $0
    79  GLOBL _rt0_arm64_lib_argc<>(SB),NOPTR, $8
    80  DATA _rt0_arm64_lib_argv<>(SB)/8, $0
    81  GLOBL _rt0_arm64_lib_argv<>(SB),NOPTR, $8
    82  
    83  #ifdef GOARM64_LSE
    84  DATA no_lse_msg<>+0x00(SB)/64, $"This program can only run on ARM64 processors with LSE support.\n"
    85  GLOBL no_lse_msg<>(SB), RODATA, $64
    86  #endif
    87  
    88  // We know for sure that Linux and FreeBSD allow to read instruction set
    89  // attribute registers (while some others OSes, like OpenBSD and Darwin,
    90  // are not). Let's be conservative and allow code reading such registers
    91  // only when we sure this won't lead to sigill.
    92  #ifdef GOOS_linux
    93  #define ISA_REGS_READABLE
    94  #endif
    95  #ifdef GOOS_freebsd
    96  #define ISA_REGS_READABLE
    97  #endif
    98  
    99  #ifdef GOARM64_LSE
   100  #ifdef ISA_REGS_READABLE
   101  #define CHECK_GOARM64_LSE
   102  #endif
   103  #endif
   104  
   105  TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
   106  	// SP = stack; R0 = argc; R1 = argv
   107  
   108  	SUB	$32, RSP
   109  	MOVW	R0, 8(RSP) // argc
   110  	MOVD	R1, 16(RSP) // argv
   111  
   112  	// This is typically the entry point for Go programs.
   113  	// Call stack unwinding must not proceed past this frame.
   114  	// Set the frame pointer register to 0 so that frame pointer-based unwinders
   115  	// (which don't use debug info for performance reasons)
   116  	// won't attempt to unwind past this function.
   117  	// See go.dev/issue/63630
   118  	MOVD	$0, R29
   119  
   120  #ifdef TLS_darwin
   121  	// Initialize TLS.
   122  	MOVD	ZR, g // clear g, make sure it's not junk.
   123  	SUB	$32, RSP
   124  	MRS_TPIDR_R0
   125  	AND	$~7, R0
   126  	MOVD	R0, 16(RSP)             // arg2: TLS base
   127  	MOVD	$runtime·tls_g(SB), R2
   128  	MOVD	R2, 8(RSP)              // arg1: &tlsg
   129  	BL	·tlsinit(SB)
   130  	ADD	$32, RSP
   131  #endif
   132  
   133  	// create istack out of the given (operating system) stack.
   134  	// _cgo_init may update stackguard.
   135  	MOVD	$runtime·g0(SB), g
   136  	MOVD	RSP, R7
   137  	MOVD	$(-64*1024)(R7), R0
   138  	MOVD	R0, g_stackguard0(g)
   139  	MOVD	R0, g_stackguard1(g)
   140  	MOVD	R0, (g_stack+stack_lo)(g)
   141  	MOVD	R7, (g_stack+stack_hi)(g)
   142  
   143  	// if there is a _cgo_init, call it using the gcc ABI.
   144  	MOVD	_cgo_init(SB), R12
   145  	CBZ	R12, nocgo
   146  
   147  #ifdef GOOS_android
   148  	MRS_TPIDR_R0			// load TLS base pointer
   149  	MOVD	R0, R3			// arg 3: TLS base pointer
   150  	MOVD	$runtime·tls_g(SB), R2 	// arg 2: &tls_g
   151  #else
   152  	MOVD	$0, R2		        // arg 2: not used when using platform's TLS
   153  #endif
   154  	MOVD	$setg_gcc<>(SB), R1	// arg 1: setg
   155  	MOVD	g, R0			// arg 0: G
   156  	SUB	$16, RSP		// reserve 16 bytes for sp-8 where fp may be saved.
   157  	BL	(R12)
   158  	ADD	$16, RSP
   159  
   160  nocgo:
   161  	BL	runtime·save_g(SB)
   162  	// update stackguard after _cgo_init
   163  	MOVD	(g_stack+stack_lo)(g), R0
   164  	ADD	$const_stackGuard, R0
   165  	MOVD	R0, g_stackguard0(g)
   166  	MOVD	R0, g_stackguard1(g)
   167  
   168  	// set the per-goroutine and per-mach "registers"
   169  	MOVD	$runtime·m0(SB), R0
   170  
   171  	// save m->g0 = g0
   172  	MOVD	g, m_g0(R0)
   173  	// save m0 to g0->m
   174  	MOVD	R0, g_m(g)
   175  
   176  	BL	runtime·check(SB)
   177  
   178  #ifdef GOOS_windows
   179  	BL	runtime·wintls(SB)
   180  #endif
   181  
   182  	// Check that CPU we use for execution supports instructions targeted during compile-time.
   183  #ifdef CHECK_GOARM64_LSE
   184  	// Read the ID_AA64ISAR0_EL1 register
   185  	MRS	ID_AA64ISAR0_EL1, R0
   186  
   187  	// Extract the LSE field (bits [23:20])
   188  	LSR	$20, R0, R0
   189  	AND	$0xf, R0, R0
   190  
   191  	// LSE support is indicated by a non-zero value
   192  	CBZ	R0, no_lse
   193  #endif
   194  
   195  	MOVW	8(RSP), R0	// copy argc
   196  	MOVW	R0, -8(RSP)
   197  	MOVD	16(RSP), R0		// copy argv
   198  	MOVD	R0, 0(RSP)
   199  	BL	runtime·args(SB)
   200  	BL	runtime·osinit(SB)
   201  	BL	runtime·schedinit(SB)
   202  
   203  	// create a new goroutine to start program
   204  	MOVD	$runtime·mainPC(SB), R0		// entry
   205  	SUB	$16, RSP
   206  	MOVD	R0, 8(RSP) // arg
   207  	MOVD	$0, 0(RSP) // dummy LR
   208  	BL	runtime·newproc(SB)
   209  	ADD	$16, RSP
   210  
   211  	// start this M
   212  	BL	runtime·mstart(SB)
   213  	UNDEF
   214  
   215  #ifdef CHECK_GOARM64_LSE
   216  no_lse:
   217  	MOVD	$1, R0 // stderr
   218  	MOVD	R0, 8(RSP)
   219  	MOVD	$no_lse_msg<>(SB), R1 // message address
   220  	MOVD	R1, 16(RSP)
   221  	MOVD	$64, R2 // message length
   222  	MOVD	R2, 24(RSP)
   223  	CALL	runtime·write(SB)
   224  	CALL	runtime·exit(SB)
   225  	CALL	runtime·abort(SB)
   226  	RET
   227  #endif
   228  
   229  	// Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
   230  	// intended to be called by debuggers.
   231  	MOVD	$runtime·debugPinnerV1<ABIInternal>(SB), R0
   232  	MOVD	$runtime·debugCallV2<ABIInternal>(SB), R0
   233  
   234  	MOVD	$0, R0
   235  	MOVD	R0, (R0)	// boom
   236  	UNDEF
   237  
   238  DATA	runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
   239  GLOBL	runtime·mainPC(SB),RODATA,$8
   240  
   241  // Windows ARM64 needs an immediate 0xf000 argument.
   242  // See go.dev/issues/53837.
   243  #define BREAK	\
   244  #ifdef GOOS_windows	\
   245  	BRK	$0xf000 	\
   246  #else 				\
   247  	BRK 			\
   248  #endif 				\
   249  
   250  
   251  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
   252  	BREAK
   253  	RET
   254  
   255  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
   256  	RET
   257  
   258  TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
   259  	// This is the root frame of new Go-created OS threads.
   260  	// Call stack unwinding must not proceed past this frame.
   261  	// Set the frame pointer register to 0 so that frame pointer-based unwinders
   262  	// (which don't use debug info for performance reasons)
   263  	// won't attempt to unwind past this function.
   264  	// See go.dev/issue/63630
   265  	MOVD	$0, R29
   266  	BL	runtime·mstart0(SB)
   267  	RET // not reached
   268  
   269  /*
   270   *  go-routine
   271   */
   272  
   273  // void gogo(Gobuf*)
   274  // restore state from Gobuf; longjmp
   275  TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
   276  	MOVD	buf+0(FP), R5
   277  	MOVD	gobuf_g(R5), R6
   278  	MOVD	0(R6), R4	// make sure g != nil
   279  	B	gogo<>(SB)
   280  
   281  TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
   282  	MOVD	R6, g
   283  	BL	runtime·save_g(SB)
   284  
   285  	MOVD	gobuf_sp(R5), R0
   286  	MOVD	R0, RSP
   287  	MOVD	gobuf_bp(R5), R29
   288  	MOVD	gobuf_lr(R5), LR
   289  	MOVD	gobuf_ctxt(R5), R26
   290  	MOVD	$0, gobuf_sp(R5)
   291  	MOVD	$0, gobuf_bp(R5)
   292  	MOVD	$0, gobuf_lr(R5)
   293  	MOVD	$0, gobuf_ctxt(R5)
   294  	CMP	ZR, ZR // set condition codes for == test, needed by stack split
   295  	MOVD	gobuf_pc(R5), R6
   296  	B	(R6)
   297  
   298  // void mcall(fn func(*g))
   299  // Switch to m->g0's stack, call fn(g).
   300  // Fn must never return. It should gogo(&g->sched)
   301  // to keep running g.
   302  TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
   303  #ifdef GOEXPERIMENT_runtimesecret
   304  	MOVW	g_secret(g), R26
   305  	CBZ 	R26, nosecret
   306  	// Use R26 as a secondary link register
   307  	// We purposefully don't erase it in secretEraseRegistersMcall
   308  	MOVD	LR, R26
   309  	BL 	runtime·secretEraseRegistersMcall(SB)
   310  	MOVD	R26, LR
   311  
   312  nosecret:
   313  #endif
   314  	MOVD	R0, R26				// context
   315  
   316  	// Save caller state in g->sched
   317  	MOVD	RSP, R0
   318  	MOVD	R0, (g_sched+gobuf_sp)(g)
   319  	MOVD	R29, (g_sched+gobuf_bp)(g)
   320  	MOVD	LR, (g_sched+gobuf_pc)(g)
   321  	MOVD	$0, (g_sched+gobuf_lr)(g)
   322  
   323  	// Switch to m->g0 & its stack, call fn.
   324  	MOVD	g, R3
   325  	MOVD	g_m(g), R8
   326  	MOVD	m_g0(R8), g
   327  	BL	runtime·save_g(SB)
   328  	CMP	g, R3
   329  	BNE	2(PC)
   330  	B	runtime·badmcall(SB)
   331  
   332  	MOVD	(g_sched+gobuf_sp)(g), R0
   333  	MOVD	R0, RSP	// sp = m->g0->sched.sp
   334  	MOVD	$0, R29				// clear frame pointer, as caller may execute on another M
   335  	MOVD	R3, R0				// arg = g
   336  	MOVD	$0, -16(RSP)			// dummy LR
   337  	SUB	$16, RSP
   338  	MOVD	0(R26), R4			// code pointer
   339  	BL	(R4)
   340  	B	runtime·badmcall2(SB)
   341  
   342  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   343  // of the G stack. We need to distinguish the routine that
   344  // lives at the bottom of the G stack from the one that lives
   345  // at the top of the system stack because the one at the top of
   346  // the system stack terminates the stack walk (see topofstack()).
   347  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   348  	UNDEF
   349  	BL	(LR)	// make sure this function is not leaf
   350  	RET
   351  
   352  // func systemstack(fn func())
   353  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   354  #ifdef GOEXPERIMENT_runtimesecret
   355  	MOVW	g_secret(g), R3
   356  	CBZ		R3, nosecret
   357  	BL 		·secretEraseRegisters(SB)
   358  
   359  nosecret:
   360  #endif
   361  	MOVD	fn+0(FP), R3	// R3 = fn
   362  	MOVD	R3, R26		// context
   363  	MOVD	g_m(g), R4	// R4 = m
   364  
   365  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   366  	CMP	g, R5
   367  	BEQ	noswitch
   368  
   369  	MOVD	m_g0(R4), R5	// R5 = g0
   370  	CMP	g, R5
   371  	BEQ	noswitch
   372  
   373  	MOVD	m_curg(R4), R6
   374  	CMP	g, R6
   375  	BEQ	switch
   376  
   377  	// Bad: g is not gsignal, not g0, not curg. What is it?
   378  	// Hide call from linker nosplit analysis.
   379  	MOVD	$runtime·badsystemstack(SB), R3
   380  	BL	(R3)
   381  	B	runtime·abort(SB)
   382  
   383  switch:
   384  	// Switch stacks.
   385  	// The original frame pointer is stored in R29,
   386  	// which is useful for stack unwinding.
   387  	// Save our state in g->sched. Pretend to
   388  	// be systemstack_switch if the G stack is scanned.
   389  	BL	gosave_systemstack_switch<>(SB)
   390  
   391  	// switch to g0
   392  	MOVD	R5, g
   393  	BL	runtime·save_g(SB)
   394  	MOVD	(g_sched+gobuf_sp)(g), R3
   395  	MOVD	R3, RSP
   396  
   397  	// call target function
   398  	MOVD	0(R26), R3	// code pointer
   399  	BL	(R3)
   400  
   401  	// switch back to g
   402  	MOVD	g_m(g), R3
   403  	MOVD	m_curg(R3), g
   404  	BL	runtime·save_g(SB)
   405  	MOVD	(g_sched+gobuf_sp)(g), R0
   406  	MOVD	R0, RSP
   407  	MOVD	(g_sched+gobuf_bp)(g), R29
   408  	MOVD	$0, (g_sched+gobuf_sp)(g)
   409  	MOVD	$0, (g_sched+gobuf_bp)(g)
   410  	RET
   411  
   412  noswitch:
   413  	// already on m stack, just call directly
   414  	// Using a tail call here cleans up tracebacks since we won't stop
   415  	// at an intermediate systemstack.
   416  	MOVD	0(R26), R3	// code pointer
   417  	MOVD.P	16(RSP), R30	// restore LR
   418  	SUB	$8, RSP, R29	// restore FP
   419  	B	(R3)
   420  
   421  // func switchToCrashStack0(fn func())
   422  TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
   423  	MOVD	R0, R26    // context register
   424  	MOVD	g_m(g), R1 // curm
   425  
   426  	// set g to gcrash
   427  	MOVD	$runtime·gcrash(SB), g // g = &gcrash
   428  	BL	runtime·save_g(SB)         // clobbers R0
   429  	MOVD	R1, g_m(g)             // g.m = curm
   430  	MOVD	g, m_g0(R1)            // curm.g0 = g
   431  
   432  	// switch to crashstack
   433  	MOVD	(g_stack+stack_hi)(g), R1
   434  	SUB	$(4*8), R1
   435  	MOVD	R1, RSP
   436  
   437  	// call target function
   438  	MOVD	0(R26), R0
   439  	CALL	(R0)
   440  
   441  	// should never return
   442  	CALL	runtime·abort(SB)
   443  	UNDEF
   444  
   445  /*
   446   * support for morestack
   447   */
   448  
   449  // Called during function prolog when more stack is needed.
   450  // Caller has already loaded:
   451  // R3 prolog's LR (R30)
   452  //
   453  // The traceback routines see morestack on a g0 as being
   454  // the top of a stack (for example, morestack calling newstack
   455  // calling the scheduler calling newm calling gc), so we must
   456  // record an argument size. For that purpose, it has no arguments.
   457  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   458  	// Cannot grow scheduler stack (m->g0).
   459  	MOVD	g_m(g), R8
   460  	MOVD	m_g0(R8), R4
   461  
   462  	// Called from f.
   463  	// Set g->sched to context in f
   464  	MOVD	RSP, R0
   465  	MOVD	R0, (g_sched+gobuf_sp)(g)
   466  	MOVD	R29, (g_sched+gobuf_bp)(g)
   467  	MOVD	LR, (g_sched+gobuf_pc)(g)
   468  	MOVD	R3, (g_sched+gobuf_lr)(g)
   469  	MOVD	R26, (g_sched+gobuf_ctxt)(g)
   470  
   471  	CMP	g, R4
   472  	BNE	3(PC)
   473  	BL	runtime·badmorestackg0(SB)
   474  	B	runtime·abort(SB)
   475  
   476  	// Cannot grow signal stack (m->gsignal).
   477  	MOVD	m_gsignal(R8), R4
   478  	CMP	g, R4
   479  	BNE	3(PC)
   480  	BL	runtime·badmorestackgsignal(SB)
   481  	B	runtime·abort(SB)
   482  
   483  	// Called from f.
   484  	// Set m->morebuf to f's callers.
   485  	MOVD	R3, (m_morebuf+gobuf_pc)(R8)	// f's caller's PC
   486  	MOVD	RSP, R0
   487  	MOVD	R0, (m_morebuf+gobuf_sp)(R8)	// f's caller's RSP
   488  	MOVD	g, (m_morebuf+gobuf_g)(R8)
   489  
   490  	// If in secret mode, erase registers on transition
   491  	// from G stack to M stack,
   492  #ifdef GOEXPERIMENT_runtimesecret
   493  	MOVW	g_secret(g), R4
   494  	CBZ 	R4, nosecret
   495  	BL	·secretEraseRegisters(SB)
   496  	MOVD	g_m(g), R8
   497  nosecret:
   498  #endif
   499  
   500  	// Call newstack on m->g0's stack.
   501  	MOVD	m_g0(R8), g
   502  	BL	runtime·save_g(SB)
   503  	MOVD	(g_sched+gobuf_sp)(g), R0
   504  	MOVD	R0, RSP
   505  	MOVD	$0, R29		// clear frame pointer, as caller may execute on another M
   506  	MOVD.W	$0, -16(RSP)	// create a call frame on g0 (saved LR; keep 16-aligned)
   507  	BL	runtime·newstack(SB)
   508  
   509  	// Not reached, but make sure the return PC from the call to newstack
   510  	// is still in this function, and not the beginning of the next.
   511  	UNDEF
   512  
   513  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   514  	// Force SPWRITE. This function doesn't actually write SP,
   515  	// but it is called with a special calling convention where
   516  	// the caller doesn't save LR on stack but passes it as a
   517  	// register (R3), and the unwinder currently doesn't understand.
   518  	// Make it SPWRITE to stop unwinding. (See issue 54332)
   519  	MOVD	RSP, RSP
   520  
   521  	MOVW	$0, R26
   522  	B runtime·morestack(SB)
   523  
   524  // spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
   525  TEXT ·spillArgs(SB),NOSPLIT,$0-0
   526  	STP	(R0, R1), (0*8)(R20)
   527  	STP	(R2, R3), (2*8)(R20)
   528  	STP	(R4, R5), (4*8)(R20)
   529  	STP	(R6, R7), (6*8)(R20)
   530  	STP	(R8, R9), (8*8)(R20)
   531  	STP	(R10, R11), (10*8)(R20)
   532  	STP	(R12, R13), (12*8)(R20)
   533  	STP	(R14, R15), (14*8)(R20)
   534  	FSTPD	(F0, F1), (16*8)(R20)
   535  	FSTPD	(F2, F3), (18*8)(R20)
   536  	FSTPD	(F4, F5), (20*8)(R20)
   537  	FSTPD	(F6, F7), (22*8)(R20)
   538  	FSTPD	(F8, F9), (24*8)(R20)
   539  	FSTPD	(F10, F11), (26*8)(R20)
   540  	FSTPD	(F12, F13), (28*8)(R20)
   541  	FSTPD	(F14, F15), (30*8)(R20)
   542  	RET
   543  
   544  // unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
   545  TEXT ·unspillArgs(SB),NOSPLIT,$0-0
   546  	LDP	(0*8)(R20), (R0, R1)
   547  	LDP	(2*8)(R20), (R2, R3)
   548  	LDP	(4*8)(R20), (R4, R5)
   549  	LDP	(6*8)(R20), (R6, R7)
   550  	LDP	(8*8)(R20), (R8, R9)
   551  	LDP	(10*8)(R20), (R10, R11)
   552  	LDP	(12*8)(R20), (R12, R13)
   553  	LDP	(14*8)(R20), (R14, R15)
   554  	FLDPD	(16*8)(R20), (F0, F1)
   555  	FLDPD	(18*8)(R20), (F2, F3)
   556  	FLDPD	(20*8)(R20), (F4, F5)
   557  	FLDPD	(22*8)(R20), (F6, F7)
   558  	FLDPD	(24*8)(R20), (F8, F9)
   559  	FLDPD	(26*8)(R20), (F10, F11)
   560  	FLDPD	(28*8)(R20), (F12, F13)
   561  	FLDPD	(30*8)(R20), (F14, F15)
   562  	RET
   563  
   564  // reflectcall: call a function with the given argument list
   565  // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
   566  // we don't have variable-sized frames, so we use a small number
   567  // of constant-sized-frame functions to encode a few bits of size in the pc.
   568  // Caution: ugly multiline assembly macros in your future!
   569  
   570  #define DISPATCH(NAME,MAXSIZE)		\
   571  	MOVD	$MAXSIZE, R27;		\
   572  	CMP	R27, R16;		\
   573  	BGT	3(PC);			\
   574  	MOVD	$NAME(SB), R27;	\
   575  	B	(R27)
   576  // Note: can't just "B NAME(SB)" - bad inlining results.
   577  
   578  TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
   579  	MOVWU	frameSize+32(FP), R16
   580  	DISPATCH(runtime·call16, 16)
   581  	DISPATCH(runtime·call32, 32)
   582  	DISPATCH(runtime·call64, 64)
   583  	DISPATCH(runtime·call128, 128)
   584  	DISPATCH(runtime·call256, 256)
   585  	DISPATCH(runtime·call512, 512)
   586  	DISPATCH(runtime·call1024, 1024)
   587  	DISPATCH(runtime·call2048, 2048)
   588  	DISPATCH(runtime·call4096, 4096)
   589  	DISPATCH(runtime·call8192, 8192)
   590  	DISPATCH(runtime·call16384, 16384)
   591  	DISPATCH(runtime·call32768, 32768)
   592  	DISPATCH(runtime·call65536, 65536)
   593  	DISPATCH(runtime·call131072, 131072)
   594  	DISPATCH(runtime·call262144, 262144)
   595  	DISPATCH(runtime·call524288, 524288)
   596  	DISPATCH(runtime·call1048576, 1048576)
   597  	DISPATCH(runtime·call2097152, 2097152)
   598  	DISPATCH(runtime·call4194304, 4194304)
   599  	DISPATCH(runtime·call8388608, 8388608)
   600  	DISPATCH(runtime·call16777216, 16777216)
   601  	DISPATCH(runtime·call33554432, 33554432)
   602  	DISPATCH(runtime·call67108864, 67108864)
   603  	DISPATCH(runtime·call134217728, 134217728)
   604  	DISPATCH(runtime·call268435456, 268435456)
   605  	DISPATCH(runtime·call536870912, 536870912)
   606  	DISPATCH(runtime·call1073741824, 1073741824)
   607  	MOVD	$runtime·badreflectcall(SB), R0
   608  	B	(R0)
   609  
   610  #define CALLFN(NAME,MAXSIZE)			\
   611  TEXT NAME(SB), WRAPPER, $MAXSIZE-48;		\
   612  	NO_LOCAL_POINTERS;			\
   613  	/* copy arguments to stack */		\
   614  	MOVD	stackArgs+16(FP), R3;			\
   615  	MOVWU	stackArgsSize+24(FP), R4;		\
   616  	ADD	$8, RSP, R5;			\
   617  	BIC	$0xf, R4, R6;			\
   618  	CBZ	R6, 6(PC);			\
   619  	/* if R6=(argsize&~15) != 0 */		\
   620  	ADD	R6, R5, R6;			\
   621  	/* copy 16 bytes a time */		\
   622  	LDP.P	16(R3), (R7, R8);		\
   623  	STP.P	(R7, R8), 16(R5);		\
   624  	CMP	R5, R6;				\
   625  	BNE	-3(PC);				\
   626  	AND	$0xf, R4, R6;			\
   627  	CBZ	R6, 6(PC);			\
   628  	/* if R6=(argsize&15) != 0 */		\
   629  	ADD	R6, R5, R6;			\
   630  	/* copy 1 byte a time for the rest */	\
   631  	MOVBU.P	1(R3), R7;			\
   632  	MOVBU.P	R7, 1(R5);			\
   633  	CMP	R5, R6;				\
   634  	BNE	-3(PC);				\
   635  	/* set up argument registers */		\
   636  	MOVD	regArgs+40(FP), R20;		\
   637  	CALL	·unspillArgs(SB);		\
   638  	/* call function */			\
   639  	MOVD	f+8(FP), R26;			\
   640  	MOVD	(R26), R20;			\
   641  	PCDATA	$PCDATA_StackMapIndex, $0;	\
   642  	BL	(R20);				\
   643  	/* copy return values back */		\
   644  	MOVD	regArgs+40(FP), R20;		\
   645  	CALL	·spillArgs(SB);		\
   646  	MOVD	stackArgsType+0(FP), R7;		\
   647  	MOVD	stackArgs+16(FP), R3;			\
   648  	MOVWU	stackArgsSize+24(FP), R4;			\
   649  	MOVWU	stackRetOffset+28(FP), R6;		\
   650  	ADD	$8, RSP, R5;			\
   651  	ADD	R6, R5; 			\
   652  	ADD	R6, R3;				\
   653  	SUB	R6, R4;				\
   654  	BL	callRet<>(SB);			\
   655  	RET
   656  
   657  // callRet copies return values back at the end of call*. This is a
   658  // separate function so it can allocate stack space for the arguments
   659  // to reflectcallmove. It does not follow the Go ABI; it expects its
   660  // arguments in registers.
   661  TEXT callRet<>(SB), NOSPLIT, $48-0
   662  	NO_LOCAL_POINTERS
   663  	STP	(R7, R3), 8(RSP)
   664  	STP	(R5, R4), 24(RSP)
   665  	MOVD	R20, 40(RSP)
   666  	BL	runtime·reflectcallmove(SB)
   667  	RET
   668  
   669  CALLFN(·call16, 16)
   670  CALLFN(·call32, 32)
   671  CALLFN(·call64, 64)
   672  CALLFN(·call128, 128)
   673  CALLFN(·call256, 256)
   674  CALLFN(·call512, 512)
   675  CALLFN(·call1024, 1024)
   676  CALLFN(·call2048, 2048)
   677  CALLFN(·call4096, 4096)
   678  CALLFN(·call8192, 8192)
   679  CALLFN(·call16384, 16384)
   680  CALLFN(·call32768, 32768)
   681  CALLFN(·call65536, 65536)
   682  CALLFN(·call131072, 131072)
   683  CALLFN(·call262144, 262144)
   684  CALLFN(·call524288, 524288)
   685  CALLFN(·call1048576, 1048576)
   686  CALLFN(·call2097152, 2097152)
   687  CALLFN(·call4194304, 4194304)
   688  CALLFN(·call8388608, 8388608)
   689  CALLFN(·call16777216, 16777216)
   690  CALLFN(·call33554432, 33554432)
   691  CALLFN(·call67108864, 67108864)
   692  CALLFN(·call134217728, 134217728)
   693  CALLFN(·call268435456, 268435456)
   694  CALLFN(·call536870912, 536870912)
   695  CALLFN(·call1073741824, 1073741824)
   696  
   697  // func memhash32(p unsafe.Pointer, h uintptr) uintptr
   698  TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
   699  	MOVB	runtime·useAeshash(SB), R10
   700  	CBZ	R10, noaes
   701  	MOVD	$runtime·aeskeysched+0(SB), R3
   702  
   703  	VEOR	V0.B16, V0.B16, V0.B16
   704  	VLD1	(R3), [V2.B16]
   705  	VLD1	(R0), V0.S[1]
   706  	VMOV	R1, V0.S[0]
   707  
   708  	AESE	V2.B16, V0.B16
   709  	AESMC	V0.B16, V0.B16
   710  	AESE	V2.B16, V0.B16
   711  	AESMC	V0.B16, V0.B16
   712  	AESE	V2.B16, V0.B16
   713  
   714  	VMOV	V0.D[0], R0
   715  	RET
   716  noaes:
   717  	B	runtime·memhash32Fallback<ABIInternal>(SB)
   718  
   719  // func memhash64(p unsafe.Pointer, h uintptr) uintptr
   720  TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
   721  	MOVB	runtime·useAeshash(SB), R10
   722  	CBZ	R10, noaes
   723  	MOVD	$runtime·aeskeysched+0(SB), R3
   724  
   725  	VEOR	V0.B16, V0.B16, V0.B16
   726  	VLD1	(R3), [V2.B16]
   727  	VLD1	(R0), V0.D[1]
   728  	VMOV	R1, V0.D[0]
   729  
   730  	AESE	V2.B16, V0.B16
   731  	AESMC	V0.B16, V0.B16
   732  	AESE	V2.B16, V0.B16
   733  	AESMC	V0.B16, V0.B16
   734  	AESE	V2.B16, V0.B16
   735  
   736  	VMOV	V0.D[0], R0
   737  	RET
   738  noaes:
   739  	B	runtime·memhash64Fallback<ABIInternal>(SB)
   740  
   741  // func memhash(p unsafe.Pointer, h, size uintptr) uintptr
   742  TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
   743  	MOVB	runtime·useAeshash(SB), R10
   744  	CBZ	R10, noaes
   745  	B	aeshashbody<>(SB)
   746  noaes:
   747  	B	runtime·memhashFallback<ABIInternal>(SB)
   748  
   749  // func strhash(p unsafe.Pointer, h uintptr) uintptr
   750  TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
   751  	MOVB	runtime·useAeshash(SB), R10
   752  	CBZ	R10, noaes
   753  	LDP	(R0), (R0, R2)	// string data / length
   754  	B	aeshashbody<>(SB)
   755  noaes:
   756  	B	runtime·strhashFallback<ABIInternal>(SB)
   757  
   758  // R0: data
   759  // R1: seed data
   760  // R2: length
   761  // At return, R0 = return value
   762  TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
   763  	VEOR	V30.B16, V30.B16, V30.B16
   764  	VMOV	R1, V30.D[0]
   765  	VMOV	R2, V30.D[1] // load length into seed
   766  
   767  	MOVD	$runtime·aeskeysched+0(SB), R4
   768  	VLD1.P	16(R4), [V0.B16]
   769  	AESE	V30.B16, V0.B16
   770  	AESMC	V0.B16, V0.B16
   771  	CMP	$16, R2
   772  	BLO	aes0to15
   773  	BEQ	aes16
   774  	CMP	$32, R2
   775  	BLS	aes17to32
   776  	CMP	$64, R2
   777  	BLS	aes33to64
   778  	CMP	$128, R2
   779  	BLS	aes65to128
   780  	B	aes129plus
   781  
   782  aes0to15:
   783  	CBZ	R2, aes0
   784  	VEOR	V2.B16, V2.B16, V2.B16
   785  	TBZ	$3, R2, less_than_8
   786  	VLD1.P	8(R0), V2.D[0]
   787  
   788  less_than_8:
   789  	TBZ	$2, R2, less_than_4
   790  	VLD1.P	4(R0), V2.S[2]
   791  
   792  less_than_4:
   793  	TBZ	$1, R2, less_than_2
   794  	VLD1.P	2(R0), V2.H[6]
   795  
   796  less_than_2:
   797  	TBZ	$0, R2, done
   798  	VLD1	(R0), V2.B[14]
   799  done:
   800  	AESE	V0.B16, V2.B16
   801  	AESMC	V2.B16, V2.B16
   802  	AESE	V0.B16, V2.B16
   803  	AESMC	V2.B16, V2.B16
   804  	AESE	V0.B16, V2.B16
   805  	AESMC	V2.B16, V2.B16
   806  
   807  	VMOV	V2.D[0], R0
   808  	RET
   809  
   810  aes0:
   811  	VMOV	V0.D[0], R0
   812  	RET
   813  
   814  aes16:
   815  	VLD1	(R0), [V2.B16]
   816  	B	done
   817  
   818  aes17to32:
   819  	// make second seed
   820  	VLD1	(R4), [V1.B16]
   821  	AESE	V30.B16, V1.B16
   822  	AESMC	V1.B16, V1.B16
   823  	SUB	$16, R2, R10
   824  	VLD1.P	(R0)(R10), [V2.B16]
   825  	VLD1	(R0), [V3.B16]
   826  
   827  	AESE	V0.B16, V2.B16
   828  	AESMC	V2.B16, V2.B16
   829  	AESE	V1.B16, V3.B16
   830  	AESMC	V3.B16, V3.B16
   831  
   832  	AESE	V0.B16, V2.B16
   833  	AESMC	V2.B16, V2.B16
   834  	AESE	V1.B16, V3.B16
   835  	AESMC	V3.B16, V3.B16
   836  
   837  	AESE	V0.B16, V2.B16
   838  	AESE	V1.B16, V3.B16
   839  
   840  	VEOR	V3.B16, V2.B16, V2.B16
   841  
   842  	VMOV	V2.D[0], R0
   843  	RET
   844  
   845  aes33to64:
   846  	VLD1	(R4), [V1.B16, V2.B16, V3.B16]
   847  	AESE	V30.B16, V1.B16
   848  	AESMC	V1.B16, V1.B16
   849  	AESE	V30.B16, V2.B16
   850  	AESMC	V2.B16, V2.B16
   851  	AESE	V30.B16, V3.B16
   852  	AESMC	V3.B16, V3.B16
   853  	SUB	$32, R2, R10
   854  
   855  	VLD1.P	(R0)(R10), [V4.B16, V5.B16]
   856  	VLD1	(R0), [V6.B16, V7.B16]
   857  
   858  	AESE	V0.B16, V4.B16
   859  	AESMC	V4.B16, V4.B16
   860  	AESE	V1.B16, V5.B16
   861  	AESMC	V5.B16, V5.B16
   862  	AESE	V2.B16, V6.B16
   863  	AESMC	V6.B16, V6.B16
   864  	AESE	V3.B16, V7.B16
   865  	AESMC	V7.B16, V7.B16
   866  
   867  	AESE	V0.B16, V4.B16
   868  	AESMC	V4.B16, V4.B16
   869  	AESE	V1.B16, V5.B16
   870  	AESMC	V5.B16, V5.B16
   871  	AESE	V2.B16, V6.B16
   872  	AESMC	V6.B16, V6.B16
   873  	AESE	V3.B16, V7.B16
   874  	AESMC	V7.B16, V7.B16
   875  
   876  	AESE	V0.B16, V4.B16
   877  	AESE	V1.B16, V5.B16
   878  	AESE	V2.B16, V6.B16
   879  	AESE	V3.B16, V7.B16
   880  
   881  	VEOR	V6.B16, V4.B16, V4.B16
   882  	VEOR	V7.B16, V5.B16, V5.B16
   883  	VEOR	V5.B16, V4.B16, V4.B16
   884  
   885  	VMOV	V4.D[0], R0
   886  	RET
   887  
   888  aes65to128:
   889  	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
   890  	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
   891  	AESE	V30.B16, V1.B16
   892  	AESMC	V1.B16, V1.B16
   893  	AESE	V30.B16, V2.B16
   894  	AESMC	V2.B16, V2.B16
   895  	AESE	V30.B16, V3.B16
   896  	AESMC	V3.B16, V3.B16
   897  	AESE	V30.B16, V4.B16
   898  	AESMC	V4.B16, V4.B16
   899  	AESE	V30.B16, V5.B16
   900  	AESMC	V5.B16, V5.B16
   901  	AESE	V30.B16, V6.B16
   902  	AESMC	V6.B16, V6.B16
   903  	AESE	V30.B16, V7.B16
   904  	AESMC	V7.B16, V7.B16
   905  
   906  	SUB	$64, R2, R10
   907  	VLD1.P	(R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
   908  	VLD1	(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
   909  	AESE	V0.B16,	 V8.B16
   910  	AESMC	V8.B16,  V8.B16
   911  	AESE	V1.B16,	 V9.B16
   912  	AESMC	V9.B16,  V9.B16
   913  	AESE	V2.B16, V10.B16
   914  	AESMC	V10.B16,  V10.B16
   915  	AESE	V3.B16, V11.B16
   916  	AESMC	V11.B16,  V11.B16
   917  	AESE	V4.B16, V12.B16
   918  	AESMC	V12.B16,  V12.B16
   919  	AESE	V5.B16, V13.B16
   920  	AESMC	V13.B16,  V13.B16
   921  	AESE	V6.B16, V14.B16
   922  	AESMC	V14.B16,  V14.B16
   923  	AESE	V7.B16, V15.B16
   924  	AESMC	V15.B16,  V15.B16
   925  
   926  	AESE	V0.B16,	 V8.B16
   927  	AESMC	V8.B16,  V8.B16
   928  	AESE	V1.B16,	 V9.B16
   929  	AESMC	V9.B16,  V9.B16
   930  	AESE	V2.B16, V10.B16
   931  	AESMC	V10.B16,  V10.B16
   932  	AESE	V3.B16, V11.B16
   933  	AESMC	V11.B16,  V11.B16
   934  	AESE	V4.B16, V12.B16
   935  	AESMC	V12.B16,  V12.B16
   936  	AESE	V5.B16, V13.B16
   937  	AESMC	V13.B16,  V13.B16
   938  	AESE	V6.B16, V14.B16
   939  	AESMC	V14.B16,  V14.B16
   940  	AESE	V7.B16, V15.B16
   941  	AESMC	V15.B16,  V15.B16
   942  
   943  	AESE	V0.B16,	 V8.B16
   944  	AESE	V1.B16,	 V9.B16
   945  	AESE	V2.B16, V10.B16
   946  	AESE	V3.B16, V11.B16
   947  	AESE	V4.B16, V12.B16
   948  	AESE	V5.B16, V13.B16
   949  	AESE	V6.B16, V14.B16
   950  	AESE	V7.B16, V15.B16
   951  
   952  	VEOR	V12.B16, V8.B16, V8.B16
   953  	VEOR	V13.B16, V9.B16, V9.B16
   954  	VEOR	V14.B16, V10.B16, V10.B16
   955  	VEOR	V15.B16, V11.B16, V11.B16
   956  	VEOR	V10.B16, V8.B16, V8.B16
   957  	VEOR	V11.B16, V9.B16, V9.B16
   958  	VEOR	V9.B16, V8.B16, V8.B16
   959  
   960  	VMOV	V8.D[0], R0
   961  	RET
   962  
   963  aes129plus:
   964  	PRFM (R0), PLDL1KEEP
   965  	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
   966  	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
   967  	AESE	V30.B16, V1.B16
   968  	AESMC	V1.B16, V1.B16
   969  	AESE	V30.B16, V2.B16
   970  	AESMC	V2.B16, V2.B16
   971  	AESE	V30.B16, V3.B16
   972  	AESMC	V3.B16, V3.B16
   973  	AESE	V30.B16, V4.B16
   974  	AESMC	V4.B16, V4.B16
   975  	AESE	V30.B16, V5.B16
   976  	AESMC	V5.B16, V5.B16
   977  	AESE	V30.B16, V6.B16
   978  	AESMC	V6.B16, V6.B16
   979  	AESE	V30.B16, V7.B16
   980  	AESMC	V7.B16, V7.B16
   981  	ADD	R0, R2, R10
   982  	SUB	$128, R10, R10
   983  	VLD1.P	64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
   984  	VLD1	(R10), [V12.B16, V13.B16, V14.B16, V15.B16]
   985  	SUB	$1, R2, R2
   986  	LSR	$7, R2, R2
   987  
   988  aesloop:
   989  	AESE	V8.B16,	 V0.B16
   990  	AESMC	V0.B16,  V0.B16
   991  	AESE	V9.B16,	 V1.B16
   992  	AESMC	V1.B16,  V1.B16
   993  	AESE	V10.B16, V2.B16
   994  	AESMC	V2.B16,  V2.B16
   995  	AESE	V11.B16, V3.B16
   996  	AESMC	V3.B16,  V3.B16
   997  	AESE	V12.B16, V4.B16
   998  	AESMC	V4.B16,  V4.B16
   999  	AESE	V13.B16, V5.B16
  1000  	AESMC	V5.B16,  V5.B16
  1001  	AESE	V14.B16, V6.B16
  1002  	AESMC	V6.B16,  V6.B16
  1003  	AESE	V15.B16, V7.B16
  1004  	AESMC	V7.B16,  V7.B16
  1005  
  1006  	VLD1.P	64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
  1007  	AESE	V8.B16,	 V0.B16
  1008  	AESMC	V0.B16,  V0.B16
  1009  	AESE	V9.B16,	 V1.B16
  1010  	AESMC	V1.B16,  V1.B16
  1011  	AESE	V10.B16, V2.B16
  1012  	AESMC	V2.B16,  V2.B16
  1013  	AESE	V11.B16, V3.B16
  1014  	AESMC	V3.B16,  V3.B16
  1015  
  1016  	VLD1.P	64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
  1017  	AESE	V12.B16, V4.B16
  1018  	AESMC	V4.B16,  V4.B16
  1019  	AESE	V13.B16, V5.B16
  1020  	AESMC	V5.B16,  V5.B16
  1021  	AESE	V14.B16, V6.B16
  1022  	AESMC	V6.B16,  V6.B16
  1023  	AESE	V15.B16, V7.B16
  1024  	AESMC	V7.B16,  V7.B16
  1025  	SUB	$1, R2, R2
  1026  	CBNZ	R2, aesloop
  1027  
  1028  	AESE	V8.B16,	 V0.B16
  1029  	AESMC	V0.B16,  V0.B16
  1030  	AESE	V9.B16,	 V1.B16
  1031  	AESMC	V1.B16,  V1.B16
  1032  	AESE	V10.B16, V2.B16
  1033  	AESMC	V2.B16,  V2.B16
  1034  	AESE	V11.B16, V3.B16
  1035  	AESMC	V3.B16,  V3.B16
  1036  	AESE	V12.B16, V4.B16
  1037  	AESMC	V4.B16,  V4.B16
  1038  	AESE	V13.B16, V5.B16
  1039  	AESMC	V5.B16,  V5.B16
  1040  	AESE	V14.B16, V6.B16
  1041  	AESMC	V6.B16,  V6.B16
  1042  	AESE	V15.B16, V7.B16
  1043  	AESMC	V7.B16,  V7.B16
  1044  
  1045  	AESE	V8.B16,	 V0.B16
  1046  	AESMC	V0.B16,  V0.B16
  1047  	AESE	V9.B16,	 V1.B16
  1048  	AESMC	V1.B16,  V1.B16
  1049  	AESE	V10.B16, V2.B16
  1050  	AESMC	V2.B16,  V2.B16
  1051  	AESE	V11.B16, V3.B16
  1052  	AESMC	V3.B16,  V3.B16
  1053  	AESE	V12.B16, V4.B16
  1054  	AESMC	V4.B16,  V4.B16
  1055  	AESE	V13.B16, V5.B16
  1056  	AESMC	V5.B16,  V5.B16
  1057  	AESE	V14.B16, V6.B16
  1058  	AESMC	V6.B16,  V6.B16
  1059  	AESE	V15.B16, V7.B16
  1060  	AESMC	V7.B16,  V7.B16
  1061  
  1062  	AESE	V8.B16,	 V0.B16
  1063  	AESE	V9.B16,	 V1.B16
  1064  	AESE	V10.B16, V2.B16
  1065  	AESE	V11.B16, V3.B16
  1066  	AESE	V12.B16, V4.B16
  1067  	AESE	V13.B16, V5.B16
  1068  	AESE	V14.B16, V6.B16
  1069  	AESE	V15.B16, V7.B16
  1070  
  1071  	VEOR	V0.B16, V1.B16, V0.B16
  1072  	VEOR	V2.B16, V3.B16, V2.B16
  1073  	VEOR	V4.B16, V5.B16, V4.B16
  1074  	VEOR	V6.B16, V7.B16, V6.B16
  1075  	VEOR	V0.B16, V2.B16, V0.B16
  1076  	VEOR	V4.B16, V6.B16, V4.B16
  1077  	VEOR	V4.B16, V0.B16, V0.B16
  1078  
  1079  	VMOV	V0.D[0], R0
  1080  	RET
  1081  
  1082  // The Arm architecture provides a user space accessible counter-timer which
  1083  // is incremented at a fixed but machine-specific rate. Software can (spin)
  1084  // wait until the counter-timer reaches some desired value.
  1085  //
  1086  // Armv8.7-A introduced the WFET (FEAT_WFxT) instruction, which allows the
  1087  // processor to enter a low power state for a set time, or until an event is
  1088  // received.
  1089  //
  1090  // However, WFET is not used here because it is only available on newer hardware,
  1091  // and we aim to maintain compatibility with older Armv8-A platforms that do not
  1092  // support this feature.
  1093  //
  1094  // As a fallback, we can instead use the ISB instruction to decrease processor
  1095  // activity and thus power consumption between checks of the counter-timer.
  1096  // Note that we do not depend on the latency of the ISB instruction which is
  1097  // implementation specific. Actual delay comes from comparing against a fresh
  1098  // read of the counter-timer value.
  1099  //
  1100  // Read more in this Arm blog post:
  1101  // https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/multi-threaded-applications-arm
  1102  
  1103  TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
  1104  	MOVWU	cycles+0(FP), R0
  1105  	CBZ	 R0, done
  1106  	//Prevent speculation of subsequent counter/timer reads and memory accesses.
  1107  	ISB     $15
  1108  	// If the delay is very short, just return.
  1109  	// Hardcode 18ns as the first ISB delay.
  1110  	CMP     $18, R0
  1111  	BLS     done
  1112  	// Adjust for overhead of initial ISB.
  1113  	SUB     $18, R0, R0
  1114  	// Convert the delay from nanoseconds to counter/timer ticks.
  1115  	// Read the counter/timer frequency.
  1116  	// delay_ticks = (delay * CNTFRQ_EL0) / 1e9
  1117  	// With the below simplifications and adjustments,
  1118  	// we are usually within 2% of the correct value:
  1119  	// delay_ticks = (delay + delay / 16) * CNTFRQ_EL0 >> 30
  1120  	MRS     CNTFRQ_EL0, R1
  1121  	ADD     R0>>4, R0, R0
  1122  	MUL     R1, R0, R0
  1123  	LSR     $30, R0, R0
  1124  	CBZ     R0, done
  1125  	// start = current counter/timer value
  1126  	MRS     CNTVCT_EL0, R2
  1127  delay:
  1128  	// Delay using ISB for all ticks.
  1129  	ISB     $15
  1130  	// Substract and compare to handle counter roll-over.
  1131  	// counter_read() - start < delay_ticks
  1132  	MRS     CNTVCT_EL0, R1
  1133  	SUB     R2, R1, R1
  1134  	CMP     R0, R1
  1135  	BCC     delay
  1136  done:
  1137  	RET
  1138  
  1139  // Save state of caller into g->sched,
  1140  // but using fake PC from systemstack_switch.
  1141  // Must only be called from functions with no locals ($0)
  1142  // or else unwinding from systemstack_switch is incorrect.
  1143  // Smashes R0.
  1144  TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
  1145  	MOVD	$runtime·systemstack_switch(SB), R0
  1146  	ADD	$8, R0	// get past prologue
  1147  	MOVD	R0, (g_sched+gobuf_pc)(g)
  1148  	MOVD	RSP, R0
  1149  	MOVD	R0, (g_sched+gobuf_sp)(g)
  1150  	MOVD	R29, (g_sched+gobuf_bp)(g)
  1151  	MOVD	$0, (g_sched+gobuf_lr)(g)
  1152  	// Assert ctxt is zero. See func save.
  1153  	MOVD	(g_sched+gobuf_ctxt)(g), R0
  1154  	CBZ	R0, 2(PC)
  1155  	CALL	runtime·abort(SB)
  1156  	RET
  1157  
  1158  // func asmcgocall_no_g(fn, arg unsafe.Pointer)
  1159  // Call fn(arg) aligned appropriately for the gcc ABI.
  1160  // Called on a system stack, and there may be no g yet (during needm).
  1161  TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
  1162  	MOVD	fn+0(FP), R1
  1163  	MOVD	arg+8(FP), R0
  1164  	SUB	$16, RSP	// skip over saved frame pointer below RSP
  1165  	BL	(R1)
  1166  	ADD	$16, RSP	// skip over saved frame pointer below RSP
  1167  	RET
  1168  
  1169  // func asmcgocall(fn, arg unsafe.Pointer) int32
  1170  // Call fn(arg) on the scheduler stack,
  1171  // aligned appropriately for the gcc ABI.
  1172  // See cgocall.go for more details.
  1173  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
  1174  	CBZ	g, nosave
  1175  
  1176  	// Figure out if we need to switch to m->g0 stack.
  1177  	// We get called to create new OS threads too, and those
  1178  	// come in on the m->g0 stack already. Or we might already
  1179  	// be on the m->gsignal stack.
  1180  	MOVD	g_m(g), R8
  1181  	MOVD	m_gsignal(R8), R3
  1182  	CMP	R3, g
  1183  	BEQ	nosave
  1184  	MOVD	m_g0(R8), R3
  1185  	CMP	R3, g
  1186  	BEQ	nosave
  1187  
  1188  	// running on a user stack. Figure out if we're running
  1189  	// secret code and clear our registers if so.
  1190  #ifdef GOEXPERIMENT_runtimesecret
  1191  	MOVW 	g_secret(g), R5
  1192  	CBZ		R5, nosecret
  1193  	BL 	·secretEraseRegisters(SB)
  1194  	// restore g0 back into R3
  1195  	MOVD	g_m(g), R3
  1196  	MOVD	m_g0(R3), R3
  1197  
  1198  nosecret:
  1199  #endif
  1200  	MOVD	fn+0(FP), R1
  1201  	MOVD	arg+8(FP), R0
  1202  	MOVD	RSP, R2
  1203  	MOVD	g, R4
  1204  
  1205  	// Switch to system stack.
  1206  	MOVD	R0, R9	// gosave_systemstack_switch<> and save_g might clobber R0
  1207  	BL	gosave_systemstack_switch<>(SB)
  1208  	MOVD	R3, g
  1209  	BL	runtime·save_g(SB)
  1210  	MOVD	(g_sched+gobuf_sp)(g), R0
  1211  	MOVD	R0, RSP
  1212  	MOVD	(g_sched+gobuf_bp)(g), R29
  1213  	MOVD	R9, R0
  1214  
  1215  	// Now on a scheduling stack (a pthread-created stack).
  1216  	// Save room for two of our pointers /*, plus 32 bytes of callee
  1217  	// save area that lives on the caller stack. */
  1218  	MOVD	RSP, R13
  1219  	SUB	$16, R13
  1220  	MOVD	R13, RSP
  1221  	MOVD	R4, 0(RSP)	// save old g on stack
  1222  	MOVD	(g_stack+stack_hi)(R4), R4
  1223  	SUB	R2, R4
  1224  	MOVD	R4, 8(RSP)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
  1225  	BL	(R1)
  1226  	MOVD	R0, R9
  1227  
  1228  	// Restore g, stack pointer. R0 is errno, so don't touch it
  1229  	MOVD	0(RSP), g
  1230  	BL	runtime·save_g(SB)
  1231  	MOVD	(g_stack+stack_hi)(g), R5
  1232  	MOVD	8(RSP), R6
  1233  	SUB	R6, R5
  1234  	MOVD	R9, R0
  1235  	MOVD	R5, RSP
  1236  
  1237  	MOVW	R0, ret+16(FP)
  1238  	RET
  1239  
  1240  nosave:
  1241  	// Running on a system stack, perhaps even without a g.
  1242  	// Having no g can happen during thread creation or thread teardown
  1243  	// (see needm/dropm on Solaris, for example).
  1244  	// This code is like the above sequence but without saving/restoring g
  1245  	// and without worrying about the stack moving out from under us
  1246  	// (because we're on a system stack, not a goroutine stack).
  1247  	// The above code could be used directly if already on a system stack,
  1248  	// but then the only path through this code would be a rare case on Solaris.
  1249  	// Using this code for all "already on system stack" calls exercises it more,
  1250  	// which should help keep it correct.
  1251  	MOVD	fn+0(FP), R1
  1252  	MOVD	arg+8(FP), R0
  1253  	MOVD	RSP, R2
  1254  	MOVD 	R2, R13
  1255  	SUB	$16, R13
  1256  	MOVD	R13, RSP
  1257  	MOVD	$0, R4
  1258  	MOVD	R4, 0(RSP)	// Where above code stores g, in case someone looks during debugging.
  1259  	MOVD	R2, 8(RSP)	// Save original stack pointer.
  1260  	BL	(R1)
  1261  	// Restore stack pointer.
  1262  	MOVD	8(RSP), R2
  1263  	MOVD	R2, RSP
  1264  	MOVD	R0, ret+16(FP)
  1265  	RET
  1266  
  1267  // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
  1268  // See cgocall.go for more details.
  1269  TEXT ·cgocallback(SB),NOSPLIT,$24-24
  1270  	NO_LOCAL_POINTERS
  1271  
  1272  	// Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
  1273  	// It is used to dropm while thread is exiting.
  1274  	MOVD	fn+0(FP), R1
  1275  	CBNZ	R1, loadg
  1276  	// Restore the g from frame.
  1277  	MOVD	frame+8(FP), g
  1278  	B	dropm
  1279  
  1280  loadg:
  1281  	// Load g from thread-local storage.
  1282  	BL	runtime·load_g(SB)
  1283  
  1284  	// If g is nil, Go did not create the current thread,
  1285  	// or if this thread never called into Go on pthread platforms.
  1286  	// Call needm to obtain one for temporary use.
  1287  	// In this case, we're running on the thread stack, so there's
  1288  	// lots of space, but the linker doesn't know. Hide the call from
  1289  	// the linker analysis by using an indirect call.
  1290  	CBZ	g, needm
  1291  
  1292  	MOVD	g_m(g), R8
  1293  	MOVD	R8, savedm-8(SP)
  1294  	B	havem
  1295  
  1296  needm:
  1297  	MOVD	g, savedm-8(SP) // g is zero, so is m.
  1298  	MOVD	$runtime·needAndBindM(SB), R0
  1299  	BL	(R0)
  1300  
  1301  	// Set m->g0->sched.sp = SP, so that if a panic happens
  1302  	// during the function we are about to execute, it will
  1303  	// have a valid SP to run on the g0 stack.
  1304  	// The next few lines (after the havem label)
  1305  	// will save this SP onto the stack and then write
  1306  	// the same SP back to m->sched.sp. That seems redundant,
  1307  	// but if an unrecovered panic happens, unwindm will
  1308  	// restore the g->sched.sp from the stack location
  1309  	// and then systemstack will try to use it. If we don't set it here,
  1310  	// that restored SP will be uninitialized (typically 0) and
  1311  	// will not be usable.
  1312  	MOVD	g_m(g), R8
  1313  	MOVD	m_g0(R8), R3
  1314  	MOVD	RSP, R0
  1315  	MOVD	R0, (g_sched+gobuf_sp)(R3)
  1316  	MOVD	R29, (g_sched+gobuf_bp)(R3)
  1317  
  1318  havem:
  1319  	// Now there's a valid m, and we're running on its m->g0.
  1320  	// Save current m->g0->sched.sp on stack and then set it to SP.
  1321  	// Save current sp in m->g0->sched.sp in preparation for
  1322  	// switch back to m->curg stack.
  1323  	// NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
  1324  	// Beware that the frame size is actually 32+16.
  1325  	MOVD	m_g0(R8), R3
  1326  	MOVD	(g_sched+gobuf_sp)(R3), R4
  1327  	MOVD	R4, savedsp-16(SP)
  1328  	MOVD	RSP, R0
  1329  	MOVD	R0, (g_sched+gobuf_sp)(R3)
  1330  
  1331  	// Switch to m->curg stack and call runtime.cgocallbackg.
  1332  	// Because we are taking over the execution of m->curg
  1333  	// but *not* resuming what had been running, we need to
  1334  	// save that information (m->curg->sched) so we can restore it.
  1335  	// We can restore m->curg->sched.sp easily, because calling
  1336  	// runtime.cgocallbackg leaves SP unchanged upon return.
  1337  	// To save m->curg->sched.pc, we push it onto the curg stack and
  1338  	// open a frame the same size as cgocallback's g0 frame.
  1339  	// Once we switch to the curg stack, the pushed PC will appear
  1340  	// to be the return PC of cgocallback, so that the traceback
  1341  	// will seamlessly trace back into the earlier calls.
  1342  	MOVD	m_curg(R8), g
  1343  	BL	runtime·save_g(SB)
  1344  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
  1345  	MOVD	(g_sched+gobuf_pc)(g), R5
  1346  	MOVD	R5, -48(R4)
  1347  	MOVD	(g_sched+gobuf_bp)(g), R5
  1348  	MOVD	R5, -56(R4)
  1349  	// Gather our arguments into registers.
  1350  	MOVD	fn+0(FP), R1
  1351  	MOVD	frame+8(FP), R2
  1352  	MOVD	ctxt+16(FP), R3
  1353  	MOVD	$-48(R4), R0 // maintain 16-byte SP alignment
  1354  	MOVD	R0, RSP	// switch stack
  1355  	MOVD	R1, 8(RSP)
  1356  	MOVD	R2, 16(RSP)
  1357  	MOVD	R3, 24(RSP)
  1358  	MOVD	$runtime·cgocallbackg(SB), R0
  1359  	CALL	(R0) // indirect call to bypass nosplit check. We're on a different stack now.
  1360  
  1361  	// Restore g->sched (== m->curg->sched) from saved values.
  1362  	MOVD	0(RSP), R5
  1363  	MOVD	R5, (g_sched+gobuf_pc)(g)
  1364  	MOVD	RSP, R4
  1365  	ADD	$48, R4, R4
  1366  	MOVD	R4, (g_sched+gobuf_sp)(g)
  1367  
  1368  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
  1369  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
  1370  	// so we do not have to restore it.)
  1371  	MOVD	g_m(g), R8
  1372  	MOVD	m_g0(R8), g
  1373  	BL	runtime·save_g(SB)
  1374  	MOVD	(g_sched+gobuf_sp)(g), R0
  1375  	MOVD	R0, RSP
  1376  	MOVD	savedsp-16(SP), R4
  1377  	MOVD	R4, (g_sched+gobuf_sp)(g)
  1378  
  1379  	// If the m on entry was nil, we called needm above to borrow an m,
  1380  	// 1. for the duration of the call on non-pthread platforms,
  1381  	// 2. or the duration of the C thread alive on pthread platforms.
  1382  	// If the m on entry wasn't nil,
  1383  	// 1. the thread might be a Go thread,
  1384  	// 2. or it wasn't the first call from a C thread on pthread platforms,
  1385  	//    since then we skip dropm to reuse the m in the first call.
  1386  	MOVD	savedm-8(SP), R6
  1387  	CBNZ	R6, droppedm
  1388  
  1389  	// Skip dropm to reuse it in the next call, when a pthread key has been created.
  1390  	MOVD	_cgo_pthread_key_created(SB), R6
  1391  	// It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
  1392  	CBZ	R6, dropm
  1393  	MOVD	(R6), R6
  1394  	CBNZ	R6, droppedm
  1395  
  1396  dropm:
  1397  	MOVD	$runtime·dropm(SB), R0
  1398  	BL	(R0)
  1399  droppedm:
  1400  
  1401  	// Done!
  1402  	RET
  1403  
  1404  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1405  // Must obey the gcc calling convention.
  1406  TEXT _cgo_topofstack(SB),NOSPLIT,$24
  1407  	// g (R28) and REGTMP (R27)  might be clobbered by load_g. They
  1408  	// are callee-save in the gcc calling convention, so save them.
  1409  	MOVD	R27, savedR27-8(SP)
  1410  	MOVD	g, saveG-16(SP)
  1411  
  1412  	BL	runtime·load_g(SB)
  1413  	MOVD	g_m(g), R0
  1414  	MOVD	m_curg(R0), R0
  1415  	MOVD	(g_stack+stack_hi)(R0), R0
  1416  
  1417  	MOVD	saveG-16(SP), g
  1418  	MOVD	savedR28-8(SP), R27
  1419  	RET
  1420  
  1421  // void setg(G*); set g. for use by needm.
  1422  TEXT runtime·setg(SB), NOSPLIT, $0-8
  1423  	MOVD	gg+0(FP), g
  1424  	// This only happens if iscgo, so jump straight to save_g
  1425  	BL	runtime·save_g(SB)
  1426  	RET
  1427  
  1428  // void setg_gcc(G*); set g called from gcc
  1429  TEXT setg_gcc<>(SB),NOSPLIT,$8
  1430  	MOVD	R0, g
  1431  	MOVD	R27, savedR27-8(SP)
  1432  	BL	runtime·save_g(SB)
  1433  	MOVD	savedR27-8(SP), R27
  1434  	RET
  1435  
  1436  TEXT runtime·emptyfunc(SB),0,$0-0
  1437  	RET
  1438  
  1439  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
  1440  	MOVD	ZR, R0
  1441  	MOVD	(R0), R0
  1442  	UNDEF
  1443  
  1444  // The top-most function running on a goroutine
  1445  // returns to goexit+PCQuantum.
  1446  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
  1447  	MOVD	R0, R0	// NOP
  1448  	BL	runtime·goexit1(SB)	// does not return
  1449  
  1450  // This is called from .init_array and follows the platform, not Go, ABI.
  1451  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1452  	SUB	$0x10, RSP
  1453  	MOVD	R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
  1454  	MOVD	runtime·lastmoduledatap(SB), R1
  1455  	MOVD	R0, moduledata_next(R1)
  1456  	MOVD	R0, runtime·lastmoduledatap(SB)
  1457  	MOVD	8(RSP), R27
  1458  	ADD	$0x10, RSP
  1459  	RET
  1460  
  1461  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1462  	MOVW	$1, R3
  1463  	MOVB	R3, ret+0(FP)
  1464  	RET
  1465  
  1466  // gcWriteBarrier informs the GC about heap pointer writes.
  1467  //
  1468  // gcWriteBarrier does NOT follow the Go ABI. It accepts the
  1469  // number of bytes of buffer needed in R25, and returns a pointer
  1470  // to the buffer space in R25.
  1471  // It clobbers condition codes.
  1472  // It does not clobber any general-purpose registers except R27,
  1473  // but may clobber others (e.g., floating point registers)
  1474  // The act of CALLing gcWriteBarrier will clobber R30 (LR).
  1475  TEXT gcWriteBarrier<>(SB),NOSPLIT,$200
  1476  	// Save the registers clobbered by the fast path.
  1477  	STP	(R0, R1), 184(RSP)
  1478  retry:
  1479  	MOVD	g_m(g), R0
  1480  	MOVD	m_p(R0), R0
  1481  	MOVD	(p_wbBuf+wbBuf_next)(R0), R1
  1482  	MOVD	(p_wbBuf+wbBuf_end)(R0), R27
  1483  	// Increment wbBuf.next position.
  1484  	ADD	R25, R1
  1485  	// Is the buffer full?
  1486  	CMP	R27, R1
  1487  	BHI	flush
  1488  	// Commit to the larger buffer.
  1489  	MOVD	R1, (p_wbBuf+wbBuf_next)(R0)
  1490  	// Make return value (the original next position)
  1491  	SUB	R25, R1, R25
  1492  	// Restore registers.
  1493  	LDP	184(RSP), (R0, R1)
  1494  	RET
  1495  
  1496  flush:
  1497  	// Save all general purpose registers since these could be
  1498  	// clobbered by wbBufFlush and were not saved by the caller.
  1499  	// R0 and R1 already saved
  1500  	STP	(R2, R3), 1*8(RSP)
  1501  	STP	(R4, R5), 3*8(RSP)
  1502  	STP	(R6, R7), 5*8(RSP)
  1503  	STP	(R8, R9), 7*8(RSP)
  1504  	STP	(R10, R11), 9*8(RSP)
  1505  	STP	(R12, R13), 11*8(RSP)
  1506  	STP	(R14, R15), 13*8(RSP)
  1507  	// R16, R17 may be clobbered by linker trampoline
  1508  	// R18 is unused.
  1509  	STP	(R19, R20), 15*8(RSP)
  1510  	STP	(R21, R22), 17*8(RSP)
  1511  	STP	(R23, R24), 19*8(RSP)
  1512  	STP	(R25, R26), 21*8(RSP)
  1513  	// R27 is temp register.
  1514  	// R28 is g.
  1515  	// R29 is frame pointer (unused).
  1516  	// R30 is LR, which was saved by the prologue.
  1517  	// R31 is SP.
  1518  
  1519  	CALL	runtime·wbBufFlush(SB)
  1520  	LDP	1*8(RSP), (R2, R3)
  1521  	LDP	3*8(RSP), (R4, R5)
  1522  	LDP	5*8(RSP), (R6, R7)
  1523  	LDP	7*8(RSP), (R8, R9)
  1524  	LDP	9*8(RSP), (R10, R11)
  1525  	LDP	11*8(RSP), (R12, R13)
  1526  	LDP	13*8(RSP), (R14, R15)
  1527  	LDP	15*8(RSP), (R19, R20)
  1528  	LDP	17*8(RSP), (R21, R22)
  1529  	LDP	19*8(RSP), (R23, R24)
  1530  	LDP	21*8(RSP), (R25, R26)
  1531  	JMP	retry
  1532  
  1533  TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
  1534  	MOVD	$8, R25
  1535  	JMP	gcWriteBarrier<>(SB)
  1536  TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
  1537  	MOVD	$16, R25
  1538  	JMP	gcWriteBarrier<>(SB)
  1539  TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
  1540  	MOVD	$24, R25
  1541  	JMP	gcWriteBarrier<>(SB)
  1542  TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
  1543  	MOVD	$32, R25
  1544  	JMP	gcWriteBarrier<>(SB)
  1545  TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
  1546  	MOVD	$40, R25
  1547  	JMP	gcWriteBarrier<>(SB)
  1548  TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
  1549  	MOVD	$48, R25
  1550  	JMP	gcWriteBarrier<>(SB)
  1551  TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
  1552  	MOVD	$56, R25
  1553  	JMP	gcWriteBarrier<>(SB)
  1554  TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
  1555  	MOVD	$64, R25
  1556  	JMP	gcWriteBarrier<>(SB)
  1557  
  1558  DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1559  GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1560  
  1561  // debugCallV2 is the entry point for debugger-injected function
  1562  // calls on running goroutines. It informs the runtime that a
  1563  // debug call has been injected and creates a call frame for the
  1564  // debugger to fill in.
  1565  //
  1566  // To inject a function call, a debugger should:
  1567  // 1. Check that the goroutine is in state _Grunning and that
  1568  //    there are at least 288 bytes free on the stack.
  1569  // 2. Set SP as SP-16.
  1570  // 3. Store the current LR in (SP) (using the SP after step 2).
  1571  // 4. Store the current PC in the LR register.
  1572  // 5. Write the desired argument frame size at SP-16
  1573  // 6. Save all machine registers (including flags and fpsimd registers)
  1574  //    so they can be restored later by the debugger.
  1575  // 7. Set the PC to debugCallV2 and resume execution.
  1576  //
  1577  // If the goroutine is in state _Grunnable, then it's not generally
  1578  // safe to inject a call because it may return out via other runtime
  1579  // operations. Instead, the debugger should unwind the stack to find
  1580  // the return to non-runtime code, add a temporary breakpoint there,
  1581  // and inject the call once that breakpoint is hit.
  1582  //
  1583  // If the goroutine is in any other state, it's not safe to inject a call.
  1584  //
  1585  // This function communicates back to the debugger by setting R20 and
  1586  // invoking BRK to raise a breakpoint signal. Note that the signal PC of
  1587  // the signal triggered by the BRK instruction is the PC where the signal
  1588  // is trapped, not the next PC, so to resume execution, the debugger needs
  1589  // to set the signal PC to PC+4. See the comments in the implementation for
  1590  // the protocol the debugger is expected to follow. InjectDebugCall in the
  1591  // runtime tests demonstrates this protocol.
  1592  //
  1593  // The debugger must ensure that any pointers passed to the function
  1594  // obey escape analysis requirements. Specifically, it must not pass
  1595  // a stack pointer to an escaping argument. debugCallV2 cannot check
  1596  // this invariant.
  1597  //
  1598  // This is ABIInternal because Go code injects its PC directly into new
  1599  // goroutine stacks.
  1600  TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
  1601  	STP	(R29, R30), -280(RSP)
  1602  	SUB	$272, RSP, RSP
  1603  	SUB	$8, RSP, R29
  1604  	// Save all registers that may contain pointers so they can be
  1605  	// conservatively scanned.
  1606  	//
  1607  	// We can't do anything that might clobber any of these
  1608  	// registers before this.
  1609  	STP	(R27, g), (30*8)(RSP)
  1610  	STP	(R25, R26), (28*8)(RSP)
  1611  	STP	(R23, R24), (26*8)(RSP)
  1612  	STP	(R21, R22), (24*8)(RSP)
  1613  	STP	(R19, R20), (22*8)(RSP)
  1614  	STP	(R16, R17), (20*8)(RSP)
  1615  	STP	(R14, R15), (18*8)(RSP)
  1616  	STP	(R12, R13), (16*8)(RSP)
  1617  	STP	(R10, R11), (14*8)(RSP)
  1618  	STP	(R8, R9), (12*8)(RSP)
  1619  	STP	(R6, R7), (10*8)(RSP)
  1620  	STP	(R4, R5), (8*8)(RSP)
  1621  	STP	(R2, R3), (6*8)(RSP)
  1622  	STP	(R0, R1), (4*8)(RSP)
  1623  
  1624  	// Perform a safe-point check.
  1625  	MOVD	R30, 8(RSP) // Caller's PC
  1626  	CALL	runtime·debugCallCheck(SB)
  1627  	MOVD	16(RSP), R0
  1628  	CBZ	R0, good
  1629  
  1630  	// The safety check failed. Put the reason string at the top
  1631  	// of the stack.
  1632  	MOVD	R0, 8(RSP)
  1633  	MOVD	24(RSP), R0
  1634  	MOVD	R0, 16(RSP)
  1635  
  1636  	// Set R20 to 8 and invoke BRK. The debugger should get the
  1637  	// reason a call can't be injected from SP+8 and resume execution.
  1638  	MOVD	$8, R20
  1639  	BREAK
  1640  	JMP	restore
  1641  
  1642  good:
  1643  	// Registers are saved and it's safe to make a call.
  1644  	// Open up a call frame, moving the stack if necessary.
  1645  	//
  1646  	// Once the frame is allocated, this will set R20 to 0 and
  1647  	// invoke BRK. The debugger should write the argument
  1648  	// frame for the call at SP+8, set up argument registers,
  1649  	// set the LR as the signal PC + 4, set the PC to the function
  1650  	// to call, set R26 to point to the closure (if a closure call),
  1651  	// and resume execution.
  1652  	//
  1653  	// If the function returns, this will set R20 to 1 and invoke
  1654  	// BRK. The debugger can then inspect any return value saved
  1655  	// on the stack at SP+8 and in registers. To resume execution,
  1656  	// the debugger should restore the LR from (SP).
  1657  	//
  1658  	// If the function panics, this will set R20 to 2 and invoke BRK.
  1659  	// The interface{} value of the panic will be at SP+8. The debugger
  1660  	// can inspect the panic value and resume execution again.
  1661  #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1662  	CMP	$MAXSIZE, R0;			\
  1663  	BGT	5(PC);				\
  1664  	MOVD	$NAME(SB), R0;			\
  1665  	MOVD	R0, 8(RSP);			\
  1666  	CALL	runtime·debugCallWrap(SB);	\
  1667  	JMP	restore
  1668  
  1669  	MOVD	256(RSP), R0 // the argument frame size
  1670  	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1671  	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1672  	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1673  	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1674  	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1675  	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1676  	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1677  	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1678  	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1679  	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1680  	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1681  	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1682  	// The frame size is too large. Report the error.
  1683  	MOVD	$debugCallFrameTooLarge<>(SB), R0
  1684  	MOVD	R0, 8(RSP)
  1685  	MOVD	$20, R0
  1686  	MOVD	R0, 16(RSP) // length of debugCallFrameTooLarge string
  1687  	MOVD	$8, R20
  1688  	BREAK
  1689  	JMP	restore
  1690  
  1691  restore:
  1692  	// Calls and failures resume here.
  1693  	//
  1694  	// Set R20 to 16 and invoke BRK. The debugger should restore
  1695  	// all registers except for PC and RSP and resume execution.
  1696  	MOVD	$16, R20
  1697  	BREAK
  1698  	// We must not modify flags after this point.
  1699  
  1700  	// Restore pointer-containing registers, which may have been
  1701  	// modified from the debugger's copy by stack copying.
  1702  	LDP	(30*8)(RSP), (R27, g)
  1703  	LDP	(28*8)(RSP), (R25, R26)
  1704  	LDP	(26*8)(RSP), (R23, R24)
  1705  	LDP	(24*8)(RSP), (R21, R22)
  1706  	LDP	(22*8)(RSP), (R19, R20)
  1707  	LDP	(20*8)(RSP), (R16, R17)
  1708  	LDP	(18*8)(RSP), (R14, R15)
  1709  	LDP	(16*8)(RSP), (R12, R13)
  1710  	LDP	(14*8)(RSP), (R10, R11)
  1711  	LDP	(12*8)(RSP), (R8, R9)
  1712  	LDP	(10*8)(RSP), (R6, R7)
  1713  	LDP	(8*8)(RSP), (R4, R5)
  1714  	LDP	(6*8)(RSP), (R2, R3)
  1715  	LDP	(4*8)(RSP), (R0, R1)
  1716  
  1717  	LDP	-8(RSP), (R29, R27)
  1718  	ADD	$288, RSP, RSP // Add 16 more bytes, see saveSigContext
  1719  	MOVD	-16(RSP), R30 // restore old lr
  1720  	JMP	(R27)
  1721  
  1722  // runtime.debugCallCheck assumes that functions defined with the
  1723  // DEBUG_CALL_FN macro are safe points to inject calls.
  1724  #define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1725  TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1726  	NO_LOCAL_POINTERS;		\
  1727  	MOVD	$0, R20;		\
  1728  	BREAK;		\
  1729  	MOVD	$1, R20;		\
  1730  	BREAK;		\
  1731  	RET
  1732  DEBUG_CALL_FN(debugCall32<>, 32)
  1733  DEBUG_CALL_FN(debugCall64<>, 64)
  1734  DEBUG_CALL_FN(debugCall128<>, 128)
  1735  DEBUG_CALL_FN(debugCall256<>, 256)
  1736  DEBUG_CALL_FN(debugCall512<>, 512)
  1737  DEBUG_CALL_FN(debugCall1024<>, 1024)
  1738  DEBUG_CALL_FN(debugCall2048<>, 2048)
  1739  DEBUG_CALL_FN(debugCall4096<>, 4096)
  1740  DEBUG_CALL_FN(debugCall8192<>, 8192)
  1741  DEBUG_CALL_FN(debugCall16384<>, 16384)
  1742  DEBUG_CALL_FN(debugCall32768<>, 32768)
  1743  DEBUG_CALL_FN(debugCall65536<>, 65536)
  1744  
  1745  // func debugCallPanicked(val interface{})
  1746  TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1747  	// Copy the panic value to the top of stack at SP+8.
  1748  	MOVD	val_type+0(FP), R0
  1749  	MOVD	R0, 8(RSP)
  1750  	MOVD	val_data+8(FP), R0
  1751  	MOVD	R0, 16(RSP)
  1752  	MOVD	$2, R20
  1753  	BREAK
  1754  	RET
  1755  
  1756  TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
  1757  	NO_LOCAL_POINTERS
  1758  	// Save all 16 int registers that could have an index in them.
  1759  	// They may be pointers, but if they are they are dead.
  1760  	STP	(R0, R1), 24(RSP)
  1761  	STP	(R2, R3), 40(RSP)
  1762  	STP	(R4, R5), 56(RSP)
  1763  	STP	(R6, R7), 72(RSP)
  1764  	STP	(R8, R9), 88(RSP)
  1765  	STP	(R10, R11), 104(RSP)
  1766  	STP	(R12, R13), 120(RSP)
  1767  	STP	(R14, R15), 136(RSP)
  1768  	MOVD	LR, R0		// PC immediately after call to panicBounds
  1769  	ADD	$24, RSP, R1	// pointer to save area
  1770  	CALL	runtime·panicBounds64<ABIInternal>(SB)
  1771  	RET
  1772  
  1773  TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1774  	MOVD R29, R0
  1775  	RET
  1776  

View as plain text