Text file
src/runtime/asm_arm64.s
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "tls_arm64.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10 #include "cgo/abi_arm64.h"
11
12 // _rt0_arm64 is common startup code for most arm64 systems when using
13 // internal linking. This is the entry point for the program from the
14 // kernel for an ordinary -buildmode=exe program. The stack holds the
15 // number of arguments and the C-style argv.
16 TEXT _rt0_arm64(SB),NOSPLIT,$0
17 MOVD 0(RSP), R0 // argc
18 ADD $8, RSP, R1 // argv
19 JMP runtime·rt0_go(SB)
20
21 // main is common startup code for most amd64 systems when using
22 // external linking. The C startup code will call the symbol "main"
23 // passing argc and argv in the usual C ABI registers R0 and R1.
24 TEXT main(SB),NOSPLIT,$0
25 JMP runtime·rt0_go(SB)
26
27 // _rt0_arm64_lib is common startup code for most arm64 systems when
28 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
29 // arrange to invoke this function as a global constructor (for
30 // c-archive) or when the shared library is loaded (for c-shared).
31 // We expect argc and argv to be passed in the usual C ABI registers
32 // R0 and R1.
33 TEXT _rt0_arm64_lib(SB),NOSPLIT,$184
34 // Preserve callee-save registers.
35 SAVE_R19_TO_R28(24)
36 SAVE_F8_TO_F15(104)
37
38 // Initialize g as null in case of using g later e.g. sigaction in cgo_sigaction.go
39 MOVD ZR, g
40
41 MOVD R0, _rt0_arm64_lib_argc<>(SB)
42 MOVD R1, _rt0_arm64_lib_argv<>(SB)
43
44 // Synchronous initialization.
45 MOVD $runtime·libpreinit(SB), R4
46 BL (R4)
47
48 // Create a new thread to do the runtime initialization and return.
49 MOVD _cgo_sys_thread_create(SB), R4
50 CBZ R4, nocgo
51 MOVD $_rt0_arm64_lib_go(SB), R0
52 MOVD $0, R1
53 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
54 BL (R4)
55 ADD $16, RSP
56 B restore
57
58 nocgo:
59 MOVD $0x800000, R0 // stacksize = 8192KB
60 MOVD $_rt0_arm64_lib_go(SB), R1
61 MOVD R0, 8(RSP)
62 MOVD R1, 16(RSP)
63 MOVD $runtime·newosproc0(SB),R4
64 BL (R4)
65
66 restore:
67 // Restore callee-save registers.
68 RESTORE_R19_TO_R28(24)
69 RESTORE_F8_TO_F15(104)
70 RET
71
72 TEXT _rt0_arm64_lib_go(SB),NOSPLIT,$0
73 MOVD _rt0_arm64_lib_argc<>(SB), R0
74 MOVD _rt0_arm64_lib_argv<>(SB), R1
75 MOVD $runtime·rt0_go(SB),R4
76 B (R4)
77
78 DATA _rt0_arm64_lib_argc<>(SB)/8, $0
79 GLOBL _rt0_arm64_lib_argc<>(SB),NOPTR, $8
80 DATA _rt0_arm64_lib_argv<>(SB)/8, $0
81 GLOBL _rt0_arm64_lib_argv<>(SB),NOPTR, $8
82
83 #ifdef GOARM64_LSE
84 DATA no_lse_msg<>+0x00(SB)/64, $"This program can only run on ARM64 processors with LSE support.\n"
85 GLOBL no_lse_msg<>(SB), RODATA, $64
86 #endif
87
88 // We know for sure that Linux and FreeBSD allow to read instruction set
89 // attribute registers (while some others OSes, like OpenBSD and Darwin,
90 // are not). Let's be conservative and allow code reading such registers
91 // only when we sure this won't lead to sigill.
92 #ifdef GOOS_linux
93 #define ISA_REGS_READABLE
94 #endif
95 #ifdef GOOS_freebsd
96 #define ISA_REGS_READABLE
97 #endif
98
99 #ifdef GOARM64_LSE
100 #ifdef ISA_REGS_READABLE
101 #define CHECK_GOARM64_LSE
102 #endif
103 #endif
104
105 TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
106 // SP = stack; R0 = argc; R1 = argv
107
108 SUB $32, RSP
109 MOVW R0, 8(RSP) // argc
110 MOVD R1, 16(RSP) // argv
111
112 #ifdef TLS_darwin
113 // Initialize TLS.
114 MOVD ZR, g // clear g, make sure it's not junk.
115 SUB $32, RSP
116 MRS_TPIDR_R0
117 AND $~7, R0
118 MOVD R0, 16(RSP) // arg2: TLS base
119 MOVD $runtime·tls_g(SB), R2
120 MOVD R2, 8(RSP) // arg1: &tlsg
121 BL ·tlsinit(SB)
122 ADD $32, RSP
123 #endif
124
125 // create istack out of the given (operating system) stack.
126 // _cgo_init may update stackguard.
127 MOVD $runtime·g0(SB), g
128 MOVD RSP, R7
129 MOVD $(-64*1024)(R7), R0
130 MOVD R0, g_stackguard0(g)
131 MOVD R0, g_stackguard1(g)
132 MOVD R0, (g_stack+stack_lo)(g)
133 MOVD R7, (g_stack+stack_hi)(g)
134
135 // if there is a _cgo_init, call it using the gcc ABI.
136 MOVD _cgo_init(SB), R12
137 CBZ R12, nocgo
138
139 #ifdef GOOS_android
140 MRS_TPIDR_R0 // load TLS base pointer
141 MOVD R0, R3 // arg 3: TLS base pointer
142 MOVD $runtime·tls_g(SB), R2 // arg 2: &tls_g
143 #else
144 MOVD $0, R2 // arg 2: not used when using platform's TLS
145 #endif
146 MOVD $setg_gcc<>(SB), R1 // arg 1: setg
147 MOVD g, R0 // arg 0: G
148 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
149 BL (R12)
150 ADD $16, RSP
151
152 nocgo:
153 BL runtime·save_g(SB)
154 // update stackguard after _cgo_init
155 MOVD (g_stack+stack_lo)(g), R0
156 ADD $const_stackGuard, R0
157 MOVD R0, g_stackguard0(g)
158 MOVD R0, g_stackguard1(g)
159
160 // set the per-goroutine and per-mach "registers"
161 MOVD $runtime·m0(SB), R0
162
163 // save m->g0 = g0
164 MOVD g, m_g0(R0)
165 // save m0 to g0->m
166 MOVD R0, g_m(g)
167
168 BL runtime·check(SB)
169
170 #ifdef GOOS_windows
171 BL runtime·wintls(SB)
172 #endif
173
174 // Check that CPU we use for execution supports instructions targeted during compile-time.
175 #ifdef CHECK_GOARM64_LSE
176 // Read the ID_AA64ISAR0_EL1 register
177 MRS ID_AA64ISAR0_EL1, R0
178
179 // Extract the LSE field (bits [23:20])
180 LSR $20, R0, R0
181 AND $0xf, R0, R0
182
183 // LSE support is indicated by a non-zero value
184 CBZ R0, no_lse
185 #endif
186
187 MOVW 8(RSP), R0 // copy argc
188 MOVW R0, -8(RSP)
189 MOVD 16(RSP), R0 // copy argv
190 MOVD R0, 0(RSP)
191 BL runtime·args(SB)
192 BL runtime·osinit(SB)
193 BL runtime·schedinit(SB)
194
195 // create a new goroutine to start program
196 MOVD $runtime·mainPC(SB), R0 // entry
197 SUB $16, RSP
198 MOVD R0, 8(RSP) // arg
199 MOVD $0, 0(RSP) // dummy LR
200 BL runtime·newproc(SB)
201 ADD $16, RSP
202
203 // start this M
204 BL runtime·mstart(SB)
205 UNDEF
206
207 #ifdef CHECK_GOARM64_LSE
208 no_lse:
209 MOVD $1, R0 // stderr
210 MOVD R0, 8(RSP)
211 MOVD $no_lse_msg<>(SB), R1 // message address
212 MOVD R1, 16(RSP)
213 MOVD $64, R2 // message length
214 MOVD R2, 24(RSP)
215 CALL runtime·write(SB)
216 CALL runtime·exit(SB)
217 CALL runtime·abort(SB)
218 RET
219 #endif
220
221 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
222 // intended to be called by debuggers.
223 MOVD $runtime·debugPinnerV1<ABIInternal>(SB), R0
224 MOVD $runtime·debugCallV2<ABIInternal>(SB), R0
225
226 MOVD $0, R0
227 MOVD R0, (R0) // boom
228 UNDEF
229
230 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
231 GLOBL runtime·mainPC(SB),RODATA,$8
232
233 // Windows ARM64 needs an immediate 0xf000 argument.
234 // See go.dev/issues/53837.
235 #define BREAK \
236 #ifdef GOOS_windows \
237 BRK $0xf000 \
238 #else \
239 BRK \
240 #endif \
241
242
243 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
244 BREAK
245 RET
246
247 TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
248 RET
249
250 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
251 BL runtime·mstart0(SB)
252 RET // not reached
253
254 /*
255 * go-routine
256 */
257
258 // void gogo(Gobuf*)
259 // restore state from Gobuf; longjmp
260 TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
261 MOVD buf+0(FP), R5
262 MOVD gobuf_g(R5), R6
263 MOVD 0(R6), R4 // make sure g != nil
264 B gogo<>(SB)
265
266 TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
267 MOVD R6, g
268 BL runtime·save_g(SB)
269
270 MOVD gobuf_sp(R5), R0
271 MOVD R0, RSP
272 MOVD gobuf_bp(R5), R29
273 MOVD gobuf_lr(R5), LR
274 MOVD gobuf_ctxt(R5), R26
275 MOVD $0, gobuf_sp(R5)
276 MOVD $0, gobuf_bp(R5)
277 MOVD $0, gobuf_lr(R5)
278 MOVD $0, gobuf_ctxt(R5)
279 CMP ZR, ZR // set condition codes for == test, needed by stack split
280 MOVD gobuf_pc(R5), R6
281 B (R6)
282
283 // void mcall(fn func(*g))
284 // Switch to m->g0's stack, call fn(g).
285 // Fn must never return. It should gogo(&g->sched)
286 // to keep running g.
287 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
288 MOVD R0, R26 // context
289
290 // Save caller state in g->sched
291 MOVD RSP, R0
292 MOVD R0, (g_sched+gobuf_sp)(g)
293 MOVD R29, (g_sched+gobuf_bp)(g)
294 MOVD LR, (g_sched+gobuf_pc)(g)
295 MOVD $0, (g_sched+gobuf_lr)(g)
296
297 // Switch to m->g0 & its stack, call fn.
298 MOVD g, R3
299 MOVD g_m(g), R8
300 MOVD m_g0(R8), g
301 BL runtime·save_g(SB)
302 CMP g, R3
303 BNE 2(PC)
304 B runtime·badmcall(SB)
305
306 MOVD (g_sched+gobuf_sp)(g), R0
307 MOVD R0, RSP // sp = m->g0->sched.sp
308 MOVD $0, R29 // clear frame pointer, as caller may execute on another M
309 MOVD R3, R0 // arg = g
310 MOVD $0, -16(RSP) // dummy LR
311 SUB $16, RSP
312 MOVD 0(R26), R4 // code pointer
313 BL (R4)
314 B runtime·badmcall2(SB)
315
316 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
317 // of the G stack. We need to distinguish the routine that
318 // lives at the bottom of the G stack from the one that lives
319 // at the top of the system stack because the one at the top of
320 // the system stack terminates the stack walk (see topofstack()).
321 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
322 UNDEF
323 BL (LR) // make sure this function is not leaf
324 RET
325
326 // func systemstack(fn func())
327 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
328 MOVD fn+0(FP), R3 // R3 = fn
329 MOVD R3, R26 // context
330 MOVD g_m(g), R4 // R4 = m
331
332 MOVD m_gsignal(R4), R5 // R5 = gsignal
333 CMP g, R5
334 BEQ noswitch
335
336 MOVD m_g0(R4), R5 // R5 = g0
337 CMP g, R5
338 BEQ noswitch
339
340 MOVD m_curg(R4), R6
341 CMP g, R6
342 BEQ switch
343
344 // Bad: g is not gsignal, not g0, not curg. What is it?
345 // Hide call from linker nosplit analysis.
346 MOVD $runtime·badsystemstack(SB), R3
347 BL (R3)
348 B runtime·abort(SB)
349
350 switch:
351 // Switch stacks.
352 // The original frame pointer is stored in R29,
353 // which is useful for stack unwinding.
354 // Save our state in g->sched. Pretend to
355 // be systemstack_switch if the G stack is scanned.
356 BL gosave_systemstack_switch<>(SB)
357
358 // switch to g0
359 MOVD R5, g
360 BL runtime·save_g(SB)
361 MOVD (g_sched+gobuf_sp)(g), R3
362 MOVD R3, RSP
363
364 // call target function
365 MOVD 0(R26), R3 // code pointer
366 BL (R3)
367
368 // switch back to g
369 MOVD g_m(g), R3
370 MOVD m_curg(R3), g
371 BL runtime·save_g(SB)
372 MOVD (g_sched+gobuf_sp)(g), R0
373 MOVD R0, RSP
374 MOVD (g_sched+gobuf_bp)(g), R29
375 MOVD $0, (g_sched+gobuf_sp)(g)
376 MOVD $0, (g_sched+gobuf_bp)(g)
377 RET
378
379 noswitch:
380 // already on m stack, just call directly
381 // Using a tail call here cleans up tracebacks since we won't stop
382 // at an intermediate systemstack.
383 MOVD 0(R26), R3 // code pointer
384 MOVD.P 16(RSP), R30 // restore LR
385 SUB $8, RSP, R29 // restore FP
386 B (R3)
387
388 // func switchToCrashStack0(fn func())
389 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
390 MOVD R0, R26 // context register
391 MOVD g_m(g), R1 // curm
392
393 // set g to gcrash
394 MOVD $runtime·gcrash(SB), g // g = &gcrash
395 BL runtime·save_g(SB) // clobbers R0
396 MOVD R1, g_m(g) // g.m = curm
397 MOVD g, m_g0(R1) // curm.g0 = g
398
399 // switch to crashstack
400 MOVD (g_stack+stack_hi)(g), R1
401 SUB $(4*8), R1
402 MOVD R1, RSP
403
404 // call target function
405 MOVD 0(R26), R0
406 CALL (R0)
407
408 // should never return
409 CALL runtime·abort(SB)
410 UNDEF
411
412 /*
413 * support for morestack
414 */
415
416 // Called during function prolog when more stack is needed.
417 // Caller has already loaded:
418 // R3 prolog's LR (R30)
419 //
420 // The traceback routines see morestack on a g0 as being
421 // the top of a stack (for example, morestack calling newstack
422 // calling the scheduler calling newm calling gc), so we must
423 // record an argument size. For that purpose, it has no arguments.
424 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
425 // Cannot grow scheduler stack (m->g0).
426 MOVD g_m(g), R8
427 MOVD m_g0(R8), R4
428
429 // Called from f.
430 // Set g->sched to context in f
431 MOVD RSP, R0
432 MOVD R0, (g_sched+gobuf_sp)(g)
433 MOVD R29, (g_sched+gobuf_bp)(g)
434 MOVD LR, (g_sched+gobuf_pc)(g)
435 MOVD R3, (g_sched+gobuf_lr)(g)
436 MOVD R26, (g_sched+gobuf_ctxt)(g)
437
438 CMP g, R4
439 BNE 3(PC)
440 BL runtime·badmorestackg0(SB)
441 B runtime·abort(SB)
442
443 // Cannot grow signal stack (m->gsignal).
444 MOVD m_gsignal(R8), R4
445 CMP g, R4
446 BNE 3(PC)
447 BL runtime·badmorestackgsignal(SB)
448 B runtime·abort(SB)
449
450 // Called from f.
451 // Set m->morebuf to f's callers.
452 MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC
453 MOVD RSP, R0
454 MOVD R0, (m_morebuf+gobuf_sp)(R8) // f's caller's RSP
455 MOVD g, (m_morebuf+gobuf_g)(R8)
456
457 // Call newstack on m->g0's stack.
458 MOVD m_g0(R8), g
459 BL runtime·save_g(SB)
460 MOVD (g_sched+gobuf_sp)(g), R0
461 MOVD R0, RSP
462 MOVD $0, R29 // clear frame pointer, as caller may execute on another M
463 MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned)
464 BL runtime·newstack(SB)
465
466 // Not reached, but make sure the return PC from the call to newstack
467 // is still in this function, and not the beginning of the next.
468 UNDEF
469
470 TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
471 // Force SPWRITE. This function doesn't actually write SP,
472 // but it is called with a special calling convention where
473 // the caller doesn't save LR on stack but passes it as a
474 // register (R3), and the unwinder currently doesn't understand.
475 // Make it SPWRITE to stop unwinding. (See issue 54332)
476 MOVD RSP, RSP
477
478 MOVW $0, R26
479 B runtime·morestack(SB)
480
481 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
482 TEXT ·spillArgs(SB),NOSPLIT,$0-0
483 STP (R0, R1), (0*8)(R20)
484 STP (R2, R3), (2*8)(R20)
485 STP (R4, R5), (4*8)(R20)
486 STP (R6, R7), (6*8)(R20)
487 STP (R8, R9), (8*8)(R20)
488 STP (R10, R11), (10*8)(R20)
489 STP (R12, R13), (12*8)(R20)
490 STP (R14, R15), (14*8)(R20)
491 FSTPD (F0, F1), (16*8)(R20)
492 FSTPD (F2, F3), (18*8)(R20)
493 FSTPD (F4, F5), (20*8)(R20)
494 FSTPD (F6, F7), (22*8)(R20)
495 FSTPD (F8, F9), (24*8)(R20)
496 FSTPD (F10, F11), (26*8)(R20)
497 FSTPD (F12, F13), (28*8)(R20)
498 FSTPD (F14, F15), (30*8)(R20)
499 RET
500
501 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
502 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
503 LDP (0*8)(R20), (R0, R1)
504 LDP (2*8)(R20), (R2, R3)
505 LDP (4*8)(R20), (R4, R5)
506 LDP (6*8)(R20), (R6, R7)
507 LDP (8*8)(R20), (R8, R9)
508 LDP (10*8)(R20), (R10, R11)
509 LDP (12*8)(R20), (R12, R13)
510 LDP (14*8)(R20), (R14, R15)
511 FLDPD (16*8)(R20), (F0, F1)
512 FLDPD (18*8)(R20), (F2, F3)
513 FLDPD (20*8)(R20), (F4, F5)
514 FLDPD (22*8)(R20), (F6, F7)
515 FLDPD (24*8)(R20), (F8, F9)
516 FLDPD (26*8)(R20), (F10, F11)
517 FLDPD (28*8)(R20), (F12, F13)
518 FLDPD (30*8)(R20), (F14, F15)
519 RET
520
521 // reflectcall: call a function with the given argument list
522 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
523 // we don't have variable-sized frames, so we use a small number
524 // of constant-sized-frame functions to encode a few bits of size in the pc.
525 // Caution: ugly multiline assembly macros in your future!
526
527 #define DISPATCH(NAME,MAXSIZE) \
528 MOVD $MAXSIZE, R27; \
529 CMP R27, R16; \
530 BGT 3(PC); \
531 MOVD $NAME(SB), R27; \
532 B (R27)
533 // Note: can't just "B NAME(SB)" - bad inlining results.
534
535 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
536 MOVWU frameSize+32(FP), R16
537 DISPATCH(runtime·call16, 16)
538 DISPATCH(runtime·call32, 32)
539 DISPATCH(runtime·call64, 64)
540 DISPATCH(runtime·call128, 128)
541 DISPATCH(runtime·call256, 256)
542 DISPATCH(runtime·call512, 512)
543 DISPATCH(runtime·call1024, 1024)
544 DISPATCH(runtime·call2048, 2048)
545 DISPATCH(runtime·call4096, 4096)
546 DISPATCH(runtime·call8192, 8192)
547 DISPATCH(runtime·call16384, 16384)
548 DISPATCH(runtime·call32768, 32768)
549 DISPATCH(runtime·call65536, 65536)
550 DISPATCH(runtime·call131072, 131072)
551 DISPATCH(runtime·call262144, 262144)
552 DISPATCH(runtime·call524288, 524288)
553 DISPATCH(runtime·call1048576, 1048576)
554 DISPATCH(runtime·call2097152, 2097152)
555 DISPATCH(runtime·call4194304, 4194304)
556 DISPATCH(runtime·call8388608, 8388608)
557 DISPATCH(runtime·call16777216, 16777216)
558 DISPATCH(runtime·call33554432, 33554432)
559 DISPATCH(runtime·call67108864, 67108864)
560 DISPATCH(runtime·call134217728, 134217728)
561 DISPATCH(runtime·call268435456, 268435456)
562 DISPATCH(runtime·call536870912, 536870912)
563 DISPATCH(runtime·call1073741824, 1073741824)
564 MOVD $runtime·badreflectcall(SB), R0
565 B (R0)
566
567 #define CALLFN(NAME,MAXSIZE) \
568 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
569 NO_LOCAL_POINTERS; \
570 /* copy arguments to stack */ \
571 MOVD stackArgs+16(FP), R3; \
572 MOVWU stackArgsSize+24(FP), R4; \
573 ADD $8, RSP, R5; \
574 BIC $0xf, R4, R6; \
575 CBZ R6, 6(PC); \
576 /* if R6=(argsize&~15) != 0 */ \
577 ADD R6, R5, R6; \
578 /* copy 16 bytes a time */ \
579 LDP.P 16(R3), (R7, R8); \
580 STP.P (R7, R8), 16(R5); \
581 CMP R5, R6; \
582 BNE -3(PC); \
583 AND $0xf, R4, R6; \
584 CBZ R6, 6(PC); \
585 /* if R6=(argsize&15) != 0 */ \
586 ADD R6, R5, R6; \
587 /* copy 1 byte a time for the rest */ \
588 MOVBU.P 1(R3), R7; \
589 MOVBU.P R7, 1(R5); \
590 CMP R5, R6; \
591 BNE -3(PC); \
592 /* set up argument registers */ \
593 MOVD regArgs+40(FP), R20; \
594 CALL ·unspillArgs(SB); \
595 /* call function */ \
596 MOVD f+8(FP), R26; \
597 MOVD (R26), R20; \
598 PCDATA $PCDATA_StackMapIndex, $0; \
599 BL (R20); \
600 /* copy return values back */ \
601 MOVD regArgs+40(FP), R20; \
602 CALL ·spillArgs(SB); \
603 MOVD stackArgsType+0(FP), R7; \
604 MOVD stackArgs+16(FP), R3; \
605 MOVWU stackArgsSize+24(FP), R4; \
606 MOVWU stackRetOffset+28(FP), R6; \
607 ADD $8, RSP, R5; \
608 ADD R6, R5; \
609 ADD R6, R3; \
610 SUB R6, R4; \
611 BL callRet<>(SB); \
612 RET
613
614 // callRet copies return values back at the end of call*. This is a
615 // separate function so it can allocate stack space for the arguments
616 // to reflectcallmove. It does not follow the Go ABI; it expects its
617 // arguments in registers.
618 TEXT callRet<>(SB), NOSPLIT, $48-0
619 NO_LOCAL_POINTERS
620 STP (R7, R3), 8(RSP)
621 STP (R5, R4), 24(RSP)
622 MOVD R20, 40(RSP)
623 BL runtime·reflectcallmove(SB)
624 RET
625
626 CALLFN(·call16, 16)
627 CALLFN(·call32, 32)
628 CALLFN(·call64, 64)
629 CALLFN(·call128, 128)
630 CALLFN(·call256, 256)
631 CALLFN(·call512, 512)
632 CALLFN(·call1024, 1024)
633 CALLFN(·call2048, 2048)
634 CALLFN(·call4096, 4096)
635 CALLFN(·call8192, 8192)
636 CALLFN(·call16384, 16384)
637 CALLFN(·call32768, 32768)
638 CALLFN(·call65536, 65536)
639 CALLFN(·call131072, 131072)
640 CALLFN(·call262144, 262144)
641 CALLFN(·call524288, 524288)
642 CALLFN(·call1048576, 1048576)
643 CALLFN(·call2097152, 2097152)
644 CALLFN(·call4194304, 4194304)
645 CALLFN(·call8388608, 8388608)
646 CALLFN(·call16777216, 16777216)
647 CALLFN(·call33554432, 33554432)
648 CALLFN(·call67108864, 67108864)
649 CALLFN(·call134217728, 134217728)
650 CALLFN(·call268435456, 268435456)
651 CALLFN(·call536870912, 536870912)
652 CALLFN(·call1073741824, 1073741824)
653
654 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
655 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
656 MOVB runtime·useAeshash(SB), R10
657 CBZ R10, noaes
658 MOVD $runtime·aeskeysched+0(SB), R3
659
660 VEOR V0.B16, V0.B16, V0.B16
661 VLD1 (R3), [V2.B16]
662 VLD1 (R0), V0.S[1]
663 VMOV R1, V0.S[0]
664
665 AESE V2.B16, V0.B16
666 AESMC V0.B16, V0.B16
667 AESE V2.B16, V0.B16
668 AESMC V0.B16, V0.B16
669 AESE V2.B16, V0.B16
670
671 VMOV V0.D[0], R0
672 RET
673 noaes:
674 B runtime·memhash32Fallback<ABIInternal>(SB)
675
676 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
677 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
678 MOVB runtime·useAeshash(SB), R10
679 CBZ R10, noaes
680 MOVD $runtime·aeskeysched+0(SB), R3
681
682 VEOR V0.B16, V0.B16, V0.B16
683 VLD1 (R3), [V2.B16]
684 VLD1 (R0), V0.D[1]
685 VMOV R1, V0.D[0]
686
687 AESE V2.B16, V0.B16
688 AESMC V0.B16, V0.B16
689 AESE V2.B16, V0.B16
690 AESMC V0.B16, V0.B16
691 AESE V2.B16, V0.B16
692
693 VMOV V0.D[0], R0
694 RET
695 noaes:
696 B runtime·memhash64Fallback<ABIInternal>(SB)
697
698 // func memhash(p unsafe.Pointer, h, size uintptr) uintptr
699 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
700 MOVB runtime·useAeshash(SB), R10
701 CBZ R10, noaes
702 B aeshashbody<>(SB)
703 noaes:
704 B runtime·memhashFallback<ABIInternal>(SB)
705
706 // func strhash(p unsafe.Pointer, h uintptr) uintptr
707 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
708 MOVB runtime·useAeshash(SB), R10
709 CBZ R10, noaes
710 LDP (R0), (R0, R2) // string data / length
711 B aeshashbody<>(SB)
712 noaes:
713 B runtime·strhashFallback<ABIInternal>(SB)
714
715 // R0: data
716 // R1: seed data
717 // R2: length
718 // At return, R0 = return value
719 TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
720 VEOR V30.B16, V30.B16, V30.B16
721 VMOV R1, V30.D[0]
722 VMOV R2, V30.D[1] // load length into seed
723
724 MOVD $runtime·aeskeysched+0(SB), R4
725 VLD1.P 16(R4), [V0.B16]
726 AESE V30.B16, V0.B16
727 AESMC V0.B16, V0.B16
728 CMP $16, R2
729 BLO aes0to15
730 BEQ aes16
731 CMP $32, R2
732 BLS aes17to32
733 CMP $64, R2
734 BLS aes33to64
735 CMP $128, R2
736 BLS aes65to128
737 B aes129plus
738
739 aes0to15:
740 CBZ R2, aes0
741 VEOR V2.B16, V2.B16, V2.B16
742 TBZ $3, R2, less_than_8
743 VLD1.P 8(R0), V2.D[0]
744
745 less_than_8:
746 TBZ $2, R2, less_than_4
747 VLD1.P 4(R0), V2.S[2]
748
749 less_than_4:
750 TBZ $1, R2, less_than_2
751 VLD1.P 2(R0), V2.H[6]
752
753 less_than_2:
754 TBZ $0, R2, done
755 VLD1 (R0), V2.B[14]
756 done:
757 AESE V0.B16, V2.B16
758 AESMC V2.B16, V2.B16
759 AESE V0.B16, V2.B16
760 AESMC V2.B16, V2.B16
761 AESE V0.B16, V2.B16
762 AESMC V2.B16, V2.B16
763
764 VMOV V2.D[0], R0
765 RET
766
767 aes0:
768 VMOV V0.D[0], R0
769 RET
770
771 aes16:
772 VLD1 (R0), [V2.B16]
773 B done
774
775 aes17to32:
776 // make second seed
777 VLD1 (R4), [V1.B16]
778 AESE V30.B16, V1.B16
779 AESMC V1.B16, V1.B16
780 SUB $16, R2, R10
781 VLD1.P (R0)(R10), [V2.B16]
782 VLD1 (R0), [V3.B16]
783
784 AESE V0.B16, V2.B16
785 AESMC V2.B16, V2.B16
786 AESE V1.B16, V3.B16
787 AESMC V3.B16, V3.B16
788
789 AESE V0.B16, V2.B16
790 AESMC V2.B16, V2.B16
791 AESE V1.B16, V3.B16
792 AESMC V3.B16, V3.B16
793
794 AESE V0.B16, V2.B16
795 AESE V1.B16, V3.B16
796
797 VEOR V3.B16, V2.B16, V2.B16
798
799 VMOV V2.D[0], R0
800 RET
801
802 aes33to64:
803 VLD1 (R4), [V1.B16, V2.B16, V3.B16]
804 AESE V30.B16, V1.B16
805 AESMC V1.B16, V1.B16
806 AESE V30.B16, V2.B16
807 AESMC V2.B16, V2.B16
808 AESE V30.B16, V3.B16
809 AESMC V3.B16, V3.B16
810 SUB $32, R2, R10
811
812 VLD1.P (R0)(R10), [V4.B16, V5.B16]
813 VLD1 (R0), [V6.B16, V7.B16]
814
815 AESE V0.B16, V4.B16
816 AESMC V4.B16, V4.B16
817 AESE V1.B16, V5.B16
818 AESMC V5.B16, V5.B16
819 AESE V2.B16, V6.B16
820 AESMC V6.B16, V6.B16
821 AESE V3.B16, V7.B16
822 AESMC V7.B16, V7.B16
823
824 AESE V0.B16, V4.B16
825 AESMC V4.B16, V4.B16
826 AESE V1.B16, V5.B16
827 AESMC V5.B16, V5.B16
828 AESE V2.B16, V6.B16
829 AESMC V6.B16, V6.B16
830 AESE V3.B16, V7.B16
831 AESMC V7.B16, V7.B16
832
833 AESE V0.B16, V4.B16
834 AESE V1.B16, V5.B16
835 AESE V2.B16, V6.B16
836 AESE V3.B16, V7.B16
837
838 VEOR V6.B16, V4.B16, V4.B16
839 VEOR V7.B16, V5.B16, V5.B16
840 VEOR V5.B16, V4.B16, V4.B16
841
842 VMOV V4.D[0], R0
843 RET
844
845 aes65to128:
846 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
847 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
848 AESE V30.B16, V1.B16
849 AESMC V1.B16, V1.B16
850 AESE V30.B16, V2.B16
851 AESMC V2.B16, V2.B16
852 AESE V30.B16, V3.B16
853 AESMC V3.B16, V3.B16
854 AESE V30.B16, V4.B16
855 AESMC V4.B16, V4.B16
856 AESE V30.B16, V5.B16
857 AESMC V5.B16, V5.B16
858 AESE V30.B16, V6.B16
859 AESMC V6.B16, V6.B16
860 AESE V30.B16, V7.B16
861 AESMC V7.B16, V7.B16
862
863 SUB $64, R2, R10
864 VLD1.P (R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
865 VLD1 (R0), [V12.B16, V13.B16, V14.B16, V15.B16]
866 AESE V0.B16, V8.B16
867 AESMC V8.B16, V8.B16
868 AESE V1.B16, V9.B16
869 AESMC V9.B16, V9.B16
870 AESE V2.B16, V10.B16
871 AESMC V10.B16, V10.B16
872 AESE V3.B16, V11.B16
873 AESMC V11.B16, V11.B16
874 AESE V4.B16, V12.B16
875 AESMC V12.B16, V12.B16
876 AESE V5.B16, V13.B16
877 AESMC V13.B16, V13.B16
878 AESE V6.B16, V14.B16
879 AESMC V14.B16, V14.B16
880 AESE V7.B16, V15.B16
881 AESMC V15.B16, V15.B16
882
883 AESE V0.B16, V8.B16
884 AESMC V8.B16, V8.B16
885 AESE V1.B16, V9.B16
886 AESMC V9.B16, V9.B16
887 AESE V2.B16, V10.B16
888 AESMC V10.B16, V10.B16
889 AESE V3.B16, V11.B16
890 AESMC V11.B16, V11.B16
891 AESE V4.B16, V12.B16
892 AESMC V12.B16, V12.B16
893 AESE V5.B16, V13.B16
894 AESMC V13.B16, V13.B16
895 AESE V6.B16, V14.B16
896 AESMC V14.B16, V14.B16
897 AESE V7.B16, V15.B16
898 AESMC V15.B16, V15.B16
899
900 AESE V0.B16, V8.B16
901 AESE V1.B16, V9.B16
902 AESE V2.B16, V10.B16
903 AESE V3.B16, V11.B16
904 AESE V4.B16, V12.B16
905 AESE V5.B16, V13.B16
906 AESE V6.B16, V14.B16
907 AESE V7.B16, V15.B16
908
909 VEOR V12.B16, V8.B16, V8.B16
910 VEOR V13.B16, V9.B16, V9.B16
911 VEOR V14.B16, V10.B16, V10.B16
912 VEOR V15.B16, V11.B16, V11.B16
913 VEOR V10.B16, V8.B16, V8.B16
914 VEOR V11.B16, V9.B16, V9.B16
915 VEOR V9.B16, V8.B16, V8.B16
916
917 VMOV V8.D[0], R0
918 RET
919
920 aes129plus:
921 PRFM (R0), PLDL1KEEP
922 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
923 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
924 AESE V30.B16, V1.B16
925 AESMC V1.B16, V1.B16
926 AESE V30.B16, V2.B16
927 AESMC V2.B16, V2.B16
928 AESE V30.B16, V3.B16
929 AESMC V3.B16, V3.B16
930 AESE V30.B16, V4.B16
931 AESMC V4.B16, V4.B16
932 AESE V30.B16, V5.B16
933 AESMC V5.B16, V5.B16
934 AESE V30.B16, V6.B16
935 AESMC V6.B16, V6.B16
936 AESE V30.B16, V7.B16
937 AESMC V7.B16, V7.B16
938 ADD R0, R2, R10
939 SUB $128, R10, R10
940 VLD1.P 64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
941 VLD1 (R10), [V12.B16, V13.B16, V14.B16, V15.B16]
942 SUB $1, R2, R2
943 LSR $7, R2, R2
944
945 aesloop:
946 AESE V8.B16, V0.B16
947 AESMC V0.B16, V0.B16
948 AESE V9.B16, V1.B16
949 AESMC V1.B16, V1.B16
950 AESE V10.B16, V2.B16
951 AESMC V2.B16, V2.B16
952 AESE V11.B16, V3.B16
953 AESMC V3.B16, V3.B16
954 AESE V12.B16, V4.B16
955 AESMC V4.B16, V4.B16
956 AESE V13.B16, V5.B16
957 AESMC V5.B16, V5.B16
958 AESE V14.B16, V6.B16
959 AESMC V6.B16, V6.B16
960 AESE V15.B16, V7.B16
961 AESMC V7.B16, V7.B16
962
963 VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
964 AESE V8.B16, V0.B16
965 AESMC V0.B16, V0.B16
966 AESE V9.B16, V1.B16
967 AESMC V1.B16, V1.B16
968 AESE V10.B16, V2.B16
969 AESMC V2.B16, V2.B16
970 AESE V11.B16, V3.B16
971 AESMC V3.B16, V3.B16
972
973 VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
974 AESE V12.B16, V4.B16
975 AESMC V4.B16, V4.B16
976 AESE V13.B16, V5.B16
977 AESMC V5.B16, V5.B16
978 AESE V14.B16, V6.B16
979 AESMC V6.B16, V6.B16
980 AESE V15.B16, V7.B16
981 AESMC V7.B16, V7.B16
982 SUB $1, R2, R2
983 CBNZ R2, aesloop
984
985 AESE V8.B16, V0.B16
986 AESMC V0.B16, V0.B16
987 AESE V9.B16, V1.B16
988 AESMC V1.B16, V1.B16
989 AESE V10.B16, V2.B16
990 AESMC V2.B16, V2.B16
991 AESE V11.B16, V3.B16
992 AESMC V3.B16, V3.B16
993 AESE V12.B16, V4.B16
994 AESMC V4.B16, V4.B16
995 AESE V13.B16, V5.B16
996 AESMC V5.B16, V5.B16
997 AESE V14.B16, V6.B16
998 AESMC V6.B16, V6.B16
999 AESE V15.B16, V7.B16
1000 AESMC V7.B16, V7.B16
1001
1002 AESE V8.B16, V0.B16
1003 AESMC V0.B16, V0.B16
1004 AESE V9.B16, V1.B16
1005 AESMC V1.B16, V1.B16
1006 AESE V10.B16, V2.B16
1007 AESMC V2.B16, V2.B16
1008 AESE V11.B16, V3.B16
1009 AESMC V3.B16, V3.B16
1010 AESE V12.B16, V4.B16
1011 AESMC V4.B16, V4.B16
1012 AESE V13.B16, V5.B16
1013 AESMC V5.B16, V5.B16
1014 AESE V14.B16, V6.B16
1015 AESMC V6.B16, V6.B16
1016 AESE V15.B16, V7.B16
1017 AESMC V7.B16, V7.B16
1018
1019 AESE V8.B16, V0.B16
1020 AESE V9.B16, V1.B16
1021 AESE V10.B16, V2.B16
1022 AESE V11.B16, V3.B16
1023 AESE V12.B16, V4.B16
1024 AESE V13.B16, V5.B16
1025 AESE V14.B16, V6.B16
1026 AESE V15.B16, V7.B16
1027
1028 VEOR V0.B16, V1.B16, V0.B16
1029 VEOR V2.B16, V3.B16, V2.B16
1030 VEOR V4.B16, V5.B16, V4.B16
1031 VEOR V6.B16, V7.B16, V6.B16
1032 VEOR V0.B16, V2.B16, V0.B16
1033 VEOR V4.B16, V6.B16, V4.B16
1034 VEOR V4.B16, V0.B16, V0.B16
1035
1036 VMOV V0.D[0], R0
1037 RET
1038
1039 TEXT runtime·procyield(SB),NOSPLIT,$0-0
1040 MOVWU cycles+0(FP), R0
1041 again:
1042 YIELD
1043 SUBW $1, R0
1044 CBNZ R0, again
1045 RET
1046
1047 // Save state of caller into g->sched,
1048 // but using fake PC from systemstack_switch.
1049 // Must only be called from functions with no locals ($0)
1050 // or else unwinding from systemstack_switch is incorrect.
1051 // Smashes R0.
1052 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
1053 MOVD $runtime·systemstack_switch(SB), R0
1054 ADD $8, R0 // get past prologue
1055 MOVD R0, (g_sched+gobuf_pc)(g)
1056 MOVD RSP, R0
1057 MOVD R0, (g_sched+gobuf_sp)(g)
1058 MOVD R29, (g_sched+gobuf_bp)(g)
1059 MOVD $0, (g_sched+gobuf_lr)(g)
1060 // Assert ctxt is zero. See func save.
1061 MOVD (g_sched+gobuf_ctxt)(g), R0
1062 CBZ R0, 2(PC)
1063 CALL runtime·abort(SB)
1064 RET
1065
1066 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
1067 // Call fn(arg) aligned appropriately for the gcc ABI.
1068 // Called on a system stack, and there may be no g yet (during needm).
1069 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
1070 MOVD fn+0(FP), R1
1071 MOVD arg+8(FP), R0
1072 SUB $16, RSP // skip over saved frame pointer below RSP
1073 BL (R1)
1074 ADD $16, RSP // skip over saved frame pointer below RSP
1075 RET
1076
1077 // func asmcgocall(fn, arg unsafe.Pointer) int32
1078 // Call fn(arg) on the scheduler stack,
1079 // aligned appropriately for the gcc ABI.
1080 // See cgocall.go for more details.
1081 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
1082 MOVD fn+0(FP), R1
1083 MOVD arg+8(FP), R0
1084
1085 MOVD RSP, R2 // save original stack pointer
1086 CBZ g, nosave
1087 MOVD g, R4
1088
1089 // Figure out if we need to switch to m->g0 stack.
1090 // We get called to create new OS threads too, and those
1091 // come in on the m->g0 stack already. Or we might already
1092 // be on the m->gsignal stack.
1093 MOVD g_m(g), R8
1094 MOVD m_gsignal(R8), R3
1095 CMP R3, g
1096 BEQ nosave
1097 MOVD m_g0(R8), R3
1098 CMP R3, g
1099 BEQ nosave
1100
1101 // Switch to system stack.
1102 MOVD R0, R9 // gosave_systemstack_switch<> and save_g might clobber R0
1103 BL gosave_systemstack_switch<>(SB)
1104 MOVD R3, g
1105 BL runtime·save_g(SB)
1106 MOVD (g_sched+gobuf_sp)(g), R0
1107 MOVD R0, RSP
1108 MOVD (g_sched+gobuf_bp)(g), R29
1109 MOVD R9, R0
1110
1111 // Now on a scheduling stack (a pthread-created stack).
1112 // Save room for two of our pointers /*, plus 32 bytes of callee
1113 // save area that lives on the caller stack. */
1114 MOVD RSP, R13
1115 SUB $16, R13
1116 MOVD R13, RSP
1117 MOVD R4, 0(RSP) // save old g on stack
1118 MOVD (g_stack+stack_hi)(R4), R4
1119 SUB R2, R4
1120 MOVD R4, 8(RSP) // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
1121 BL (R1)
1122 MOVD R0, R9
1123
1124 // Restore g, stack pointer. R0 is errno, so don't touch it
1125 MOVD 0(RSP), g
1126 BL runtime·save_g(SB)
1127 MOVD (g_stack+stack_hi)(g), R5
1128 MOVD 8(RSP), R6
1129 SUB R6, R5
1130 MOVD R9, R0
1131 MOVD R5, RSP
1132
1133 MOVW R0, ret+16(FP)
1134 RET
1135
1136 nosave:
1137 // Running on a system stack, perhaps even without a g.
1138 // Having no g can happen during thread creation or thread teardown
1139 // (see needm/dropm on Solaris, for example).
1140 // This code is like the above sequence but without saving/restoring g
1141 // and without worrying about the stack moving out from under us
1142 // (because we're on a system stack, not a goroutine stack).
1143 // The above code could be used directly if already on a system stack,
1144 // but then the only path through this code would be a rare case on Solaris.
1145 // Using this code for all "already on system stack" calls exercises it more,
1146 // which should help keep it correct.
1147 MOVD RSP, R13
1148 SUB $16, R13
1149 MOVD R13, RSP
1150 MOVD $0, R4
1151 MOVD R4, 0(RSP) // Where above code stores g, in case someone looks during debugging.
1152 MOVD R2, 8(RSP) // Save original stack pointer.
1153 BL (R1)
1154 // Restore stack pointer.
1155 MOVD 8(RSP), R2
1156 MOVD R2, RSP
1157 MOVD R0, ret+16(FP)
1158 RET
1159
1160 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1161 // See cgocall.go for more details.
1162 TEXT ·cgocallback(SB),NOSPLIT,$24-24
1163 NO_LOCAL_POINTERS
1164
1165 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1166 // It is used to dropm while thread is exiting.
1167 MOVD fn+0(FP), R1
1168 CBNZ R1, loadg
1169 // Restore the g from frame.
1170 MOVD frame+8(FP), g
1171 B dropm
1172
1173 loadg:
1174 // Load g from thread-local storage.
1175 BL runtime·load_g(SB)
1176
1177 // If g is nil, Go did not create the current thread,
1178 // or if this thread never called into Go on pthread platforms.
1179 // Call needm to obtain one for temporary use.
1180 // In this case, we're running on the thread stack, so there's
1181 // lots of space, but the linker doesn't know. Hide the call from
1182 // the linker analysis by using an indirect call.
1183 CBZ g, needm
1184
1185 MOVD g_m(g), R8
1186 MOVD R8, savedm-8(SP)
1187 B havem
1188
1189 needm:
1190 MOVD g, savedm-8(SP) // g is zero, so is m.
1191 MOVD $runtime·needAndBindM(SB), R0
1192 BL (R0)
1193
1194 // Set m->g0->sched.sp = SP, so that if a panic happens
1195 // during the function we are about to execute, it will
1196 // have a valid SP to run on the g0 stack.
1197 // The next few lines (after the havem label)
1198 // will save this SP onto the stack and then write
1199 // the same SP back to m->sched.sp. That seems redundant,
1200 // but if an unrecovered panic happens, unwindm will
1201 // restore the g->sched.sp from the stack location
1202 // and then systemstack will try to use it. If we don't set it here,
1203 // that restored SP will be uninitialized (typically 0) and
1204 // will not be usable.
1205 MOVD g_m(g), R8
1206 MOVD m_g0(R8), R3
1207 MOVD RSP, R0
1208 MOVD R0, (g_sched+gobuf_sp)(R3)
1209 MOVD R29, (g_sched+gobuf_bp)(R3)
1210
1211 havem:
1212 // Now there's a valid m, and we're running on its m->g0.
1213 // Save current m->g0->sched.sp on stack and then set it to SP.
1214 // Save current sp in m->g0->sched.sp in preparation for
1215 // switch back to m->curg stack.
1216 // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
1217 // Beware that the frame size is actually 32+16.
1218 MOVD m_g0(R8), R3
1219 MOVD (g_sched+gobuf_sp)(R3), R4
1220 MOVD R4, savedsp-16(SP)
1221 MOVD RSP, R0
1222 MOVD R0, (g_sched+gobuf_sp)(R3)
1223
1224 // Switch to m->curg stack and call runtime.cgocallbackg.
1225 // Because we are taking over the execution of m->curg
1226 // but *not* resuming what had been running, we need to
1227 // save that information (m->curg->sched) so we can restore it.
1228 // We can restore m->curg->sched.sp easily, because calling
1229 // runtime.cgocallbackg leaves SP unchanged upon return.
1230 // To save m->curg->sched.pc, we push it onto the curg stack and
1231 // open a frame the same size as cgocallback's g0 frame.
1232 // Once we switch to the curg stack, the pushed PC will appear
1233 // to be the return PC of cgocallback, so that the traceback
1234 // will seamlessly trace back into the earlier calls.
1235 MOVD m_curg(R8), g
1236 BL runtime·save_g(SB)
1237 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
1238 MOVD (g_sched+gobuf_pc)(g), R5
1239 MOVD R5, -48(R4)
1240 MOVD (g_sched+gobuf_bp)(g), R5
1241 MOVD R5, -56(R4)
1242 // Gather our arguments into registers.
1243 MOVD fn+0(FP), R1
1244 MOVD frame+8(FP), R2
1245 MOVD ctxt+16(FP), R3
1246 MOVD $-48(R4), R0 // maintain 16-byte SP alignment
1247 MOVD R0, RSP // switch stack
1248 MOVD R1, 8(RSP)
1249 MOVD R2, 16(RSP)
1250 MOVD R3, 24(RSP)
1251 MOVD $runtime·cgocallbackg(SB), R0
1252 CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now.
1253
1254 // Restore g->sched (== m->curg->sched) from saved values.
1255 MOVD 0(RSP), R5
1256 MOVD R5, (g_sched+gobuf_pc)(g)
1257 MOVD RSP, R4
1258 ADD $48, R4, R4
1259 MOVD R4, (g_sched+gobuf_sp)(g)
1260
1261 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1262 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1263 // so we do not have to restore it.)
1264 MOVD g_m(g), R8
1265 MOVD m_g0(R8), g
1266 BL runtime·save_g(SB)
1267 MOVD (g_sched+gobuf_sp)(g), R0
1268 MOVD R0, RSP
1269 MOVD savedsp-16(SP), R4
1270 MOVD R4, (g_sched+gobuf_sp)(g)
1271
1272 // If the m on entry was nil, we called needm above to borrow an m,
1273 // 1. for the duration of the call on non-pthread platforms,
1274 // 2. or the duration of the C thread alive on pthread platforms.
1275 // If the m on entry wasn't nil,
1276 // 1. the thread might be a Go thread,
1277 // 2. or it wasn't the first call from a C thread on pthread platforms,
1278 // since then we skip dropm to reuse the m in the first call.
1279 MOVD savedm-8(SP), R6
1280 CBNZ R6, droppedm
1281
1282 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1283 MOVD _cgo_pthread_key_created(SB), R6
1284 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1285 CBZ R6, dropm
1286 MOVD (R6), R6
1287 CBNZ R6, droppedm
1288
1289 dropm:
1290 MOVD $runtime·dropm(SB), R0
1291 BL (R0)
1292 droppedm:
1293
1294 // Done!
1295 RET
1296
1297 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1298 // Must obey the gcc calling convention.
1299 TEXT _cgo_topofstack(SB),NOSPLIT,$24
1300 // g (R28) and REGTMP (R27) might be clobbered by load_g. They
1301 // are callee-save in the gcc calling convention, so save them.
1302 MOVD R27, savedR27-8(SP)
1303 MOVD g, saveG-16(SP)
1304
1305 BL runtime·load_g(SB)
1306 MOVD g_m(g), R0
1307 MOVD m_curg(R0), R0
1308 MOVD (g_stack+stack_hi)(R0), R0
1309
1310 MOVD saveG-16(SP), g
1311 MOVD savedR28-8(SP), R27
1312 RET
1313
1314 // void setg(G*); set g. for use by needm.
1315 TEXT runtime·setg(SB), NOSPLIT, $0-8
1316 MOVD gg+0(FP), g
1317 // This only happens if iscgo, so jump straight to save_g
1318 BL runtime·save_g(SB)
1319 RET
1320
1321 // void setg_gcc(G*); set g called from gcc
1322 TEXT setg_gcc<>(SB),NOSPLIT,$8
1323 MOVD R0, g
1324 MOVD R27, savedR27-8(SP)
1325 BL runtime·save_g(SB)
1326 MOVD savedR27-8(SP), R27
1327 RET
1328
1329 TEXT runtime·emptyfunc(SB),0,$0-0
1330 RET
1331
1332 TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
1333 MOVD ZR, R0
1334 MOVD (R0), R0
1335 UNDEF
1336
1337 // The top-most function running on a goroutine
1338 // returns to goexit+PCQuantum.
1339 TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
1340 MOVD R0, R0 // NOP
1341 BL runtime·goexit1(SB) // does not return
1342
1343 // This is called from .init_array and follows the platform, not Go, ABI.
1344 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1345 SUB $0x10, RSP
1346 MOVD R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
1347 MOVD runtime·lastmoduledatap(SB), R1
1348 MOVD R0, moduledata_next(R1)
1349 MOVD R0, runtime·lastmoduledatap(SB)
1350 MOVD 8(RSP), R27
1351 ADD $0x10, RSP
1352 RET
1353
1354 TEXT ·checkASM(SB),NOSPLIT,$0-1
1355 MOVW $1, R3
1356 MOVB R3, ret+0(FP)
1357 RET
1358
1359 // gcWriteBarrier informs the GC about heap pointer writes.
1360 //
1361 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1362 // number of bytes of buffer needed in R25, and returns a pointer
1363 // to the buffer space in R25.
1364 // It clobbers condition codes.
1365 // It does not clobber any general-purpose registers except R27,
1366 // but may clobber others (e.g., floating point registers)
1367 // The act of CALLing gcWriteBarrier will clobber R30 (LR).
1368 TEXT gcWriteBarrier<>(SB),NOSPLIT,$200
1369 // Save the registers clobbered by the fast path.
1370 STP (R0, R1), 184(RSP)
1371 retry:
1372 MOVD g_m(g), R0
1373 MOVD m_p(R0), R0
1374 MOVD (p_wbBuf+wbBuf_next)(R0), R1
1375 MOVD (p_wbBuf+wbBuf_end)(R0), R27
1376 // Increment wbBuf.next position.
1377 ADD R25, R1
1378 // Is the buffer full?
1379 CMP R27, R1
1380 BHI flush
1381 // Commit to the larger buffer.
1382 MOVD R1, (p_wbBuf+wbBuf_next)(R0)
1383 // Make return value (the original next position)
1384 SUB R25, R1, R25
1385 // Restore registers.
1386 LDP 184(RSP), (R0, R1)
1387 RET
1388
1389 flush:
1390 // Save all general purpose registers since these could be
1391 // clobbered by wbBufFlush and were not saved by the caller.
1392 // R0 and R1 already saved
1393 STP (R2, R3), 1*8(RSP)
1394 STP (R4, R5), 3*8(RSP)
1395 STP (R6, R7), 5*8(RSP)
1396 STP (R8, R9), 7*8(RSP)
1397 STP (R10, R11), 9*8(RSP)
1398 STP (R12, R13), 11*8(RSP)
1399 STP (R14, R15), 13*8(RSP)
1400 // R16, R17 may be clobbered by linker trampoline
1401 // R18 is unused.
1402 STP (R19, R20), 15*8(RSP)
1403 STP (R21, R22), 17*8(RSP)
1404 STP (R23, R24), 19*8(RSP)
1405 STP (R25, R26), 21*8(RSP)
1406 // R27 is temp register.
1407 // R28 is g.
1408 // R29 is frame pointer (unused).
1409 // R30 is LR, which was saved by the prologue.
1410 // R31 is SP.
1411
1412 CALL runtime·wbBufFlush(SB)
1413 LDP 1*8(RSP), (R2, R3)
1414 LDP 3*8(RSP), (R4, R5)
1415 LDP 5*8(RSP), (R6, R7)
1416 LDP 7*8(RSP), (R8, R9)
1417 LDP 9*8(RSP), (R10, R11)
1418 LDP 11*8(RSP), (R12, R13)
1419 LDP 13*8(RSP), (R14, R15)
1420 LDP 15*8(RSP), (R19, R20)
1421 LDP 17*8(RSP), (R21, R22)
1422 LDP 19*8(RSP), (R23, R24)
1423 LDP 21*8(RSP), (R25, R26)
1424 JMP retry
1425
1426 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1427 MOVD $8, R25
1428 JMP gcWriteBarrier<>(SB)
1429 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1430 MOVD $16, R25
1431 JMP gcWriteBarrier<>(SB)
1432 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1433 MOVD $24, R25
1434 JMP gcWriteBarrier<>(SB)
1435 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1436 MOVD $32, R25
1437 JMP gcWriteBarrier<>(SB)
1438 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1439 MOVD $40, R25
1440 JMP gcWriteBarrier<>(SB)
1441 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1442 MOVD $48, R25
1443 JMP gcWriteBarrier<>(SB)
1444 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1445 MOVD $56, R25
1446 JMP gcWriteBarrier<>(SB)
1447 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1448 MOVD $64, R25
1449 JMP gcWriteBarrier<>(SB)
1450
1451 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1452 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1453
1454 // debugCallV2 is the entry point for debugger-injected function
1455 // calls on running goroutines. It informs the runtime that a
1456 // debug call has been injected and creates a call frame for the
1457 // debugger to fill in.
1458 //
1459 // To inject a function call, a debugger should:
1460 // 1. Check that the goroutine is in state _Grunning and that
1461 // there are at least 288 bytes free on the stack.
1462 // 2. Set SP as SP-16.
1463 // 3. Store the current LR in (SP) (using the SP after step 2).
1464 // 4. Store the current PC in the LR register.
1465 // 5. Write the desired argument frame size at SP-16
1466 // 6. Save all machine registers (including flags and fpsimd registers)
1467 // so they can be restored later by the debugger.
1468 // 7. Set the PC to debugCallV2 and resume execution.
1469 //
1470 // If the goroutine is in state _Grunnable, then it's not generally
1471 // safe to inject a call because it may return out via other runtime
1472 // operations. Instead, the debugger should unwind the stack to find
1473 // the return to non-runtime code, add a temporary breakpoint there,
1474 // and inject the call once that breakpoint is hit.
1475 //
1476 // If the goroutine is in any other state, it's not safe to inject a call.
1477 //
1478 // This function communicates back to the debugger by setting R20 and
1479 // invoking BRK to raise a breakpoint signal. Note that the signal PC of
1480 // the signal triggered by the BRK instruction is the PC where the signal
1481 // is trapped, not the next PC, so to resume execution, the debugger needs
1482 // to set the signal PC to PC+4. See the comments in the implementation for
1483 // the protocol the debugger is expected to follow. InjectDebugCall in the
1484 // runtime tests demonstrates this protocol.
1485 //
1486 // The debugger must ensure that any pointers passed to the function
1487 // obey escape analysis requirements. Specifically, it must not pass
1488 // a stack pointer to an escaping argument. debugCallV2 cannot check
1489 // this invariant.
1490 //
1491 // This is ABIInternal because Go code injects its PC directly into new
1492 // goroutine stacks.
1493 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
1494 STP (R29, R30), -280(RSP)
1495 SUB $272, RSP, RSP
1496 SUB $8, RSP, R29
1497 // Save all registers that may contain pointers so they can be
1498 // conservatively scanned.
1499 //
1500 // We can't do anything that might clobber any of these
1501 // registers before this.
1502 STP (R27, g), (30*8)(RSP)
1503 STP (R25, R26), (28*8)(RSP)
1504 STP (R23, R24), (26*8)(RSP)
1505 STP (R21, R22), (24*8)(RSP)
1506 STP (R19, R20), (22*8)(RSP)
1507 STP (R16, R17), (20*8)(RSP)
1508 STP (R14, R15), (18*8)(RSP)
1509 STP (R12, R13), (16*8)(RSP)
1510 STP (R10, R11), (14*8)(RSP)
1511 STP (R8, R9), (12*8)(RSP)
1512 STP (R6, R7), (10*8)(RSP)
1513 STP (R4, R5), (8*8)(RSP)
1514 STP (R2, R3), (6*8)(RSP)
1515 STP (R0, R1), (4*8)(RSP)
1516
1517 // Perform a safe-point check.
1518 MOVD R30, 8(RSP) // Caller's PC
1519 CALL runtime·debugCallCheck(SB)
1520 MOVD 16(RSP), R0
1521 CBZ R0, good
1522
1523 // The safety check failed. Put the reason string at the top
1524 // of the stack.
1525 MOVD R0, 8(RSP)
1526 MOVD 24(RSP), R0
1527 MOVD R0, 16(RSP)
1528
1529 // Set R20 to 8 and invoke BRK. The debugger should get the
1530 // reason a call can't be injected from SP+8 and resume execution.
1531 MOVD $8, R20
1532 BREAK
1533 JMP restore
1534
1535 good:
1536 // Registers are saved and it's safe to make a call.
1537 // Open up a call frame, moving the stack if necessary.
1538 //
1539 // Once the frame is allocated, this will set R20 to 0 and
1540 // invoke BRK. The debugger should write the argument
1541 // frame for the call at SP+8, set up argument registers,
1542 // set the LR as the signal PC + 4, set the PC to the function
1543 // to call, set R26 to point to the closure (if a closure call),
1544 // and resume execution.
1545 //
1546 // If the function returns, this will set R20 to 1 and invoke
1547 // BRK. The debugger can then inspect any return value saved
1548 // on the stack at SP+8 and in registers. To resume execution,
1549 // the debugger should restore the LR from (SP).
1550 //
1551 // If the function panics, this will set R20 to 2 and invoke BRK.
1552 // The interface{} value of the panic will be at SP+8. The debugger
1553 // can inspect the panic value and resume execution again.
1554 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1555 CMP $MAXSIZE, R0; \
1556 BGT 5(PC); \
1557 MOVD $NAME(SB), R0; \
1558 MOVD R0, 8(RSP); \
1559 CALL runtime·debugCallWrap(SB); \
1560 JMP restore
1561
1562 MOVD 256(RSP), R0 // the argument frame size
1563 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1564 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1565 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1566 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1567 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1568 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1569 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1570 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1571 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1572 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1573 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1574 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1575 // The frame size is too large. Report the error.
1576 MOVD $debugCallFrameTooLarge<>(SB), R0
1577 MOVD R0, 8(RSP)
1578 MOVD $20, R0
1579 MOVD R0, 16(RSP) // length of debugCallFrameTooLarge string
1580 MOVD $8, R20
1581 BREAK
1582 JMP restore
1583
1584 restore:
1585 // Calls and failures resume here.
1586 //
1587 // Set R20 to 16 and invoke BRK. The debugger should restore
1588 // all registers except for PC and RSP and resume execution.
1589 MOVD $16, R20
1590 BREAK
1591 // We must not modify flags after this point.
1592
1593 // Restore pointer-containing registers, which may have been
1594 // modified from the debugger's copy by stack copying.
1595 LDP (30*8)(RSP), (R27, g)
1596 LDP (28*8)(RSP), (R25, R26)
1597 LDP (26*8)(RSP), (R23, R24)
1598 LDP (24*8)(RSP), (R21, R22)
1599 LDP (22*8)(RSP), (R19, R20)
1600 LDP (20*8)(RSP), (R16, R17)
1601 LDP (18*8)(RSP), (R14, R15)
1602 LDP (16*8)(RSP), (R12, R13)
1603 LDP (14*8)(RSP), (R10, R11)
1604 LDP (12*8)(RSP), (R8, R9)
1605 LDP (10*8)(RSP), (R6, R7)
1606 LDP (8*8)(RSP), (R4, R5)
1607 LDP (6*8)(RSP), (R2, R3)
1608 LDP (4*8)(RSP), (R0, R1)
1609
1610 LDP -8(RSP), (R29, R27)
1611 ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext
1612 MOVD -16(RSP), R30 // restore old lr
1613 JMP (R27)
1614
1615 // runtime.debugCallCheck assumes that functions defined with the
1616 // DEBUG_CALL_FN macro are safe points to inject calls.
1617 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
1618 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
1619 NO_LOCAL_POINTERS; \
1620 MOVD $0, R20; \
1621 BREAK; \
1622 MOVD $1, R20; \
1623 BREAK; \
1624 RET
1625 DEBUG_CALL_FN(debugCall32<>, 32)
1626 DEBUG_CALL_FN(debugCall64<>, 64)
1627 DEBUG_CALL_FN(debugCall128<>, 128)
1628 DEBUG_CALL_FN(debugCall256<>, 256)
1629 DEBUG_CALL_FN(debugCall512<>, 512)
1630 DEBUG_CALL_FN(debugCall1024<>, 1024)
1631 DEBUG_CALL_FN(debugCall2048<>, 2048)
1632 DEBUG_CALL_FN(debugCall4096<>, 4096)
1633 DEBUG_CALL_FN(debugCall8192<>, 8192)
1634 DEBUG_CALL_FN(debugCall16384<>, 16384)
1635 DEBUG_CALL_FN(debugCall32768<>, 32768)
1636 DEBUG_CALL_FN(debugCall65536<>, 65536)
1637
1638 // func debugCallPanicked(val interface{})
1639 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1640 // Copy the panic value to the top of stack at SP+8.
1641 MOVD val_type+0(FP), R0
1642 MOVD R0, 8(RSP)
1643 MOVD val_data+8(FP), R0
1644 MOVD R0, 16(RSP)
1645 MOVD $2, R20
1646 BREAK
1647 RET
1648
1649 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
1650 NO_LOCAL_POINTERS
1651 // Save all 16 int registers that could have an index in them.
1652 // They may be pointers, but if they are they are dead.
1653 STP (R0, R1), 24(RSP)
1654 STP (R2, R3), 40(RSP)
1655 STP (R4, R5), 56(RSP)
1656 STP (R6, R7), 72(RSP)
1657 STP (R8, R9), 88(RSP)
1658 STP (R10, R11), 104(RSP)
1659 STP (R12, R13), 120(RSP)
1660 STP (R14, R15), 136(RSP)
1661 MOVD LR, R0 // PC immediately after call to panicBounds
1662 ADD $24, RSP, R1 // pointer to save area
1663 CALL runtime·panicBounds64<ABIInternal>(SB)
1664 RET
1665
1666 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1667 MOVD R29, R0
1668 RET
1669
View as plain text