Text file
src/runtime/asm_amd64.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9 #include "cgo/abi_amd64.h"
10
11 // _rt0_amd64 is common startup code for most amd64 systems when using
12 // internal linking. This is the entry point for the program from the
13 // kernel for an ordinary -buildmode=exe program. The stack holds the
14 // number of arguments and the C-style argv.
15 TEXT _rt0_amd64(SB),NOSPLIT,$-8
16 MOVQ 0(SP), DI // argc
17 LEAQ 8(SP), SI // argv
18 JMP runtime·rt0_go(SB)
19
20 // main is common startup code for most amd64 systems when using
21 // external linking. The C startup code will call the symbol "main"
22 // passing argc and argv in the usual C ABI registers DI and SI.
23 TEXT main(SB),NOSPLIT,$-8
24 JMP runtime·rt0_go(SB)
25
26 // _rt0_amd64_lib is common startup code for most amd64 systems when
27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
28 // arrange to invoke this function as a global constructor (for
29 // c-archive) or when the shared library is loaded (for c-shared).
30 // We expect argc and argv to be passed in the usual C ABI registers
31 // DI and SI.
32 TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
33 // Transition from C ABI to Go ABI.
34 PUSH_REGS_HOST_TO_ABI0()
35
36 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
37 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
38
39 #ifdef GOOS_windows
40 // Set up a dummy TLS value on Windows so that the autogenerated
41 // ABI wrappers don't crash when trying to load G from TLS before
42 // wintls has set up the real TLS slot in rt0_go.
43 MOVQ $zeroTLS<>(SB), DI
44 CALL runtime·settls(SB)
45 #endif
46
47 CALL runtime·libInit(SB)
48
49 POP_REGS_HOST_TO_ABI0()
50 RET
51
52 // rt0_lib_go initializes the Go runtime.
53 // This is started in a separate thread by _rt0_amd64_lib.
54 TEXT runtime·rt0_lib_go<ABIInternal>(SB),NOSPLIT,$0
55 MOVQ _rt0_amd64_lib_argc<>(SB), DI
56 MOVQ _rt0_amd64_lib_argv<>(SB), SI
57 JMP runtime·rt0_go(SB)
58
59 DATA _rt0_amd64_lib_argc<>(SB)/8, $0
60 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
61 DATA _rt0_amd64_lib_argv<>(SB)/8, $0
62 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
63
64 #ifdef GOAMD64_v2
65 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
66 #endif
67
68 #ifdef GOAMD64_v3
69 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
70 #endif
71
72 #ifdef GOAMD64_v4
73 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
74 #endif
75
76 GLOBL bad_cpu_msg<>(SB), RODATA, $84
77
78 // Define a list of AMD64 microarchitecture level features
79 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
80
81 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT
82 #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23)
83 // LAHF/SAHF
84 #define V2_EXT_FEATURES_CX (1 << 0)
85 // FMA MOVBE OSXSAVE AVX F16C
86 #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
87 // ABM (FOR LZNCT)
88 #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
89 // BMI1 AVX2 BMI2
90 #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
91 // XMM YMM
92 #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
93
94 #define V4_FEATURES_CX V3_FEATURES_CX
95
96 #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
97 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL
98 #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
99 // OPMASK ZMM
100 #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
101
102 #ifdef GOAMD64_v2
103 #define NEED_MAX_CPUID 0x80000001
104 #define NEED_FEATURES_CX V2_FEATURES_CX
105 #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
106 #endif
107
108 #ifdef GOAMD64_v3
109 #define NEED_MAX_CPUID 0x80000001
110 #define NEED_FEATURES_CX V3_FEATURES_CX
111 #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
112 #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
113 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
114 #endif
115
116 #ifdef GOAMD64_v4
117 #define NEED_MAX_CPUID 0x80000001
118 #define NEED_FEATURES_CX V4_FEATURES_CX
119 #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
120 #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
121
122 // Darwin requires a different approach to check AVX512 support, see CL 285572.
123 #ifdef GOOS_darwin
124 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
125 // These values are from:
126 // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
127 #define commpage64_base_address 0x00007fffffe00000
128 #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
129 #define commpage64_version (commpage64_base_address+0x01E)
130 #define AVX512F 0x0000004000000000
131 #define AVX512CD 0x0000008000000000
132 #define AVX512DQ 0x0000010000000000
133 #define AVX512BW 0x0000020000000000
134 #define AVX512VL 0x0000100000000000
135 #define NEED_DARWIN_SUPPORT (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL)
136 #else
137 #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
138 #endif
139
140 #endif
141
142 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
143 // copy arguments forward on an even stack
144 MOVQ DI, AX // argc
145 MOVQ SI, BX // argv
146 SUBQ $(5*8), SP // 3args 2auto
147 ANDQ $~15, SP
148 MOVQ AX, 24(SP)
149 MOVQ BX, 32(SP)
150
151 // This is typically the entry point for Go programs.
152 // Call stack unwinding must not proceed past this frame.
153 // Set the frame pointer register to 0 so that frame pointer-based unwinders
154 // (which don't use debug info for performance reasons)
155 // won't attempt to unwind past this function.
156 // See go.dev/issue/63630
157 MOVQ $0, BP
158
159 // create istack out of the given (operating system) stack.
160 // _cgo_init may update stackguard.
161 MOVQ $runtime·g0(SB), DI
162 LEAQ (-64*1024)(SP), BX
163 MOVQ BX, g_stackguard0(DI)
164 MOVQ BX, g_stackguard1(DI)
165 MOVQ BX, (g_stack+stack_lo)(DI)
166 MOVQ SP, (g_stack+stack_hi)(DI)
167
168 // find out information about the processor we're on
169 MOVL $0, AX
170 CPUID
171 CMPL AX, $0
172 JE nocpuinfo
173
174 CMPL BX, $0x756E6547 // "Genu"
175 JNE notintel
176 CMPL DX, $0x49656E69 // "ineI"
177 JNE notintel
178 CMPL CX, $0x6C65746E // "ntel"
179 JNE notintel
180 MOVB $1, runtime·isIntel(SB)
181
182 notintel:
183 // Load EAX=1 cpuid flags
184 MOVL $1, AX
185 CPUID
186 MOVL AX, runtime·processorVersionInfo(SB)
187
188 nocpuinfo:
189 // if there is an _cgo_init, call it.
190 MOVQ _cgo_init(SB), AX
191 TESTQ AX, AX
192 JZ needtls
193 // arg 1: g0, already in DI
194 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
195 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
196 MOVQ $0, CX
197 #ifdef GOOS_android
198 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
199 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
200 // Compensate for tls_g (+16).
201 MOVQ -16(TLS), CX
202 #endif
203 #ifdef GOOS_windows
204 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
205 // Adjust for the Win64 calling convention.
206 MOVQ CX, R9 // arg 4
207 MOVQ DX, R8 // arg 3
208 MOVQ SI, DX // arg 2
209 MOVQ DI, CX // arg 1
210 #endif
211 CALL AX
212
213 // update stackguard after _cgo_init
214 MOVQ $runtime·g0(SB), CX
215 MOVQ (g_stack+stack_lo)(CX), AX
216 ADDQ $const_stackGuard, AX
217 MOVQ AX, g_stackguard0(CX)
218 MOVQ AX, g_stackguard1(CX)
219
220 #ifndef GOOS_windows
221 JMP ok
222 #endif
223 needtls:
224 #ifdef GOOS_plan9
225 // skip TLS setup on Plan 9
226 JMP ok
227 #endif
228 #ifdef GOOS_solaris
229 // skip TLS setup on Solaris
230 JMP ok
231 #endif
232 #ifdef GOOS_illumos
233 // skip TLS setup on illumos
234 JMP ok
235 #endif
236 #ifdef GOOS_darwin
237 // skip TLS setup on Darwin
238 JMP ok
239 #endif
240 #ifdef GOOS_openbsd
241 // skip TLS setup on OpenBSD
242 JMP ok
243 #endif
244
245 #ifdef GOOS_windows
246 CALL runtime·wintls(SB)
247 #endif
248
249 LEAQ runtime·m0+m_tls(SB), DI
250 CALL runtime·settls(SB)
251
252 // store through it, to make sure it works
253 get_tls(BX)
254 MOVQ $0x123, g(BX)
255 MOVQ runtime·m0+m_tls(SB), AX
256 CMPQ AX, $0x123
257 JEQ 2(PC)
258 CALL runtime·abort(SB)
259 ok:
260 // set the per-goroutine and per-mach "registers"
261 get_tls(BX)
262 LEAQ runtime·g0(SB), CX
263 MOVQ CX, g(BX)
264 LEAQ runtime·m0(SB), AX
265
266 // save m->g0 = g0
267 MOVQ CX, m_g0(AX)
268 // save m0 to g0->m
269 MOVQ AX, g_m(CX)
270
271 CLD // convention is D is always left cleared
272
273 // Check GOAMD64 requirements
274 // We need to do this after setting up TLS, so that
275 // we can report an error if there is a failure. See issue 49586.
276 #ifdef NEED_FEATURES_CX
277 MOVL $0, AX
278 CPUID
279 CMPL AX, $0
280 JE bad_cpu
281 MOVL $1, AX
282 CPUID
283 ANDL $NEED_FEATURES_CX, CX
284 CMPL CX, $NEED_FEATURES_CX
285 JNE bad_cpu
286 #endif
287
288 #ifdef NEED_MAX_CPUID
289 MOVL $0x80000000, AX
290 CPUID
291 CMPL AX, $NEED_MAX_CPUID
292 JL bad_cpu
293 #endif
294
295 #ifdef NEED_EXT_FEATURES_BX
296 MOVL $7, AX
297 MOVL $0, CX
298 CPUID
299 ANDL $NEED_EXT_FEATURES_BX, BX
300 CMPL BX, $NEED_EXT_FEATURES_BX
301 JNE bad_cpu
302 #endif
303
304 #ifdef NEED_EXT_FEATURES_CX
305 MOVL $0x80000001, AX
306 CPUID
307 ANDL $NEED_EXT_FEATURES_CX, CX
308 CMPL CX, $NEED_EXT_FEATURES_CX
309 JNE bad_cpu
310 #endif
311
312 #ifdef NEED_OS_SUPPORT_AX
313 XORL CX, CX
314 XGETBV
315 ANDL $NEED_OS_SUPPORT_AX, AX
316 CMPL AX, $NEED_OS_SUPPORT_AX
317 JNE bad_cpu
318 #endif
319
320 #ifdef NEED_DARWIN_SUPPORT
321 MOVQ $commpage64_version, BX
322 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13
323 JL bad_cpu
324 MOVQ $commpage64_cpu_capabilities64, BX
325 MOVQ (BX), BX
326 MOVQ $NEED_DARWIN_SUPPORT, CX
327 ANDQ CX, BX
328 CMPQ BX, CX
329 JNE bad_cpu
330 #endif
331
332 CALL runtime·check(SB)
333
334 MOVL 24(SP), AX // copy argc
335 MOVL AX, 0(SP)
336 MOVQ 32(SP), AX // copy argv
337 MOVQ AX, 8(SP)
338 CALL runtime·args(SB)
339 CALL runtime·osinit(SB)
340 CALL runtime·schedinit(SB)
341
342 // create a new goroutine to start program
343 MOVQ $runtime·mainPC(SB), AX // entry
344 PUSHQ AX
345 CALL runtime·newproc(SB)
346 POPQ AX
347
348 // start this M
349 CALL runtime·mstart(SB)
350
351 CALL runtime·abort(SB) // mstart should never return
352 RET
353
354 bad_cpu: // show that the program requires a certain microarchitecture level.
355 MOVQ $2, 0(SP)
356 MOVQ $bad_cpu_msg<>(SB), AX
357 MOVQ AX, 8(SP)
358 MOVQ $84, 16(SP)
359 CALL runtime·write(SB)
360 MOVQ $1, 0(SP)
361 CALL runtime·exit(SB)
362 CALL runtime·abort(SB)
363 RET
364
365 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
366 // intended to be called by debuggers.
367 MOVQ $runtime·debugPinnerV1<ABIInternal>(SB), AX
368 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX
369 RET
370
371 // mainPC is a function value for runtime.main, to be passed to newproc.
372 // The reference to runtime.main is made via ABIInternal, since the
373 // actual function (not the ABI0 wrapper) is needed by newproc.
374 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
375 GLOBL runtime·mainPC(SB),RODATA,$8
376
377 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
378 BYTE $0xcc
379 RET
380
381 TEXT runtime·asminit(SB),NOSPLIT,$0-0
382 // No per-thread init.
383 RET
384
385 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
386 // This is the root frame of new Go-created OS threads.
387 // Call stack unwinding must not proceed past this frame.
388 // Set the frame pointer register to 0 so that frame pointer-based unwinders
389 // (which don't use debug info for performance reasons)
390 // won't attempt to unwind past this function.
391 // See go.dev/issue/63630
392 MOVD $0, BP
393 CALL runtime·mstart0(SB)
394 RET // not reached
395
396 /*
397 * go-routine
398 */
399
400 // func gogo(buf *gobuf)
401 // restore state from Gobuf; longjmp
402 TEXT runtime·gogo(SB), NOSPLIT, $0-8
403 MOVQ buf+0(FP), BX // gobuf
404 MOVQ gobuf_g(BX), DX
405 MOVQ 0(DX), CX // make sure g != nil
406 JMP gogo<>(SB)
407
408 TEXT gogo<>(SB), NOSPLIT, $0
409 get_tls(CX)
410 MOVQ DX, g(CX)
411 MOVQ DX, R14 // set the g register
412 MOVQ gobuf_sp(BX), SP // restore SP
413 MOVQ gobuf_ctxt(BX), DX
414 MOVQ gobuf_bp(BX), BP
415 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
416 MOVQ $0, gobuf_ctxt(BX)
417 MOVQ $0, gobuf_bp(BX)
418 MOVQ gobuf_pc(BX), BX
419 JMP BX
420
421 // func mcall(fn func(*g))
422 // Switch to m->g0's stack, call fn(g).
423 // Fn must never return. It should gogo(&g->sched)
424 // to keep running g.
425 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
426 #ifdef GOEXPERIMENT_runtimesecret
427 CMPL g_secret(R14), $0
428 JEQ nosecret
429 CALL ·secretEraseRegistersMcall(SB)
430 nosecret:
431 #endif
432
433 MOVQ AX, DX // DX = fn
434
435 // Save state in g->sched. The caller's SP and PC are restored by gogo to
436 // resume execution in the caller's frame (implicit return). The caller's BP
437 // is also restored to support frame pointer unwinding.
438 MOVQ SP, BX // hide (SP) reads from vet
439 MOVQ 8(BX), BX // caller's PC
440 MOVQ BX, (g_sched+gobuf_pc)(R14)
441 LEAQ fn+0(FP), BX // caller's SP
442 MOVQ BX, (g_sched+gobuf_sp)(R14)
443 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is
444 // can cause a frame pointer cycle, see CL 476235.
445 MOVQ (BP), BX // caller's BP
446 MOVQ BX, (g_sched+gobuf_bp)(R14)
447
448 // switch to m->g0 & its stack, call fn
449 MOVQ g_m(R14), BX
450 MOVQ m_g0(BX), SI // SI = g.m.g0
451 CMPQ SI, R14 // if g == m->g0 call badmcall
452 JNE goodm
453 JMP runtime·badmcall(SB)
454 goodm:
455 MOVQ R14, AX // AX (and arg 0) = g
456 MOVQ SI, R14 // g = g.m.g0
457 get_tls(CX) // Set G in TLS
458 MOVQ R14, g(CX)
459 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp
460 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
461 PUSHQ AX // open up space for fn's arg spill slot
462 MOVQ 0(DX), R12
463 CALL R12 // fn(g)
464 // The Windows native stack unwinder incorrectly classifies the next instruction
465 // as part of the function epilogue, producing a wrong call stack.
466 // Add a NOP to work around this issue. See go.dev/issue/67007.
467 BYTE $0x90
468 POPQ AX
469 JMP runtime·badmcall2(SB)
470 RET
471
472 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
473 // of the G stack. We need to distinguish the routine that
474 // lives at the bottom of the G stack from the one that lives
475 // at the top of the system stack because the one at the top of
476 // the system stack terminates the stack walk (see topofstack()).
477 // The frame layout needs to match systemstack
478 // so that it can pretend to be systemstack_switch.
479 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
480 // Align for consistency with offset used in gosave_systemstack_switch
481 PCALIGN $8
482 UNDEF
483 // Make sure this function is not leaf,
484 // so the frame is saved.
485 CALL runtime·abort(SB)
486 RET
487
488 // func systemstack(fn func())
489 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
490 #ifdef GOEXPERIMENT_runtimesecret
491 // If in secret mode, erase registers on transition
492 // from G stack to M stack,
493 get_tls(CX)
494 MOVQ g(CX), AX
495 CMPL g_secret(AX), $0
496 JEQ nosecret
497 CALL ·secretEraseRegisters(SB)
498 nosecret:
499 #endif
500
501 MOVQ fn+0(FP), DI // DI = fn
502 get_tls(CX)
503 MOVQ g(CX), AX // AX = g
504 MOVQ g_m(AX), BX // BX = m
505
506 CMPQ AX, m_gsignal(BX)
507 JEQ noswitch
508
509 MOVQ m_g0(BX), DX // DX = g0
510 CMPQ AX, DX
511 JEQ noswitch
512
513 CMPQ AX, m_curg(BX)
514 JNE bad
515
516 // Switch stacks.
517 // The original frame pointer is stored in BP,
518 // which is useful for stack unwinding.
519 // Save our state in g->sched. Pretend to
520 // be systemstack_switch if the G stack is scanned.
521 CALL gosave_systemstack_switch<>(SB)
522
523 // switch to g0
524 MOVQ DX, g(CX)
525 MOVQ DX, R14 // set the g register
526 MOVQ (g_sched+gobuf_sp)(DX), SP
527
528 // call target function
529 MOVQ DI, DX
530 MOVQ 0(DI), DI
531 CALL DI
532
533 // switch back to g
534 get_tls(CX)
535 MOVQ g(CX), AX
536 MOVQ g_m(AX), BX
537 MOVQ m_curg(BX), AX
538 MOVQ AX, g(CX)
539 MOVQ (g_sched+gobuf_sp)(AX), SP
540 MOVQ (g_sched+gobuf_bp)(AX), BP
541 MOVQ $0, (g_sched+gobuf_sp)(AX)
542 MOVQ $0, (g_sched+gobuf_bp)(AX)
543 RET
544
545 noswitch:
546 // already on m stack; tail call the function
547 // Using a tail call here cleans up tracebacks since we won't stop
548 // at an intermediate systemstack.
549 MOVQ DI, DX
550 MOVQ 0(DI), DI
551 // The function epilogue is not called on a tail call.
552 // Pop BP from the stack to simulate it.
553 POPQ BP
554 JMP DI
555
556 bad:
557 // Bad: g is not gsignal, not g0, not curg. What is it?
558 MOVQ $runtime·badsystemstack(SB), AX
559 CALL AX
560 INT $3
561
562 // func switchToCrashStack0(fn func())
563 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
564 MOVQ g_m(R14), BX // curm
565
566 // set g to gcrash
567 LEAQ runtime·gcrash(SB), R14 // g = &gcrash
568 MOVQ BX, g_m(R14) // g.m = curm
569 MOVQ R14, m_g0(BX) // curm.g0 = g
570 get_tls(CX)
571 MOVQ R14, g(CX)
572
573 // switch to crashstack
574 MOVQ (g_stack+stack_hi)(R14), BX
575 SUBQ $(4*8), BX
576 MOVQ BX, SP
577
578 // call target function
579 MOVQ AX, DX
580 MOVQ 0(AX), AX
581 CALL AX
582
583 // should never return
584 CALL runtime·abort(SB)
585 UNDEF
586
587 /*
588 * support for morestack
589 */
590
591 // Called during function prolog when more stack is needed.
592 //
593 // The traceback routines see morestack on a g0 as being
594 // the top of a stack (for example, morestack calling newstack
595 // calling the scheduler calling newm calling gc), so we must
596 // record an argument size. For that purpose, it has no arguments.
597 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
598 // Cannot grow scheduler stack (m->g0).
599 get_tls(CX)
600 MOVQ g(CX), DI // DI = g
601 MOVQ g_m(DI), BX // BX = m
602
603 // Set g->sched to context in f.
604 MOVQ 0(SP), AX // f's PC
605 MOVQ AX, (g_sched+gobuf_pc)(DI)
606 LEAQ 8(SP), AX // f's SP
607 MOVQ AX, (g_sched+gobuf_sp)(DI)
608 MOVQ BP, (g_sched+gobuf_bp)(DI)
609 MOVQ DX, (g_sched+gobuf_ctxt)(DI)
610
611 MOVQ m_g0(BX), SI // SI = m.g0
612 CMPQ DI, SI
613 JNE 3(PC)
614 CALL runtime·badmorestackg0(SB)
615 CALL runtime·abort(SB)
616
617 // Cannot grow signal stack (m->gsignal).
618 MOVQ m_gsignal(BX), SI
619 CMPQ DI, SI
620 JNE 3(PC)
621 CALL runtime·badmorestackgsignal(SB)
622 CALL runtime·abort(SB)
623
624 // Called from f.
625 // Set m->morebuf to f's caller.
626 NOP SP // tell vet SP changed - stop checking offsets
627 MOVQ 8(SP), AX // f's caller's PC
628 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
629 LEAQ 16(SP), AX // f's caller's SP
630 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
631 MOVQ DI, (m_morebuf+gobuf_g)(BX)
632
633 // If in secret mode, erase registers on transition
634 // from G stack to M stack,
635 #ifdef GOEXPERIMENT_runtimesecret
636 CMPL g_secret(DI), $0
637 JEQ nosecret
638 CALL ·secretEraseRegisters(SB)
639 get_tls(CX)
640 MOVQ g(CX), DI // DI = g
641 MOVQ g_m(DI), BX // BX = m
642 nosecret:
643 #endif
644
645 // Call newstack on m->g0's stack.
646 MOVQ m_g0(BX), BX
647 MOVQ BX, g(CX)
648 MOVQ (g_sched+gobuf_sp)(BX), SP
649 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
650 CALL runtime·newstack(SB)
651 CALL runtime·abort(SB) // crash if newstack returns
652 RET
653
654 // morestack but not preserving ctxt.
655 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
656 MOVL $0, DX
657 JMP runtime·morestack(SB)
658
659 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
660 TEXT ·spillArgs(SB),NOSPLIT,$0-0
661 MOVQ AX, 0(R12)
662 MOVQ BX, 8(R12)
663 MOVQ CX, 16(R12)
664 MOVQ DI, 24(R12)
665 MOVQ SI, 32(R12)
666 MOVQ R8, 40(R12)
667 MOVQ R9, 48(R12)
668 MOVQ R10, 56(R12)
669 MOVQ R11, 64(R12)
670 MOVQ X0, 72(R12)
671 MOVQ X1, 80(R12)
672 MOVQ X2, 88(R12)
673 MOVQ X3, 96(R12)
674 MOVQ X4, 104(R12)
675 MOVQ X5, 112(R12)
676 MOVQ X6, 120(R12)
677 MOVQ X7, 128(R12)
678 MOVQ X8, 136(R12)
679 MOVQ X9, 144(R12)
680 MOVQ X10, 152(R12)
681 MOVQ X11, 160(R12)
682 MOVQ X12, 168(R12)
683 MOVQ X13, 176(R12)
684 MOVQ X14, 184(R12)
685 RET
686
687 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
688 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
689 MOVQ 0(R12), AX
690 MOVQ 8(R12), BX
691 MOVQ 16(R12), CX
692 MOVQ 24(R12), DI
693 MOVQ 32(R12), SI
694 MOVQ 40(R12), R8
695 MOVQ 48(R12), R9
696 MOVQ 56(R12), R10
697 MOVQ 64(R12), R11
698 MOVQ 72(R12), X0
699 MOVQ 80(R12), X1
700 MOVQ 88(R12), X2
701 MOVQ 96(R12), X3
702 MOVQ 104(R12), X4
703 MOVQ 112(R12), X5
704 MOVQ 120(R12), X6
705 MOVQ 128(R12), X7
706 MOVQ 136(R12), X8
707 MOVQ 144(R12), X9
708 MOVQ 152(R12), X10
709 MOVQ 160(R12), X11
710 MOVQ 168(R12), X12
711 MOVQ 176(R12), X13
712 MOVQ 184(R12), X14
713 RET
714
715 // reflectcall: call a function with the given argument list
716 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
717 // we don't have variable-sized frames, so we use a small number
718 // of constant-sized-frame functions to encode a few bits of size in the pc.
719 // Caution: ugly multiline assembly macros in your future!
720
721 #define DISPATCH(NAME,MAXSIZE) \
722 CMPQ CX, $MAXSIZE; \
723 JA 3(PC); \
724 MOVQ $NAME(SB), AX; \
725 JMP AX
726 // Note: can't just "JMP NAME(SB)" - bad inlining results.
727
728 TEXT ·reflectcall(SB), NOSPLIT, $0-48
729 MOVLQZX frameSize+32(FP), CX
730 DISPATCH(runtime·call16, 16)
731 DISPATCH(runtime·call32, 32)
732 DISPATCH(runtime·call64, 64)
733 DISPATCH(runtime·call128, 128)
734 DISPATCH(runtime·call256, 256)
735 DISPATCH(runtime·call512, 512)
736 DISPATCH(runtime·call1024, 1024)
737 DISPATCH(runtime·call2048, 2048)
738 DISPATCH(runtime·call4096, 4096)
739 DISPATCH(runtime·call8192, 8192)
740 DISPATCH(runtime·call16384, 16384)
741 DISPATCH(runtime·call32768, 32768)
742 DISPATCH(runtime·call65536, 65536)
743 DISPATCH(runtime·call131072, 131072)
744 DISPATCH(runtime·call262144, 262144)
745 DISPATCH(runtime·call524288, 524288)
746 DISPATCH(runtime·call1048576, 1048576)
747 DISPATCH(runtime·call2097152, 2097152)
748 DISPATCH(runtime·call4194304, 4194304)
749 DISPATCH(runtime·call8388608, 8388608)
750 DISPATCH(runtime·call16777216, 16777216)
751 DISPATCH(runtime·call33554432, 33554432)
752 DISPATCH(runtime·call67108864, 67108864)
753 DISPATCH(runtime·call134217728, 134217728)
754 DISPATCH(runtime·call268435456, 268435456)
755 DISPATCH(runtime·call536870912, 536870912)
756 DISPATCH(runtime·call1073741824, 1073741824)
757 MOVQ $runtime·badreflectcall(SB), AX
758 JMP AX
759
760 #define CALLFN(NAME,MAXSIZE) \
761 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
762 NO_LOCAL_POINTERS; \
763 /* copy arguments to stack */ \
764 MOVQ stackArgs+16(FP), SI; \
765 MOVLQZX stackArgsSize+24(FP), CX; \
766 MOVQ SP, DI; \
767 REP;MOVSB; \
768 /* set up argument registers */ \
769 MOVQ regArgs+40(FP), R12; \
770 CALL ·unspillArgs(SB); \
771 /* call function */ \
772 MOVQ f+8(FP), DX; \
773 PCDATA $PCDATA_StackMapIndex, $0; \
774 MOVQ (DX), R12; \
775 CALL R12; \
776 /* copy register return values back */ \
777 MOVQ regArgs+40(FP), R12; \
778 CALL ·spillArgs(SB); \
779 MOVLQZX stackArgsSize+24(FP), CX; \
780 MOVLQZX stackRetOffset+28(FP), BX; \
781 MOVQ stackArgs+16(FP), DI; \
782 MOVQ stackArgsType+0(FP), DX; \
783 MOVQ SP, SI; \
784 ADDQ BX, DI; \
785 ADDQ BX, SI; \
786 SUBQ BX, CX; \
787 CALL callRet<>(SB); \
788 RET
789
790 // callRet copies return values back at the end of call*. This is a
791 // separate function so it can allocate stack space for the arguments
792 // to reflectcallmove. It does not follow the Go ABI; it expects its
793 // arguments in registers.
794 TEXT callRet<>(SB), NOSPLIT, $40-0
795 NO_LOCAL_POINTERS
796 MOVQ DX, 0(SP)
797 MOVQ DI, 8(SP)
798 MOVQ SI, 16(SP)
799 MOVQ CX, 24(SP)
800 MOVQ R12, 32(SP)
801 CALL runtime·reflectcallmove(SB)
802 RET
803
804 CALLFN(·call16, 16)
805 CALLFN(·call32, 32)
806 CALLFN(·call64, 64)
807 CALLFN(·call128, 128)
808 CALLFN(·call256, 256)
809 CALLFN(·call512, 512)
810 CALLFN(·call1024, 1024)
811 CALLFN(·call2048, 2048)
812 CALLFN(·call4096, 4096)
813 CALLFN(·call8192, 8192)
814 CALLFN(·call16384, 16384)
815 CALLFN(·call32768, 32768)
816 CALLFN(·call65536, 65536)
817 CALLFN(·call131072, 131072)
818 CALLFN(·call262144, 262144)
819 CALLFN(·call524288, 524288)
820 CALLFN(·call1048576, 1048576)
821 CALLFN(·call2097152, 2097152)
822 CALLFN(·call4194304, 4194304)
823 CALLFN(·call8388608, 8388608)
824 CALLFN(·call16777216, 16777216)
825 CALLFN(·call33554432, 33554432)
826 CALLFN(·call67108864, 67108864)
827 CALLFN(·call134217728, 134217728)
828 CALLFN(·call268435456, 268435456)
829 CALLFN(·call536870912, 536870912)
830 CALLFN(·call1073741824, 1073741824)
831
832 TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
833 MOVL cycles+0(FP), AX
834 TESTL AX, AX
835 JZ done
836 again:
837 PAUSE
838 SUBL $1, AX
839 JNZ again
840 done:
841 RET
842
843
844 TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
845 // Stores are already ordered on x86, so this is just a
846 // compile barrier.
847 RET
848
849 // Save state of caller into g->sched,
850 // but using fake PC from systemstack_switch.
851 // Must only be called from functions with frame pointer
852 // and without locals ($0) or else unwinding from
853 // systemstack_switch is incorrect.
854 // Smashes R9.
855 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
856 // Take systemstack_switch PC and add 8 bytes to skip
857 // the prologue. Keep 8 bytes offset consistent with
858 // PCALIGN $8 in systemstack_swtich, pointing start of
859 // UNDEF instruction beyond prologue.
860 MOVQ $runtime·systemstack_switch+8(SB), R9
861 MOVQ R9, (g_sched+gobuf_pc)(R14)
862 LEAQ 8(SP), R9
863 MOVQ R9, (g_sched+gobuf_sp)(R14)
864 MOVQ BP, (g_sched+gobuf_bp)(R14)
865 // Assert ctxt is zero. See func save.
866 MOVQ (g_sched+gobuf_ctxt)(R14), R9
867 TESTQ R9, R9
868 JZ 2(PC)
869 CALL runtime·abort(SB)
870 RET
871
872 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
873 // Call fn(arg) aligned appropriately for the gcc ABI.
874 // Called on a system stack, and there may be no g yet (during needm).
875 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
876 MOVQ fn+0(FP), AX
877 MOVQ arg+8(FP), BX
878 MOVQ SP, DX
879 ANDQ $~15, SP // alignment
880 MOVQ DX, 8(SP)
881 MOVQ BX, DI // DI = first argument in AMD64 ABI
882 MOVQ BX, CX // CX = first argument in Win64
883 CALL AX
884 MOVQ 8(SP), DX
885 MOVQ DX, SP
886 RET
887
888 // asmcgocall_landingpad calls AX with BX as argument.
889 // Must be called on the system stack.
890 TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0
891 #ifdef GOOS_windows
892 // Make sure we have enough room for 4 stack-backed fast-call
893 // registers as per Windows amd64 calling convention.
894 ADJSP $32
895 // On Windows, asmcgocall_landingpad acts as landing pad for exceptions
896 // thrown in the cgo call. Exceptions that reach this function will be
897 // handled by runtime.sehtramp thanks to the SEH metadata added
898 // by the compiler.
899 // Note that runtime.sehtramp can't be attached directly to asmcgocall
900 // because its initial stack pointer can be outside the system stack bounds,
901 // and Windows stops the stack unwinding without calling the exception handler
902 // when it reaches that point.
903 MOVQ BX, CX // CX = first argument in Win64
904 CALL AX
905 // The exception handler is not called if the next instruction is part of
906 // the epilogue, which includes the RET instruction, so we need to add a NOP here.
907 BYTE $0x90
908 ADJSP $-32
909 RET
910 #endif
911 // Tail call AX on non-Windows, as the extra stack frame is not needed.
912 MOVQ BX, DI // DI = first argument in AMD64 ABI
913 JMP AX
914
915 // func asmcgocall(fn, arg unsafe.Pointer) int32
916 // Call fn(arg) on the scheduler stack,
917 // aligned appropriately for the gcc ABI.
918 // See cgocall.go for more details.
919 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
920 // Figure out if we need to switch to m->g0 stack.
921 // We get called to create new OS threads too, and those
922 // come in on the m->g0 stack already. Or we might already
923 // be on the m->gsignal stack.
924 get_tls(CX)
925 MOVQ g(CX), DI
926 CMPQ DI, $0
927 JEQ nosave
928 MOVQ g_m(DI), R8
929 MOVQ m_gsignal(R8), SI
930 CMPQ DI, SI
931 JEQ nosave
932 MOVQ m_g0(R8), SI
933 CMPQ DI, SI
934 JEQ nosave
935
936 // Running on a user G
937 // Figure out if we're running secret code and clear the registers
938 // so that the C code we're about to call doesn't spill confidential
939 // information into memory
940 #ifdef GOEXPERIMENT_runtimesecret
941 CMPL g_secret(DI), $0
942 JEQ nosecret
943 CALL ·secretEraseRegisters(SB)
944
945 nosecret:
946 #endif
947 MOVQ fn+0(FP), AX
948 MOVQ arg+8(FP), BX
949 MOVQ SP, DX
950
951 // Switch to system stack.
952 // The original frame pointer is stored in BP,
953 // which is useful for stack unwinding.
954 CALL gosave_systemstack_switch<>(SB)
955 MOVQ SI, g(CX)
956 MOVQ (g_sched+gobuf_sp)(SI), SP
957
958 // Now on a scheduling stack (a pthread-created stack).
959 SUBQ $16, SP
960 ANDQ $~15, SP // alignment for gcc ABI
961 MOVQ DI, 8(SP) // save g
962 MOVQ (g_stack+stack_hi)(DI), DI
963 SUBQ DX, DI
964 MOVQ DI, 0(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
965 CALL runtime·asmcgocall_landingpad(SB)
966
967 // Restore registers, g, stack pointer.
968 get_tls(CX)
969 MOVQ 8(SP), DI
970 MOVQ (g_stack+stack_hi)(DI), SI
971 SUBQ 0(SP), SI
972 MOVQ DI, g(CX)
973 MOVQ SI, SP
974
975 MOVL AX, ret+16(FP)
976 RET
977
978 nosave:
979 // Running on a system stack, perhaps even without a g.
980 // Having no g can happen during thread creation or thread teardown
981 // (see needm/dropm on Solaris, for example).
982 // This code is like the above sequence but without saving/restoring g
983 // and without worrying about the stack moving out from under us
984 // (because we're on a system stack, not a goroutine stack).
985 MOVQ fn+0(FP), AX
986 MOVQ arg+8(FP), BX
987 MOVQ SP, DX
988
989 SUBQ $16, SP
990 ANDQ $~15, SP
991 MOVQ $0, 8(SP) // where above code stores g, in case someone looks during debugging
992 MOVQ DX, 0(SP) // save original stack pointer
993 CALL runtime·asmcgocall_landingpad(SB)
994 MOVQ 0(SP), SI // restore original stack pointer
995 MOVQ SI, SP
996 MOVL AX, ret+16(FP)
997 RET
998
999 #ifdef GOOS_windows
1000 // Dummy TLS that's used on Windows so that we don't crash trying
1001 // to restore the G register in needm. needm and its callees are
1002 // very careful never to actually use the G, the TLS just can't be
1003 // unset since we're in Go code.
1004 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
1005 #endif
1006
1007 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1008 // See cgocall.go for more details.
1009 TEXT ·cgocallback(SB),NOSPLIT,$24-24
1010 NO_LOCAL_POINTERS
1011
1012 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1013 // It is used to dropm while thread is exiting.
1014 MOVQ fn+0(FP), AX
1015 CMPQ AX, $0
1016 JNE loadg
1017 // Restore the g from frame.
1018 get_tls(CX)
1019 MOVQ frame+8(FP), BX
1020 MOVQ BX, g(CX)
1021 JMP dropm
1022
1023 loadg:
1024 // If g is nil, Go did not create the current thread,
1025 // or if this thread never called into Go on pthread platforms.
1026 // Call needm to obtain one m for temporary use.
1027 // In this case, we're running on the thread stack, so there's
1028 // lots of space, but the linker doesn't know. Hide the call from
1029 // the linker analysis by using an indirect call through AX.
1030 get_tls(CX)
1031 #ifdef GOOS_windows
1032 MOVL $0, BX
1033 CMPQ CX, $0
1034 JEQ 2(PC)
1035 #endif
1036 MOVQ g(CX), BX
1037 CMPQ BX, $0
1038 JEQ needm
1039 MOVQ g_m(BX), BX
1040 MOVQ BX, savedm-8(SP) // saved copy of oldm
1041 JMP havem
1042 needm:
1043 #ifdef GOOS_windows
1044 // Set up a dummy TLS value. needm is careful not to use it,
1045 // but it needs to be there to prevent autogenerated code from
1046 // crashing when it loads from it.
1047 // We don't need to clear it or anything later because needm
1048 // will set up TLS properly.
1049 MOVQ $zeroTLS<>(SB), DI
1050 CALL runtime·settls(SB)
1051 #endif
1052 // On some platforms (Windows) we cannot call needm through
1053 // an ABI wrapper because there's no TLS set up, and the ABI
1054 // wrapper will try to restore the G register (R14) from TLS.
1055 // Clear X15 because Go expects it and we're not calling
1056 // through a wrapper, but otherwise avoid setting the G
1057 // register in the wrapper and call needm directly. It
1058 // takes no arguments and doesn't return any values so
1059 // there's no need to handle that. Clear R14 so that there's
1060 // a bad value in there, in case needm tries to use it.
1061 XORPS X15, X15
1062 XORQ R14, R14
1063 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
1064 CALL AX
1065 MOVQ $0, savedm-8(SP)
1066 get_tls(CX)
1067 MOVQ g(CX), BX
1068 MOVQ g_m(BX), BX
1069
1070 // Set m->sched.sp = SP, so that if a panic happens
1071 // during the function we are about to execute, it will
1072 // have a valid SP to run on the g0 stack.
1073 // The next few lines (after the havem label)
1074 // will save this SP onto the stack and then write
1075 // the same SP back to m->sched.sp. That seems redundant,
1076 // but if an unrecovered panic happens, unwindm will
1077 // restore the g->sched.sp from the stack location
1078 // and then systemstack will try to use it. If we don't set it here,
1079 // that restored SP will be uninitialized (typically 0) and
1080 // will not be usable.
1081 MOVQ m_g0(BX), SI
1082 MOVQ SP, (g_sched+gobuf_sp)(SI)
1083
1084 havem:
1085 // Now there's a valid m, and we're running on its m->g0.
1086 // Save current m->g0->sched.sp on stack and then set it to SP.
1087 // Save current sp in m->g0->sched.sp in preparation for
1088 // switch back to m->curg stack.
1089 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
1090 MOVQ m_g0(BX), SI
1091 MOVQ (g_sched+gobuf_sp)(SI), AX
1092 MOVQ AX, 0(SP)
1093 MOVQ SP, (g_sched+gobuf_sp)(SI)
1094
1095 // Switch to m->curg stack and call runtime.cgocallbackg.
1096 // Because we are taking over the execution of m->curg
1097 // but *not* resuming what had been running, we need to
1098 // save that information (m->curg->sched) so we can restore it.
1099 // We can restore m->curg->sched.sp easily, because calling
1100 // runtime.cgocallbackg leaves SP unchanged upon return.
1101 // To save m->curg->sched.pc, we push it onto the curg stack and
1102 // open a frame the same size as cgocallback's g0 frame.
1103 // Once we switch to the curg stack, the pushed PC will appear
1104 // to be the return PC of cgocallback, so that the traceback
1105 // will seamlessly trace back into the earlier calls.
1106 MOVQ m_curg(BX), SI
1107 MOVQ SI, g(CX)
1108 MOVQ SI, R14 // set the g register, as required by ABIInternal.
1109 XORPS X15, X15 // clear X15, as required by ABIInternal.
1110 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
1111 MOVQ (g_sched+gobuf_pc)(SI), BX
1112 MOVQ BX, -8(DI) // "push" return PC on the g stack
1113 // Gather our arguments into registers.
1114 MOVQ fn+0(FP), AX
1115 MOVQ frame+8(FP), BX
1116 MOVQ ctxt+16(FP), CX
1117 // Compute the size of the frame, including return PC and, if
1118 // GOEXPERIMENT=framepointer, the saved base pointer
1119 LEAQ fn+0(FP), R8
1120 SUBQ SP, R8 // R8 is our actual frame size
1121 SUBQ R8, DI // Allocate the same frame size on the g stack
1122 MOVQ DI, SP
1123
1124 MOVQ $runtime·cgocallbackg<ABIInternal>(SB), DX
1125 CALL DX // indirect call to bypass nosplit check. We're on a different stack now.
1126
1127 // Compute the size of the frame again. FP and SP have
1128 // completely different values here than they did above,
1129 // but only their difference matters.
1130 LEAQ fn+0(FP), AX
1131 SUBQ SP, AX
1132
1133 // Restore g->sched (== m->curg->sched) from saved values.
1134 get_tls(CX)
1135 MOVQ g(CX), SI
1136 MOVQ SP, DI
1137 ADDQ AX, DI
1138 MOVQ -8(DI), BX
1139 MOVQ BX, (g_sched+gobuf_pc)(SI)
1140 MOVQ DI, (g_sched+gobuf_sp)(SI)
1141
1142 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1143 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1144 // so we do not have to restore it.)
1145 MOVQ g(CX), BX
1146 MOVQ g_m(BX), BX
1147 MOVQ m_g0(BX), SI
1148 MOVQ SI, g(CX)
1149 MOVQ (g_sched+gobuf_sp)(SI), SP
1150 MOVQ 0(SP), AX
1151 MOVQ AX, (g_sched+gobuf_sp)(SI)
1152
1153 // If the m on entry was nil, we called needm above to borrow an m,
1154 // 1. for the duration of the call on non-pthread platforms,
1155 // 2. or the duration of the C thread alive on pthread platforms.
1156 // If the m on entry wasn't nil,
1157 // 1. the thread might be a Go thread,
1158 // 2. or it wasn't the first call from a C thread on pthread platforms,
1159 // since then we skip dropm to reuse the m in the first call.
1160 MOVQ savedm-8(SP), BX
1161 CMPQ BX, $0
1162 JNE done
1163
1164 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1165 MOVQ _cgo_pthread_key_created(SB), AX
1166 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1167 CMPQ AX, $0
1168 JEQ dropm
1169 CMPQ (AX), $0
1170 JNE done
1171
1172 dropm:
1173 MOVQ $runtime·dropm(SB), AX
1174 CALL AX
1175 #ifdef GOOS_windows
1176 // We need to clear the TLS pointer in case the next
1177 // thread that comes into Go tries to reuse that space
1178 // but uses the same M.
1179 XORQ DI, DI
1180 CALL runtime·settls(SB)
1181 #endif
1182 done:
1183
1184 // Done!
1185 RET
1186
1187 // func setg(gg *g)
1188 // set g. for use by needm.
1189 TEXT runtime·setg(SB), NOSPLIT, $0-8
1190 MOVQ gg+0(FP), BX
1191 get_tls(CX)
1192 MOVQ BX, g(CX)
1193 RET
1194
1195 // void setg_gcc(G*); set g called from gcc.
1196 TEXT setg_gcc<>(SB),NOSPLIT,$0
1197 get_tls(AX)
1198 MOVQ DI, g(AX)
1199 MOVQ DI, R14 // set the g register
1200 RET
1201
1202 TEXT runtime·abort(SB),NOSPLIT,$0-0
1203 INT $3
1204 loop:
1205 JMP loop
1206
1207 // check that SP is in range [g->stack.lo, g->stack.hi)
1208 TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
1209 get_tls(CX)
1210 MOVQ g(CX), AX
1211 CMPQ (g_stack+stack_hi)(AX), SP
1212 JHI 2(PC)
1213 CALL runtime·abort(SB)
1214 CMPQ SP, (g_stack+stack_lo)(AX)
1215 JHI 2(PC)
1216 CALL runtime·abort(SB)
1217 RET
1218
1219 // func cputicks() int64
1220 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
1221 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
1222 JNE fences
1223 // Instruction stream serializing RDTSCP is supported.
1224 // RDTSCP is supported by Intel Nehalem (2008) and
1225 // AMD K8 Rev. F (2006) and newer.
1226 RDTSCP
1227 done:
1228 SHLQ $32, DX
1229 ADDQ DX, AX
1230 MOVQ AX, ret+0(FP)
1231 RET
1232 fences:
1233 // MFENCE is instruction stream serializing and flushes the
1234 // store buffers on AMD. The serialization semantics of LFENCE on AMD
1235 // are dependent on MSR C001_1029 and CPU generation.
1236 // LFENCE on Intel does wait for all previous instructions to have executed.
1237 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
1238 // previous instructions executed and all previous loads and stores to globally visible.
1239 // Using MFENCE;LFENCE here aligns the serializing properties without
1240 // runtime detection of CPU manufacturer.
1241 MFENCE
1242 LFENCE
1243 RDTSC
1244 JMP done
1245
1246 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
1247 // hash function using AES hardware instructions
1248 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
1249 // AX = ptr to data
1250 // BX = seed
1251 // CX = size
1252 CMPB runtime·useAeshash(SB), $0
1253 JEQ noaes
1254 JMP runtime·aeshashbody<>(SB)
1255 noaes:
1256 JMP runtime·memhashFallback<ABIInternal>(SB)
1257
1258 // func strhash(p unsafe.Pointer, h uintptr) uintptr
1259 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
1260 // AX = ptr to string struct
1261 // BX = seed
1262 CMPB runtime·useAeshash(SB), $0
1263 JEQ noaes
1264 MOVQ 8(AX), CX // length of string
1265 MOVQ (AX), AX // string data
1266 JMP runtime·aeshashbody<>(SB)
1267 noaes:
1268 JMP runtime·strhashFallback<ABIInternal>(SB)
1269
1270 // AX: data
1271 // BX: hash seed
1272 // CX: length
1273 // At return: AX = return value
1274 TEXT runtime·aeshashbody<>(SB),NOSPLIT,$0-0
1275 // Fill an SSE register with our seeds.
1276 MOVQ BX, X0 // 64 bits of per-table hash seed
1277 PINSRW $4, CX, X0 // 16 bits of length
1278 PSHUFHW $0, X0, X0 // repeat length 4 times total
1279 MOVO X0, X1 // save unscrambled seed
1280 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
1281 AESENC X0, X0 // scramble seed
1282
1283 CMPQ CX, $16
1284 JB aes0to15
1285 JE aes16
1286 CMPQ CX, $32
1287 JBE aes17to32
1288 CMPQ CX, $64
1289 JBE aes33to64
1290 CMPQ CX, $128
1291 JBE aes65to128
1292 JMP aes129plus
1293
1294 aes0to15:
1295 TESTQ CX, CX
1296 JE aes0
1297
1298 ADDQ $16, AX
1299 TESTW $0xff0, AX
1300 JE endofpage
1301
1302 // 16 bytes loaded at this address won't cross
1303 // a page boundary, so we can load it directly.
1304 MOVOU -16(AX), X1
1305 ADDQ CX, CX
1306 MOVQ $masks<>(SB), AX
1307 PAND (AX)(CX*8), X1
1308 final1:
1309 PXOR X0, X1 // xor data with seed
1310 AESENC X1, X1 // scramble combo 3 times
1311 AESENC X1, X1
1312 AESENC X1, X1
1313 MOVQ X1, AX // return X1
1314 RET
1315
1316 endofpage:
1317 // address ends in 1111xxxx. Might be up against
1318 // a page boundary, so load ending at last byte.
1319 // Then shift bytes down using pshufb.
1320 MOVOU -32(AX)(CX*1), X1
1321 ADDQ CX, CX
1322 MOVQ $shifts<>(SB), AX
1323 PSHUFB (AX)(CX*8), X1
1324 JMP final1
1325
1326 aes0:
1327 // Return scrambled input seed
1328 AESENC X0, X0
1329 MOVQ X0, AX // return X0
1330 RET
1331
1332 aes16:
1333 MOVOU (AX), X1
1334 JMP final1
1335
1336 aes17to32:
1337 // make second starting seed
1338 PXOR runtime·aeskeysched+16(SB), X1
1339 AESENC X1, X1
1340
1341 // load data to be hashed
1342 MOVOU (AX), X2
1343 MOVOU -16(AX)(CX*1), X3
1344
1345 // xor with seed
1346 PXOR X0, X2
1347 PXOR X1, X3
1348
1349 // scramble 3 times
1350 AESENC X2, X2
1351 AESENC X3, X3
1352 AESENC X2, X2
1353 AESENC X3, X3
1354 AESENC X2, X2
1355 AESENC X3, X3
1356
1357 // combine results
1358 PXOR X3, X2
1359 MOVQ X2, AX // return X2
1360 RET
1361
1362 aes33to64:
1363 // make 3 more starting seeds
1364 MOVO X1, X2
1365 MOVO X1, X3
1366 PXOR runtime·aeskeysched+16(SB), X1
1367 PXOR runtime·aeskeysched+32(SB), X2
1368 PXOR runtime·aeskeysched+48(SB), X3
1369 AESENC X1, X1
1370 AESENC X2, X2
1371 AESENC X3, X3
1372
1373 MOVOU (AX), X4
1374 MOVOU 16(AX), X5
1375 MOVOU -32(AX)(CX*1), X6
1376 MOVOU -16(AX)(CX*1), X7
1377
1378 PXOR X0, X4
1379 PXOR X1, X5
1380 PXOR X2, X6
1381 PXOR X3, X7
1382
1383 AESENC X4, X4
1384 AESENC X5, X5
1385 AESENC X6, X6
1386 AESENC X7, X7
1387
1388 AESENC X4, X4
1389 AESENC X5, X5
1390 AESENC X6, X6
1391 AESENC X7, X7
1392
1393 AESENC X4, X4
1394 AESENC X5, X5
1395 AESENC X6, X6
1396 AESENC X7, X7
1397
1398 PXOR X6, X4
1399 PXOR X7, X5
1400 PXOR X5, X4
1401 MOVQ X4, AX // return X4
1402 RET
1403
1404 aes65to128:
1405 // make 7 more starting seeds
1406 MOVO X1, X2
1407 MOVO X1, X3
1408 MOVO X1, X4
1409 MOVO X1, X5
1410 MOVO X1, X6
1411 MOVO X1, X7
1412 PXOR runtime·aeskeysched+16(SB), X1
1413 PXOR runtime·aeskeysched+32(SB), X2
1414 PXOR runtime·aeskeysched+48(SB), X3
1415 PXOR runtime·aeskeysched+64(SB), X4
1416 PXOR runtime·aeskeysched+80(SB), X5
1417 PXOR runtime·aeskeysched+96(SB), X6
1418 PXOR runtime·aeskeysched+112(SB), X7
1419 AESENC X1, X1
1420 AESENC X2, X2
1421 AESENC X3, X3
1422 AESENC X4, X4
1423 AESENC X5, X5
1424 AESENC X6, X6
1425 AESENC X7, X7
1426
1427 // load data
1428 MOVOU (AX), X8
1429 MOVOU 16(AX), X9
1430 MOVOU 32(AX), X10
1431 MOVOU 48(AX), X11
1432 MOVOU -64(AX)(CX*1), X12
1433 MOVOU -48(AX)(CX*1), X13
1434 MOVOU -32(AX)(CX*1), X14
1435 MOVOU -16(AX)(CX*1), X15
1436
1437 // xor with seed
1438 PXOR X0, X8
1439 PXOR X1, X9
1440 PXOR X2, X10
1441 PXOR X3, X11
1442 PXOR X4, X12
1443 PXOR X5, X13
1444 PXOR X6, X14
1445 PXOR X7, X15
1446
1447 // scramble 3 times
1448 AESENC X8, X8
1449 AESENC X9, X9
1450 AESENC X10, X10
1451 AESENC X11, X11
1452 AESENC X12, X12
1453 AESENC X13, X13
1454 AESENC X14, X14
1455 AESENC X15, X15
1456
1457 AESENC X8, X8
1458 AESENC X9, X9
1459 AESENC X10, X10
1460 AESENC X11, X11
1461 AESENC X12, X12
1462 AESENC X13, X13
1463 AESENC X14, X14
1464 AESENC X15, X15
1465
1466 AESENC X8, X8
1467 AESENC X9, X9
1468 AESENC X10, X10
1469 AESENC X11, X11
1470 AESENC X12, X12
1471 AESENC X13, X13
1472 AESENC X14, X14
1473 AESENC X15, X15
1474
1475 // combine results
1476 PXOR X12, X8
1477 PXOR X13, X9
1478 PXOR X14, X10
1479 PXOR X15, X11
1480 PXOR X10, X8
1481 PXOR X11, X9
1482 PXOR X9, X8
1483 // X15 must be zero on return
1484 PXOR X15, X15
1485 MOVQ X8, AX // return X8
1486 RET
1487
1488 aes129plus:
1489 // make 7 more starting seeds
1490 MOVO X1, X2
1491 MOVO X1, X3
1492 MOVO X1, X4
1493 MOVO X1, X5
1494 MOVO X1, X6
1495 MOVO X1, X7
1496 PXOR runtime·aeskeysched+16(SB), X1
1497 PXOR runtime·aeskeysched+32(SB), X2
1498 PXOR runtime·aeskeysched+48(SB), X3
1499 PXOR runtime·aeskeysched+64(SB), X4
1500 PXOR runtime·aeskeysched+80(SB), X5
1501 PXOR runtime·aeskeysched+96(SB), X6
1502 PXOR runtime·aeskeysched+112(SB), X7
1503 AESENC X1, X1
1504 AESENC X2, X2
1505 AESENC X3, X3
1506 AESENC X4, X4
1507 AESENC X5, X5
1508 AESENC X6, X6
1509 AESENC X7, X7
1510
1511 // start with last (possibly overlapping) block
1512 MOVOU -128(AX)(CX*1), X8
1513 MOVOU -112(AX)(CX*1), X9
1514 MOVOU -96(AX)(CX*1), X10
1515 MOVOU -80(AX)(CX*1), X11
1516 MOVOU -64(AX)(CX*1), X12
1517 MOVOU -48(AX)(CX*1), X13
1518 MOVOU -32(AX)(CX*1), X14
1519 MOVOU -16(AX)(CX*1), X15
1520
1521 // xor in seed
1522 PXOR X0, X8
1523 PXOR X1, X9
1524 PXOR X2, X10
1525 PXOR X3, X11
1526 PXOR X4, X12
1527 PXOR X5, X13
1528 PXOR X6, X14
1529 PXOR X7, X15
1530
1531 // compute number of remaining 128-byte blocks
1532 DECQ CX
1533 SHRQ $7, CX
1534
1535 PCALIGN $16
1536 aesloop:
1537 // scramble state
1538 AESENC X8, X8
1539 AESENC X9, X9
1540 AESENC X10, X10
1541 AESENC X11, X11
1542 AESENC X12, X12
1543 AESENC X13, X13
1544 AESENC X14, X14
1545 AESENC X15, X15
1546
1547 // scramble state, xor in a block
1548 MOVOU (AX), X0
1549 MOVOU 16(AX), X1
1550 MOVOU 32(AX), X2
1551 MOVOU 48(AX), X3
1552 AESENC X0, X8
1553 AESENC X1, X9
1554 AESENC X2, X10
1555 AESENC X3, X11
1556 MOVOU 64(AX), X4
1557 MOVOU 80(AX), X5
1558 MOVOU 96(AX), X6
1559 MOVOU 112(AX), X7
1560 AESENC X4, X12
1561 AESENC X5, X13
1562 AESENC X6, X14
1563 AESENC X7, X15
1564
1565 ADDQ $128, AX
1566 DECQ CX
1567 JNE aesloop
1568
1569 // 3 more scrambles to finish
1570 AESENC X8, X8
1571 AESENC X9, X9
1572 AESENC X10, X10
1573 AESENC X11, X11
1574 AESENC X12, X12
1575 AESENC X13, X13
1576 AESENC X14, X14
1577 AESENC X15, X15
1578 AESENC X8, X8
1579 AESENC X9, X9
1580 AESENC X10, X10
1581 AESENC X11, X11
1582 AESENC X12, X12
1583 AESENC X13, X13
1584 AESENC X14, X14
1585 AESENC X15, X15
1586 AESENC X8, X8
1587 AESENC X9, X9
1588 AESENC X10, X10
1589 AESENC X11, X11
1590 AESENC X12, X12
1591 AESENC X13, X13
1592 AESENC X14, X14
1593 AESENC X15, X15
1594
1595 PXOR X12, X8
1596 PXOR X13, X9
1597 PXOR X14, X10
1598 PXOR X15, X11
1599 PXOR X10, X8
1600 PXOR X11, X9
1601 PXOR X9, X8
1602 // X15 must be zero on return
1603 PXOR X15, X15
1604 MOVQ X8, AX // return X8
1605 RET
1606
1607 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
1608 // ABIInternal for performance.
1609 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
1610 // AX = ptr to data
1611 // BX = seed
1612 CMPB runtime·useAeshash(SB), $0
1613 JEQ noaes
1614 MOVQ BX, X0 // X0 = seed
1615 PINSRD $2, (AX), X0 // data
1616 AESENC runtime·aeskeysched+0(SB), X0
1617 AESENC runtime·aeskeysched+16(SB), X0
1618 AESENC runtime·aeskeysched+32(SB), X0
1619 MOVQ X0, AX // return X0
1620 RET
1621 noaes:
1622 JMP runtime·memhash32Fallback<ABIInternal>(SB)
1623
1624 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
1625 // ABIInternal for performance.
1626 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
1627 // AX = ptr to data
1628 // BX = seed
1629 CMPB runtime·useAeshash(SB), $0
1630 JEQ noaes
1631 MOVQ BX, X0 // X0 = seed
1632 PINSRQ $1, (AX), X0 // data
1633 AESENC runtime·aeskeysched+0(SB), X0
1634 AESENC runtime·aeskeysched+16(SB), X0
1635 AESENC runtime·aeskeysched+32(SB), X0
1636 MOVQ X0, AX // return X0
1637 RET
1638 noaes:
1639 JMP runtime·memhash64Fallback<ABIInternal>(SB)
1640
1641 // simple mask to get rid of data in the high part of the register.
1642 DATA masks<>+0x00(SB)/8, $0x0000000000000000
1643 DATA masks<>+0x08(SB)/8, $0x0000000000000000
1644 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1645 DATA masks<>+0x18(SB)/8, $0x0000000000000000
1646 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1647 DATA masks<>+0x28(SB)/8, $0x0000000000000000
1648 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1649 DATA masks<>+0x38(SB)/8, $0x0000000000000000
1650 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1651 DATA masks<>+0x48(SB)/8, $0x0000000000000000
1652 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1653 DATA masks<>+0x58(SB)/8, $0x0000000000000000
1654 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1655 DATA masks<>+0x68(SB)/8, $0x0000000000000000
1656 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1657 DATA masks<>+0x78(SB)/8, $0x0000000000000000
1658 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1659 DATA masks<>+0x88(SB)/8, $0x0000000000000000
1660 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1661 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1662 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1663 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1664 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1665 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1666 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1667 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1668 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1669 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1670 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1671 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1672 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1673 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1674 GLOBL masks<>(SB),RODATA,$256
1675
1676 // func checkASM() bool
1677 TEXT ·checkASM(SB),NOSPLIT,$0-1
1678 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1679 MOVQ $masks<>(SB), AX
1680 MOVQ $shifts<>(SB), BX
1681 ORQ BX, AX
1682 TESTQ $15, AX
1683 SETEQ ret+0(FP)
1684 RET
1685
1686 // these are arguments to pshufb. They move data down from
1687 // the high bytes of the register to the low bytes of the register.
1688 // index is how many bytes to move.
1689 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1690 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1691 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1692 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1693 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1694 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1695 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1696 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1697 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1698 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1699 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1700 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1701 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1702 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1703 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1704 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1705 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1706 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1707 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1708 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1709 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1710 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1711 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1712 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1713 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1714 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1715 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1716 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1717 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1718 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1719 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1720 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1721 GLOBL shifts<>(SB),RODATA,$256
1722
1723 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1724 // Must obey the gcc calling convention.
1725 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1726 get_tls(CX)
1727 MOVQ g(CX), AX
1728 MOVQ g_m(AX), AX
1729 MOVQ m_curg(AX), AX
1730 MOVQ (g_stack+stack_hi)(AX), AX
1731 RET
1732
1733 // The top-most function running on a goroutine
1734 // returns to goexit+PCQuantum.
1735 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
1736 BYTE $0x90 // NOP
1737 CALL runtime·goexit1(SB) // does not return
1738 // traceback from goexit1 must hit code range of goexit
1739 BYTE $0x90 // NOP
1740
1741 // This is called from .init_array and follows the platform, not Go, ABI.
1742 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1743 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1744 MOVQ runtime·lastmoduledatap(SB), AX
1745 MOVQ DI, moduledata_next(AX)
1746 MOVQ DI, runtime·lastmoduledatap(SB)
1747 POPQ R15
1748 RET
1749
1750 // Initialize special registers then jump to sigpanic.
1751 // This function is injected from the signal handler for panicking
1752 // signals. It is quite painful to set X15 in the signal context,
1753 // so we do it here.
1754 TEXT ·sigpanic0(SB),NOSPLIT,$0-0
1755 get_tls(R14)
1756 MOVQ g(R14), R14
1757 XORPS X15, X15
1758 JMP ·sigpanic<ABIInternal>(SB)
1759
1760 // gcWriteBarrier informs the GC about heap pointer writes.
1761 //
1762 // gcWriteBarrier returns space in a write barrier buffer which
1763 // should be filled in by the caller.
1764 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1765 // number of bytes of buffer needed in R11, and returns a pointer
1766 // to the buffer space in R11.
1767 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1768 // but may clobber others (e.g., SSE registers).
1769 // Typical use would be, when doing *(CX+88) = AX
1770 // CMPL $0, runtime.writeBarrier(SB)
1771 // JEQ dowrite
1772 // CALL runtime.gcBatchBarrier2(SB)
1773 // MOVQ AX, (R11)
1774 // MOVQ 88(CX), DX
1775 // MOVQ DX, 8(R11)
1776 // dowrite:
1777 // MOVQ AX, 88(CX)
1778 TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
1779 // Save the registers clobbered by the fast path. This is slightly
1780 // faster than having the caller spill these.
1781 MOVQ R12, 96(SP)
1782 MOVQ R13, 104(SP)
1783 retry:
1784 // TODO: Consider passing g.m.p in as an argument so they can be shared
1785 // across a sequence of write barriers.
1786 MOVQ g_m(R14), R13
1787 MOVQ m_p(R13), R13
1788 // Get current buffer write position.
1789 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position
1790 ADDQ R11, R12 // new next position
1791 // Is the buffer full?
1792 CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
1793 JA flush
1794 // Commit to the larger buffer.
1795 MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
1796 // Make return value (the original next position)
1797 SUBQ R11, R12
1798 MOVQ R12, R11
1799 // Restore registers.
1800 MOVQ 96(SP), R12
1801 MOVQ 104(SP), R13
1802 RET
1803
1804 flush:
1805 // Save all general purpose registers since these could be
1806 // clobbered by wbBufFlush and were not saved by the caller.
1807 // It is possible for wbBufFlush to clobber other registers
1808 // (e.g., SSE registers), but the compiler takes care of saving
1809 // those in the caller if necessary. This strikes a balance
1810 // with registers that are likely to be used.
1811 //
1812 // We don't have type information for these, but all code under
1813 // here is NOSPLIT, so nothing will observe these.
1814 //
1815 // TODO: We could strike a different balance; e.g., saving X0
1816 // and not saving GP registers that are less likely to be used.
1817 MOVQ DI, 0(SP)
1818 MOVQ AX, 8(SP)
1819 MOVQ BX, 16(SP)
1820 MOVQ CX, 24(SP)
1821 MOVQ DX, 32(SP)
1822 // DI already saved
1823 MOVQ SI, 40(SP)
1824 MOVQ BP, 48(SP)
1825 MOVQ R8, 56(SP)
1826 MOVQ R9, 64(SP)
1827 MOVQ R10, 72(SP)
1828 MOVQ R11, 80(SP)
1829 // R12 already saved
1830 // R13 already saved
1831 // R14 is g
1832 MOVQ R15, 88(SP)
1833
1834 CALL runtime·wbBufFlush(SB)
1835
1836 MOVQ 0(SP), DI
1837 MOVQ 8(SP), AX
1838 MOVQ 16(SP), BX
1839 MOVQ 24(SP), CX
1840 MOVQ 32(SP), DX
1841 MOVQ 40(SP), SI
1842 MOVQ 48(SP), BP
1843 MOVQ 56(SP), R8
1844 MOVQ 64(SP), R9
1845 MOVQ 72(SP), R10
1846 MOVQ 80(SP), R11
1847 MOVQ 88(SP), R15
1848 JMP retry
1849
1850 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1851 MOVL $8, R11
1852 JMP gcWriteBarrier<>(SB)
1853 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1854 MOVL $16, R11
1855 JMP gcWriteBarrier<>(SB)
1856 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1857 MOVL $24, R11
1858 JMP gcWriteBarrier<>(SB)
1859 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1860 MOVL $32, R11
1861 JMP gcWriteBarrier<>(SB)
1862 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1863 MOVL $40, R11
1864 JMP gcWriteBarrier<>(SB)
1865 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1866 MOVL $48, R11
1867 JMP gcWriteBarrier<>(SB)
1868 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1869 MOVL $56, R11
1870 JMP gcWriteBarrier<>(SB)
1871 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1872 MOVL $64, R11
1873 JMP gcWriteBarrier<>(SB)
1874
1875 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1876 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1877
1878 // debugCallV2 is the entry point for debugger-injected function
1879 // calls on running goroutines. It informs the runtime that a
1880 // debug call has been injected and creates a call frame for the
1881 // debugger to fill in.
1882 //
1883 // To inject a function call, a debugger should:
1884 // 1. Check that the goroutine is in state _Grunning and that
1885 // there are at least 256 bytes free on the stack.
1886 // 2. Push the current PC on the stack (updating SP).
1887 // 3. Write the desired argument frame size at SP-16 (using the SP
1888 // after step 2).
1889 // 4. Save all machine registers (including flags and XMM registers)
1890 // so they can be restored later by the debugger.
1891 // 5. Set the PC to debugCallV2 and resume execution.
1892 //
1893 // If the goroutine is in state _Grunnable, then it's not generally
1894 // safe to inject a call because it may return out via other runtime
1895 // operations. Instead, the debugger should unwind the stack to find
1896 // the return to non-runtime code, add a temporary breakpoint there,
1897 // and inject the call once that breakpoint is hit.
1898 //
1899 // If the goroutine is in any other state, it's not safe to inject a call.
1900 //
1901 // This function communicates back to the debugger by setting R12 and
1902 // invoking INT3 to raise a breakpoint signal. See the comments in the
1903 // implementation for the protocol the debugger is expected to
1904 // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1905 //
1906 // The debugger must ensure that any pointers passed to the function
1907 // obey escape analysis requirements. Specifically, it must not pass
1908 // a stack pointer to an escaping argument. debugCallV2 cannot check
1909 // this invariant.
1910 //
1911 // This is ABIInternal because Go code injects its PC directly into new
1912 // goroutine stacks.
1913 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
1914 // Save all registers that may contain pointers so they can be
1915 // conservatively scanned.
1916 //
1917 // We can't do anything that might clobber any of these
1918 // registers before this.
1919 MOVQ R15, r15-(14*8+8)(SP)
1920 MOVQ R14, r14-(13*8+8)(SP)
1921 MOVQ R13, r13-(12*8+8)(SP)
1922 MOVQ R12, r12-(11*8+8)(SP)
1923 MOVQ R11, r11-(10*8+8)(SP)
1924 MOVQ R10, r10-(9*8+8)(SP)
1925 MOVQ R9, r9-(8*8+8)(SP)
1926 MOVQ R8, r8-(7*8+8)(SP)
1927 MOVQ DI, di-(6*8+8)(SP)
1928 MOVQ SI, si-(5*8+8)(SP)
1929 MOVQ BP, bp-(4*8+8)(SP)
1930 MOVQ BX, bx-(3*8+8)(SP)
1931 MOVQ DX, dx-(2*8+8)(SP)
1932 // Save the frame size before we clobber it. Either of the last
1933 // saves could clobber this depending on whether there's a saved BP.
1934 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1935 MOVQ CX, cx-(1*8+8)(SP)
1936 MOVQ AX, ax-(0*8+8)(SP)
1937
1938 // Save the argument frame size.
1939 MOVQ DX, frameSize-128(SP)
1940
1941 // Perform a safe-point check.
1942 MOVQ retpc-8(FP), AX // Caller's PC
1943 MOVQ AX, 0(SP)
1944 CALL runtime·debugCallCheck(SB)
1945 MOVQ 8(SP), AX
1946 TESTQ AX, AX
1947 JZ good
1948 // The safety check failed. Put the reason string at the top
1949 // of the stack.
1950 MOVQ AX, 0(SP)
1951 MOVQ 16(SP), AX
1952 MOVQ AX, 8(SP)
1953 // Set R12 to 8 and invoke INT3. The debugger should get the
1954 // reason a call can't be injected from the top of the stack
1955 // and resume execution.
1956 MOVQ $8, R12
1957 BYTE $0xcc
1958 JMP restore
1959
1960 good:
1961 // Registers are saved and it's safe to make a call.
1962 // Open up a call frame, moving the stack if necessary.
1963 //
1964 // Once the frame is allocated, this will set R12 to 0 and
1965 // invoke INT3. The debugger should write the argument
1966 // frame for the call at SP, set up argument registers, push
1967 // the trapping PC on the stack, set the PC to the function to
1968 // call, set RDX to point to the closure (if a closure call),
1969 // and resume execution.
1970 //
1971 // If the function returns, this will set R12 to 1 and invoke
1972 // INT3. The debugger can then inspect any return value saved
1973 // on the stack at SP and in registers and resume execution again.
1974 //
1975 // If the function panics, this will set R12 to 2 and invoke INT3.
1976 // The interface{} value of the panic will be at SP. The debugger
1977 // can inspect the panic value and resume execution again.
1978 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1979 CMPQ AX, $MAXSIZE; \
1980 JA 5(PC); \
1981 MOVQ $NAME(SB), AX; \
1982 MOVQ AX, 0(SP); \
1983 CALL runtime·debugCallWrap(SB); \
1984 JMP restore
1985
1986 MOVQ frameSize-128(SP), AX
1987 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1988 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1989 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1990 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1991 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1992 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1993 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1994 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1995 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1996 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1997 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1998 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1999 // The frame size is too large. Report the error.
2000 MOVQ $debugCallFrameTooLarge<>(SB), AX
2001 MOVQ AX, 0(SP)
2002 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
2003 MOVQ $8, R12
2004 BYTE $0xcc
2005 JMP restore
2006
2007 restore:
2008 // Calls and failures resume here.
2009 //
2010 // Set R12 to 16 and invoke INT3. The debugger should restore
2011 // all registers except RIP and RSP and resume execution.
2012 MOVQ $16, R12
2013 BYTE $0xcc
2014 // We must not modify flags after this point.
2015
2016 // Restore pointer-containing registers, which may have been
2017 // modified from the debugger's copy by stack copying.
2018 MOVQ ax-(0*8+8)(SP), AX
2019 MOVQ cx-(1*8+8)(SP), CX
2020 MOVQ dx-(2*8+8)(SP), DX
2021 MOVQ bx-(3*8+8)(SP), BX
2022 MOVQ bp-(4*8+8)(SP), BP
2023 MOVQ si-(5*8+8)(SP), SI
2024 MOVQ di-(6*8+8)(SP), DI
2025 MOVQ r8-(7*8+8)(SP), R8
2026 MOVQ r9-(8*8+8)(SP), R9
2027 MOVQ r10-(9*8+8)(SP), R10
2028 MOVQ r11-(10*8+8)(SP), R11
2029 MOVQ r12-(11*8+8)(SP), R12
2030 MOVQ r13-(12*8+8)(SP), R13
2031 MOVQ r14-(13*8+8)(SP), R14
2032 MOVQ r15-(14*8+8)(SP), R15
2033
2034 RET
2035
2036 // runtime.debugCallCheck assumes that functions defined with the
2037 // DEBUG_CALL_FN macro are safe points to inject calls.
2038 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
2039 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
2040 NO_LOCAL_POINTERS; \
2041 MOVQ $0, R12; \
2042 BYTE $0xcc; \
2043 MOVQ $1, R12; \
2044 BYTE $0xcc; \
2045 RET
2046 DEBUG_CALL_FN(debugCall32<>, 32)
2047 DEBUG_CALL_FN(debugCall64<>, 64)
2048 DEBUG_CALL_FN(debugCall128<>, 128)
2049 DEBUG_CALL_FN(debugCall256<>, 256)
2050 DEBUG_CALL_FN(debugCall512<>, 512)
2051 DEBUG_CALL_FN(debugCall1024<>, 1024)
2052 DEBUG_CALL_FN(debugCall2048<>, 2048)
2053 DEBUG_CALL_FN(debugCall4096<>, 4096)
2054 DEBUG_CALL_FN(debugCall8192<>, 8192)
2055 DEBUG_CALL_FN(debugCall16384<>, 16384)
2056 DEBUG_CALL_FN(debugCall32768<>, 32768)
2057 DEBUG_CALL_FN(debugCall65536<>, 65536)
2058
2059 // func debugCallPanicked(val interface{})
2060 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
2061 // Copy the panic value to the top of stack.
2062 MOVQ val_type+0(FP), AX
2063 MOVQ AX, 0(SP)
2064 MOVQ val_data+8(FP), AX
2065 MOVQ AX, 8(SP)
2066 MOVQ $2, R12
2067 BYTE $0xcc
2068 RET
2069
2070 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
2071 NO_LOCAL_POINTERS
2072 // Save all 14 int registers that could have an index in them.
2073 // They may be pointers, but if they are they are dead.
2074 MOVQ AX, 16(SP)
2075 MOVQ CX, 24(SP)
2076 MOVQ DX, 32(SP)
2077 MOVQ BX, 40(SP)
2078 // skip SP @ 48(SP)
2079 MOVQ BP, 56(SP)
2080 MOVQ SI, 64(SP)
2081 MOVQ DI, 72(SP)
2082 MOVQ R8, 80(SP)
2083 MOVQ R9, 88(SP)
2084 MOVQ R10, 96(SP)
2085 MOVQ R11, 104(SP)
2086 MOVQ R12, 112(SP)
2087 MOVQ R13, 120(SP)
2088 // skip R14 @ 128(SP) (aka G)
2089 MOVQ R15, 136(SP)
2090
2091 MOVQ SP, AX // hide SP read from vet
2092 MOVQ 152(AX), AX // PC immediately after call to panicBounds
2093 LEAQ 16(SP), BX
2094 CALL runtime·panicBounds64<ABIInternal>(SB)
2095 RET
2096
2097 #ifdef GOOS_android
2098 // Use the free TLS_SLOT_APP slot #2 on Android Q.
2099 // Earlier androids are set up in gcc_android.c.
2100 DATA runtime·tls_g+0(SB)/8, $16
2101 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2102 #endif
2103 #ifdef GOOS_windows
2104 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2105 #endif
2106
2107 // The compiler and assembler's -spectre=ret mode rewrites
2108 // all indirect CALL AX / JMP AX instructions to be
2109 // CALL retpolineAX / JMP retpolineAX.
2110 // See https://support.google.com/faqs/answer/7625886.
2111 #define RETPOLINE(reg) \
2112 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
2113 /* nospec: */ \
2114 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
2115 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
2116 /* setup: */ \
2117 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
2118 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
2119 /* RET */ BYTE $0xC3
2120
2121 TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
2122 TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
2123 TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
2124 TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
2125 /* SP is 4, can't happen / magic encodings */
2126 TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
2127 TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
2128 TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
2129 TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
2130 TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
2131 TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
2132 TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
2133 TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
2134 TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
2135 TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
2136 TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
2137
2138 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
2139 MOVQ BP, AX
2140 RET
2141
View as plain text