Text file
src/runtime/asm_amd64.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9 #include "cgo/abi_amd64.h"
10
11 // _rt0_amd64 is common startup code for most amd64 systems when using
12 // internal linking. This is the entry point for the program from the
13 // kernel for an ordinary -buildmode=exe program. The stack holds the
14 // number of arguments and the C-style argv.
15 TEXT _rt0_amd64(SB),NOSPLIT,$-8
16 MOVQ 0(SP), DI // argc
17 LEAQ 8(SP), SI // argv
18 JMP runtime·rt0_go(SB)
19
20 // main is common startup code for most amd64 systems when using
21 // external linking. The C startup code will call the symbol "main"
22 // passing argc and argv in the usual C ABI registers DI and SI.
23 TEXT main(SB),NOSPLIT,$-8
24 JMP runtime·rt0_go(SB)
25
26 // _rt0_amd64_lib is common startup code for most amd64 systems when
27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
28 // arrange to invoke this function as a global constructor (for
29 // c-archive) or when the shared library is loaded (for c-shared).
30 // We expect argc and argv to be passed in the usual C ABI registers
31 // DI and SI.
32 TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
33 // Transition from C ABI to Go ABI.
34 PUSH_REGS_HOST_TO_ABI0()
35
36 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
37 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
38
39 #ifdef GOOS_windows
40 // Set up a dummy TLS value on Windows so that the autogenerated
41 // ABI wrappers don't crash when trying to load G from TLS before
42 // wintls has set up the real TLS slot in rt0_go.
43 MOVQ $zeroTLS<>(SB), DI
44 CALL runtime·settls(SB)
45 #endif
46
47 CALL runtime·libInit(SB)
48
49 POP_REGS_HOST_TO_ABI0()
50 RET
51
52 // rt0_lib_go initializes the Go runtime.
53 // This is started in a separate thread by _rt0_amd64_lib.
54 TEXT runtime·rt0_lib_go<ABIInternal>(SB),NOSPLIT,$0
55 MOVQ _rt0_amd64_lib_argc<>(SB), DI
56 MOVQ _rt0_amd64_lib_argv<>(SB), SI
57 JMP runtime·rt0_go(SB)
58
59 DATA _rt0_amd64_lib_argc<>(SB)/8, $0
60 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
61 DATA _rt0_amd64_lib_argv<>(SB)/8, $0
62 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
63
64 #ifdef GOAMD64_v2
65 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
66 #endif
67
68 #ifdef GOAMD64_v3
69 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
70 #endif
71
72 #ifdef GOAMD64_v4
73 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
74 #endif
75
76 GLOBL bad_cpu_msg<>(SB), RODATA, $84
77
78 // Define a list of AMD64 microarchitecture level features
79 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
80
81 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT
82 #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23)
83 // LAHF/SAHF
84 #define V2_EXT_FEATURES_CX (1 << 0)
85 // FMA MOVBE OSXSAVE AVX F16C
86 #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
87 // ABM (FOR LZNCT)
88 #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
89 // BMI1 AVX2 BMI2
90 #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
91 // XMM YMM
92 #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
93
94 #define V4_FEATURES_CX V3_FEATURES_CX
95
96 #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
97 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL
98 #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
99 // OPMASK ZMM
100 #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
101
102 #ifdef GOAMD64_v2
103 #define NEED_MAX_CPUID 0x80000001
104 #define NEED_FEATURES_CX V2_FEATURES_CX
105 #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
106 #endif
107
108 #ifdef GOAMD64_v3
109 #define NEED_MAX_CPUID 0x80000001
110 #define NEED_FEATURES_CX V3_FEATURES_CX
111 #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
112 #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
113 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
114 #endif
115
116 #ifdef GOAMD64_v4
117 #define NEED_MAX_CPUID 0x80000001
118 #define NEED_FEATURES_CX V4_FEATURES_CX
119 #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
120 #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
121
122 // Darwin requires a different approach to check AVX512 support, see CL 285572.
123 #ifdef GOOS_darwin
124 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
125 // These values are from:
126 // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
127 #define commpage64_base_address 0x00007fffffe00000
128 #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
129 #define commpage64_version (commpage64_base_address+0x01E)
130 #define AVX512F 0x0000004000000000
131 #define AVX512CD 0x0000008000000000
132 #define AVX512DQ 0x0000010000000000
133 #define AVX512BW 0x0000020000000000
134 #define AVX512VL 0x0000100000000000
135 #define NEED_DARWIN_SUPPORT (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL)
136 #else
137 #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
138 #endif
139
140 #endif
141
142 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
143 // copy arguments forward on an even stack
144 MOVQ DI, AX // argc
145 MOVQ SI, BX // argv
146 SUBQ $(5*8), SP // 3args 2auto
147 ANDQ $~15, SP
148 MOVQ AX, 24(SP)
149 MOVQ BX, 32(SP)
150
151 // This is typically the entry point for Go programs.
152 // Call stack unwinding must not proceed past this frame.
153 // Set the frame pointer register to 0 so that frame pointer-based unwinders
154 // (which don't use debug info for performance reasons)
155 // won't attempt to unwind past this function.
156 // See go.dev/issue/63630
157 MOVQ $0, BP
158
159 // create istack out of the given (operating system) stack.
160 // _cgo_init may update stackguard.
161 MOVQ $runtime·g0(SB), DI
162 LEAQ (-64*1024)(SP), BX
163 MOVQ BX, g_stackguard0(DI)
164 MOVQ BX, g_stackguard1(DI)
165 MOVQ BX, (g_stack+stack_lo)(DI)
166 MOVQ SP, (g_stack+stack_hi)(DI)
167
168 // find out information about the processor we're on
169 MOVL $0, AX
170 CPUID
171 CMPL AX, $0
172 JE nocpuinfo
173
174 CMPL BX, $0x756E6547 // "Genu"
175 JNE notintel
176 CMPL DX, $0x49656E69 // "ineI"
177 JNE notintel
178 CMPL CX, $0x6C65746E // "ntel"
179 JNE notintel
180 MOVB $1, runtime·isIntel(SB)
181
182 notintel:
183 // Load EAX=1 cpuid flags
184 MOVL $1, AX
185 CPUID
186 MOVL AX, runtime·processorVersionInfo(SB)
187
188 nocpuinfo:
189 // if there is an _cgo_init, call it.
190 MOVQ _cgo_init(SB), AX
191 TESTQ AX, AX
192 JZ needtls
193 // arg 1: g0, already in DI
194 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
195 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
196 MOVQ $0, CX
197 #ifdef GOOS_android
198 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
199 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
200 // Compensate for tls_g (+16).
201 MOVQ -16(TLS), CX
202 #endif
203 #ifdef GOOS_windows
204 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
205 // Adjust for the Win64 calling convention.
206 MOVQ CX, R9 // arg 4
207 MOVQ DX, R8 // arg 3
208 MOVQ SI, DX // arg 2
209 MOVQ DI, CX // arg 1
210 #endif
211 CALL AX
212
213 // update stackguard after _cgo_init
214 MOVQ $runtime·g0(SB), CX
215 MOVQ (g_stack+stack_lo)(CX), AX
216 ADDQ $const_stackGuard, AX
217 MOVQ AX, g_stackguard0(CX)
218 MOVQ AX, g_stackguard1(CX)
219
220 #ifndef GOOS_windows
221 JMP ok
222 #endif
223 needtls:
224 #ifdef GOOS_plan9
225 // skip TLS setup on Plan 9
226 JMP ok
227 #endif
228 #ifdef GOOS_solaris
229 // skip TLS setup on Solaris
230 JMP ok
231 #endif
232 #ifdef GOOS_illumos
233 // skip TLS setup on illumos
234 JMP ok
235 #endif
236 #ifdef GOOS_darwin
237 // skip TLS setup on Darwin
238 JMP ok
239 #endif
240 #ifdef GOOS_openbsd
241 // skip TLS setup on OpenBSD
242 JMP ok
243 #endif
244
245 #ifdef GOOS_windows
246 CALL runtime·wintls(SB)
247 #endif
248
249 LEAQ runtime·m0+m_tls(SB), DI
250 CALL runtime·settls(SB)
251
252 // store through it, to make sure it works
253 get_tls(BX)
254 MOVQ $0x123, g(BX)
255 MOVQ runtime·m0+m_tls(SB), AX
256 CMPQ AX, $0x123
257 JEQ 2(PC)
258 CALL runtime·abort(SB)
259 ok:
260 // set the per-goroutine and per-mach "registers"
261 get_tls(BX)
262 LEAQ runtime·g0(SB), CX
263 MOVQ CX, g(BX)
264 LEAQ runtime·m0(SB), AX
265
266 // save m->g0 = g0
267 MOVQ CX, m_g0(AX)
268 // save m0 to g0->m
269 MOVQ AX, g_m(CX)
270
271 CLD // convention is D is always left cleared
272
273 // Check GOAMD64 requirements
274 // We need to do this after setting up TLS, so that
275 // we can report an error if there is a failure. See issue 49586.
276 #ifdef NEED_FEATURES_CX
277 MOVL $0, AX
278 CPUID
279 CMPL AX, $0
280 JE bad_cpu
281 MOVL $1, AX
282 CPUID
283 ANDL $NEED_FEATURES_CX, CX
284 CMPL CX, $NEED_FEATURES_CX
285 JNE bad_cpu
286 #endif
287
288 #ifdef NEED_MAX_CPUID
289 MOVL $0x80000000, AX
290 CPUID
291 CMPL AX, $NEED_MAX_CPUID
292 JL bad_cpu
293 #endif
294
295 #ifdef NEED_EXT_FEATURES_BX
296 MOVL $7, AX
297 MOVL $0, CX
298 CPUID
299 ANDL $NEED_EXT_FEATURES_BX, BX
300 CMPL BX, $NEED_EXT_FEATURES_BX
301 JNE bad_cpu
302 #endif
303
304 #ifdef NEED_EXT_FEATURES_CX
305 MOVL $0x80000001, AX
306 CPUID
307 ANDL $NEED_EXT_FEATURES_CX, CX
308 CMPL CX, $NEED_EXT_FEATURES_CX
309 JNE bad_cpu
310 #endif
311
312 #ifdef NEED_OS_SUPPORT_AX
313 XORL CX, CX
314 XGETBV
315 ANDL $NEED_OS_SUPPORT_AX, AX
316 CMPL AX, $NEED_OS_SUPPORT_AX
317 JNE bad_cpu
318 #endif
319
320 #ifdef NEED_DARWIN_SUPPORT
321 MOVQ $commpage64_version, BX
322 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13
323 JL bad_cpu
324 MOVQ $commpage64_cpu_capabilities64, BX
325 MOVQ (BX), BX
326 MOVQ $NEED_DARWIN_SUPPORT, CX
327 ANDQ CX, BX
328 CMPQ BX, CX
329 JNE bad_cpu
330 #endif
331
332 CALL runtime·check(SB)
333
334 MOVL 24(SP), AX // copy argc
335 MOVL AX, 0(SP)
336 MOVQ 32(SP), AX // copy argv
337 MOVQ AX, 8(SP)
338 CALL runtime·args(SB)
339 CALL runtime·osinit(SB)
340 CALL runtime·schedinit(SB)
341
342 // create a new goroutine to start program
343 MOVQ $runtime·mainPC(SB), AX // entry
344 PUSHQ AX
345 CALL runtime·newproc(SB)
346 POPQ AX
347
348 // start this M
349 CALL runtime·mstart(SB)
350
351 CALL runtime·abort(SB) // mstart should never return
352 RET
353
354 bad_cpu: // show that the program requires a certain microarchitecture level.
355 MOVQ $2, 0(SP)
356 MOVQ $bad_cpu_msg<>(SB), AX
357 MOVQ AX, 8(SP)
358 MOVQ $84, 16(SP)
359 CALL runtime·write(SB)
360 MOVQ $1, 0(SP)
361 CALL runtime·exit(SB)
362 CALL runtime·abort(SB)
363 RET
364
365 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
366 // intended to be called by debuggers.
367 MOVQ $runtime·debugPinnerV1<ABIInternal>(SB), AX
368 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX
369 RET
370
371 // mainPC is a function value for runtime.main, to be passed to newproc.
372 // The reference to runtime.main is made via ABIInternal, since the
373 // actual function (not the ABI0 wrapper) is needed by newproc.
374 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
375 GLOBL runtime·mainPC(SB),RODATA,$8
376
377 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
378 BYTE $0xcc
379 RET
380
381 TEXT runtime·asminit(SB),NOSPLIT,$0-0
382 // No per-thread init.
383 RET
384
385 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
386 // This is the root frame of new Go-created OS threads.
387 // Call stack unwinding must not proceed past this frame.
388 // Set the frame pointer register to 0 so that frame pointer-based unwinders
389 // (which don't use debug info for performance reasons)
390 // won't attempt to unwind past this function.
391 // See go.dev/issue/63630
392 MOVD $0, BP
393 CALL runtime·mstart0(SB)
394 RET // not reached
395
396 /*
397 * go-routine
398 */
399
400 // func gogo(buf *gobuf)
401 // restore state from Gobuf; longjmp
402 TEXT runtime·gogo(SB), NOSPLIT, $0-8
403 MOVQ buf+0(FP), BX // gobuf
404 MOVQ gobuf_g(BX), DX
405 MOVQ 0(DX), CX // make sure g != nil
406 JMP gogo<>(SB)
407
408 TEXT gogo<>(SB), NOSPLIT, $0
409 get_tls(CX)
410 MOVQ DX, g(CX)
411 MOVQ DX, R14 // set the g register
412 MOVQ gobuf_sp(BX), SP // restore SP
413 MOVQ gobuf_ctxt(BX), DX
414 MOVQ gobuf_bp(BX), BP
415 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
416 MOVQ $0, gobuf_ctxt(BX)
417 MOVQ $0, gobuf_bp(BX)
418 MOVQ gobuf_pc(BX), BX
419 JMP BX
420
421 // func mcall(fn func(*g))
422 // Switch to m->g0's stack, call fn(g).
423 // Fn must never return. It should gogo(&g->sched)
424 // to keep running g.
425 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
426 #ifdef GOEXPERIMENT_runtimesecret
427 CMPL g_secret(R14), $0
428 JEQ nosecret
429 CALL ·secretEraseRegistersMcall(SB)
430 nosecret:
431 #endif
432
433 MOVQ AX, DX // DX = fn
434
435 // Save state in g->sched. The caller's SP and PC are restored by gogo to
436 // resume execution in the caller's frame (implicit return). The caller's BP
437 // is also restored to support frame pointer unwinding.
438 MOVQ SP, BX // hide (SP) reads from vet
439 MOVQ 8(BX), BX // caller's PC
440 MOVQ BX, (g_sched+gobuf_pc)(R14)
441 LEAQ fn+0(FP), BX // caller's SP
442 MOVQ BX, (g_sched+gobuf_sp)(R14)
443 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is
444 // can cause a frame pointer cycle, see CL 476235.
445 MOVQ (BP), BX // caller's BP
446 MOVQ BX, (g_sched+gobuf_bp)(R14)
447
448 // switch to m->g0 & its stack, call fn
449 MOVQ g_m(R14), BX
450 MOVQ m_g0(BX), SI // SI = g.m.g0
451 CMPQ SI, R14 // if g == m->g0 call badmcall
452 JNE goodm
453 JMP runtime·badmcall(SB)
454 goodm:
455 MOVQ R14, AX // AX (and arg 0) = g
456 MOVQ SI, R14 // g = g.m.g0
457 get_tls(CX) // Set G in TLS
458 MOVQ R14, g(CX)
459 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp
460 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
461 PUSHQ AX // open up space for fn's arg spill slot
462 MOVQ 0(DX), R12
463 CALL R12 // fn(g)
464 // The Windows native stack unwinder incorrectly classifies the next instruction
465 // as part of the function epilogue, producing a wrong call stack.
466 // Add a NOP to work around this issue. See go.dev/issue/67007.
467 BYTE $0x90
468 POPQ AX
469 JMP runtime·badmcall2(SB)
470 RET
471
472 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
473 // of the G stack. We need to distinguish the routine that
474 // lives at the bottom of the G stack from the one that lives
475 // at the top of the system stack because the one at the top of
476 // the system stack terminates the stack walk (see topofstack()).
477 // The frame layout needs to match systemstack
478 // so that it can pretend to be systemstack_switch.
479 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
480 // Align for consistency with offset used in gosave_systemstack_switch
481 PCALIGN $8
482 UNDEF
483 // Make sure this function is not leaf,
484 // so the frame is saved.
485 CALL runtime·abort(SB)
486 RET
487
488 // func systemstack(fn func())
489 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
490 #ifdef GOEXPERIMENT_runtimesecret
491 // If in secret mode, erase registers on transition
492 // from G stack to M stack,
493 get_tls(CX)
494 MOVQ g(CX), AX
495 CMPL g_secret(AX), $0
496 JEQ nosecret
497 CALL ·secretEraseRegisters(SB)
498 nosecret:
499 #endif
500
501 MOVQ fn+0(FP), DI // DI = fn
502 get_tls(CX)
503 MOVQ g(CX), AX // AX = g
504 MOVQ g_m(AX), BX // BX = m
505
506 CMPQ AX, m_gsignal(BX)
507 JEQ noswitch
508
509 MOVQ m_g0(BX), DX // DX = g0
510 CMPQ AX, DX
511 JEQ noswitch
512
513 CMPQ AX, m_curg(BX)
514 JNE bad
515
516 // Switch stacks.
517 // The original frame pointer is stored in BP,
518 // which is useful for stack unwinding.
519 // Save our state in g->sched. Pretend to
520 // be systemstack_switch if the G stack is scanned.
521 CALL gosave_systemstack_switch<>(SB)
522
523 // switch to g0
524 MOVQ DX, g(CX)
525 MOVQ DX, R14 // set the g register
526 MOVQ (g_sched+gobuf_sp)(DX), SP
527
528 // call target function
529 MOVQ DI, DX
530 MOVQ 0(DI), DI
531 CALL DI
532
533 // switch back to g
534 get_tls(CX)
535 MOVQ g(CX), AX
536 MOVQ g_m(AX), BX
537 MOVQ m_curg(BX), AX
538 MOVQ AX, g(CX)
539 MOVQ (g_sched+gobuf_sp)(AX), SP
540 MOVQ (g_sched+gobuf_bp)(AX), BP
541 MOVQ $0, (g_sched+gobuf_sp)(AX)
542 MOVQ $0, (g_sched+gobuf_bp)(AX)
543 RET
544
545 noswitch:
546 // already on m stack; tail call the function
547 // Using a tail call here cleans up tracebacks since we won't stop
548 // at an intermediate systemstack.
549 MOVQ DI, DX
550 MOVQ 0(DI), DI
551 // The function epilogue is not called on a tail call.
552 // Pop BP from the stack to simulate it.
553 POPQ BP
554 JMP DI
555
556 bad:
557 // Bad: g is not gsignal, not g0, not curg. What is it?
558 MOVQ $runtime·badsystemstack(SB), AX
559 CALL AX
560 INT $3
561
562 // func switchToCrashStack0(fn func())
563 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
564 MOVQ g_m(R14), BX // curm
565
566 // set g to gcrash
567 LEAQ runtime·gcrash(SB), R14 // g = &gcrash
568 MOVQ BX, g_m(R14) // g.m = curm
569 MOVQ R14, m_g0(BX) // curm.g0 = g
570 get_tls(CX)
571 MOVQ R14, g(CX)
572
573 // switch to crashstack
574 MOVQ (g_stack+stack_hi)(R14), BX
575 SUBQ $(4*8), BX
576 MOVQ BX, SP
577
578 // call target function
579 MOVQ AX, DX
580 MOVQ 0(AX), AX
581 CALL AX
582
583 // should never return
584 CALL runtime·abort(SB)
585 UNDEF
586
587 /*
588 * support for morestack
589 */
590
591 // Called during function prolog when more stack is needed.
592 //
593 // The traceback routines see morestack on a g0 as being
594 // the top of a stack (for example, morestack calling newstack
595 // calling the scheduler calling newm calling gc), so we must
596 // record an argument size. For that purpose, it has no arguments.
597 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
598 // Cannot grow scheduler stack (m->g0).
599 get_tls(CX)
600 MOVQ g(CX), DI // DI = g
601 MOVQ g_m(DI), BX // BX = m
602
603 // Set g->sched to context in f.
604 MOVQ 0(SP), AX // f's PC
605 MOVQ AX, (g_sched+gobuf_pc)(DI)
606 LEAQ 8(SP), AX // f's SP
607 MOVQ AX, (g_sched+gobuf_sp)(DI)
608 MOVQ BP, (g_sched+gobuf_bp)(DI)
609 MOVQ DX, (g_sched+gobuf_ctxt)(DI)
610
611 MOVQ m_g0(BX), SI // SI = m.g0
612 CMPQ DI, SI
613 JNE 3(PC)
614 CALL runtime·badmorestackg0(SB)
615 CALL runtime·abort(SB)
616
617 // Cannot grow signal stack (m->gsignal).
618 MOVQ m_gsignal(BX), SI
619 CMPQ DI, SI
620 JNE 3(PC)
621 CALL runtime·badmorestackgsignal(SB)
622 CALL runtime·abort(SB)
623
624 // Called from f.
625 // Set m->morebuf to f's caller.
626 NOP SP // tell vet SP changed - stop checking offsets
627 MOVQ 8(SP), AX // f's caller's PC
628 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
629 LEAQ 16(SP), AX // f's caller's SP
630 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
631 MOVQ DI, (m_morebuf+gobuf_g)(BX)
632
633 // If in secret mode, erase registers on transition
634 // from G stack to M stack,
635 #ifdef GOEXPERIMENT_runtimesecret
636 CMPL g_secret(DI), $0
637 JEQ nosecret
638 CALL ·secretEraseRegisters(SB)
639 get_tls(CX)
640 MOVQ g(CX), DI // DI = g
641 MOVQ g_m(DI), BX // BX = m
642 nosecret:
643 #endif
644
645 // Call newstack on m->g0's stack.
646 MOVQ m_g0(BX), BX
647 MOVQ BX, g(CX)
648 MOVQ (g_sched+gobuf_sp)(BX), SP
649 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
650 CALL runtime·newstack(SB)
651 CALL runtime·abort(SB) // crash if newstack returns
652 RET
653
654 // morestack but not preserving ctxt.
655 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
656 MOVL $0, DX
657 JMP runtime·morestack(SB)
658
659 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
660 TEXT ·spillArgs(SB),NOSPLIT,$0-0
661 MOVQ AX, 0(R12)
662 MOVQ BX, 8(R12)
663 MOVQ CX, 16(R12)
664 MOVQ DI, 24(R12)
665 MOVQ SI, 32(R12)
666 MOVQ R8, 40(R12)
667 MOVQ R9, 48(R12)
668 MOVQ R10, 56(R12)
669 MOVQ R11, 64(R12)
670 MOVQ X0, 72(R12)
671 MOVQ X1, 80(R12)
672 MOVQ X2, 88(R12)
673 MOVQ X3, 96(R12)
674 MOVQ X4, 104(R12)
675 MOVQ X5, 112(R12)
676 MOVQ X6, 120(R12)
677 MOVQ X7, 128(R12)
678 MOVQ X8, 136(R12)
679 MOVQ X9, 144(R12)
680 MOVQ X10, 152(R12)
681 MOVQ X11, 160(R12)
682 MOVQ X12, 168(R12)
683 MOVQ X13, 176(R12)
684 MOVQ X14, 184(R12)
685 RET
686
687 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
688 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
689 MOVQ 0(R12), AX
690 MOVQ 8(R12), BX
691 MOVQ 16(R12), CX
692 MOVQ 24(R12), DI
693 MOVQ 32(R12), SI
694 MOVQ 40(R12), R8
695 MOVQ 48(R12), R9
696 MOVQ 56(R12), R10
697 MOVQ 64(R12), R11
698 MOVQ 72(R12), X0
699 MOVQ 80(R12), X1
700 MOVQ 88(R12), X2
701 MOVQ 96(R12), X3
702 MOVQ 104(R12), X4
703 MOVQ 112(R12), X5
704 MOVQ 120(R12), X6
705 MOVQ 128(R12), X7
706 MOVQ 136(R12), X8
707 MOVQ 144(R12), X9
708 MOVQ 152(R12), X10
709 MOVQ 160(R12), X11
710 MOVQ 168(R12), X12
711 MOVQ 176(R12), X13
712 MOVQ 184(R12), X14
713 RET
714
715 // reflectcall: call a function with the given argument list
716 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
717 // we don't have variable-sized frames, so we use a small number
718 // of constant-sized-frame functions to encode a few bits of size in the pc.
719 // Caution: ugly multiline assembly macros in your future!
720
721 #define DISPATCH(NAME,MAXSIZE) \
722 CMPQ CX, $MAXSIZE; \
723 JA 3(PC); \
724 MOVQ $NAME(SB), AX; \
725 JMP AX
726 // Note: can't just "JMP NAME(SB)" - bad inlining results.
727
728 TEXT ·reflectcall(SB), NOSPLIT, $0-48
729 MOVLQZX frameSize+32(FP), CX
730 DISPATCH(runtime·call16, 16)
731 DISPATCH(runtime·call32, 32)
732 DISPATCH(runtime·call64, 64)
733 DISPATCH(runtime·call128, 128)
734 DISPATCH(runtime·call256, 256)
735 DISPATCH(runtime·call512, 512)
736 DISPATCH(runtime·call1024, 1024)
737 DISPATCH(runtime·call2048, 2048)
738 DISPATCH(runtime·call4096, 4096)
739 DISPATCH(runtime·call8192, 8192)
740 DISPATCH(runtime·call16384, 16384)
741 DISPATCH(runtime·call32768, 32768)
742 DISPATCH(runtime·call65536, 65536)
743 DISPATCH(runtime·call131072, 131072)
744 DISPATCH(runtime·call262144, 262144)
745 DISPATCH(runtime·call524288, 524288)
746 DISPATCH(runtime·call1048576, 1048576)
747 DISPATCH(runtime·call2097152, 2097152)
748 DISPATCH(runtime·call4194304, 4194304)
749 DISPATCH(runtime·call8388608, 8388608)
750 DISPATCH(runtime·call16777216, 16777216)
751 DISPATCH(runtime·call33554432, 33554432)
752 DISPATCH(runtime·call67108864, 67108864)
753 DISPATCH(runtime·call134217728, 134217728)
754 DISPATCH(runtime·call268435456, 268435456)
755 DISPATCH(runtime·call536870912, 536870912)
756 DISPATCH(runtime·call1073741824, 1073741824)
757 MOVQ $runtime·badreflectcall(SB), AX
758 JMP AX
759
760 #define CALLFN(NAME,MAXSIZE) \
761 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
762 NO_LOCAL_POINTERS; \
763 /* copy arguments to stack */ \
764 MOVQ stackArgs+16(FP), SI; \
765 MOVLQZX stackArgsSize+24(FP), CX; \
766 MOVQ SP, DI; \
767 REP;MOVSB; \
768 /* set up argument registers */ \
769 MOVQ regArgs+40(FP), R12; \
770 CALL ·unspillArgs(SB); \
771 /* call function */ \
772 MOVQ f+8(FP), DX; \
773 PCDATA $PCDATA_StackMapIndex, $0; \
774 MOVQ (DX), R12; \
775 CALL R12; \
776 /* copy register return values back */ \
777 MOVQ regArgs+40(FP), R12; \
778 CALL ·spillArgs(SB); \
779 MOVLQZX stackArgsSize+24(FP), CX; \
780 MOVLQZX stackRetOffset+28(FP), BX; \
781 MOVQ stackArgs+16(FP), DI; \
782 MOVQ stackArgsType+0(FP), DX; \
783 MOVQ SP, SI; \
784 ADDQ BX, DI; \
785 ADDQ BX, SI; \
786 SUBQ BX, CX; \
787 CALL callRet<>(SB); \
788 RET
789
790 // callRet copies return values back at the end of call*. This is a
791 // separate function so it can allocate stack space for the arguments
792 // to reflectcallmove. It does not follow the Go ABI; it expects its
793 // arguments in registers.
794 TEXT callRet<>(SB), NOSPLIT, $40-0
795 NO_LOCAL_POINTERS
796 MOVQ DX, 0(SP)
797 MOVQ DI, 8(SP)
798 MOVQ SI, 16(SP)
799 MOVQ CX, 24(SP)
800 MOVQ R12, 32(SP)
801 CALL runtime·reflectcallmove(SB)
802 RET
803
804 CALLFN(·call16, 16)
805 CALLFN(·call32, 32)
806 CALLFN(·call64, 64)
807 CALLFN(·call128, 128)
808 CALLFN(·call256, 256)
809 CALLFN(·call512, 512)
810 CALLFN(·call1024, 1024)
811 CALLFN(·call2048, 2048)
812 CALLFN(·call4096, 4096)
813 CALLFN(·call8192, 8192)
814 CALLFN(·call16384, 16384)
815 CALLFN(·call32768, 32768)
816 CALLFN(·call65536, 65536)
817 CALLFN(·call131072, 131072)
818 CALLFN(·call262144, 262144)
819 CALLFN(·call524288, 524288)
820 CALLFN(·call1048576, 1048576)
821 CALLFN(·call2097152, 2097152)
822 CALLFN(·call4194304, 4194304)
823 CALLFN(·call8388608, 8388608)
824 CALLFN(·call16777216, 16777216)
825 CALLFN(·call33554432, 33554432)
826 CALLFN(·call67108864, 67108864)
827 CALLFN(·call134217728, 134217728)
828 CALLFN(·call268435456, 268435456)
829 CALLFN(·call536870912, 536870912)
830 CALLFN(·call1073741824, 1073741824)
831
832 TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
833 MOVL cycles+0(FP), AX
834 TESTL AX, AX
835 JZ done
836 again:
837 PAUSE
838 SUBL $1, AX
839 JNZ again
840 done:
841 RET
842
843
844 TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
845 // Stores are already ordered on x86, so this is just a
846 // compile barrier.
847 RET
848
849 // Save state of caller into g->sched,
850 // but using fake PC from systemstack_switch.
851 // Must only be called from functions with frame pointer
852 // and without locals ($0) or else unwinding from
853 // systemstack_switch is incorrect.
854 // Smashes R9.
855 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
856 // Take systemstack_switch PC and add 8 bytes to skip
857 // the prologue. Keep 8 bytes offset consistent with
858 // PCALIGN $8 in systemstack_swtich, pointing start of
859 // UNDEF instruction beyond prologue.
860 MOVQ $runtime·systemstack_switch+8(SB), R9
861 MOVQ R9, (g_sched+gobuf_pc)(R14)
862 LEAQ 8(SP), R9
863 MOVQ R9, (g_sched+gobuf_sp)(R14)
864 MOVQ BP, (g_sched+gobuf_bp)(R14)
865 // Assert ctxt is zero. See func save.
866 MOVQ (g_sched+gobuf_ctxt)(R14), R9
867 TESTQ R9, R9
868 JZ 2(PC)
869 CALL runtime·abort(SB)
870 RET
871
872 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
873 // Call fn(arg) aligned appropriately for the gcc ABI.
874 // Called on a system stack, and there may be no g yet (during needm).
875 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
876 MOVQ fn+0(FP), AX
877 MOVQ arg+8(FP), BX
878 MOVQ SP, DX
879 ANDQ $~15, SP // alignment
880 MOVQ DX, 8(SP)
881 MOVQ BX, DI // DI = first argument in AMD64 ABI
882 MOVQ BX, CX // CX = first argument in Win64
883 CALL AX
884 MOVQ 8(SP), DX
885 MOVQ DX, SP
886 RET
887
888 // asmcgocall_landingpad calls AX with BX as argument.
889 // Must be called on the system stack.
890 TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0
891 #ifdef GOOS_windows
892 // Make sure we have enough room for 4 stack-backed fast-call
893 // registers as per Windows amd64 calling convention.
894 ADJSP $32
895 // On Windows, asmcgocall_landingpad acts as landing pad for exceptions
896 // thrown in the cgo call. Exceptions that reach this function will be
897 // handled by runtime.sehtramp thanks to the SEH metadata added
898 // by the compiler.
899 // Note that runtime.sehtramp can't be attached directly to asmcgocall
900 // because its initial stack pointer can be outside the system stack bounds,
901 // and Windows stops the stack unwinding without calling the exception handler
902 // when it reaches that point.
903 MOVQ BX, CX // CX = first argument in Win64
904 CALL AX
905 // The exception handler is not called if the next instruction is part of
906 // the epilogue, which includes the RET instruction, so we need to add a NOP here.
907 BYTE $0x90
908 ADJSP $-32
909 RET
910 #endif
911 // Tail call AX on non-Windows, as the extra stack frame is not needed.
912 MOVQ BX, DI // DI = first argument in AMD64 ABI
913 JMP AX
914
915 // func asmcgocall(fn, arg unsafe.Pointer) int32
916 // Call fn(arg) on the scheduler stack,
917 // aligned appropriately for the gcc ABI.
918 // See cgocall.go for more details.
919 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
920 // Figure out if we need to switch to m->g0 stack.
921 // We get called to create new OS threads too, and those
922 // come in on the m->g0 stack already. Or we might already
923 // be on the m->gsignal stack.
924 get_tls(CX)
925 MOVQ g(CX), DI
926 CMPQ DI, $0
927 JEQ nosave
928 MOVQ g_m(DI), R8
929 MOVQ m_gsignal(R8), SI
930 CMPQ DI, SI
931 JEQ nosave
932 MOVQ m_g0(R8), SI
933 CMPQ DI, SI
934 JEQ nosave
935
936 // Running on a user G
937 // Figure out if we're running secret code and clear the registers
938 // so that the C code we're about to call doesn't spill confidential
939 // information into memory
940 #ifdef GOEXPERIMENT_runtimesecret
941 CMPL g_secret(DI), $0
942 JEQ nosecret
943 CALL ·secretEraseRegisters(SB)
944
945 nosecret:
946 #endif
947 MOVQ fn+0(FP), AX
948 MOVQ arg+8(FP), BX
949 MOVQ SP, DX
950
951 // Switch to system stack.
952 // The original frame pointer is stored in BP,
953 // which is useful for stack unwinding.
954 CALL gosave_systemstack_switch<>(SB)
955 MOVQ SI, g(CX)
956 MOVQ (g_sched+gobuf_sp)(SI), SP
957
958 // Now on a scheduling stack (a pthread-created stack).
959 SUBQ $16, SP
960 ANDQ $~15, SP // alignment for gcc ABI
961 MOVQ DI, 8(SP) // save g
962 MOVQ (g_stack+stack_hi)(DI), DI
963 SUBQ DX, DI
964 MOVQ DI, 0(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
965 CALL runtime·asmcgocall_landingpad(SB)
966
967 // Restore registers, g, stack pointer.
968 get_tls(CX)
969 MOVQ 8(SP), DI
970 MOVQ (g_stack+stack_hi)(DI), SI
971 SUBQ 0(SP), SI
972 MOVQ DI, g(CX)
973 MOVQ SI, SP
974
975 MOVL AX, ret+16(FP)
976 RET
977
978 nosave:
979 // Running on a system stack, perhaps even without a g.
980 // Having no g can happen during thread creation or thread teardown
981 // (see needm/dropm on Solaris, for example).
982 // This code is like the above sequence but without saving/restoring g
983 // and without worrying about the stack moving out from under us
984 // (because we're on a system stack, not a goroutine stack).
985 MOVQ fn+0(FP), AX
986 MOVQ arg+8(FP), BX
987 MOVQ SP, DX
988
989 SUBQ $16, SP
990 ANDQ $~15, SP
991 MOVQ $0, 8(SP) // where above code stores g, in case someone looks during debugging
992 MOVQ DX, 0(SP) // save original stack pointer
993 CALL runtime·asmcgocall_landingpad(SB)
994 MOVQ 0(SP), SI // restore original stack pointer
995 MOVQ SI, SP
996 MOVL AX, ret+16(FP)
997 RET
998
999 #ifdef GOOS_windows
1000 // Dummy TLS that's used on Windows so that we don't crash trying
1001 // to restore the G register in needm. needm and its callees are
1002 // very careful never to actually use the G, the TLS just can't be
1003 // unset since we're in Go code.
1004 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
1005 #endif
1006
1007 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1008 // See cgocall.go for more details.
1009 TEXT ·cgocallback(SB),NOSPLIT,$24-24
1010 NO_LOCAL_POINTERS
1011
1012 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1013 // It is used to dropm while thread is exiting.
1014 MOVQ fn+0(FP), AX
1015 CMPQ AX, $0
1016 JNE loadg
1017 // Restore the g from frame.
1018 get_tls(CX)
1019 MOVQ frame+8(FP), BX
1020 MOVQ BX, g(CX)
1021 JMP dropm
1022
1023 loadg:
1024 // If g is nil, Go did not create the current thread,
1025 // or if this thread never called into Go on pthread platforms.
1026 // Call needm to obtain one m for temporary use.
1027 // In this case, we're running on the thread stack, so there's
1028 // lots of space, but the linker doesn't know. Hide the call from
1029 // the linker analysis by using an indirect call through AX.
1030 get_tls(CX)
1031 #ifdef GOOS_windows
1032 MOVL $0, BX
1033 CMPQ CX, $0
1034 JEQ 2(PC)
1035 #endif
1036 MOVQ g(CX), BX
1037 CMPQ BX, $0
1038 JEQ needm
1039 MOVQ g_m(BX), BX
1040 MOVQ BX, savedm-8(SP) // saved copy of oldm
1041 JMP havem
1042 needm:
1043 #ifdef GOOS_windows
1044 // Set up a dummy TLS value. needm is careful not to use it,
1045 // but it needs to be there to prevent autogenerated code from
1046 // crashing when it loads from it.
1047 // We don't need to clear it or anything later because needm
1048 // will set up TLS properly.
1049 MOVQ $zeroTLS<>(SB), DI
1050 CALL runtime·settls(SB)
1051 #endif
1052 // On some platforms (Windows) we cannot call needm through
1053 // an ABI wrapper because there's no TLS set up, and the ABI
1054 // wrapper will try to restore the G register (R14) from TLS.
1055 // Clear X15 because Go expects it and we're not calling
1056 // through a wrapper, but otherwise avoid setting the G
1057 // register in the wrapper and call needm directly. It
1058 // takes no arguments and doesn't return any values so
1059 // there's no need to handle that. Clear R14 so that there's
1060 // a bad value in there, in case needm tries to use it.
1061 XORPS X15, X15
1062 XORQ R14, R14
1063 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
1064 CALL AX
1065 MOVQ $0, savedm-8(SP)
1066 get_tls(CX)
1067 MOVQ g(CX), BX
1068 MOVQ g_m(BX), BX
1069
1070 // Set m->sched.sp = SP, so that if a panic happens
1071 // during the function we are about to execute, it will
1072 // have a valid SP to run on the g0 stack.
1073 // The next few lines (after the havem label)
1074 // will save this SP onto the stack and then write
1075 // the same SP back to m->sched.sp. That seems redundant,
1076 // but if an unrecovered panic happens, unwindm will
1077 // restore the g->sched.sp from the stack location
1078 // and then systemstack will try to use it. If we don't set it here,
1079 // that restored SP will be uninitialized (typically 0) and
1080 // will not be usable.
1081 MOVQ m_g0(BX), SI
1082 MOVQ SP, (g_sched+gobuf_sp)(SI)
1083
1084 havem:
1085 // Now there's a valid m, and we're running on its m->g0.
1086 // Save current m->g0->sched.sp on stack and then set it to SP.
1087 // Save current sp in m->g0->sched.sp in preparation for
1088 // switch back to m->curg stack.
1089 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
1090 MOVQ m_g0(BX), SI
1091 MOVQ (g_sched+gobuf_sp)(SI), AX
1092 MOVQ AX, 0(SP)
1093 MOVQ SP, (g_sched+gobuf_sp)(SI)
1094
1095 // Switch to m->curg stack and call runtime.cgocallbackg.
1096 // Because we are taking over the execution of m->curg
1097 // but *not* resuming what had been running, we need to
1098 // save that information (m->curg->sched) so we can restore it.
1099 // We can restore m->curg->sched.sp easily, because calling
1100 // runtime.cgocallbackg leaves SP unchanged upon return.
1101 // To save m->curg->sched.pc, we push it onto the curg stack and
1102 // open a frame the same size as cgocallback's g0 frame.
1103 // Once we switch to the curg stack, the pushed PC will appear
1104 // to be the return PC of cgocallback, so that the traceback
1105 // will seamlessly trace back into the earlier calls.
1106 MOVQ m_curg(BX), SI
1107 MOVQ SI, g(CX)
1108 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
1109 MOVQ (g_sched+gobuf_pc)(SI), BX
1110 MOVQ BX, -8(DI) // "push" return PC on the g stack
1111 // Gather our arguments into registers.
1112 MOVQ fn+0(FP), BX
1113 MOVQ frame+8(FP), CX
1114 MOVQ ctxt+16(FP), DX
1115 // Compute the size of the frame, including return PC and, if
1116 // GOEXPERIMENT=framepointer, the saved base pointer
1117 LEAQ fn+0(FP), AX
1118 SUBQ SP, AX // AX is our actual frame size
1119 SUBQ AX, DI // Allocate the same frame size on the g stack
1120 MOVQ DI, SP
1121
1122 MOVQ BX, 0(SP)
1123 MOVQ CX, 8(SP)
1124 MOVQ DX, 16(SP)
1125 MOVQ $runtime·cgocallbackg(SB), AX
1126 CALL AX // indirect call to bypass nosplit check. We're on a different stack now.
1127
1128 // Compute the size of the frame again. FP and SP have
1129 // completely different values here than they did above,
1130 // but only their difference matters.
1131 LEAQ fn+0(FP), AX
1132 SUBQ SP, AX
1133
1134 // Restore g->sched (== m->curg->sched) from saved values.
1135 get_tls(CX)
1136 MOVQ g(CX), SI
1137 MOVQ SP, DI
1138 ADDQ AX, DI
1139 MOVQ -8(DI), BX
1140 MOVQ BX, (g_sched+gobuf_pc)(SI)
1141 MOVQ DI, (g_sched+gobuf_sp)(SI)
1142
1143 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1144 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1145 // so we do not have to restore it.)
1146 MOVQ g(CX), BX
1147 MOVQ g_m(BX), BX
1148 MOVQ m_g0(BX), SI
1149 MOVQ SI, g(CX)
1150 MOVQ (g_sched+gobuf_sp)(SI), SP
1151 MOVQ 0(SP), AX
1152 MOVQ AX, (g_sched+gobuf_sp)(SI)
1153
1154 // If the m on entry was nil, we called needm above to borrow an m,
1155 // 1. for the duration of the call on non-pthread platforms,
1156 // 2. or the duration of the C thread alive on pthread platforms.
1157 // If the m on entry wasn't nil,
1158 // 1. the thread might be a Go thread,
1159 // 2. or it wasn't the first call from a C thread on pthread platforms,
1160 // since then we skip dropm to reuse the m in the first call.
1161 MOVQ savedm-8(SP), BX
1162 CMPQ BX, $0
1163 JNE done
1164
1165 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1166 MOVQ _cgo_pthread_key_created(SB), AX
1167 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1168 CMPQ AX, $0
1169 JEQ dropm
1170 CMPQ (AX), $0
1171 JNE done
1172
1173 dropm:
1174 MOVQ $runtime·dropm(SB), AX
1175 CALL AX
1176 #ifdef GOOS_windows
1177 // We need to clear the TLS pointer in case the next
1178 // thread that comes into Go tries to reuse that space
1179 // but uses the same M.
1180 XORQ DI, DI
1181 CALL runtime·settls(SB)
1182 #endif
1183 done:
1184
1185 // Done!
1186 RET
1187
1188 // func setg(gg *g)
1189 // set g. for use by needm.
1190 TEXT runtime·setg(SB), NOSPLIT, $0-8
1191 MOVQ gg+0(FP), BX
1192 get_tls(CX)
1193 MOVQ BX, g(CX)
1194 RET
1195
1196 // void setg_gcc(G*); set g called from gcc.
1197 TEXT setg_gcc<>(SB),NOSPLIT,$0
1198 get_tls(AX)
1199 MOVQ DI, g(AX)
1200 MOVQ DI, R14 // set the g register
1201 RET
1202
1203 TEXT runtime·abort(SB),NOSPLIT,$0-0
1204 INT $3
1205 loop:
1206 JMP loop
1207
1208 // check that SP is in range [g->stack.lo, g->stack.hi)
1209 TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
1210 get_tls(CX)
1211 MOVQ g(CX), AX
1212 CMPQ (g_stack+stack_hi)(AX), SP
1213 JHI 2(PC)
1214 CALL runtime·abort(SB)
1215 CMPQ SP, (g_stack+stack_lo)(AX)
1216 JHI 2(PC)
1217 CALL runtime·abort(SB)
1218 RET
1219
1220 // func cputicks() int64
1221 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
1222 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
1223 JNE fences
1224 // Instruction stream serializing RDTSCP is supported.
1225 // RDTSCP is supported by Intel Nehalem (2008) and
1226 // AMD K8 Rev. F (2006) and newer.
1227 RDTSCP
1228 done:
1229 SHLQ $32, DX
1230 ADDQ DX, AX
1231 MOVQ AX, ret+0(FP)
1232 RET
1233 fences:
1234 // MFENCE is instruction stream serializing and flushes the
1235 // store buffers on AMD. The serialization semantics of LFENCE on AMD
1236 // are dependent on MSR C001_1029 and CPU generation.
1237 // LFENCE on Intel does wait for all previous instructions to have executed.
1238 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
1239 // previous instructions executed and all previous loads and stores to globally visible.
1240 // Using MFENCE;LFENCE here aligns the serializing properties without
1241 // runtime detection of CPU manufacturer.
1242 MFENCE
1243 LFENCE
1244 RDTSC
1245 JMP done
1246
1247 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
1248 // hash function using AES hardware instructions
1249 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
1250 // AX = ptr to data
1251 // BX = seed
1252 // CX = size
1253 CMPB runtime·useAeshash(SB), $0
1254 JEQ noaes
1255 JMP runtime·aeshashbody<>(SB)
1256 noaes:
1257 JMP runtime·memhashFallback<ABIInternal>(SB)
1258
1259 // func strhash(p unsafe.Pointer, h uintptr) uintptr
1260 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
1261 // AX = ptr to string struct
1262 // BX = seed
1263 CMPB runtime·useAeshash(SB), $0
1264 JEQ noaes
1265 MOVQ 8(AX), CX // length of string
1266 MOVQ (AX), AX // string data
1267 JMP runtime·aeshashbody<>(SB)
1268 noaes:
1269 JMP runtime·strhashFallback<ABIInternal>(SB)
1270
1271 // AX: data
1272 // BX: hash seed
1273 // CX: length
1274 // At return: AX = return value
1275 TEXT runtime·aeshashbody<>(SB),NOSPLIT,$0-0
1276 // Fill an SSE register with our seeds.
1277 MOVQ BX, X0 // 64 bits of per-table hash seed
1278 PINSRW $4, CX, X0 // 16 bits of length
1279 PSHUFHW $0, X0, X0 // repeat length 4 times total
1280 MOVO X0, X1 // save unscrambled seed
1281 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
1282 AESENC X0, X0 // scramble seed
1283
1284 CMPQ CX, $16
1285 JB aes0to15
1286 JE aes16
1287 CMPQ CX, $32
1288 JBE aes17to32
1289 CMPQ CX, $64
1290 JBE aes33to64
1291 CMPQ CX, $128
1292 JBE aes65to128
1293 JMP aes129plus
1294
1295 aes0to15:
1296 TESTQ CX, CX
1297 JE aes0
1298
1299 ADDQ $16, AX
1300 TESTW $0xff0, AX
1301 JE endofpage
1302
1303 // 16 bytes loaded at this address won't cross
1304 // a page boundary, so we can load it directly.
1305 MOVOU -16(AX), X1
1306 ADDQ CX, CX
1307 MOVQ $masks<>(SB), AX
1308 PAND (AX)(CX*8), X1
1309 final1:
1310 PXOR X0, X1 // xor data with seed
1311 AESENC X1, X1 // scramble combo 3 times
1312 AESENC X1, X1
1313 AESENC X1, X1
1314 MOVQ X1, AX // return X1
1315 RET
1316
1317 endofpage:
1318 // address ends in 1111xxxx. Might be up against
1319 // a page boundary, so load ending at last byte.
1320 // Then shift bytes down using pshufb.
1321 MOVOU -32(AX)(CX*1), X1
1322 ADDQ CX, CX
1323 MOVQ $shifts<>(SB), AX
1324 PSHUFB (AX)(CX*8), X1
1325 JMP final1
1326
1327 aes0:
1328 // Return scrambled input seed
1329 AESENC X0, X0
1330 MOVQ X0, AX // return X0
1331 RET
1332
1333 aes16:
1334 MOVOU (AX), X1
1335 JMP final1
1336
1337 aes17to32:
1338 // make second starting seed
1339 PXOR runtime·aeskeysched+16(SB), X1
1340 AESENC X1, X1
1341
1342 // load data to be hashed
1343 MOVOU (AX), X2
1344 MOVOU -16(AX)(CX*1), X3
1345
1346 // xor with seed
1347 PXOR X0, X2
1348 PXOR X1, X3
1349
1350 // scramble 3 times
1351 AESENC X2, X2
1352 AESENC X3, X3
1353 AESENC X2, X2
1354 AESENC X3, X3
1355 AESENC X2, X2
1356 AESENC X3, X3
1357
1358 // combine results
1359 PXOR X3, X2
1360 MOVQ X2, AX // return X2
1361 RET
1362
1363 aes33to64:
1364 // make 3 more starting seeds
1365 MOVO X1, X2
1366 MOVO X1, X3
1367 PXOR runtime·aeskeysched+16(SB), X1
1368 PXOR runtime·aeskeysched+32(SB), X2
1369 PXOR runtime·aeskeysched+48(SB), X3
1370 AESENC X1, X1
1371 AESENC X2, X2
1372 AESENC X3, X3
1373
1374 MOVOU (AX), X4
1375 MOVOU 16(AX), X5
1376 MOVOU -32(AX)(CX*1), X6
1377 MOVOU -16(AX)(CX*1), X7
1378
1379 PXOR X0, X4
1380 PXOR X1, X5
1381 PXOR X2, X6
1382 PXOR X3, X7
1383
1384 AESENC X4, X4
1385 AESENC X5, X5
1386 AESENC X6, X6
1387 AESENC X7, X7
1388
1389 AESENC X4, X4
1390 AESENC X5, X5
1391 AESENC X6, X6
1392 AESENC X7, X7
1393
1394 AESENC X4, X4
1395 AESENC X5, X5
1396 AESENC X6, X6
1397 AESENC X7, X7
1398
1399 PXOR X6, X4
1400 PXOR X7, X5
1401 PXOR X5, X4
1402 MOVQ X4, AX // return X4
1403 RET
1404
1405 aes65to128:
1406 // make 7 more starting seeds
1407 MOVO X1, X2
1408 MOVO X1, X3
1409 MOVO X1, X4
1410 MOVO X1, X5
1411 MOVO X1, X6
1412 MOVO X1, X7
1413 PXOR runtime·aeskeysched+16(SB), X1
1414 PXOR runtime·aeskeysched+32(SB), X2
1415 PXOR runtime·aeskeysched+48(SB), X3
1416 PXOR runtime·aeskeysched+64(SB), X4
1417 PXOR runtime·aeskeysched+80(SB), X5
1418 PXOR runtime·aeskeysched+96(SB), X6
1419 PXOR runtime·aeskeysched+112(SB), X7
1420 AESENC X1, X1
1421 AESENC X2, X2
1422 AESENC X3, X3
1423 AESENC X4, X4
1424 AESENC X5, X5
1425 AESENC X6, X6
1426 AESENC X7, X7
1427
1428 // load data
1429 MOVOU (AX), X8
1430 MOVOU 16(AX), X9
1431 MOVOU 32(AX), X10
1432 MOVOU 48(AX), X11
1433 MOVOU -64(AX)(CX*1), X12
1434 MOVOU -48(AX)(CX*1), X13
1435 MOVOU -32(AX)(CX*1), X14
1436 MOVOU -16(AX)(CX*1), X15
1437
1438 // xor with seed
1439 PXOR X0, X8
1440 PXOR X1, X9
1441 PXOR X2, X10
1442 PXOR X3, X11
1443 PXOR X4, X12
1444 PXOR X5, X13
1445 PXOR X6, X14
1446 PXOR X7, X15
1447
1448 // scramble 3 times
1449 AESENC X8, X8
1450 AESENC X9, X9
1451 AESENC X10, X10
1452 AESENC X11, X11
1453 AESENC X12, X12
1454 AESENC X13, X13
1455 AESENC X14, X14
1456 AESENC X15, X15
1457
1458 AESENC X8, X8
1459 AESENC X9, X9
1460 AESENC X10, X10
1461 AESENC X11, X11
1462 AESENC X12, X12
1463 AESENC X13, X13
1464 AESENC X14, X14
1465 AESENC X15, X15
1466
1467 AESENC X8, X8
1468 AESENC X9, X9
1469 AESENC X10, X10
1470 AESENC X11, X11
1471 AESENC X12, X12
1472 AESENC X13, X13
1473 AESENC X14, X14
1474 AESENC X15, X15
1475
1476 // combine results
1477 PXOR X12, X8
1478 PXOR X13, X9
1479 PXOR X14, X10
1480 PXOR X15, X11
1481 PXOR X10, X8
1482 PXOR X11, X9
1483 PXOR X9, X8
1484 // X15 must be zero on return
1485 PXOR X15, X15
1486 MOVQ X8, AX // return X8
1487 RET
1488
1489 aes129plus:
1490 // make 7 more starting seeds
1491 MOVO X1, X2
1492 MOVO X1, X3
1493 MOVO X1, X4
1494 MOVO X1, X5
1495 MOVO X1, X6
1496 MOVO X1, X7
1497 PXOR runtime·aeskeysched+16(SB), X1
1498 PXOR runtime·aeskeysched+32(SB), X2
1499 PXOR runtime·aeskeysched+48(SB), X3
1500 PXOR runtime·aeskeysched+64(SB), X4
1501 PXOR runtime·aeskeysched+80(SB), X5
1502 PXOR runtime·aeskeysched+96(SB), X6
1503 PXOR runtime·aeskeysched+112(SB), X7
1504 AESENC X1, X1
1505 AESENC X2, X2
1506 AESENC X3, X3
1507 AESENC X4, X4
1508 AESENC X5, X5
1509 AESENC X6, X6
1510 AESENC X7, X7
1511
1512 // start with last (possibly overlapping) block
1513 MOVOU -128(AX)(CX*1), X8
1514 MOVOU -112(AX)(CX*1), X9
1515 MOVOU -96(AX)(CX*1), X10
1516 MOVOU -80(AX)(CX*1), X11
1517 MOVOU -64(AX)(CX*1), X12
1518 MOVOU -48(AX)(CX*1), X13
1519 MOVOU -32(AX)(CX*1), X14
1520 MOVOU -16(AX)(CX*1), X15
1521
1522 // xor in seed
1523 PXOR X0, X8
1524 PXOR X1, X9
1525 PXOR X2, X10
1526 PXOR X3, X11
1527 PXOR X4, X12
1528 PXOR X5, X13
1529 PXOR X6, X14
1530 PXOR X7, X15
1531
1532 // compute number of remaining 128-byte blocks
1533 DECQ CX
1534 SHRQ $7, CX
1535
1536 PCALIGN $16
1537 aesloop:
1538 // scramble state
1539 AESENC X8, X8
1540 AESENC X9, X9
1541 AESENC X10, X10
1542 AESENC X11, X11
1543 AESENC X12, X12
1544 AESENC X13, X13
1545 AESENC X14, X14
1546 AESENC X15, X15
1547
1548 // scramble state, xor in a block
1549 MOVOU (AX), X0
1550 MOVOU 16(AX), X1
1551 MOVOU 32(AX), X2
1552 MOVOU 48(AX), X3
1553 AESENC X0, X8
1554 AESENC X1, X9
1555 AESENC X2, X10
1556 AESENC X3, X11
1557 MOVOU 64(AX), X4
1558 MOVOU 80(AX), X5
1559 MOVOU 96(AX), X6
1560 MOVOU 112(AX), X7
1561 AESENC X4, X12
1562 AESENC X5, X13
1563 AESENC X6, X14
1564 AESENC X7, X15
1565
1566 ADDQ $128, AX
1567 DECQ CX
1568 JNE aesloop
1569
1570 // 3 more scrambles to finish
1571 AESENC X8, X8
1572 AESENC X9, X9
1573 AESENC X10, X10
1574 AESENC X11, X11
1575 AESENC X12, X12
1576 AESENC X13, X13
1577 AESENC X14, X14
1578 AESENC X15, X15
1579 AESENC X8, X8
1580 AESENC X9, X9
1581 AESENC X10, X10
1582 AESENC X11, X11
1583 AESENC X12, X12
1584 AESENC X13, X13
1585 AESENC X14, X14
1586 AESENC X15, X15
1587 AESENC X8, X8
1588 AESENC X9, X9
1589 AESENC X10, X10
1590 AESENC X11, X11
1591 AESENC X12, X12
1592 AESENC X13, X13
1593 AESENC X14, X14
1594 AESENC X15, X15
1595
1596 PXOR X12, X8
1597 PXOR X13, X9
1598 PXOR X14, X10
1599 PXOR X15, X11
1600 PXOR X10, X8
1601 PXOR X11, X9
1602 PXOR X9, X8
1603 // X15 must be zero on return
1604 PXOR X15, X15
1605 MOVQ X8, AX // return X8
1606 RET
1607
1608 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
1609 // ABIInternal for performance.
1610 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
1611 // AX = ptr to data
1612 // BX = seed
1613 CMPB runtime·useAeshash(SB), $0
1614 JEQ noaes
1615 MOVQ BX, X0 // X0 = seed
1616 PINSRD $2, (AX), X0 // data
1617 AESENC runtime·aeskeysched+0(SB), X0
1618 AESENC runtime·aeskeysched+16(SB), X0
1619 AESENC runtime·aeskeysched+32(SB), X0
1620 MOVQ X0, AX // return X0
1621 RET
1622 noaes:
1623 JMP runtime·memhash32Fallback<ABIInternal>(SB)
1624
1625 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
1626 // ABIInternal for performance.
1627 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
1628 // AX = ptr to data
1629 // BX = seed
1630 CMPB runtime·useAeshash(SB), $0
1631 JEQ noaes
1632 MOVQ BX, X0 // X0 = seed
1633 PINSRQ $1, (AX), X0 // data
1634 AESENC runtime·aeskeysched+0(SB), X0
1635 AESENC runtime·aeskeysched+16(SB), X0
1636 AESENC runtime·aeskeysched+32(SB), X0
1637 MOVQ X0, AX // return X0
1638 RET
1639 noaes:
1640 JMP runtime·memhash64Fallback<ABIInternal>(SB)
1641
1642 // simple mask to get rid of data in the high part of the register.
1643 DATA masks<>+0x00(SB)/8, $0x0000000000000000
1644 DATA masks<>+0x08(SB)/8, $0x0000000000000000
1645 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1646 DATA masks<>+0x18(SB)/8, $0x0000000000000000
1647 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1648 DATA masks<>+0x28(SB)/8, $0x0000000000000000
1649 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1650 DATA masks<>+0x38(SB)/8, $0x0000000000000000
1651 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1652 DATA masks<>+0x48(SB)/8, $0x0000000000000000
1653 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1654 DATA masks<>+0x58(SB)/8, $0x0000000000000000
1655 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1656 DATA masks<>+0x68(SB)/8, $0x0000000000000000
1657 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1658 DATA masks<>+0x78(SB)/8, $0x0000000000000000
1659 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1660 DATA masks<>+0x88(SB)/8, $0x0000000000000000
1661 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1662 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1663 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1664 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1665 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1666 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1667 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1668 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1669 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1670 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1671 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1672 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1673 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1674 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1675 GLOBL masks<>(SB),RODATA,$256
1676
1677 // func checkASM() bool
1678 TEXT ·checkASM(SB),NOSPLIT,$0-1
1679 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1680 MOVQ $masks<>(SB), AX
1681 MOVQ $shifts<>(SB), BX
1682 ORQ BX, AX
1683 TESTQ $15, AX
1684 SETEQ ret+0(FP)
1685 RET
1686
1687 // these are arguments to pshufb. They move data down from
1688 // the high bytes of the register to the low bytes of the register.
1689 // index is how many bytes to move.
1690 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1691 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1692 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1693 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1694 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1695 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1696 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1697 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1698 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1699 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1700 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1701 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1702 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1703 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1704 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1705 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1706 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1707 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1708 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1709 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1710 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1711 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1712 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1713 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1714 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1715 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1716 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1717 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1718 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1719 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1720 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1721 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1722 GLOBL shifts<>(SB),RODATA,$256
1723
1724 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1725 // Must obey the gcc calling convention.
1726 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1727 get_tls(CX)
1728 MOVQ g(CX), AX
1729 MOVQ g_m(AX), AX
1730 MOVQ m_curg(AX), AX
1731 MOVQ (g_stack+stack_hi)(AX), AX
1732 RET
1733
1734 // The top-most function running on a goroutine
1735 // returns to goexit+PCQuantum.
1736 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
1737 BYTE $0x90 // NOP
1738 CALL runtime·goexit1(SB) // does not return
1739 // traceback from goexit1 must hit code range of goexit
1740 BYTE $0x90 // NOP
1741
1742 // This is called from .init_array and follows the platform, not Go, ABI.
1743 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1744 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1745 MOVQ runtime·lastmoduledatap(SB), AX
1746 MOVQ DI, moduledata_next(AX)
1747 MOVQ DI, runtime·lastmoduledatap(SB)
1748 POPQ R15
1749 RET
1750
1751 // Initialize special registers then jump to sigpanic.
1752 // This function is injected from the signal handler for panicking
1753 // signals. It is quite painful to set X15 in the signal context,
1754 // so we do it here.
1755 TEXT ·sigpanic0(SB),NOSPLIT,$0-0
1756 get_tls(R14)
1757 MOVQ g(R14), R14
1758 XORPS X15, X15
1759 JMP ·sigpanic<ABIInternal>(SB)
1760
1761 // gcWriteBarrier informs the GC about heap pointer writes.
1762 //
1763 // gcWriteBarrier returns space in a write barrier buffer which
1764 // should be filled in by the caller.
1765 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1766 // number of bytes of buffer needed in R11, and returns a pointer
1767 // to the buffer space in R11.
1768 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1769 // but may clobber others (e.g., SSE registers).
1770 // Typical use would be, when doing *(CX+88) = AX
1771 // CMPL $0, runtime.writeBarrier(SB)
1772 // JEQ dowrite
1773 // CALL runtime.gcBatchBarrier2(SB)
1774 // MOVQ AX, (R11)
1775 // MOVQ 88(CX), DX
1776 // MOVQ DX, 8(R11)
1777 // dowrite:
1778 // MOVQ AX, 88(CX)
1779 TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
1780 // Save the registers clobbered by the fast path. This is slightly
1781 // faster than having the caller spill these.
1782 MOVQ R12, 96(SP)
1783 MOVQ R13, 104(SP)
1784 retry:
1785 // TODO: Consider passing g.m.p in as an argument so they can be shared
1786 // across a sequence of write barriers.
1787 MOVQ g_m(R14), R13
1788 MOVQ m_p(R13), R13
1789 // Get current buffer write position.
1790 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position
1791 ADDQ R11, R12 // new next position
1792 // Is the buffer full?
1793 CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
1794 JA flush
1795 // Commit to the larger buffer.
1796 MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
1797 // Make return value (the original next position)
1798 SUBQ R11, R12
1799 MOVQ R12, R11
1800 // Restore registers.
1801 MOVQ 96(SP), R12
1802 MOVQ 104(SP), R13
1803 RET
1804
1805 flush:
1806 // Save all general purpose registers since these could be
1807 // clobbered by wbBufFlush and were not saved by the caller.
1808 // It is possible for wbBufFlush to clobber other registers
1809 // (e.g., SSE registers), but the compiler takes care of saving
1810 // those in the caller if necessary. This strikes a balance
1811 // with registers that are likely to be used.
1812 //
1813 // We don't have type information for these, but all code under
1814 // here is NOSPLIT, so nothing will observe these.
1815 //
1816 // TODO: We could strike a different balance; e.g., saving X0
1817 // and not saving GP registers that are less likely to be used.
1818 MOVQ DI, 0(SP)
1819 MOVQ AX, 8(SP)
1820 MOVQ BX, 16(SP)
1821 MOVQ CX, 24(SP)
1822 MOVQ DX, 32(SP)
1823 // DI already saved
1824 MOVQ SI, 40(SP)
1825 MOVQ BP, 48(SP)
1826 MOVQ R8, 56(SP)
1827 MOVQ R9, 64(SP)
1828 MOVQ R10, 72(SP)
1829 MOVQ R11, 80(SP)
1830 // R12 already saved
1831 // R13 already saved
1832 // R14 is g
1833 MOVQ R15, 88(SP)
1834
1835 CALL runtime·wbBufFlush(SB)
1836
1837 MOVQ 0(SP), DI
1838 MOVQ 8(SP), AX
1839 MOVQ 16(SP), BX
1840 MOVQ 24(SP), CX
1841 MOVQ 32(SP), DX
1842 MOVQ 40(SP), SI
1843 MOVQ 48(SP), BP
1844 MOVQ 56(SP), R8
1845 MOVQ 64(SP), R9
1846 MOVQ 72(SP), R10
1847 MOVQ 80(SP), R11
1848 MOVQ 88(SP), R15
1849 JMP retry
1850
1851 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1852 MOVL $8, R11
1853 JMP gcWriteBarrier<>(SB)
1854 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1855 MOVL $16, R11
1856 JMP gcWriteBarrier<>(SB)
1857 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1858 MOVL $24, R11
1859 JMP gcWriteBarrier<>(SB)
1860 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1861 MOVL $32, R11
1862 JMP gcWriteBarrier<>(SB)
1863 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1864 MOVL $40, R11
1865 JMP gcWriteBarrier<>(SB)
1866 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1867 MOVL $48, R11
1868 JMP gcWriteBarrier<>(SB)
1869 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1870 MOVL $56, R11
1871 JMP gcWriteBarrier<>(SB)
1872 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1873 MOVL $64, R11
1874 JMP gcWriteBarrier<>(SB)
1875
1876 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1877 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1878
1879 // debugCallV2 is the entry point for debugger-injected function
1880 // calls on running goroutines. It informs the runtime that a
1881 // debug call has been injected and creates a call frame for the
1882 // debugger to fill in.
1883 //
1884 // To inject a function call, a debugger should:
1885 // 1. Check that the goroutine is in state _Grunning and that
1886 // there are at least 256 bytes free on the stack.
1887 // 2. Push the current PC on the stack (updating SP).
1888 // 3. Write the desired argument frame size at SP-16 (using the SP
1889 // after step 2).
1890 // 4. Save all machine registers (including flags and XMM registers)
1891 // so they can be restored later by the debugger.
1892 // 5. Set the PC to debugCallV2 and resume execution.
1893 //
1894 // If the goroutine is in state _Grunnable, then it's not generally
1895 // safe to inject a call because it may return out via other runtime
1896 // operations. Instead, the debugger should unwind the stack to find
1897 // the return to non-runtime code, add a temporary breakpoint there,
1898 // and inject the call once that breakpoint is hit.
1899 //
1900 // If the goroutine is in any other state, it's not safe to inject a call.
1901 //
1902 // This function communicates back to the debugger by setting R12 and
1903 // invoking INT3 to raise a breakpoint signal. See the comments in the
1904 // implementation for the protocol the debugger is expected to
1905 // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1906 //
1907 // The debugger must ensure that any pointers passed to the function
1908 // obey escape analysis requirements. Specifically, it must not pass
1909 // a stack pointer to an escaping argument. debugCallV2 cannot check
1910 // this invariant.
1911 //
1912 // This is ABIInternal because Go code injects its PC directly into new
1913 // goroutine stacks.
1914 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
1915 // Save all registers that may contain pointers so they can be
1916 // conservatively scanned.
1917 //
1918 // We can't do anything that might clobber any of these
1919 // registers before this.
1920 MOVQ R15, r15-(14*8+8)(SP)
1921 MOVQ R14, r14-(13*8+8)(SP)
1922 MOVQ R13, r13-(12*8+8)(SP)
1923 MOVQ R12, r12-(11*8+8)(SP)
1924 MOVQ R11, r11-(10*8+8)(SP)
1925 MOVQ R10, r10-(9*8+8)(SP)
1926 MOVQ R9, r9-(8*8+8)(SP)
1927 MOVQ R8, r8-(7*8+8)(SP)
1928 MOVQ DI, di-(6*8+8)(SP)
1929 MOVQ SI, si-(5*8+8)(SP)
1930 MOVQ BP, bp-(4*8+8)(SP)
1931 MOVQ BX, bx-(3*8+8)(SP)
1932 MOVQ DX, dx-(2*8+8)(SP)
1933 // Save the frame size before we clobber it. Either of the last
1934 // saves could clobber this depending on whether there's a saved BP.
1935 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1936 MOVQ CX, cx-(1*8+8)(SP)
1937 MOVQ AX, ax-(0*8+8)(SP)
1938
1939 // Save the argument frame size.
1940 MOVQ DX, frameSize-128(SP)
1941
1942 // Perform a safe-point check.
1943 MOVQ retpc-8(FP), AX // Caller's PC
1944 MOVQ AX, 0(SP)
1945 CALL runtime·debugCallCheck(SB)
1946 MOVQ 8(SP), AX
1947 TESTQ AX, AX
1948 JZ good
1949 // The safety check failed. Put the reason string at the top
1950 // of the stack.
1951 MOVQ AX, 0(SP)
1952 MOVQ 16(SP), AX
1953 MOVQ AX, 8(SP)
1954 // Set R12 to 8 and invoke INT3. The debugger should get the
1955 // reason a call can't be injected from the top of the stack
1956 // and resume execution.
1957 MOVQ $8, R12
1958 BYTE $0xcc
1959 JMP restore
1960
1961 good:
1962 // Registers are saved and it's safe to make a call.
1963 // Open up a call frame, moving the stack if necessary.
1964 //
1965 // Once the frame is allocated, this will set R12 to 0 and
1966 // invoke INT3. The debugger should write the argument
1967 // frame for the call at SP, set up argument registers, push
1968 // the trapping PC on the stack, set the PC to the function to
1969 // call, set RDX to point to the closure (if a closure call),
1970 // and resume execution.
1971 //
1972 // If the function returns, this will set R12 to 1 and invoke
1973 // INT3. The debugger can then inspect any return value saved
1974 // on the stack at SP and in registers and resume execution again.
1975 //
1976 // If the function panics, this will set R12 to 2 and invoke INT3.
1977 // The interface{} value of the panic will be at SP. The debugger
1978 // can inspect the panic value and resume execution again.
1979 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1980 CMPQ AX, $MAXSIZE; \
1981 JA 5(PC); \
1982 MOVQ $NAME(SB), AX; \
1983 MOVQ AX, 0(SP); \
1984 CALL runtime·debugCallWrap(SB); \
1985 JMP restore
1986
1987 MOVQ frameSize-128(SP), AX
1988 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1989 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1990 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1991 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1992 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1993 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1994 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1995 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1996 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1997 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1998 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1999 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
2000 // The frame size is too large. Report the error.
2001 MOVQ $debugCallFrameTooLarge<>(SB), AX
2002 MOVQ AX, 0(SP)
2003 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
2004 MOVQ $8, R12
2005 BYTE $0xcc
2006 JMP restore
2007
2008 restore:
2009 // Calls and failures resume here.
2010 //
2011 // Set R12 to 16 and invoke INT3. The debugger should restore
2012 // all registers except RIP and RSP and resume execution.
2013 MOVQ $16, R12
2014 BYTE $0xcc
2015 // We must not modify flags after this point.
2016
2017 // Restore pointer-containing registers, which may have been
2018 // modified from the debugger's copy by stack copying.
2019 MOVQ ax-(0*8+8)(SP), AX
2020 MOVQ cx-(1*8+8)(SP), CX
2021 MOVQ dx-(2*8+8)(SP), DX
2022 MOVQ bx-(3*8+8)(SP), BX
2023 MOVQ bp-(4*8+8)(SP), BP
2024 MOVQ si-(5*8+8)(SP), SI
2025 MOVQ di-(6*8+8)(SP), DI
2026 MOVQ r8-(7*8+8)(SP), R8
2027 MOVQ r9-(8*8+8)(SP), R9
2028 MOVQ r10-(9*8+8)(SP), R10
2029 MOVQ r11-(10*8+8)(SP), R11
2030 MOVQ r12-(11*8+8)(SP), R12
2031 MOVQ r13-(12*8+8)(SP), R13
2032 MOVQ r14-(13*8+8)(SP), R14
2033 MOVQ r15-(14*8+8)(SP), R15
2034
2035 RET
2036
2037 // runtime.debugCallCheck assumes that functions defined with the
2038 // DEBUG_CALL_FN macro are safe points to inject calls.
2039 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
2040 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
2041 NO_LOCAL_POINTERS; \
2042 MOVQ $0, R12; \
2043 BYTE $0xcc; \
2044 MOVQ $1, R12; \
2045 BYTE $0xcc; \
2046 RET
2047 DEBUG_CALL_FN(debugCall32<>, 32)
2048 DEBUG_CALL_FN(debugCall64<>, 64)
2049 DEBUG_CALL_FN(debugCall128<>, 128)
2050 DEBUG_CALL_FN(debugCall256<>, 256)
2051 DEBUG_CALL_FN(debugCall512<>, 512)
2052 DEBUG_CALL_FN(debugCall1024<>, 1024)
2053 DEBUG_CALL_FN(debugCall2048<>, 2048)
2054 DEBUG_CALL_FN(debugCall4096<>, 4096)
2055 DEBUG_CALL_FN(debugCall8192<>, 8192)
2056 DEBUG_CALL_FN(debugCall16384<>, 16384)
2057 DEBUG_CALL_FN(debugCall32768<>, 32768)
2058 DEBUG_CALL_FN(debugCall65536<>, 65536)
2059
2060 // func debugCallPanicked(val interface{})
2061 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
2062 // Copy the panic value to the top of stack.
2063 MOVQ val_type+0(FP), AX
2064 MOVQ AX, 0(SP)
2065 MOVQ val_data+8(FP), AX
2066 MOVQ AX, 8(SP)
2067 MOVQ $2, R12
2068 BYTE $0xcc
2069 RET
2070
2071 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
2072 NO_LOCAL_POINTERS
2073 // Save all 14 int registers that could have an index in them.
2074 // They may be pointers, but if they are they are dead.
2075 MOVQ AX, 16(SP)
2076 MOVQ CX, 24(SP)
2077 MOVQ DX, 32(SP)
2078 MOVQ BX, 40(SP)
2079 // skip SP @ 48(SP)
2080 MOVQ BP, 56(SP)
2081 MOVQ SI, 64(SP)
2082 MOVQ DI, 72(SP)
2083 MOVQ R8, 80(SP)
2084 MOVQ R9, 88(SP)
2085 MOVQ R10, 96(SP)
2086 MOVQ R11, 104(SP)
2087 MOVQ R12, 112(SP)
2088 MOVQ R13, 120(SP)
2089 // skip R14 @ 128(SP) (aka G)
2090 MOVQ R15, 136(SP)
2091
2092 MOVQ SP, AX // hide SP read from vet
2093 MOVQ 152(AX), AX // PC immediately after call to panicBounds
2094 LEAQ 16(SP), BX
2095 CALL runtime·panicBounds64<ABIInternal>(SB)
2096 RET
2097
2098 #ifdef GOOS_android
2099 // Use the free TLS_SLOT_APP slot #2 on Android Q.
2100 // Earlier androids are set up in gcc_android.c.
2101 DATA runtime·tls_g+0(SB)/8, $16
2102 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2103 #endif
2104 #ifdef GOOS_windows
2105 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2106 #endif
2107
2108 // The compiler and assembler's -spectre=ret mode rewrites
2109 // all indirect CALL AX / JMP AX instructions to be
2110 // CALL retpolineAX / JMP retpolineAX.
2111 // See https://support.google.com/faqs/answer/7625886.
2112 #define RETPOLINE(reg) \
2113 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
2114 /* nospec: */ \
2115 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
2116 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
2117 /* setup: */ \
2118 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
2119 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
2120 /* RET */ BYTE $0xC3
2121
2122 TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
2123 TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
2124 TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
2125 TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
2126 /* SP is 4, can't happen / magic encodings */
2127 TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
2128 TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
2129 TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
2130 TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
2131 TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
2132 TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
2133 TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
2134 TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
2135 TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
2136 TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
2137 TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
2138
2139 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
2140 MOVQ BP, AX
2141 RET
2142
View as plain text