Text file
src/runtime/asm_amd64.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9 #include "cgo/abi_amd64.h"
10
11 // _rt0_amd64 is common startup code for most amd64 systems when using
12 // internal linking. This is the entry point for the program from the
13 // kernel for an ordinary -buildmode=exe program. The stack holds the
14 // number of arguments and the C-style argv.
15 TEXT _rt0_amd64(SB),NOSPLIT,$-8
16 MOVQ 0(SP), DI // argc
17 LEAQ 8(SP), SI // argv
18 JMP runtime·rt0_go(SB)
19
20 // main is common startup code for most amd64 systems when using
21 // external linking. The C startup code will call the symbol "main"
22 // passing argc and argv in the usual C ABI registers DI and SI.
23 TEXT main(SB),NOSPLIT,$-8
24 JMP runtime·rt0_go(SB)
25
26 // _rt0_amd64_lib is common startup code for most amd64 systems when
27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
28 // arrange to invoke this function as a global constructor (for
29 // c-archive) or when the shared library is loaded (for c-shared).
30 // We expect argc and argv to be passed in the usual C ABI registers
31 // DI and SI.
32 TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
33 // Transition from C ABI to Go ABI.
34 PUSH_REGS_HOST_TO_ABI0()
35
36 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
37 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
38
39 #ifdef GOOS_windows
40 // Set up a dummy TLS value on Windows so that the autogenerated
41 // ABI wrappers don't crash when trying to load G from TLS before
42 // wintls has set up the real TLS slot in rt0_go.
43 MOVQ $zeroTLS<>(SB), DI
44 CALL runtime·settls(SB)
45 #endif
46
47 CALL runtime·libInit(SB)
48
49 POP_REGS_HOST_TO_ABI0()
50 RET
51
52 // rt0_lib_go initializes the Go runtime.
53 // This is started in a separate thread by _rt0_amd64_lib.
54 TEXT runtime·rt0_lib_go<ABIInternal>(SB),NOSPLIT,$0
55 MOVQ _rt0_amd64_lib_argc<>(SB), DI
56 MOVQ _rt0_amd64_lib_argv<>(SB), SI
57 JMP runtime·rt0_go(SB)
58
59 DATA _rt0_amd64_lib_argc<>(SB)/8, $0
60 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
61 DATA _rt0_amd64_lib_argv<>(SB)/8, $0
62 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
63
64 #ifdef GOAMD64_v2
65 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
66 #endif
67
68 #ifdef GOAMD64_v3
69 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
70 #endif
71
72 #ifdef GOAMD64_v4
73 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
74 #endif
75
76 GLOBL bad_cpu_msg<>(SB), RODATA, $84
77
78 // Define a list of AMD64 microarchitecture level features
79 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
80
81 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT
82 #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23)
83 // LAHF/SAHF
84 #define V2_EXT_FEATURES_CX (1 << 0)
85 // FMA MOVBE OSXSAVE AVX F16C
86 #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
87 // ABM (FOR LZNCT)
88 #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
89 // BMI1 AVX2 BMI2
90 #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
91 // XMM YMM
92 #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
93
94 #define V4_FEATURES_CX V3_FEATURES_CX
95
96 #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
97 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL
98 #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
99 // OPMASK ZMM
100 #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
101
102 #ifdef GOAMD64_v2
103 #define NEED_MAX_CPUID 0x80000001
104 #define NEED_FEATURES_CX V2_FEATURES_CX
105 #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
106 #endif
107
108 #ifdef GOAMD64_v3
109 #define NEED_MAX_CPUID 0x80000001
110 #define NEED_FEATURES_CX V3_FEATURES_CX
111 #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
112 #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
113 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
114 #endif
115
116 #ifdef GOAMD64_v4
117 #define NEED_MAX_CPUID 0x80000001
118 #define NEED_FEATURES_CX V4_FEATURES_CX
119 #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
120 #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
121
122 // Darwin requires a different approach to check AVX512 support, see CL 285572.
123 #ifdef GOOS_darwin
124 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
125 // These values are from:
126 // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
127 #define commpage64_base_address 0x00007fffffe00000
128 #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
129 #define commpage64_version (commpage64_base_address+0x01E)
130 #define AVX512F 0x0000004000000000
131 #define AVX512CD 0x0000008000000000
132 #define AVX512DQ 0x0000010000000000
133 #define AVX512BW 0x0000020000000000
134 #define AVX512VL 0x0000100000000000
135 #define NEED_DARWIN_SUPPORT (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL)
136 #else
137 #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
138 #endif
139
140 #endif
141
142 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
143 // copy arguments forward on an even stack
144 MOVQ DI, AX // argc
145 MOVQ SI, BX // argv
146 SUBQ $(5*8), SP // 3args 2auto
147 ANDQ $~15, SP
148 MOVQ AX, 24(SP)
149 MOVQ BX, 32(SP)
150
151 // This is typically the entry point for Go programs.
152 // Call stack unwinding must not proceed past this frame.
153 // Set the frame pointer register to 0 so that frame pointer-based unwinders
154 // (which don't use debug info for performance reasons)
155 // won't attempt to unwind past this function.
156 // See go.dev/issue/63630
157 MOVQ $0, BP
158
159 // create istack out of the given (operating system) stack.
160 // _cgo_init may update stackguard.
161 MOVQ $runtime·g0(SB), DI
162 LEAQ (-64*1024)(SP), BX
163 MOVQ BX, g_stackguard0(DI)
164 MOVQ BX, g_stackguard1(DI)
165 MOVQ BX, (g_stack+stack_lo)(DI)
166 MOVQ SP, (g_stack+stack_hi)(DI)
167
168 // find out information about the processor we're on
169 MOVL $0, AX
170 CPUID
171 CMPL AX, $0
172 JE nocpuinfo
173
174 CMPL BX, $0x756E6547 // "Genu"
175 JNE notintel
176 CMPL DX, $0x49656E69 // "ineI"
177 JNE notintel
178 CMPL CX, $0x6C65746E // "ntel"
179 JNE notintel
180 MOVB $1, runtime·isIntel(SB)
181
182 notintel:
183 // Load EAX=1 cpuid flags
184 MOVL $1, AX
185 CPUID
186 MOVL AX, runtime·processorVersionInfo(SB)
187
188 nocpuinfo:
189 // if there is an _cgo_init, call it.
190 MOVQ _cgo_init(SB), AX
191 TESTQ AX, AX
192 JZ needtls
193 // arg 1: g0, already in DI
194 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
195 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
196 MOVQ $0, CX
197 #ifdef GOOS_android
198 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
199 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
200 // Compensate for tls_g (+16).
201 MOVQ -16(TLS), CX
202 #endif
203 #ifdef GOOS_windows
204 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
205 // Adjust for the Win64 calling convention.
206 MOVQ CX, R9 // arg 4
207 MOVQ DX, R8 // arg 3
208 MOVQ SI, DX // arg 2
209 MOVQ DI, CX // arg 1
210 #endif
211 CALL AX
212
213 // update stackguard after _cgo_init
214 MOVQ $runtime·g0(SB), CX
215 MOVQ (g_stack+stack_lo)(CX), AX
216 ADDQ $const_stackGuard, AX
217 MOVQ AX, g_stackguard0(CX)
218 MOVQ AX, g_stackguard1(CX)
219
220 #ifndef GOOS_windows
221 JMP ok
222 #endif
223 needtls:
224 #ifdef GOOS_plan9
225 // skip TLS setup on Plan 9
226 JMP ok
227 #endif
228 #ifdef GOOS_solaris
229 // skip TLS setup on Solaris
230 JMP ok
231 #endif
232 #ifdef GOOS_illumos
233 // skip TLS setup on illumos
234 JMP ok
235 #endif
236 #ifdef GOOS_darwin
237 // skip TLS setup on Darwin
238 JMP ok
239 #endif
240 #ifdef GOOS_openbsd
241 // skip TLS setup on OpenBSD
242 JMP ok
243 #endif
244
245 #ifdef GOOS_windows
246 CALL runtime·wintls(SB)
247 #endif
248
249 LEAQ runtime·m0+m_tls(SB), DI
250 CALL runtime·settls(SB)
251
252 // store through it, to make sure it works
253 get_tls(BX)
254 MOVQ $0x123, g(BX)
255 MOVQ runtime·m0+m_tls(SB), AX
256 CMPQ AX, $0x123
257 JEQ 2(PC)
258 CALL runtime·abort(SB)
259 ok:
260 // set the per-goroutine and per-mach "registers"
261 get_tls(BX)
262 LEAQ runtime·g0(SB), CX
263 MOVQ CX, g(BX)
264 LEAQ runtime·m0(SB), AX
265
266 // save m->g0 = g0
267 MOVQ CX, m_g0(AX)
268 // save m0 to g0->m
269 MOVQ AX, g_m(CX)
270
271 CLD // convention is D is always left cleared
272
273 // Check GOAMD64 requirements
274 // We need to do this after setting up TLS, so that
275 // we can report an error if there is a failure. See issue 49586.
276 #ifdef NEED_FEATURES_CX
277 MOVL $0, AX
278 CPUID
279 CMPL AX, $0
280 JE bad_cpu
281 MOVL $1, AX
282 CPUID
283 ANDL $NEED_FEATURES_CX, CX
284 CMPL CX, $NEED_FEATURES_CX
285 JNE bad_cpu
286 #endif
287
288 #ifdef NEED_MAX_CPUID
289 MOVL $0x80000000, AX
290 CPUID
291 CMPL AX, $NEED_MAX_CPUID
292 JL bad_cpu
293 #endif
294
295 #ifdef NEED_EXT_FEATURES_BX
296 MOVL $7, AX
297 MOVL $0, CX
298 CPUID
299 ANDL $NEED_EXT_FEATURES_BX, BX
300 CMPL BX, $NEED_EXT_FEATURES_BX
301 JNE bad_cpu
302 #endif
303
304 #ifdef NEED_EXT_FEATURES_CX
305 MOVL $0x80000001, AX
306 CPUID
307 ANDL $NEED_EXT_FEATURES_CX, CX
308 CMPL CX, $NEED_EXT_FEATURES_CX
309 JNE bad_cpu
310 #endif
311
312 #ifdef NEED_OS_SUPPORT_AX
313 XORL CX, CX
314 XGETBV
315 ANDL $NEED_OS_SUPPORT_AX, AX
316 CMPL AX, $NEED_OS_SUPPORT_AX
317 JNE bad_cpu
318 #endif
319
320 #ifdef NEED_DARWIN_SUPPORT
321 MOVQ $commpage64_version, BX
322 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13
323 JL bad_cpu
324 MOVQ $commpage64_cpu_capabilities64, BX
325 MOVQ (BX), BX
326 MOVQ $NEED_DARWIN_SUPPORT, CX
327 ANDQ CX, BX
328 CMPQ BX, CX
329 JNE bad_cpu
330 #endif
331
332 CALL runtime·check(SB)
333
334 MOVL 24(SP), AX // copy argc
335 MOVL AX, 0(SP)
336 MOVQ 32(SP), AX // copy argv
337 MOVQ AX, 8(SP)
338 CALL runtime·args(SB)
339 CALL runtime·osinit(SB)
340 CALL runtime·schedinit(SB)
341
342 // create a new goroutine to start program
343 MOVQ $runtime·mainPC(SB), AX // entry
344 PUSHQ AX
345 CALL runtime·newproc(SB)
346 POPQ AX
347
348 // start this M
349 CALL runtime·mstart(SB)
350
351 CALL runtime·abort(SB) // mstart should never return
352 RET
353
354 bad_cpu: // show that the program requires a certain microarchitecture level.
355 MOVQ $2, 0(SP)
356 MOVQ $bad_cpu_msg<>(SB), AX
357 MOVQ AX, 8(SP)
358 MOVQ $84, 16(SP)
359 CALL runtime·write(SB)
360 MOVQ $1, 0(SP)
361 CALL runtime·exit(SB)
362 CALL runtime·abort(SB)
363 RET
364
365 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
366 // intended to be called by debuggers.
367 MOVQ $runtime·debugPinnerV1<ABIInternal>(SB), AX
368 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX
369 RET
370
371 // mainPC is a function value for runtime.main, to be passed to newproc.
372 // The reference to runtime.main is made via ABIInternal, since the
373 // actual function (not the ABI0 wrapper) is needed by newproc.
374 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
375 GLOBL runtime·mainPC(SB),RODATA,$8
376
377 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
378 BYTE $0xcc
379 RET
380
381 TEXT runtime·asminit(SB),NOSPLIT,$0-0
382 // No per-thread init.
383 RET
384
385 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
386 // This is the root frame of new Go-created OS threads.
387 // Call stack unwinding must not proceed past this frame.
388 // Set the frame pointer register to 0 so that frame pointer-based unwinders
389 // (which don't use debug info for performance reasons)
390 // won't attempt to unwind past this function.
391 // See go.dev/issue/63630
392 MOVD $0, BP
393 CALL runtime·mstart0(SB)
394 RET // not reached
395
396 /*
397 * go-routine
398 */
399
400 // func gogo(buf *gobuf)
401 // restore state from Gobuf; longjmp
402 TEXT runtime·gogo(SB), NOSPLIT, $0-8
403 MOVQ buf+0(FP), BX // gobuf
404 MOVQ gobuf_g(BX), DX
405 MOVQ 0(DX), CX // make sure g != nil
406 JMP gogo<>(SB)
407
408 TEXT gogo<>(SB), NOSPLIT, $0
409 get_tls(CX)
410 MOVQ DX, g(CX)
411 MOVQ DX, R14 // set the g register
412 MOVQ gobuf_sp(BX), SP // restore SP
413 MOVQ gobuf_ctxt(BX), DX
414 MOVQ gobuf_bp(BX), BP
415 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
416 MOVQ $0, gobuf_ctxt(BX)
417 MOVQ $0, gobuf_bp(BX)
418 MOVQ gobuf_pc(BX), BX
419 JMP BX
420
421 // func mcall(fn func(*g))
422 // Switch to m->g0's stack, call fn(g).
423 // Fn must never return. It should gogo(&g->sched)
424 // to keep running g.
425 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
426 #ifdef GOEXPERIMENT_runtimesecret
427 CMPL g_secret(R14), $0
428 JEQ nosecret
429 CALL ·secretEraseRegistersMcall(SB)
430 nosecret:
431 #endif
432
433 MOVQ AX, DX // DX = fn
434
435 // Save state in g->sched. The caller's SP and PC are restored by gogo to
436 // resume execution in the caller's frame (implicit return). The caller's BP
437 // is also restored to support frame pointer unwinding.
438 MOVQ SP, BX // hide (SP) reads from vet
439 MOVQ 8(BX), BX // caller's PC
440 MOVQ BX, (g_sched+gobuf_pc)(R14)
441 LEAQ fn+0(FP), BX // caller's SP
442 MOVQ BX, (g_sched+gobuf_sp)(R14)
443 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is
444 // can cause a frame pointer cycle, see CL 476235.
445 MOVQ (BP), BX // caller's BP
446 MOVQ BX, (g_sched+gobuf_bp)(R14)
447
448 // switch to m->g0 & its stack, call fn
449 MOVQ g_m(R14), BX
450 MOVQ m_g0(BX), SI // SI = g.m.g0
451 CMPQ SI, R14 // if g == m->g0 call badmcall
452 JNE goodm
453 JMP runtime·badmcall(SB)
454 goodm:
455 MOVQ R14, AX // AX (and arg 0) = g
456 MOVQ SI, R14 // g = g.m.g0
457 get_tls(CX) // Set G in TLS
458 MOVQ R14, g(CX)
459 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp
460 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
461 PUSHQ AX // open up space for fn's arg spill slot
462 MOVQ 0(DX), R12
463 CALL R12 // fn(g)
464 // The Windows native stack unwinder incorrectly classifies the next instruction
465 // as part of the function epilogue, producing a wrong call stack.
466 // Add a NOP to work around this issue. See go.dev/issue/67007.
467 BYTE $0x90
468 POPQ AX
469 JMP runtime·badmcall2(SB)
470 RET
471
472 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
473 // of the G stack. We need to distinguish the routine that
474 // lives at the bottom of the G stack from the one that lives
475 // at the top of the system stack because the one at the top of
476 // the system stack terminates the stack walk (see topofstack()).
477 // The frame layout needs to match systemstack
478 // so that it can pretend to be systemstack_switch.
479 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
480 // Align for consistency with offset used in gosave_systemstack_switch
481 PCALIGN $8
482 UNDEF
483 // Make sure this function is not leaf,
484 // so the frame is saved.
485 CALL runtime·abort(SB)
486 RET
487
488 // func systemstack(fn func())
489 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
490 #ifdef GOEXPERIMENT_runtimesecret
491 // If in secret mode, erase registers on transition
492 // from G stack to M stack,
493 get_tls(CX)
494 MOVQ g(CX), AX
495 CMPL g_secret(AX), $0
496 JEQ nosecret
497 CALL ·secretEraseRegisters(SB)
498 nosecret:
499 #endif
500
501 MOVQ fn+0(FP), DI // DI = fn
502 get_tls(CX)
503 MOVQ g(CX), AX // AX = g
504 MOVQ g_m(AX), BX // BX = m
505
506 CMPQ AX, m_gsignal(BX)
507 JEQ noswitch
508
509 MOVQ m_g0(BX), DX // DX = g0
510 CMPQ AX, DX
511 JEQ noswitch
512
513 CMPQ AX, m_curg(BX)
514 JNE bad
515
516 // Switch stacks.
517 // The original frame pointer is stored in BP,
518 // which is useful for stack unwinding.
519 // Save our state in g->sched. Pretend to
520 // be systemstack_switch if the G stack is scanned.
521 CALL gosave_systemstack_switch<>(SB)
522
523 // switch to g0
524 MOVQ DX, g(CX)
525 MOVQ DX, R14 // set the g register
526 MOVQ (g_sched+gobuf_sp)(DX), SP
527
528 // call target function
529 MOVQ DI, DX
530 MOVQ 0(DI), DI
531 CALL DI
532
533 // switch back to g
534 get_tls(CX)
535 MOVQ g(CX), AX
536 MOVQ g_m(AX), BX
537 MOVQ m_curg(BX), AX
538 MOVQ AX, g(CX)
539 MOVQ (g_sched+gobuf_sp)(AX), SP
540 MOVQ (g_sched+gobuf_bp)(AX), BP
541 MOVQ $0, (g_sched+gobuf_sp)(AX)
542 MOVQ $0, (g_sched+gobuf_bp)(AX)
543 RET
544
545 noswitch:
546 // already on m stack; tail call the function
547 // Using a tail call here cleans up tracebacks since we won't stop
548 // at an intermediate systemstack.
549 MOVQ DI, DX
550 MOVQ 0(DI), DI
551 // The function epilogue is not called on a tail call.
552 // Pop BP from the stack to simulate it.
553 POPQ BP
554 JMP DI
555
556 bad:
557 // Bad: g is not gsignal, not g0, not curg. What is it?
558 MOVQ $runtime·badsystemstack(SB), AX
559 CALL AX
560 INT $3
561
562 // func switchToCrashStack0(fn func())
563 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
564 MOVQ g_m(R14), BX // curm
565
566 // set g to gcrash
567 LEAQ runtime·gcrash(SB), R14 // g = &gcrash
568 MOVQ BX, g_m(R14) // g.m = curm
569 MOVQ R14, m_g0(BX) // curm.g0 = g
570 get_tls(CX)
571 MOVQ R14, g(CX)
572
573 // switch to crashstack
574 MOVQ (g_stack+stack_hi)(R14), BX
575 SUBQ $(4*8), BX
576 MOVQ BX, SP
577
578 // call target function
579 MOVQ AX, DX
580 MOVQ 0(AX), AX
581 CALL AX
582
583 // should never return
584 CALL runtime·abort(SB)
585 UNDEF
586
587 /*
588 * support for morestack
589 */
590
591 // Called during function prolog when more stack is needed.
592 //
593 // The traceback routines see morestack on a g0 as being
594 // the top of a stack (for example, morestack calling newstack
595 // calling the scheduler calling newm calling gc), so we must
596 // record an argument size. For that purpose, it has no arguments.
597 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
598 // Cannot grow scheduler stack (m->g0).
599 get_tls(CX)
600 MOVQ g(CX), DI // DI = g
601 MOVQ g_m(DI), BX // BX = m
602
603 // Set g->sched to context in f.
604 MOVQ 0(SP), AX // f's PC
605 MOVQ AX, (g_sched+gobuf_pc)(DI)
606 LEAQ 8(SP), AX // f's SP
607 MOVQ AX, (g_sched+gobuf_sp)(DI)
608 MOVQ BP, (g_sched+gobuf_bp)(DI)
609 MOVQ DX, (g_sched+gobuf_ctxt)(DI)
610
611 MOVQ m_g0(BX), SI // SI = m.g0
612 CMPQ DI, SI
613 JNE 3(PC)
614 CALL runtime·badmorestackg0(SB)
615 CALL runtime·abort(SB)
616
617 // Cannot grow signal stack (m->gsignal).
618 MOVQ m_gsignal(BX), SI
619 CMPQ DI, SI
620 JNE 3(PC)
621 CALL runtime·badmorestackgsignal(SB)
622 CALL runtime·abort(SB)
623
624 // Called from f.
625 // Set m->morebuf to f's caller.
626 NOP SP // tell vet SP changed - stop checking offsets
627 MOVQ 8(SP), AX // f's caller's PC
628 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
629 LEAQ 16(SP), AX // f's caller's SP
630 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
631 MOVQ DI, (m_morebuf+gobuf_g)(BX)
632
633 // If in secret mode, erase registers on transition
634 // from G stack to M stack,
635 #ifdef GOEXPERIMENT_runtimesecret
636 CMPL g_secret(DI), $0
637 JEQ nosecret
638 CALL ·secretEraseRegisters(SB)
639 get_tls(CX)
640 MOVQ g(CX), DI // DI = g
641 MOVQ g_m(DI), BX // BX = m
642 nosecret:
643 #endif
644
645 // Call newstack on m->g0's stack.
646 MOVQ m_g0(BX), BX
647 MOVQ BX, g(CX)
648 MOVQ (g_sched+gobuf_sp)(BX), SP
649 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
650 CALL runtime·newstack(SB)
651 CALL runtime·abort(SB) // crash if newstack returns
652 RET
653
654 // morestack but not preserving ctxt.
655 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
656 MOVL $0, DX
657 JMP runtime·morestack(SB)
658
659 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
660 TEXT ·spillArgs(SB),NOSPLIT,$0-0
661 MOVQ AX, 0(R12)
662 MOVQ BX, 8(R12)
663 MOVQ CX, 16(R12)
664 MOVQ DI, 24(R12)
665 MOVQ SI, 32(R12)
666 MOVQ R8, 40(R12)
667 MOVQ R9, 48(R12)
668 MOVQ R10, 56(R12)
669 MOVQ R11, 64(R12)
670 MOVQ X0, 72(R12)
671 MOVQ X1, 80(R12)
672 MOVQ X2, 88(R12)
673 MOVQ X3, 96(R12)
674 MOVQ X4, 104(R12)
675 MOVQ X5, 112(R12)
676 MOVQ X6, 120(R12)
677 MOVQ X7, 128(R12)
678 MOVQ X8, 136(R12)
679 MOVQ X9, 144(R12)
680 MOVQ X10, 152(R12)
681 MOVQ X11, 160(R12)
682 MOVQ X12, 168(R12)
683 MOVQ X13, 176(R12)
684 MOVQ X14, 184(R12)
685 RET
686
687 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
688 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
689 MOVQ 0(R12), AX
690 MOVQ 8(R12), BX
691 MOVQ 16(R12), CX
692 MOVQ 24(R12), DI
693 MOVQ 32(R12), SI
694 MOVQ 40(R12), R8
695 MOVQ 48(R12), R9
696 MOVQ 56(R12), R10
697 MOVQ 64(R12), R11
698 MOVQ 72(R12), X0
699 MOVQ 80(R12), X1
700 MOVQ 88(R12), X2
701 MOVQ 96(R12), X3
702 MOVQ 104(R12), X4
703 MOVQ 112(R12), X5
704 MOVQ 120(R12), X6
705 MOVQ 128(R12), X7
706 MOVQ 136(R12), X8
707 MOVQ 144(R12), X9
708 MOVQ 152(R12), X10
709 MOVQ 160(R12), X11
710 MOVQ 168(R12), X12
711 MOVQ 176(R12), X13
712 MOVQ 184(R12), X14
713 RET
714
715 // reflectcall: call a function with the given argument list
716 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
717 // we don't have variable-sized frames, so we use a small number
718 // of constant-sized-frame functions to encode a few bits of size in the pc.
719 // Caution: ugly multiline assembly macros in your future!
720
721 #define DISPATCH(NAME,MAXSIZE) \
722 CMPQ CX, $MAXSIZE; \
723 JA 3(PC); \
724 MOVQ $NAME(SB), AX; \
725 JMP AX
726 // Note: can't just "JMP NAME(SB)" - bad inlining results.
727
728 TEXT ·reflectcall(SB), NOSPLIT, $0-48
729 MOVLQZX frameSize+32(FP), CX
730 DISPATCH(runtime·call16, 16)
731 DISPATCH(runtime·call32, 32)
732 DISPATCH(runtime·call64, 64)
733 DISPATCH(runtime·call128, 128)
734 DISPATCH(runtime·call256, 256)
735 DISPATCH(runtime·call512, 512)
736 DISPATCH(runtime·call1024, 1024)
737 DISPATCH(runtime·call2048, 2048)
738 DISPATCH(runtime·call4096, 4096)
739 DISPATCH(runtime·call8192, 8192)
740 DISPATCH(runtime·call16384, 16384)
741 DISPATCH(runtime·call32768, 32768)
742 DISPATCH(runtime·call65536, 65536)
743 DISPATCH(runtime·call131072, 131072)
744 DISPATCH(runtime·call262144, 262144)
745 DISPATCH(runtime·call524288, 524288)
746 DISPATCH(runtime·call1048576, 1048576)
747 DISPATCH(runtime·call2097152, 2097152)
748 DISPATCH(runtime·call4194304, 4194304)
749 DISPATCH(runtime·call8388608, 8388608)
750 DISPATCH(runtime·call16777216, 16777216)
751 DISPATCH(runtime·call33554432, 33554432)
752 DISPATCH(runtime·call67108864, 67108864)
753 DISPATCH(runtime·call134217728, 134217728)
754 DISPATCH(runtime·call268435456, 268435456)
755 DISPATCH(runtime·call536870912, 536870912)
756 DISPATCH(runtime·call1073741824, 1073741824)
757 MOVQ $runtime·badreflectcall(SB), AX
758 JMP AX
759
760 #define CALLFN(NAME,MAXSIZE) \
761 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
762 NO_LOCAL_POINTERS; \
763 /* copy arguments to stack */ \
764 MOVQ stackArgs+16(FP), SI; \
765 MOVLQZX stackArgsSize+24(FP), CX; \
766 MOVQ SP, DI; \
767 REP;MOVSB; \
768 /* set up argument registers */ \
769 MOVQ regArgs+40(FP), R12; \
770 CALL ·unspillArgs(SB); \
771 /* call function */ \
772 MOVQ f+8(FP), DX; \
773 PCDATA $PCDATA_StackMapIndex, $0; \
774 MOVQ (DX), R12; \
775 CALL R12; \
776 /* copy register return values back */ \
777 MOVQ regArgs+40(FP), R12; \
778 CALL ·spillArgs(SB); \
779 MOVLQZX stackArgsSize+24(FP), CX; \
780 MOVLQZX stackRetOffset+28(FP), BX; \
781 MOVQ stackArgs+16(FP), DI; \
782 MOVQ stackArgsType+0(FP), DX; \
783 MOVQ SP, SI; \
784 ADDQ BX, DI; \
785 ADDQ BX, SI; \
786 SUBQ BX, CX; \
787 CALL callRet<>(SB); \
788 RET
789
790 // callRet copies return values back at the end of call*. This is a
791 // separate function so it can allocate stack space for the arguments
792 // to reflectcallmove. It does not follow the Go ABI; it expects its
793 // arguments in registers.
794 TEXT callRet<>(SB), NOSPLIT, $40-0
795 NO_LOCAL_POINTERS
796 MOVQ DX, 0(SP)
797 MOVQ DI, 8(SP)
798 MOVQ SI, 16(SP)
799 MOVQ CX, 24(SP)
800 MOVQ R12, 32(SP)
801 CALL runtime·reflectcallmove(SB)
802 RET
803
804 CALLFN(·call16, 16)
805 CALLFN(·call32, 32)
806 CALLFN(·call64, 64)
807 CALLFN(·call128, 128)
808 CALLFN(·call256, 256)
809 CALLFN(·call512, 512)
810 CALLFN(·call1024, 1024)
811 CALLFN(·call2048, 2048)
812 CALLFN(·call4096, 4096)
813 CALLFN(·call8192, 8192)
814 CALLFN(·call16384, 16384)
815 CALLFN(·call32768, 32768)
816 CALLFN(·call65536, 65536)
817 CALLFN(·call131072, 131072)
818 CALLFN(·call262144, 262144)
819 CALLFN(·call524288, 524288)
820 CALLFN(·call1048576, 1048576)
821 CALLFN(·call2097152, 2097152)
822 CALLFN(·call4194304, 4194304)
823 CALLFN(·call8388608, 8388608)
824 CALLFN(·call16777216, 16777216)
825 CALLFN(·call33554432, 33554432)
826 CALLFN(·call67108864, 67108864)
827 CALLFN(·call134217728, 134217728)
828 CALLFN(·call268435456, 268435456)
829 CALLFN(·call536870912, 536870912)
830 CALLFN(·call1073741824, 1073741824)
831
832 TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
833 MOVL cycles+0(FP), AX
834 TESTL AX, AX
835 JZ done
836 again:
837 PAUSE
838 SUBL $1, AX
839 JNZ again
840 done:
841 RET
842
843
844 TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
845 // Stores are already ordered on x86, so this is just a
846 // compile barrier.
847 RET
848
849 // Save state of caller into g->sched,
850 // but using fake PC from systemstack_switch.
851 // Must only be called from functions with frame pointer
852 // and without locals ($0) or else unwinding from
853 // systemstack_switch is incorrect.
854 // Smashes R9.
855 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
856 // Take systemstack_switch PC and add 8 bytes to skip
857 // the prologue. Keep 8 bytes offset consistent with
858 // PCALIGN $8 in systemstack_swtich, pointing start of
859 // UNDEF instruction beyond prologue.
860 MOVQ $runtime·systemstack_switch+8(SB), R9
861 MOVQ R9, (g_sched+gobuf_pc)(R14)
862 LEAQ 8(SP), R9
863 MOVQ R9, (g_sched+gobuf_sp)(R14)
864 MOVQ BP, (g_sched+gobuf_bp)(R14)
865 // Assert ctxt is zero. See func save.
866 MOVQ (g_sched+gobuf_ctxt)(R14), R9
867 TESTQ R9, R9
868 JZ 2(PC)
869 CALL runtime·abort(SB)
870 RET
871
872 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
873 // Call fn(arg) aligned appropriately for the gcc ABI.
874 // Called on a system stack, and there may be no g yet (during needm).
875 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
876 MOVQ fn+0(FP), AX
877 MOVQ arg+8(FP), BX
878 MOVQ SP, DX
879 ANDQ $~15, SP // alignment
880 MOVQ DX, 8(SP)
881 MOVQ BX, DI // DI = first argument in AMD64 ABI
882 MOVQ BX, CX // CX = first argument in Win64
883 CALL AX
884 MOVQ 8(SP), DX
885 MOVQ DX, SP
886 RET
887
888 // asmcgocall_landingpad calls AX with BX as argument.
889 // Must be called on the system stack.
890 TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0
891 #ifdef GOOS_windows
892 // Make sure we have enough room for 4 stack-backed fast-call
893 // registers as per Windows amd64 calling convention.
894 ADJSP $32
895 // On Windows, asmcgocall_landingpad acts as landing pad for exceptions
896 // thrown in the cgo call. Exceptions that reach this function will be
897 // handled by runtime.sehtramp thanks to the SEH metadata added
898 // by the compiler.
899 // Note that runtime.sehtramp can't be attached directly to asmcgocall
900 // because its initial stack pointer can be outside the system stack bounds,
901 // and Windows stops the stack unwinding without calling the exception handler
902 // when it reaches that point.
903 MOVQ BX, CX // CX = first argument in Win64
904 CALL AX
905 // The exception handler is not called if the next instruction is part of
906 // the epilogue, which includes the RET instruction, so we need to add a NOP here.
907 BYTE $0x90
908 ADJSP $-32
909 RET
910 #endif
911 // Tail call AX on non-Windows, as the extra stack frame is not needed.
912 MOVQ BX, DI // DI = first argument in AMD64 ABI
913 JMP AX
914
915 // func asmcgocall(fn, arg unsafe.Pointer) int32
916 // Call fn(arg) on the scheduler stack,
917 // aligned appropriately for the gcc ABI.
918 // See cgocall.go for more details.
919 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
920 // Figure out if we need to switch to m->g0 stack.
921 // We get called to create new OS threads too, and those
922 // come in on the m->g0 stack already. Or we might already
923 // be on the m->gsignal stack.
924 get_tls(CX)
925 MOVQ g(CX), DI
926 CMPQ DI, $0
927 JEQ nosave
928 MOVQ g_m(DI), R8
929 MOVQ m_gsignal(R8), SI
930 CMPQ DI, SI
931 JEQ nosave
932 MOVQ m_g0(R8), SI
933 CMPQ DI, SI
934 JEQ nosave
935
936 // Running on a user G
937 // Figure out if we're running secret code and clear the registers
938 // so that the C code we're about to call doesn't spill confidential
939 // information into memory
940 #ifdef GOEXPERIMENT_runtimesecret
941 CMPL g_secret(DI), $0
942 JEQ nosecret
943 CALL ·secretEraseRegisters(SB)
944
945 nosecret:
946 #endif
947 MOVQ fn+0(FP), AX
948 MOVQ arg+8(FP), BX
949 MOVQ SP, DX
950
951 // Switch to system stack.
952 // The original frame pointer is stored in BP,
953 // which is useful for stack unwinding.
954 CALL gosave_systemstack_switch<>(SB)
955 MOVQ SI, g(CX)
956 MOVQ (g_sched+gobuf_sp)(SI), SP
957
958 // Now on a scheduling stack (a pthread-created stack).
959 SUBQ $16, SP
960 ANDQ $~15, SP // alignment for gcc ABI
961 MOVQ DI, 8(SP) // save g
962 MOVQ (g_stack+stack_hi)(DI), DI
963 SUBQ DX, DI
964 MOVQ DI, 0(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
965 CALL runtime·asmcgocall_landingpad(SB)
966
967 // Restore registers, g, stack pointer.
968 get_tls(CX)
969 MOVQ 8(SP), DI
970 MOVQ (g_stack+stack_hi)(DI), SI
971 SUBQ 0(SP), SI
972 MOVQ DI, g(CX)
973 MOVQ SI, SP
974
975 MOVL AX, ret+16(FP)
976 RET
977
978 nosave:
979 // Running on a system stack, perhaps even without a g.
980 // Having no g can happen during thread creation or thread teardown
981 // (see needm/dropm on Solaris, for example).
982 // This code is like the above sequence but without saving/restoring g
983 // and without worrying about the stack moving out from under us
984 // (because we're on a system stack, not a goroutine stack).
985 MOVQ fn+0(FP), AX
986 MOVQ arg+8(FP), BX
987 MOVQ SP, DX
988
989 SUBQ $16, SP
990 ANDQ $~15, SP
991 MOVQ $0, 8(SP) // where above code stores g, in case someone looks during debugging
992 MOVQ DX, 0(SP) // save original stack pointer
993 CALL runtime·asmcgocall_landingpad(SB)
994 MOVQ 0(SP), SI // restore original stack pointer
995 MOVQ SI, SP
996 MOVL AX, ret+16(FP)
997 RET
998
999 #ifdef GOOS_windows
1000 // Dummy TLS that's used on Windows so that we don't crash trying
1001 // to restore the G register in needm. needm and its callees are
1002 // very careful never to actually use the G, the TLS just can't be
1003 // unset since we're in Go code.
1004 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
1005 #endif
1006
1007 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1008 // See cgocall.go for more details.
1009 TEXT ·cgocallback(SB),NOSPLIT,$24-24
1010 NO_LOCAL_POINTERS
1011
1012 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1013 // It is used to dropm while thread is exiting.
1014 MOVQ fn+0(FP), AX
1015 CMPQ AX, $0
1016 JNE loadg
1017 // Restore the g from frame.
1018 get_tls(CX)
1019 MOVQ frame+8(FP), BX
1020 MOVQ BX, g(CX)
1021 JMP dropm
1022
1023 loadg:
1024 // If g is nil, Go did not create the current thread,
1025 // or if this thread never called into Go on pthread platforms.
1026 // Call needm to obtain one m for temporary use.
1027 // In this case, we're running on the thread stack, so there's
1028 // lots of space, but the linker doesn't know. Hide the call from
1029 // the linker analysis by using an indirect call through AX.
1030 get_tls(CX)
1031 #ifdef GOOS_windows
1032 MOVL $0, BX
1033 CMPQ CX, $0
1034 JEQ 2(PC)
1035 #endif
1036 MOVQ g(CX), BX
1037 CMPQ BX, $0
1038 JEQ needm
1039 MOVQ g_m(BX), BX
1040 MOVQ BX, savedm-8(SP) // saved copy of oldm
1041 JMP havem
1042 needm:
1043 #ifdef GOOS_windows
1044 // Set up a dummy TLS value. needm is careful not to use it,
1045 // but it needs to be there to prevent autogenerated code from
1046 // crashing when it loads from it.
1047 // We don't need to clear it or anything later because needm
1048 // will set up TLS properly.
1049 MOVQ $zeroTLS<>(SB), DI
1050 CALL runtime·settls(SB)
1051 #endif
1052 // On some platforms (Windows) we cannot call needm through
1053 // an ABI wrapper because there's no TLS set up, and the ABI
1054 // wrapper will try to restore the G register (R14) from TLS.
1055 // Clear X15 because Go expects it and we're not calling
1056 // through a wrapper, but otherwise avoid setting the G
1057 // register in the wrapper and call needm directly. It
1058 // takes no arguments and doesn't return any values so
1059 // there's no need to handle that. Clear R14 so that there's
1060 // a bad value in there, in case needm tries to use it.
1061 XORPS X15, X15
1062 XORQ R14, R14
1063 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
1064 CALL AX
1065 MOVQ $0, savedm-8(SP)
1066 get_tls(CX)
1067 MOVQ g(CX), BX
1068 MOVQ g_m(BX), BX
1069
1070 // Set m->sched.sp = SP, so that if a panic happens
1071 // during the function we are about to execute, it will
1072 // have a valid SP to run on the g0 stack.
1073 // The next few lines (after the havem label)
1074 // will save this SP onto the stack and then write
1075 // the same SP back to m->sched.sp. That seems redundant,
1076 // but if an unrecovered panic happens, unwindm will
1077 // restore the g->sched.sp from the stack location
1078 // and then systemstack will try to use it. If we don't set it here,
1079 // that restored SP will be uninitialized (typically 0) and
1080 // will not be usable.
1081 MOVQ m_g0(BX), SI
1082 MOVQ SP, (g_sched+gobuf_sp)(SI)
1083
1084 havem:
1085 // Now there's a valid m, and we're running on its m->g0.
1086 // Save current m->g0->sched.sp on stack and then set it to SP.
1087 // Save current sp in m->g0->sched.sp in preparation for
1088 // switch back to m->curg stack.
1089 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
1090 MOVQ m_g0(BX), SI
1091 MOVQ (g_sched+gobuf_sp)(SI), AX
1092 MOVQ AX, 0(SP)
1093 MOVQ SP, (g_sched+gobuf_sp)(SI)
1094
1095 // Switch to m->curg stack and call runtime.cgocallbackg.
1096 // Because we are taking over the execution of m->curg
1097 // but *not* resuming what had been running, we need to
1098 // save that information (m->curg->sched) so we can restore it.
1099 // We can restore m->curg->sched.sp easily, because calling
1100 // runtime.cgocallbackg leaves SP unchanged upon return.
1101 // To save m->curg->sched.pc, we push it onto the curg stack and
1102 // open a frame the same size as cgocallback's g0 frame.
1103 // Once we switch to the curg stack, the pushed PC will appear
1104 // to be the return PC of cgocallback, so that the traceback
1105 // will seamlessly trace back into the earlier calls.
1106 MOVQ m_curg(BX), SI
1107 MOVQ SI, g(CX)
1108 MOVQ SI, R14 // set the g register
1109 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
1110 MOVQ (g_sched+gobuf_pc)(SI), BX
1111 MOVQ BX, -8(DI) // "push" return PC on the g stack
1112 // Gather our arguments into registers.
1113 MOVQ fn+0(FP), AX
1114 MOVQ frame+8(FP), BX
1115 MOVQ ctxt+16(FP), CX
1116 // Compute the size of the frame, including return PC and, if
1117 // GOEXPERIMENT=framepointer, the saved base pointer
1118 LEAQ fn+0(FP), R8
1119 SUBQ SP, R8 // R8 is our actual frame size
1120 SUBQ R8, DI // Allocate the same frame size on the g stack
1121 MOVQ DI, SP
1122
1123 MOVQ $runtime·cgocallbackg<ABIInternal>(SB), DX
1124 CALL DX // indirect call to bypass nosplit check. We're on a different stack now.
1125
1126 // Compute the size of the frame again. FP and SP have
1127 // completely different values here than they did above,
1128 // but only their difference matters.
1129 LEAQ fn+0(FP), AX
1130 SUBQ SP, AX
1131
1132 // Restore g->sched (== m->curg->sched) from saved values.
1133 get_tls(CX)
1134 MOVQ g(CX), SI
1135 MOVQ SP, DI
1136 ADDQ AX, DI
1137 MOVQ -8(DI), BX
1138 MOVQ BX, (g_sched+gobuf_pc)(SI)
1139 MOVQ DI, (g_sched+gobuf_sp)(SI)
1140
1141 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1142 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1143 // so we do not have to restore it.)
1144 MOVQ g(CX), BX
1145 MOVQ g_m(BX), BX
1146 MOVQ m_g0(BX), SI
1147 MOVQ SI, g(CX)
1148 MOVQ (g_sched+gobuf_sp)(SI), SP
1149 MOVQ 0(SP), AX
1150 MOVQ AX, (g_sched+gobuf_sp)(SI)
1151
1152 // If the m on entry was nil, we called needm above to borrow an m,
1153 // 1. for the duration of the call on non-pthread platforms,
1154 // 2. or the duration of the C thread alive on pthread platforms.
1155 // If the m on entry wasn't nil,
1156 // 1. the thread might be a Go thread,
1157 // 2. or it wasn't the first call from a C thread on pthread platforms,
1158 // since then we skip dropm to reuse the m in the first call.
1159 MOVQ savedm-8(SP), BX
1160 CMPQ BX, $0
1161 JNE done
1162
1163 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1164 MOVQ _cgo_pthread_key_created(SB), AX
1165 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1166 CMPQ AX, $0
1167 JEQ dropm
1168 CMPQ (AX), $0
1169 JNE done
1170
1171 dropm:
1172 MOVQ $runtime·dropm(SB), AX
1173 CALL AX
1174 #ifdef GOOS_windows
1175 // We need to clear the TLS pointer in case the next
1176 // thread that comes into Go tries to reuse that space
1177 // but uses the same M.
1178 XORQ DI, DI
1179 CALL runtime·settls(SB)
1180 #endif
1181 done:
1182
1183 // Done!
1184 RET
1185
1186 // func setg(gg *g)
1187 // set g. for use by needm.
1188 TEXT runtime·setg(SB), NOSPLIT, $0-8
1189 MOVQ gg+0(FP), BX
1190 get_tls(CX)
1191 MOVQ BX, g(CX)
1192 RET
1193
1194 // void setg_gcc(G*); set g called from gcc.
1195 TEXT setg_gcc<>(SB),NOSPLIT,$0
1196 get_tls(AX)
1197 MOVQ DI, g(AX)
1198 MOVQ DI, R14 // set the g register
1199 RET
1200
1201 TEXT runtime·abort(SB),NOSPLIT,$0-0
1202 INT $3
1203 loop:
1204 JMP loop
1205
1206 // check that SP is in range [g->stack.lo, g->stack.hi)
1207 TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
1208 get_tls(CX)
1209 MOVQ g(CX), AX
1210 CMPQ (g_stack+stack_hi)(AX), SP
1211 JHI 2(PC)
1212 CALL runtime·abort(SB)
1213 CMPQ SP, (g_stack+stack_lo)(AX)
1214 JHI 2(PC)
1215 CALL runtime·abort(SB)
1216 RET
1217
1218 // func cputicks() int64
1219 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
1220 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
1221 JNE fences
1222 // Instruction stream serializing RDTSCP is supported.
1223 // RDTSCP is supported by Intel Nehalem (2008) and
1224 // AMD K8 Rev. F (2006) and newer.
1225 RDTSCP
1226 done:
1227 SHLQ $32, DX
1228 ADDQ DX, AX
1229 MOVQ AX, ret+0(FP)
1230 RET
1231 fences:
1232 // MFENCE is instruction stream serializing and flushes the
1233 // store buffers on AMD. The serialization semantics of LFENCE on AMD
1234 // are dependent on MSR C001_1029 and CPU generation.
1235 // LFENCE on Intel does wait for all previous instructions to have executed.
1236 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
1237 // previous instructions executed and all previous loads and stores to globally visible.
1238 // Using MFENCE;LFENCE here aligns the serializing properties without
1239 // runtime detection of CPU manufacturer.
1240 MFENCE
1241 LFENCE
1242 RDTSC
1243 JMP done
1244
1245 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
1246 // hash function using AES hardware instructions
1247 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
1248 // AX = ptr to data
1249 // BX = seed
1250 // CX = size
1251 CMPB runtime·useAeshash(SB), $0
1252 JEQ noaes
1253 JMP runtime·aeshashbody<>(SB)
1254 noaes:
1255 JMP runtime·memhashFallback<ABIInternal>(SB)
1256
1257 // func strhash(p unsafe.Pointer, h uintptr) uintptr
1258 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
1259 // AX = ptr to string struct
1260 // BX = seed
1261 CMPB runtime·useAeshash(SB), $0
1262 JEQ noaes
1263 MOVQ 8(AX), CX // length of string
1264 MOVQ (AX), AX // string data
1265 JMP runtime·aeshashbody<>(SB)
1266 noaes:
1267 JMP runtime·strhashFallback<ABIInternal>(SB)
1268
1269 // AX: data
1270 // BX: hash seed
1271 // CX: length
1272 // At return: AX = return value
1273 TEXT runtime·aeshashbody<>(SB),NOSPLIT,$0-0
1274 // Fill an SSE register with our seeds.
1275 MOVQ BX, X0 // 64 bits of per-table hash seed
1276 PINSRW $4, CX, X0 // 16 bits of length
1277 PSHUFHW $0, X0, X0 // repeat length 4 times total
1278 MOVO X0, X1 // save unscrambled seed
1279 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
1280 AESENC X0, X0 // scramble seed
1281
1282 CMPQ CX, $16
1283 JB aes0to15
1284 JE aes16
1285 CMPQ CX, $32
1286 JBE aes17to32
1287 CMPQ CX, $64
1288 JBE aes33to64
1289 CMPQ CX, $128
1290 JBE aes65to128
1291 JMP aes129plus
1292
1293 aes0to15:
1294 TESTQ CX, CX
1295 JE aes0
1296
1297 ADDQ $16, AX
1298 TESTW $0xff0, AX
1299 JE endofpage
1300
1301 // 16 bytes loaded at this address won't cross
1302 // a page boundary, so we can load it directly.
1303 MOVOU -16(AX), X1
1304 ADDQ CX, CX
1305 MOVQ $masks<>(SB), AX
1306 PAND (AX)(CX*8), X1
1307 final1:
1308 PXOR X0, X1 // xor data with seed
1309 AESENC X1, X1 // scramble combo 3 times
1310 AESENC X1, X1
1311 AESENC X1, X1
1312 MOVQ X1, AX // return X1
1313 RET
1314
1315 endofpage:
1316 // address ends in 1111xxxx. Might be up against
1317 // a page boundary, so load ending at last byte.
1318 // Then shift bytes down using pshufb.
1319 MOVOU -32(AX)(CX*1), X1
1320 ADDQ CX, CX
1321 MOVQ $shifts<>(SB), AX
1322 PSHUFB (AX)(CX*8), X1
1323 JMP final1
1324
1325 aes0:
1326 // Return scrambled input seed
1327 AESENC X0, X0
1328 MOVQ X0, AX // return X0
1329 RET
1330
1331 aes16:
1332 MOVOU (AX), X1
1333 JMP final1
1334
1335 aes17to32:
1336 // make second starting seed
1337 PXOR runtime·aeskeysched+16(SB), X1
1338 AESENC X1, X1
1339
1340 // load data to be hashed
1341 MOVOU (AX), X2
1342 MOVOU -16(AX)(CX*1), X3
1343
1344 // xor with seed
1345 PXOR X0, X2
1346 PXOR X1, X3
1347
1348 // scramble 3 times
1349 AESENC X2, X2
1350 AESENC X3, X3
1351 AESENC X2, X2
1352 AESENC X3, X3
1353 AESENC X2, X2
1354 AESENC X3, X3
1355
1356 // combine results
1357 PXOR X3, X2
1358 MOVQ X2, AX // return X2
1359 RET
1360
1361 aes33to64:
1362 // make 3 more starting seeds
1363 MOVO X1, X2
1364 MOVO X1, X3
1365 PXOR runtime·aeskeysched+16(SB), X1
1366 PXOR runtime·aeskeysched+32(SB), X2
1367 PXOR runtime·aeskeysched+48(SB), X3
1368 AESENC X1, X1
1369 AESENC X2, X2
1370 AESENC X3, X3
1371
1372 MOVOU (AX), X4
1373 MOVOU 16(AX), X5
1374 MOVOU -32(AX)(CX*1), X6
1375 MOVOU -16(AX)(CX*1), X7
1376
1377 PXOR X0, X4
1378 PXOR X1, X5
1379 PXOR X2, X6
1380 PXOR X3, X7
1381
1382 AESENC X4, X4
1383 AESENC X5, X5
1384 AESENC X6, X6
1385 AESENC X7, X7
1386
1387 AESENC X4, X4
1388 AESENC X5, X5
1389 AESENC X6, X6
1390 AESENC X7, X7
1391
1392 AESENC X4, X4
1393 AESENC X5, X5
1394 AESENC X6, X6
1395 AESENC X7, X7
1396
1397 PXOR X6, X4
1398 PXOR X7, X5
1399 PXOR X5, X4
1400 MOVQ X4, AX // return X4
1401 RET
1402
1403 aes65to128:
1404 // make 7 more starting seeds
1405 MOVO X1, X2
1406 MOVO X1, X3
1407 MOVO X1, X4
1408 MOVO X1, X5
1409 MOVO X1, X6
1410 MOVO X1, X7
1411 PXOR runtime·aeskeysched+16(SB), X1
1412 PXOR runtime·aeskeysched+32(SB), X2
1413 PXOR runtime·aeskeysched+48(SB), X3
1414 PXOR runtime·aeskeysched+64(SB), X4
1415 PXOR runtime·aeskeysched+80(SB), X5
1416 PXOR runtime·aeskeysched+96(SB), X6
1417 PXOR runtime·aeskeysched+112(SB), X7
1418 AESENC X1, X1
1419 AESENC X2, X2
1420 AESENC X3, X3
1421 AESENC X4, X4
1422 AESENC X5, X5
1423 AESENC X6, X6
1424 AESENC X7, X7
1425
1426 // load data
1427 MOVOU (AX), X8
1428 MOVOU 16(AX), X9
1429 MOVOU 32(AX), X10
1430 MOVOU 48(AX), X11
1431 MOVOU -64(AX)(CX*1), X12
1432 MOVOU -48(AX)(CX*1), X13
1433 MOVOU -32(AX)(CX*1), X14
1434 MOVOU -16(AX)(CX*1), X15
1435
1436 // xor with seed
1437 PXOR X0, X8
1438 PXOR X1, X9
1439 PXOR X2, X10
1440 PXOR X3, X11
1441 PXOR X4, X12
1442 PXOR X5, X13
1443 PXOR X6, X14
1444 PXOR X7, X15
1445
1446 // scramble 3 times
1447 AESENC X8, X8
1448 AESENC X9, X9
1449 AESENC X10, X10
1450 AESENC X11, X11
1451 AESENC X12, X12
1452 AESENC X13, X13
1453 AESENC X14, X14
1454 AESENC X15, X15
1455
1456 AESENC X8, X8
1457 AESENC X9, X9
1458 AESENC X10, X10
1459 AESENC X11, X11
1460 AESENC X12, X12
1461 AESENC X13, X13
1462 AESENC X14, X14
1463 AESENC X15, X15
1464
1465 AESENC X8, X8
1466 AESENC X9, X9
1467 AESENC X10, X10
1468 AESENC X11, X11
1469 AESENC X12, X12
1470 AESENC X13, X13
1471 AESENC X14, X14
1472 AESENC X15, X15
1473
1474 // combine results
1475 PXOR X12, X8
1476 PXOR X13, X9
1477 PXOR X14, X10
1478 PXOR X15, X11
1479 PXOR X10, X8
1480 PXOR X11, X9
1481 PXOR X9, X8
1482 // X15 must be zero on return
1483 PXOR X15, X15
1484 MOVQ X8, AX // return X8
1485 RET
1486
1487 aes129plus:
1488 // make 7 more starting seeds
1489 MOVO X1, X2
1490 MOVO X1, X3
1491 MOVO X1, X4
1492 MOVO X1, X5
1493 MOVO X1, X6
1494 MOVO X1, X7
1495 PXOR runtime·aeskeysched+16(SB), X1
1496 PXOR runtime·aeskeysched+32(SB), X2
1497 PXOR runtime·aeskeysched+48(SB), X3
1498 PXOR runtime·aeskeysched+64(SB), X4
1499 PXOR runtime·aeskeysched+80(SB), X5
1500 PXOR runtime·aeskeysched+96(SB), X6
1501 PXOR runtime·aeskeysched+112(SB), X7
1502 AESENC X1, X1
1503 AESENC X2, X2
1504 AESENC X3, X3
1505 AESENC X4, X4
1506 AESENC X5, X5
1507 AESENC X6, X6
1508 AESENC X7, X7
1509
1510 // start with last (possibly overlapping) block
1511 MOVOU -128(AX)(CX*1), X8
1512 MOVOU -112(AX)(CX*1), X9
1513 MOVOU -96(AX)(CX*1), X10
1514 MOVOU -80(AX)(CX*1), X11
1515 MOVOU -64(AX)(CX*1), X12
1516 MOVOU -48(AX)(CX*1), X13
1517 MOVOU -32(AX)(CX*1), X14
1518 MOVOU -16(AX)(CX*1), X15
1519
1520 // xor in seed
1521 PXOR X0, X8
1522 PXOR X1, X9
1523 PXOR X2, X10
1524 PXOR X3, X11
1525 PXOR X4, X12
1526 PXOR X5, X13
1527 PXOR X6, X14
1528 PXOR X7, X15
1529
1530 // compute number of remaining 128-byte blocks
1531 DECQ CX
1532 SHRQ $7, CX
1533
1534 PCALIGN $16
1535 aesloop:
1536 // scramble state
1537 AESENC X8, X8
1538 AESENC X9, X9
1539 AESENC X10, X10
1540 AESENC X11, X11
1541 AESENC X12, X12
1542 AESENC X13, X13
1543 AESENC X14, X14
1544 AESENC X15, X15
1545
1546 // scramble state, xor in a block
1547 MOVOU (AX), X0
1548 MOVOU 16(AX), X1
1549 MOVOU 32(AX), X2
1550 MOVOU 48(AX), X3
1551 AESENC X0, X8
1552 AESENC X1, X9
1553 AESENC X2, X10
1554 AESENC X3, X11
1555 MOVOU 64(AX), X4
1556 MOVOU 80(AX), X5
1557 MOVOU 96(AX), X6
1558 MOVOU 112(AX), X7
1559 AESENC X4, X12
1560 AESENC X5, X13
1561 AESENC X6, X14
1562 AESENC X7, X15
1563
1564 ADDQ $128, AX
1565 DECQ CX
1566 JNE aesloop
1567
1568 // 3 more scrambles to finish
1569 AESENC X8, X8
1570 AESENC X9, X9
1571 AESENC X10, X10
1572 AESENC X11, X11
1573 AESENC X12, X12
1574 AESENC X13, X13
1575 AESENC X14, X14
1576 AESENC X15, X15
1577 AESENC X8, X8
1578 AESENC X9, X9
1579 AESENC X10, X10
1580 AESENC X11, X11
1581 AESENC X12, X12
1582 AESENC X13, X13
1583 AESENC X14, X14
1584 AESENC X15, X15
1585 AESENC X8, X8
1586 AESENC X9, X9
1587 AESENC X10, X10
1588 AESENC X11, X11
1589 AESENC X12, X12
1590 AESENC X13, X13
1591 AESENC X14, X14
1592 AESENC X15, X15
1593
1594 PXOR X12, X8
1595 PXOR X13, X9
1596 PXOR X14, X10
1597 PXOR X15, X11
1598 PXOR X10, X8
1599 PXOR X11, X9
1600 PXOR X9, X8
1601 // X15 must be zero on return
1602 PXOR X15, X15
1603 MOVQ X8, AX // return X8
1604 RET
1605
1606 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
1607 // ABIInternal for performance.
1608 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
1609 // AX = ptr to data
1610 // BX = seed
1611 CMPB runtime·useAeshash(SB), $0
1612 JEQ noaes
1613 MOVQ BX, X0 // X0 = seed
1614 PINSRD $2, (AX), X0 // data
1615 AESENC runtime·aeskeysched+0(SB), X0
1616 AESENC runtime·aeskeysched+16(SB), X0
1617 AESENC runtime·aeskeysched+32(SB), X0
1618 MOVQ X0, AX // return X0
1619 RET
1620 noaes:
1621 JMP runtime·memhash32Fallback<ABIInternal>(SB)
1622
1623 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
1624 // ABIInternal for performance.
1625 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
1626 // AX = ptr to data
1627 // BX = seed
1628 CMPB runtime·useAeshash(SB), $0
1629 JEQ noaes
1630 MOVQ BX, X0 // X0 = seed
1631 PINSRQ $1, (AX), X0 // data
1632 AESENC runtime·aeskeysched+0(SB), X0
1633 AESENC runtime·aeskeysched+16(SB), X0
1634 AESENC runtime·aeskeysched+32(SB), X0
1635 MOVQ X0, AX // return X0
1636 RET
1637 noaes:
1638 JMP runtime·memhash64Fallback<ABIInternal>(SB)
1639
1640 // simple mask to get rid of data in the high part of the register.
1641 DATA masks<>+0x00(SB)/8, $0x0000000000000000
1642 DATA masks<>+0x08(SB)/8, $0x0000000000000000
1643 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1644 DATA masks<>+0x18(SB)/8, $0x0000000000000000
1645 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1646 DATA masks<>+0x28(SB)/8, $0x0000000000000000
1647 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1648 DATA masks<>+0x38(SB)/8, $0x0000000000000000
1649 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1650 DATA masks<>+0x48(SB)/8, $0x0000000000000000
1651 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1652 DATA masks<>+0x58(SB)/8, $0x0000000000000000
1653 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1654 DATA masks<>+0x68(SB)/8, $0x0000000000000000
1655 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1656 DATA masks<>+0x78(SB)/8, $0x0000000000000000
1657 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1658 DATA masks<>+0x88(SB)/8, $0x0000000000000000
1659 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1660 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1661 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1662 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1663 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1664 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1665 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1666 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1667 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1668 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1669 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1670 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1671 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1672 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1673 GLOBL masks<>(SB),RODATA,$256
1674
1675 // func checkASM() bool
1676 TEXT ·checkASM(SB),NOSPLIT,$0-1
1677 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1678 MOVQ $masks<>(SB), AX
1679 MOVQ $shifts<>(SB), BX
1680 ORQ BX, AX
1681 TESTQ $15, AX
1682 SETEQ ret+0(FP)
1683 RET
1684
1685 // these are arguments to pshufb. They move data down from
1686 // the high bytes of the register to the low bytes of the register.
1687 // index is how many bytes to move.
1688 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1689 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1690 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1691 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1692 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1693 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1694 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1695 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1696 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1697 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1698 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1699 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1700 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1701 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1702 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1703 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1704 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1705 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1706 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1707 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1708 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1709 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1710 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1711 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1712 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1713 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1714 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1715 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1716 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1717 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1718 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1719 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1720 GLOBL shifts<>(SB),RODATA,$256
1721
1722 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1723 // Must obey the gcc calling convention.
1724 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1725 get_tls(CX)
1726 MOVQ g(CX), AX
1727 MOVQ g_m(AX), AX
1728 MOVQ m_curg(AX), AX
1729 MOVQ (g_stack+stack_hi)(AX), AX
1730 RET
1731
1732 // The top-most function running on a goroutine
1733 // returns to goexit+PCQuantum.
1734 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
1735 BYTE $0x90 // NOP
1736 CALL runtime·goexit1(SB) // does not return
1737 // traceback from goexit1 must hit code range of goexit
1738 BYTE $0x90 // NOP
1739
1740 // This is called from .init_array and follows the platform, not Go, ABI.
1741 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1742 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1743 MOVQ runtime·lastmoduledatap(SB), AX
1744 MOVQ DI, moduledata_next(AX)
1745 MOVQ DI, runtime·lastmoduledatap(SB)
1746 POPQ R15
1747 RET
1748
1749 // Initialize special registers then jump to sigpanic.
1750 // This function is injected from the signal handler for panicking
1751 // signals. It is quite painful to set X15 in the signal context,
1752 // so we do it here.
1753 TEXT ·sigpanic0(SB),NOSPLIT,$0-0
1754 get_tls(R14)
1755 MOVQ g(R14), R14
1756 XORPS X15, X15
1757 JMP ·sigpanic<ABIInternal>(SB)
1758
1759 // gcWriteBarrier informs the GC about heap pointer writes.
1760 //
1761 // gcWriteBarrier returns space in a write barrier buffer which
1762 // should be filled in by the caller.
1763 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1764 // number of bytes of buffer needed in R11, and returns a pointer
1765 // to the buffer space in R11.
1766 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1767 // but may clobber others (e.g., SSE registers).
1768 // Typical use would be, when doing *(CX+88) = AX
1769 // CMPL $0, runtime.writeBarrier(SB)
1770 // JEQ dowrite
1771 // CALL runtime.gcBatchBarrier2(SB)
1772 // MOVQ AX, (R11)
1773 // MOVQ 88(CX), DX
1774 // MOVQ DX, 8(R11)
1775 // dowrite:
1776 // MOVQ AX, 88(CX)
1777 TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
1778 // Save the registers clobbered by the fast path. This is slightly
1779 // faster than having the caller spill these.
1780 MOVQ R12, 96(SP)
1781 MOVQ R13, 104(SP)
1782 retry:
1783 // TODO: Consider passing g.m.p in as an argument so they can be shared
1784 // across a sequence of write barriers.
1785 MOVQ g_m(R14), R13
1786 MOVQ m_p(R13), R13
1787 // Get current buffer write position.
1788 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position
1789 ADDQ R11, R12 // new next position
1790 // Is the buffer full?
1791 CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
1792 JA flush
1793 // Commit to the larger buffer.
1794 MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
1795 // Make return value (the original next position)
1796 SUBQ R11, R12
1797 MOVQ R12, R11
1798 // Restore registers.
1799 MOVQ 96(SP), R12
1800 MOVQ 104(SP), R13
1801 RET
1802
1803 flush:
1804 // Save all general purpose registers since these could be
1805 // clobbered by wbBufFlush and were not saved by the caller.
1806 // It is possible for wbBufFlush to clobber other registers
1807 // (e.g., SSE registers), but the compiler takes care of saving
1808 // those in the caller if necessary. This strikes a balance
1809 // with registers that are likely to be used.
1810 //
1811 // We don't have type information for these, but all code under
1812 // here is NOSPLIT, so nothing will observe these.
1813 //
1814 // TODO: We could strike a different balance; e.g., saving X0
1815 // and not saving GP registers that are less likely to be used.
1816 MOVQ DI, 0(SP)
1817 MOVQ AX, 8(SP)
1818 MOVQ BX, 16(SP)
1819 MOVQ CX, 24(SP)
1820 MOVQ DX, 32(SP)
1821 // DI already saved
1822 MOVQ SI, 40(SP)
1823 MOVQ BP, 48(SP)
1824 MOVQ R8, 56(SP)
1825 MOVQ R9, 64(SP)
1826 MOVQ R10, 72(SP)
1827 MOVQ R11, 80(SP)
1828 // R12 already saved
1829 // R13 already saved
1830 // R14 is g
1831 MOVQ R15, 88(SP)
1832
1833 CALL runtime·wbBufFlush(SB)
1834
1835 MOVQ 0(SP), DI
1836 MOVQ 8(SP), AX
1837 MOVQ 16(SP), BX
1838 MOVQ 24(SP), CX
1839 MOVQ 32(SP), DX
1840 MOVQ 40(SP), SI
1841 MOVQ 48(SP), BP
1842 MOVQ 56(SP), R8
1843 MOVQ 64(SP), R9
1844 MOVQ 72(SP), R10
1845 MOVQ 80(SP), R11
1846 MOVQ 88(SP), R15
1847 JMP retry
1848
1849 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1850 MOVL $8, R11
1851 JMP gcWriteBarrier<>(SB)
1852 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1853 MOVL $16, R11
1854 JMP gcWriteBarrier<>(SB)
1855 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1856 MOVL $24, R11
1857 JMP gcWriteBarrier<>(SB)
1858 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1859 MOVL $32, R11
1860 JMP gcWriteBarrier<>(SB)
1861 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1862 MOVL $40, R11
1863 JMP gcWriteBarrier<>(SB)
1864 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1865 MOVL $48, R11
1866 JMP gcWriteBarrier<>(SB)
1867 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1868 MOVL $56, R11
1869 JMP gcWriteBarrier<>(SB)
1870 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1871 MOVL $64, R11
1872 JMP gcWriteBarrier<>(SB)
1873
1874 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1875 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1876
1877 // debugCallV2 is the entry point for debugger-injected function
1878 // calls on running goroutines. It informs the runtime that a
1879 // debug call has been injected and creates a call frame for the
1880 // debugger to fill in.
1881 //
1882 // To inject a function call, a debugger should:
1883 // 1. Check that the goroutine is in state _Grunning and that
1884 // there are at least 256 bytes free on the stack.
1885 // 2. Push the current PC on the stack (updating SP).
1886 // 3. Write the desired argument frame size at SP-16 (using the SP
1887 // after step 2).
1888 // 4. Save all machine registers (including flags and XMM registers)
1889 // so they can be restored later by the debugger.
1890 // 5. Set the PC to debugCallV2 and resume execution.
1891 //
1892 // If the goroutine is in state _Grunnable, then it's not generally
1893 // safe to inject a call because it may return out via other runtime
1894 // operations. Instead, the debugger should unwind the stack to find
1895 // the return to non-runtime code, add a temporary breakpoint there,
1896 // and inject the call once that breakpoint is hit.
1897 //
1898 // If the goroutine is in any other state, it's not safe to inject a call.
1899 //
1900 // This function communicates back to the debugger by setting R12 and
1901 // invoking INT3 to raise a breakpoint signal. See the comments in the
1902 // implementation for the protocol the debugger is expected to
1903 // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1904 //
1905 // The debugger must ensure that any pointers passed to the function
1906 // obey escape analysis requirements. Specifically, it must not pass
1907 // a stack pointer to an escaping argument. debugCallV2 cannot check
1908 // this invariant.
1909 //
1910 // This is ABIInternal because Go code injects its PC directly into new
1911 // goroutine stacks.
1912 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
1913 // Save all registers that may contain pointers so they can be
1914 // conservatively scanned.
1915 //
1916 // We can't do anything that might clobber any of these
1917 // registers before this.
1918 MOVQ R15, r15-(14*8+8)(SP)
1919 MOVQ R14, r14-(13*8+8)(SP)
1920 MOVQ R13, r13-(12*8+8)(SP)
1921 MOVQ R12, r12-(11*8+8)(SP)
1922 MOVQ R11, r11-(10*8+8)(SP)
1923 MOVQ R10, r10-(9*8+8)(SP)
1924 MOVQ R9, r9-(8*8+8)(SP)
1925 MOVQ R8, r8-(7*8+8)(SP)
1926 MOVQ DI, di-(6*8+8)(SP)
1927 MOVQ SI, si-(5*8+8)(SP)
1928 MOVQ BP, bp-(4*8+8)(SP)
1929 MOVQ BX, bx-(3*8+8)(SP)
1930 MOVQ DX, dx-(2*8+8)(SP)
1931 // Save the frame size before we clobber it. Either of the last
1932 // saves could clobber this depending on whether there's a saved BP.
1933 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1934 MOVQ CX, cx-(1*8+8)(SP)
1935 MOVQ AX, ax-(0*8+8)(SP)
1936
1937 // Save the argument frame size.
1938 MOVQ DX, frameSize-128(SP)
1939
1940 // Perform a safe-point check.
1941 MOVQ retpc-8(FP), AX // Caller's PC
1942 MOVQ AX, 0(SP)
1943 CALL runtime·debugCallCheck(SB)
1944 MOVQ 8(SP), AX
1945 TESTQ AX, AX
1946 JZ good
1947 // The safety check failed. Put the reason string at the top
1948 // of the stack.
1949 MOVQ AX, 0(SP)
1950 MOVQ 16(SP), AX
1951 MOVQ AX, 8(SP)
1952 // Set R12 to 8 and invoke INT3. The debugger should get the
1953 // reason a call can't be injected from the top of the stack
1954 // and resume execution.
1955 MOVQ $8, R12
1956 BYTE $0xcc
1957 JMP restore
1958
1959 good:
1960 // Registers are saved and it's safe to make a call.
1961 // Open up a call frame, moving the stack if necessary.
1962 //
1963 // Once the frame is allocated, this will set R12 to 0 and
1964 // invoke INT3. The debugger should write the argument
1965 // frame for the call at SP, set up argument registers, push
1966 // the trapping PC on the stack, set the PC to the function to
1967 // call, set RDX to point to the closure (if a closure call),
1968 // and resume execution.
1969 //
1970 // If the function returns, this will set R12 to 1 and invoke
1971 // INT3. The debugger can then inspect any return value saved
1972 // on the stack at SP and in registers and resume execution again.
1973 //
1974 // If the function panics, this will set R12 to 2 and invoke INT3.
1975 // The interface{} value of the panic will be at SP. The debugger
1976 // can inspect the panic value and resume execution again.
1977 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1978 CMPQ AX, $MAXSIZE; \
1979 JA 5(PC); \
1980 MOVQ $NAME(SB), AX; \
1981 MOVQ AX, 0(SP); \
1982 CALL runtime·debugCallWrap(SB); \
1983 JMP restore
1984
1985 MOVQ frameSize-128(SP), AX
1986 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1987 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1988 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1989 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1990 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1991 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1992 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1993 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1994 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1995 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1996 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1997 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1998 // The frame size is too large. Report the error.
1999 MOVQ $debugCallFrameTooLarge<>(SB), AX
2000 MOVQ AX, 0(SP)
2001 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
2002 MOVQ $8, R12
2003 BYTE $0xcc
2004 JMP restore
2005
2006 restore:
2007 // Calls and failures resume here.
2008 //
2009 // Set R12 to 16 and invoke INT3. The debugger should restore
2010 // all registers except RIP and RSP and resume execution.
2011 MOVQ $16, R12
2012 BYTE $0xcc
2013 // We must not modify flags after this point.
2014
2015 // Restore pointer-containing registers, which may have been
2016 // modified from the debugger's copy by stack copying.
2017 MOVQ ax-(0*8+8)(SP), AX
2018 MOVQ cx-(1*8+8)(SP), CX
2019 MOVQ dx-(2*8+8)(SP), DX
2020 MOVQ bx-(3*8+8)(SP), BX
2021 MOVQ bp-(4*8+8)(SP), BP
2022 MOVQ si-(5*8+8)(SP), SI
2023 MOVQ di-(6*8+8)(SP), DI
2024 MOVQ r8-(7*8+8)(SP), R8
2025 MOVQ r9-(8*8+8)(SP), R9
2026 MOVQ r10-(9*8+8)(SP), R10
2027 MOVQ r11-(10*8+8)(SP), R11
2028 MOVQ r12-(11*8+8)(SP), R12
2029 MOVQ r13-(12*8+8)(SP), R13
2030 MOVQ r14-(13*8+8)(SP), R14
2031 MOVQ r15-(14*8+8)(SP), R15
2032
2033 RET
2034
2035 // runtime.debugCallCheck assumes that functions defined with the
2036 // DEBUG_CALL_FN macro are safe points to inject calls.
2037 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
2038 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
2039 NO_LOCAL_POINTERS; \
2040 MOVQ $0, R12; \
2041 BYTE $0xcc; \
2042 MOVQ $1, R12; \
2043 BYTE $0xcc; \
2044 RET
2045 DEBUG_CALL_FN(debugCall32<>, 32)
2046 DEBUG_CALL_FN(debugCall64<>, 64)
2047 DEBUG_CALL_FN(debugCall128<>, 128)
2048 DEBUG_CALL_FN(debugCall256<>, 256)
2049 DEBUG_CALL_FN(debugCall512<>, 512)
2050 DEBUG_CALL_FN(debugCall1024<>, 1024)
2051 DEBUG_CALL_FN(debugCall2048<>, 2048)
2052 DEBUG_CALL_FN(debugCall4096<>, 4096)
2053 DEBUG_CALL_FN(debugCall8192<>, 8192)
2054 DEBUG_CALL_FN(debugCall16384<>, 16384)
2055 DEBUG_CALL_FN(debugCall32768<>, 32768)
2056 DEBUG_CALL_FN(debugCall65536<>, 65536)
2057
2058 // func debugCallPanicked(val interface{})
2059 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
2060 // Copy the panic value to the top of stack.
2061 MOVQ val_type+0(FP), AX
2062 MOVQ AX, 0(SP)
2063 MOVQ val_data+8(FP), AX
2064 MOVQ AX, 8(SP)
2065 MOVQ $2, R12
2066 BYTE $0xcc
2067 RET
2068
2069 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
2070 NO_LOCAL_POINTERS
2071 // Save all 14 int registers that could have an index in them.
2072 // They may be pointers, but if they are they are dead.
2073 MOVQ AX, 16(SP)
2074 MOVQ CX, 24(SP)
2075 MOVQ DX, 32(SP)
2076 MOVQ BX, 40(SP)
2077 // skip SP @ 48(SP)
2078 MOVQ BP, 56(SP)
2079 MOVQ SI, 64(SP)
2080 MOVQ DI, 72(SP)
2081 MOVQ R8, 80(SP)
2082 MOVQ R9, 88(SP)
2083 MOVQ R10, 96(SP)
2084 MOVQ R11, 104(SP)
2085 MOVQ R12, 112(SP)
2086 MOVQ R13, 120(SP)
2087 // skip R14 @ 128(SP) (aka G)
2088 MOVQ R15, 136(SP)
2089
2090 MOVQ SP, AX // hide SP read from vet
2091 MOVQ 152(AX), AX // PC immediately after call to panicBounds
2092 LEAQ 16(SP), BX
2093 CALL runtime·panicBounds64<ABIInternal>(SB)
2094 RET
2095
2096 #ifdef GOOS_android
2097 // Use the free TLS_SLOT_APP slot #2 on Android Q.
2098 // Earlier androids are set up in gcc_android.c.
2099 DATA runtime·tls_g+0(SB)/8, $16
2100 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2101 #endif
2102 #ifdef GOOS_windows
2103 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2104 #endif
2105
2106 // The compiler and assembler's -spectre=ret mode rewrites
2107 // all indirect CALL AX / JMP AX instructions to be
2108 // CALL retpolineAX / JMP retpolineAX.
2109 // See https://support.google.com/faqs/answer/7625886.
2110 #define RETPOLINE(reg) \
2111 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
2112 /* nospec: */ \
2113 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
2114 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
2115 /* setup: */ \
2116 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
2117 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
2118 /* RET */ BYTE $0xC3
2119
2120 TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
2121 TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
2122 TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
2123 TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
2124 /* SP is 4, can't happen / magic encodings */
2125 TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
2126 TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
2127 TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
2128 TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
2129 TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
2130 TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
2131 TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
2132 TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
2133 TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
2134 TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
2135 TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
2136
2137 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
2138 MOVQ BP, AX
2139 RET
2140
View as plain text