Text file
src/runtime/asm_386.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9
10 // _rt0_386 is common startup code for most 386 systems when using
11 // internal linking. This is the entry point for the program from the
12 // kernel for an ordinary -buildmode=exe program. The stack holds the
13 // number of arguments and the C-style argv.
14 TEXT _rt0_386(SB),NOSPLIT,$8
15 MOVL 8(SP), AX // argc
16 LEAL 12(SP), BX // argv
17 MOVL AX, 0(SP)
18 MOVL BX, 4(SP)
19 JMP runtime·rt0_go(SB)
20
21 // _rt0_386_lib is common startup code for most 386 systems when
22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
23 // arrange to invoke this function as a global constructor (for
24 // c-archive) or when the shared library is loaded (for c-shared).
25 // We expect argc and argv to be passed on the stack following the
26 // usual C ABI.
27 TEXT _rt0_386_lib(SB),NOSPLIT,$0
28 PUSHL BP
29 MOVL SP, BP
30 PUSHL BX
31 PUSHL SI
32 PUSHL DI
33
34 MOVL 8(BP), AX
35 MOVL AX, _rt0_386_lib_argc<>(SB)
36 MOVL 12(BP), AX
37 MOVL AX, _rt0_386_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 CALL runtime·libpreinit(SB)
41
42 SUBL $8, SP
43
44 // Create a new thread to do the runtime initialization.
45 MOVL _cgo_sys_thread_create(SB), AX
46 TESTL AX, AX
47 JZ nocgo
48
49 // Align stack to call C function.
50 // We moved SP to BP above, but BP was clobbered by the libpreinit call.
51 MOVL SP, BP
52 ANDL $~15, SP
53
54 MOVL $_rt0_386_lib_go(SB), BX
55 MOVL BX, 0(SP)
56 MOVL $0, 4(SP)
57
58 CALL AX
59
60 MOVL BP, SP
61
62 JMP restore
63
64 nocgo:
65 MOVL $0x800000, 0(SP) // stacksize = 8192KB
66 MOVL $_rt0_386_lib_go(SB), AX
67 MOVL AX, 4(SP) // fn
68 CALL runtime·newosproc0(SB)
69
70 restore:
71 ADDL $8, SP
72 POPL DI
73 POPL SI
74 POPL BX
75 POPL BP
76 RET
77
78 // _rt0_386_lib_go initializes the Go runtime.
79 // This is started in a separate thread by _rt0_386_lib.
80 TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
81 MOVL _rt0_386_lib_argc<>(SB), AX
82 MOVL AX, 0(SP)
83 MOVL _rt0_386_lib_argv<>(SB), AX
84 MOVL AX, 4(SP)
85 JMP runtime·rt0_go(SB)
86
87 DATA _rt0_386_lib_argc<>(SB)/4, $0
88 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
89 DATA _rt0_386_lib_argv<>(SB)/4, $0
90 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
91
92 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
93 // Copy arguments forward on an even stack.
94 // Users of this function jump to it, they don't call it.
95 MOVL 0(SP), AX
96 MOVL 4(SP), BX
97 SUBL $128, SP // plenty of scratch
98 ANDL $~15, SP
99 MOVL AX, 120(SP) // save argc, argv away
100 MOVL BX, 124(SP)
101
102 // set default stack bounds.
103 // _cgo_init may update stackguard.
104 MOVL $runtime·g0(SB), BP
105 LEAL (-64*1024+104)(SP), BX
106 MOVL BX, g_stackguard0(BP)
107 MOVL BX, g_stackguard1(BP)
108 MOVL BX, (g_stack+stack_lo)(BP)
109 MOVL SP, (g_stack+stack_hi)(BP)
110
111 // find out information about the processor we're on
112 // first see if CPUID instruction is supported.
113 PUSHFL
114 PUSHFL
115 XORL $(1<<21), 0(SP) // flip ID bit
116 POPFL
117 PUSHFL
118 POPL AX
119 XORL 0(SP), AX
120 POPFL // restore EFLAGS
121 TESTL $(1<<21), AX
122 JNE has_cpuid
123
124 bad_proc: // show that the program requires MMX.
125 MOVL $2, 0(SP)
126 MOVL $bad_proc_msg<>(SB), 4(SP)
127 MOVL $0x3d, 8(SP)
128 CALL runtime·write(SB)
129 MOVL $1, 0(SP)
130 CALL runtime·exit(SB)
131 CALL runtime·abort(SB)
132
133 has_cpuid:
134 MOVL $0, AX
135 CPUID
136 MOVL AX, SI
137 CMPL AX, $0
138 JE nocpuinfo
139
140 CMPL BX, $0x756E6547 // "Genu"
141 JNE notintel
142 CMPL DX, $0x49656E69 // "ineI"
143 JNE notintel
144 CMPL CX, $0x6C65746E // "ntel"
145 JNE notintel
146 MOVB $1, runtime·isIntel(SB)
147 notintel:
148
149 // Load EAX=1 cpuid flags
150 MOVL $1, AX
151 CPUID
152 MOVL CX, DI // Move to global variable clobbers CX when generating PIC
153 MOVL AX, runtime·processorVersionInfo(SB)
154
155 // Check for MMX support
156 TESTL $(1<<23), DX // MMX
157 JZ bad_proc
158
159 nocpuinfo:
160 // if there is an _cgo_init, call it to let it
161 // initialize and to set up GS. if not,
162 // we set up GS ourselves.
163 MOVL _cgo_init(SB), AX
164 TESTL AX, AX
165 JZ needtls
166 #ifdef GOOS_android
167 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
168 // Compensate for tls_g (+8).
169 MOVL -8(TLS), BX
170 MOVL BX, 12(SP)
171 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
172 #else
173 MOVL $0, BX
174 MOVL BX, 12(SP) // arg 4: not used when using platform's TLS
175 #ifdef GOOS_windows
176 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
177 #else
178 MOVL BX, 8(SP) // arg 3: not used when using platform's TLS
179 #endif
180 #endif
181 MOVL $setg_gcc<>(SB), BX
182 MOVL BX, 4(SP) // arg 2: setg_gcc
183 MOVL BP, 0(SP) // arg 1: g0
184 CALL AX
185
186 // update stackguard after _cgo_init
187 MOVL $runtime·g0(SB), CX
188 MOVL (g_stack+stack_lo)(CX), AX
189 ADDL $const_stackGuard, AX
190 MOVL AX, g_stackguard0(CX)
191 MOVL AX, g_stackguard1(CX)
192
193 #ifndef GOOS_windows
194 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
195 JMP ok
196 #endif
197 needtls:
198 #ifdef GOOS_openbsd
199 // skip runtime·ldt0setup(SB) and tls test on OpenBSD in all cases
200 JMP ok
201 #endif
202 #ifdef GOOS_plan9
203 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
204 JMP ok
205 #endif
206
207 // set up %gs
208 CALL ldt0setup<>(SB)
209
210 // store through it, to make sure it works
211 get_tls(BX)
212 MOVL $0x123, g(BX)
213 MOVL runtime·m0+m_tls(SB), AX
214 CMPL AX, $0x123
215 JEQ ok
216 MOVL AX, 0 // abort
217 ok:
218 // set up m and g "registers"
219 get_tls(BX)
220 LEAL runtime·g0(SB), DX
221 MOVL DX, g(BX)
222 LEAL runtime·m0(SB), AX
223
224 // save m->g0 = g0
225 MOVL DX, m_g0(AX)
226 // save g0->m = m0
227 MOVL AX, g_m(DX)
228
229 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
230
231 // convention is D is always cleared
232 CLD
233
234 CALL runtime·check(SB)
235
236 // saved argc, argv
237 MOVL 120(SP), AX
238 MOVL AX, 0(SP)
239 MOVL 124(SP), AX
240 MOVL AX, 4(SP)
241 CALL runtime·args(SB)
242 CALL runtime·osinit(SB)
243 CALL runtime·schedinit(SB)
244
245 // create a new goroutine to start program
246 PUSHL $runtime·mainPC(SB) // entry
247 CALL runtime·newproc(SB)
248 POPL AX
249
250 // start this M
251 CALL runtime·mstart(SB)
252
253 CALL runtime·abort(SB)
254 RET
255
256 DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n"
257 GLOBL bad_proc_msg<>(SB), RODATA, $61
258
259 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB)
260 GLOBL runtime·mainPC(SB),RODATA,$4
261
262 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
263 INT $3
264 RET
265
266 TEXT runtime·asminit(SB),NOSPLIT,$0-0
267 // Linux and MinGW start the FPU in extended double precision.
268 // Other operating systems use double precision.
269 // Change to double precision to match them,
270 // and to match other hardware that only has double.
271 FLDCW runtime·controlWord64(SB)
272 RET
273
274 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
275 CALL runtime·mstart0(SB)
276 RET // not reached
277
278 /*
279 * go-routine
280 */
281
282 // void gogo(Gobuf*)
283 // restore state from Gobuf; longjmp
284 TEXT runtime·gogo(SB), NOSPLIT, $0-4
285 MOVL buf+0(FP), BX // gobuf
286 MOVL gobuf_g(BX), DX
287 MOVL 0(DX), CX // make sure g != nil
288 JMP gogo<>(SB)
289
290 TEXT gogo<>(SB), NOSPLIT, $0
291 get_tls(CX)
292 MOVL DX, g(CX)
293 MOVL gobuf_sp(BX), SP // restore SP
294 MOVL gobuf_ctxt(BX), DX
295 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
296 MOVL $0, gobuf_ctxt(BX)
297 MOVL gobuf_pc(BX), BX
298 JMP BX
299
300 // func mcall(fn func(*g))
301 // Switch to m->g0's stack, call fn(g).
302 // Fn must never return. It should gogo(&g->sched)
303 // to keep running g.
304 TEXT runtime·mcall(SB), NOSPLIT, $0-4
305 MOVL fn+0(FP), DI
306
307 get_tls(DX)
308 MOVL g(DX), AX // save state in g->sched
309 MOVL 0(SP), BX // caller's PC
310 MOVL BX, (g_sched+gobuf_pc)(AX)
311 LEAL fn+0(FP), BX // caller's SP
312 MOVL BX, (g_sched+gobuf_sp)(AX)
313
314 // switch to m->g0 & its stack, call fn
315 MOVL g(DX), BX
316 MOVL g_m(BX), BX
317 MOVL m_g0(BX), SI
318 CMPL SI, AX // if g == m->g0 call badmcall
319 JNE 3(PC)
320 MOVL $runtime·badmcall(SB), AX
321 JMP AX
322 MOVL SI, g(DX) // g = m->g0
323 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
324 PUSHL AX
325 MOVL DI, DX
326 MOVL 0(DI), DI
327 CALL DI
328 POPL AX
329 MOVL $runtime·badmcall2(SB), AX
330 JMP AX
331 RET
332
333 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
334 // of the G stack. We need to distinguish the routine that
335 // lives at the bottom of the G stack from the one that lives
336 // at the top of the system stack because the one at the top of
337 // the system stack terminates the stack walk (see topofstack()).
338 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
339 RET
340
341 // func systemstack(fn func())
342 TEXT runtime·systemstack(SB), NOSPLIT, $0-4
343 MOVL fn+0(FP), DI // DI = fn
344 get_tls(CX)
345 MOVL g(CX), AX // AX = g
346 MOVL g_m(AX), BX // BX = m
347
348 CMPL AX, m_gsignal(BX)
349 JEQ noswitch
350
351 MOVL m_g0(BX), DX // DX = g0
352 CMPL AX, DX
353 JEQ noswitch
354
355 CMPL AX, m_curg(BX)
356 JNE bad
357
358 // switch stacks
359 // save our state in g->sched. Pretend to
360 // be systemstack_switch if the G stack is scanned.
361 CALL gosave_systemstack_switch<>(SB)
362
363 // switch to g0
364 get_tls(CX)
365 MOVL DX, g(CX)
366 MOVL (g_sched+gobuf_sp)(DX), BX
367 MOVL BX, SP
368
369 // call target function
370 MOVL DI, DX
371 MOVL 0(DI), DI
372 CALL DI
373
374 // switch back to g
375 get_tls(CX)
376 MOVL g(CX), AX
377 MOVL g_m(AX), BX
378 MOVL m_curg(BX), AX
379 MOVL AX, g(CX)
380 MOVL (g_sched+gobuf_sp)(AX), SP
381 MOVL $0, (g_sched+gobuf_sp)(AX)
382 RET
383
384 noswitch:
385 // already on system stack; tail call the function
386 // Using a tail call here cleans up tracebacks since we won't stop
387 // at an intermediate systemstack.
388 MOVL DI, DX
389 MOVL 0(DI), DI
390 JMP DI
391
392 bad:
393 // Bad: g is not gsignal, not g0, not curg. What is it?
394 // Hide call from linker nosplit analysis.
395 MOVL $runtime·badsystemstack(SB), AX
396 CALL AX
397 INT $3
398
399 // func switchToCrashStack0(fn func())
400 TEXT runtime·switchToCrashStack0(SB), NOSPLIT, $0-4
401 MOVL fn+0(FP), AX
402
403 get_tls(CX)
404 MOVL g(CX), BX // BX = g
405 MOVL g_m(BX), DX // DX = curm
406
407 // set g to gcrash
408 LEAL runtime·gcrash(SB), BX // g = &gcrash
409 MOVL DX, g_m(BX) // g.m = curm
410 MOVL BX, m_g0(DX) // curm.g0 = g
411 get_tls(CX)
412 MOVL BX, g(CX)
413
414 // switch to crashstack
415 MOVL (g_stack+stack_hi)(BX), DX
416 SUBL $(4*8), DX
417 MOVL DX, SP
418
419 // call target function
420 MOVL AX, DX
421 MOVL 0(AX), AX
422 CALL AX
423
424 // should never return
425 CALL runtime·abort(SB)
426 UNDEF
427
428 /*
429 * support for morestack
430 */
431
432 // Called during function prolog when more stack is needed.
433 //
434 // The traceback routines see morestack on a g0 as being
435 // the top of a stack (for example, morestack calling newstack
436 // calling the scheduler calling newm calling gc), so we must
437 // record an argument size. For that purpose, it has no arguments.
438 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
439 // Cannot grow scheduler stack (m->g0).
440 get_tls(CX)
441 MOVL g(CX), DI
442 MOVL g_m(DI), BX
443
444 // Set g->sched to context in f.
445 MOVL 0(SP), AX // f's PC
446 MOVL AX, (g_sched+gobuf_pc)(DI)
447 LEAL 4(SP), AX // f's SP
448 MOVL AX, (g_sched+gobuf_sp)(DI)
449 MOVL DX, (g_sched+gobuf_ctxt)(DI)
450
451 MOVL m_g0(BX), SI
452 CMPL g(CX), SI
453 JNE 3(PC)
454 CALL runtime·badmorestackg0(SB)
455 CALL runtime·abort(SB)
456
457 // Cannot grow signal stack.
458 MOVL m_gsignal(BX), SI
459 CMPL g(CX), SI
460 JNE 3(PC)
461 CALL runtime·badmorestackgsignal(SB)
462 CALL runtime·abort(SB)
463
464 // Called from f.
465 // Set m->morebuf to f's caller.
466 NOP SP // tell vet SP changed - stop checking offsets
467 MOVL 4(SP), DI // f's caller's PC
468 MOVL DI, (m_morebuf+gobuf_pc)(BX)
469 LEAL 8(SP), CX // f's caller's SP
470 MOVL CX, (m_morebuf+gobuf_sp)(BX)
471 get_tls(CX)
472 MOVL g(CX), SI
473 MOVL SI, (m_morebuf+gobuf_g)(BX)
474
475 // Call newstack on m->g0's stack.
476 MOVL m_g0(BX), BP
477 MOVL BP, g(CX)
478 MOVL (g_sched+gobuf_sp)(BP), AX
479 MOVL -4(AX), BX // fault if CALL would, before smashing SP
480 MOVL AX, SP
481 CALL runtime·newstack(SB)
482 CALL runtime·abort(SB) // crash if newstack returns
483 RET
484
485 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
486 MOVL $0, DX
487 JMP runtime·morestack(SB)
488
489 // reflectcall: call a function with the given argument list
490 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
491 // we don't have variable-sized frames, so we use a small number
492 // of constant-sized-frame functions to encode a few bits of size in the pc.
493 // Caution: ugly multiline assembly macros in your future!
494
495 #define DISPATCH(NAME,MAXSIZE) \
496 CMPL CX, $MAXSIZE; \
497 JA 3(PC); \
498 MOVL $NAME(SB), AX; \
499 JMP AX
500 // Note: can't just "JMP NAME(SB)" - bad inlining results.
501
502 TEXT ·reflectcall(SB), NOSPLIT, $0-28
503 MOVL frameSize+20(FP), CX
504 DISPATCH(runtime·call16, 16)
505 DISPATCH(runtime·call32, 32)
506 DISPATCH(runtime·call64, 64)
507 DISPATCH(runtime·call128, 128)
508 DISPATCH(runtime·call256, 256)
509 DISPATCH(runtime·call512, 512)
510 DISPATCH(runtime·call1024, 1024)
511 DISPATCH(runtime·call2048, 2048)
512 DISPATCH(runtime·call4096, 4096)
513 DISPATCH(runtime·call8192, 8192)
514 DISPATCH(runtime·call16384, 16384)
515 DISPATCH(runtime·call32768, 32768)
516 DISPATCH(runtime·call65536, 65536)
517 DISPATCH(runtime·call131072, 131072)
518 DISPATCH(runtime·call262144, 262144)
519 DISPATCH(runtime·call524288, 524288)
520 DISPATCH(runtime·call1048576, 1048576)
521 DISPATCH(runtime·call2097152, 2097152)
522 DISPATCH(runtime·call4194304, 4194304)
523 DISPATCH(runtime·call8388608, 8388608)
524 DISPATCH(runtime·call16777216, 16777216)
525 DISPATCH(runtime·call33554432, 33554432)
526 DISPATCH(runtime·call67108864, 67108864)
527 DISPATCH(runtime·call134217728, 134217728)
528 DISPATCH(runtime·call268435456, 268435456)
529 DISPATCH(runtime·call536870912, 536870912)
530 DISPATCH(runtime·call1073741824, 1073741824)
531 MOVL $runtime·badreflectcall(SB), AX
532 JMP AX
533
534 #define CALLFN(NAME,MAXSIZE) \
535 TEXT NAME(SB), WRAPPER, $MAXSIZE-28; \
536 NO_LOCAL_POINTERS; \
537 /* copy arguments to stack */ \
538 MOVL stackArgs+8(FP), SI; \
539 MOVL stackArgsSize+12(FP), CX; \
540 MOVL SP, DI; \
541 REP;MOVSB; \
542 /* call function */ \
543 MOVL f+4(FP), DX; \
544 MOVL (DX), AX; \
545 PCDATA $PCDATA_StackMapIndex, $0; \
546 CALL AX; \
547 /* copy return values back */ \
548 MOVL stackArgsType+0(FP), DX; \
549 MOVL stackArgs+8(FP), DI; \
550 MOVL stackArgsSize+12(FP), CX; \
551 MOVL stackRetOffset+16(FP), BX; \
552 MOVL SP, SI; \
553 ADDL BX, DI; \
554 ADDL BX, SI; \
555 SUBL BX, CX; \
556 CALL callRet<>(SB); \
557 RET
558
559 // callRet copies return values back at the end of call*. This is a
560 // separate function so it can allocate stack space for the arguments
561 // to reflectcallmove. It does not follow the Go ABI; it expects its
562 // arguments in registers.
563 TEXT callRet<>(SB), NOSPLIT, $20-0
564 MOVL DX, 0(SP)
565 MOVL DI, 4(SP)
566 MOVL SI, 8(SP)
567 MOVL CX, 12(SP)
568 MOVL $0, 16(SP)
569 CALL runtime·reflectcallmove(SB)
570 RET
571
572 CALLFN(·call16, 16)
573 CALLFN(·call32, 32)
574 CALLFN(·call64, 64)
575 CALLFN(·call128, 128)
576 CALLFN(·call256, 256)
577 CALLFN(·call512, 512)
578 CALLFN(·call1024, 1024)
579 CALLFN(·call2048, 2048)
580 CALLFN(·call4096, 4096)
581 CALLFN(·call8192, 8192)
582 CALLFN(·call16384, 16384)
583 CALLFN(·call32768, 32768)
584 CALLFN(·call65536, 65536)
585 CALLFN(·call131072, 131072)
586 CALLFN(·call262144, 262144)
587 CALLFN(·call524288, 524288)
588 CALLFN(·call1048576, 1048576)
589 CALLFN(·call2097152, 2097152)
590 CALLFN(·call4194304, 4194304)
591 CALLFN(·call8388608, 8388608)
592 CALLFN(·call16777216, 16777216)
593 CALLFN(·call33554432, 33554432)
594 CALLFN(·call67108864, 67108864)
595 CALLFN(·call134217728, 134217728)
596 CALLFN(·call268435456, 268435456)
597 CALLFN(·call536870912, 536870912)
598 CALLFN(·call1073741824, 1073741824)
599
600 TEXT runtime·procyield(SB),NOSPLIT,$0-0
601 MOVL cycles+0(FP), AX
602 again:
603 PAUSE
604 SUBL $1, AX
605 JNZ again
606 RET
607
608 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
609 // Stores are already ordered on x86, so this is just a
610 // compile barrier.
611 RET
612
613 // Save state of caller into g->sched,
614 // but using fake PC from systemstack_switch.
615 // Must only be called from functions with no locals ($0)
616 // or else unwinding from systemstack_switch is incorrect.
617 TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0
618 PUSHL AX
619 PUSHL BX
620 get_tls(BX)
621 MOVL g(BX), BX
622 LEAL arg+0(FP), AX
623 MOVL AX, (g_sched+gobuf_sp)(BX)
624 MOVL $runtime·systemstack_switch(SB), AX
625 MOVL AX, (g_sched+gobuf_pc)(BX)
626 // Assert ctxt is zero. See func save.
627 MOVL (g_sched+gobuf_ctxt)(BX), AX
628 TESTL AX, AX
629 JZ 2(PC)
630 CALL runtime·abort(SB)
631 POPL BX
632 POPL AX
633 RET
634
635 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
636 // Call fn(arg) aligned appropriately for the gcc ABI.
637 // Called on a system stack, and there may be no g yet (during needm).
638 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-8
639 MOVL fn+0(FP), AX
640 MOVL arg+4(FP), BX
641 MOVL SP, DX
642 SUBL $32, SP
643 ANDL $~15, SP // alignment, perhaps unnecessary
644 MOVL DX, 8(SP) // save old SP
645 MOVL BX, 0(SP) // first argument in x86-32 ABI
646 CALL AX
647 MOVL 8(SP), DX
648 MOVL DX, SP
649 RET
650
651 // func asmcgocall(fn, arg unsafe.Pointer) int32
652 // Call fn(arg) on the scheduler stack,
653 // aligned appropriately for the gcc ABI.
654 // See cgocall.go for more details.
655 TEXT ·asmcgocall(SB),NOSPLIT,$0-12
656 MOVL fn+0(FP), AX
657 MOVL arg+4(FP), BX
658
659 MOVL SP, DX
660
661 // Figure out if we need to switch to m->g0 stack.
662 // We get called to create new OS threads too, and those
663 // come in on the m->g0 stack already. Or we might already
664 // be on the m->gsignal stack.
665 get_tls(CX)
666 MOVL g(CX), DI
667 CMPL DI, $0
668 JEQ nosave // Don't even have a G yet.
669 MOVL g_m(DI), BP
670 CMPL DI, m_gsignal(BP)
671 JEQ noswitch
672 MOVL m_g0(BP), SI
673 CMPL DI, SI
674 JEQ noswitch
675 CALL gosave_systemstack_switch<>(SB)
676 get_tls(CX)
677 MOVL SI, g(CX)
678 MOVL (g_sched+gobuf_sp)(SI), SP
679
680 noswitch:
681 // Now on a scheduling stack (a pthread-created stack).
682 SUBL $32, SP
683 ANDL $~15, SP // alignment, perhaps unnecessary
684 MOVL DI, 8(SP) // save g
685 MOVL (g_stack+stack_hi)(DI), DI
686 SUBL DX, DI
687 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
688 MOVL BX, 0(SP) // first argument in x86-32 ABI
689 CALL AX
690
691 // Restore registers, g, stack pointer.
692 get_tls(CX)
693 MOVL 8(SP), DI
694 MOVL (g_stack+stack_hi)(DI), SI
695 SUBL 4(SP), SI
696 MOVL DI, g(CX)
697 MOVL SI, SP
698
699 MOVL AX, ret+8(FP)
700 RET
701 nosave:
702 // Now on a scheduling stack (a pthread-created stack).
703 SUBL $32, SP
704 ANDL $~15, SP // alignment, perhaps unnecessary
705 MOVL DX, 4(SP) // save original stack pointer
706 MOVL BX, 0(SP) // first argument in x86-32 ABI
707 CALL AX
708
709 MOVL 4(SP), CX // restore original stack pointer
710 MOVL CX, SP
711 MOVL AX, ret+8(FP)
712 RET
713
714 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
715 // See cgocall.go for more details.
716 TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below
717 NO_LOCAL_POINTERS
718
719 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
720 // It is used to dropm while thread is exiting.
721 MOVL fn+0(FP), AX
722 CMPL AX, $0
723 JNE loadg
724 // Restore the g from frame.
725 get_tls(CX)
726 MOVL frame+4(FP), BX
727 MOVL BX, g(CX)
728 JMP dropm
729
730 loadg:
731 // If g is nil, Go did not create the current thread,
732 // or if this thread never called into Go on pthread platforms.
733 // Call needm to obtain one for temporary use.
734 // In this case, we're running on the thread stack, so there's
735 // lots of space, but the linker doesn't know. Hide the call from
736 // the linker analysis by using an indirect call through AX.
737 get_tls(CX)
738 #ifdef GOOS_windows
739 MOVL $0, BP
740 CMPL CX, $0
741 JEQ 2(PC) // TODO
742 #endif
743 MOVL g(CX), BP
744 CMPL BP, $0
745 JEQ needm
746 MOVL g_m(BP), BP
747 MOVL BP, savedm-4(SP) // saved copy of oldm
748 JMP havem
749 needm:
750 MOVL $runtime·needAndBindM(SB), AX
751 CALL AX
752 MOVL $0, savedm-4(SP)
753 get_tls(CX)
754 MOVL g(CX), BP
755 MOVL g_m(BP), BP
756
757 // Set m->sched.sp = SP, so that if a panic happens
758 // during the function we are about to execute, it will
759 // have a valid SP to run on the g0 stack.
760 // The next few lines (after the havem label)
761 // will save this SP onto the stack and then write
762 // the same SP back to m->sched.sp. That seems redundant,
763 // but if an unrecovered panic happens, unwindm will
764 // restore the g->sched.sp from the stack location
765 // and then systemstack will try to use it. If we don't set it here,
766 // that restored SP will be uninitialized (typically 0) and
767 // will not be usable.
768 MOVL m_g0(BP), SI
769 MOVL SP, (g_sched+gobuf_sp)(SI)
770
771 havem:
772 // Now there's a valid m, and we're running on its m->g0.
773 // Save current m->g0->sched.sp on stack and then set it to SP.
774 // Save current sp in m->g0->sched.sp in preparation for
775 // switch back to m->curg stack.
776 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
777 MOVL m_g0(BP), SI
778 MOVL (g_sched+gobuf_sp)(SI), AX
779 MOVL AX, 0(SP)
780 MOVL SP, (g_sched+gobuf_sp)(SI)
781
782 // Switch to m->curg stack and call runtime.cgocallbackg.
783 // Because we are taking over the execution of m->curg
784 // but *not* resuming what had been running, we need to
785 // save that information (m->curg->sched) so we can restore it.
786 // We can restore m->curg->sched.sp easily, because calling
787 // runtime.cgocallbackg leaves SP unchanged upon return.
788 // To save m->curg->sched.pc, we push it onto the curg stack and
789 // open a frame the same size as cgocallback's g0 frame.
790 // Once we switch to the curg stack, the pushed PC will appear
791 // to be the return PC of cgocallback, so that the traceback
792 // will seamlessly trace back into the earlier calls.
793 MOVL m_curg(BP), SI
794 MOVL SI, g(CX)
795 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
796 MOVL (g_sched+gobuf_pc)(SI), BP
797 MOVL BP, -4(DI) // "push" return PC on the g stack
798 // Gather our arguments into registers.
799 MOVL fn+0(FP), AX
800 MOVL frame+4(FP), BX
801 MOVL ctxt+8(FP), CX
802 LEAL -(4+12)(DI), SP // Must match declared frame size
803 MOVL AX, 0(SP)
804 MOVL BX, 4(SP)
805 MOVL CX, 8(SP)
806 CALL runtime·cgocallbackg(SB)
807
808 // Restore g->sched (== m->curg->sched) from saved values.
809 get_tls(CX)
810 MOVL g(CX), SI
811 MOVL 12(SP), BP // Must match declared frame size
812 MOVL BP, (g_sched+gobuf_pc)(SI)
813 LEAL (12+4)(SP), DI // Must match declared frame size
814 MOVL DI, (g_sched+gobuf_sp)(SI)
815
816 // Switch back to m->g0's stack and restore m->g0->sched.sp.
817 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
818 // so we do not have to restore it.)
819 MOVL g(CX), BP
820 MOVL g_m(BP), BP
821 MOVL m_g0(BP), SI
822 MOVL SI, g(CX)
823 MOVL (g_sched+gobuf_sp)(SI), SP
824 MOVL 0(SP), AX
825 MOVL AX, (g_sched+gobuf_sp)(SI)
826
827 // If the m on entry was nil, we called needm above to borrow an m,
828 // 1. for the duration of the call on non-pthread platforms,
829 // 2. or the duration of the C thread alive on pthread platforms.
830 // If the m on entry wasn't nil,
831 // 1. the thread might be a Go thread,
832 // 2. or it wasn't the first call from a C thread on pthread platforms,
833 // since then we skip dropm to reuse the m in the first call.
834 MOVL savedm-4(SP), DX
835 CMPL DX, $0
836 JNE droppedm
837
838 // Skip dropm to reuse it in the next call, when a pthread key has been created.
839 MOVL _cgo_pthread_key_created(SB), DX
840 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
841 CMPL DX, $0
842 JEQ dropm
843 CMPL (DX), $0
844 JNE droppedm
845
846 dropm:
847 MOVL $runtime·dropm(SB), AX
848 CALL AX
849 droppedm:
850
851 // Done!
852 RET
853
854 // void setg(G*); set g. for use by needm.
855 TEXT runtime·setg(SB), NOSPLIT, $0-4
856 MOVL gg+0(FP), BX
857 #ifdef GOOS_windows
858 MOVL runtime·tls_g(SB), CX
859 CMPL BX, $0
860 JNE settls
861 MOVL $0, 0(CX)(FS)
862 RET
863 settls:
864 MOVL g_m(BX), AX
865 LEAL m_tls(AX), AX
866 MOVL AX, 0(CX)(FS)
867 #endif
868 get_tls(CX)
869 MOVL BX, g(CX)
870 RET
871
872 // void setg_gcc(G*); set g. for use by gcc
873 TEXT setg_gcc<>(SB), NOSPLIT, $0
874 get_tls(AX)
875 MOVL gg+0(FP), DX
876 MOVL DX, g(AX)
877 RET
878
879 TEXT runtime·abort(SB),NOSPLIT,$0-0
880 INT $3
881 loop:
882 JMP loop
883
884 // check that SP is in range [g->stack.lo, g->stack.hi)
885 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
886 get_tls(CX)
887 MOVL g(CX), AX
888 CMPL (g_stack+stack_hi)(AX), SP
889 JHI 2(PC)
890 CALL runtime·abort(SB)
891 CMPL SP, (g_stack+stack_lo)(AX)
892 JHI 2(PC)
893 CALL runtime·abort(SB)
894 RET
895
896 // func cputicks() int64
897 TEXT runtime·cputicks(SB),NOSPLIT,$0-8
898 // LFENCE/MFENCE instruction support is dependent on SSE2.
899 // When no SSE2 support is present do not enforce any serialization
900 // since using CPUID to serialize the instruction stream is
901 // very costly.
902 #ifdef GO386_softfloat
903 JMP rdtsc // no fence instructions available
904 #endif
905 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
906 JNE fences
907 // Instruction stream serializing RDTSCP is supported.
908 // RDTSCP is supported by Intel Nehalem (2008) and
909 // AMD K8 Rev. F (2006) and newer.
910 RDTSCP
911 done:
912 MOVL AX, ret_lo+0(FP)
913 MOVL DX, ret_hi+4(FP)
914 RET
915 fences:
916 // MFENCE is instruction stream serializing and flushes the
917 // store buffers on AMD. The serialization semantics of LFENCE on AMD
918 // are dependent on MSR C001_1029 and CPU generation.
919 // LFENCE on Intel does wait for all previous instructions to have executed.
920 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
921 // previous instructions executed and all previous loads and stores to globally visible.
922 // Using MFENCE;LFENCE here aligns the serializing properties without
923 // runtime detection of CPU manufacturer.
924 MFENCE
925 LFENCE
926 rdtsc:
927 RDTSC
928 JMP done
929
930 TEXT ldt0setup<>(SB),NOSPLIT,$16-0
931 #ifdef GOOS_windows
932 CALL runtime·wintls(SB)
933 #endif
934 // set up ldt 7 to point at m0.tls
935 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
936 // the entry number is just a hint. setldt will set up GS with what it used.
937 MOVL $7, 0(SP)
938 LEAL runtime·m0+m_tls(SB), AX
939 MOVL AX, 4(SP)
940 MOVL $32, 8(SP) // sizeof(tls array)
941 CALL runtime·setldt(SB)
942 RET
943
944 TEXT runtime·emptyfunc(SB),0,$0-0
945 RET
946
947 // hash function using AES hardware instructions
948 TEXT runtime·memhash(SB),NOSPLIT,$0-16
949 CMPB runtime·useAeshash(SB), $0
950 JEQ noaes
951 MOVL p+0(FP), AX // ptr to data
952 MOVL s+8(FP), BX // size
953 LEAL ret+12(FP), DX
954 JMP aeshashbody<>(SB)
955 noaes:
956 JMP runtime·memhashFallback(SB)
957
958 TEXT runtime·strhash(SB),NOSPLIT,$0-12
959 CMPB runtime·useAeshash(SB), $0
960 JEQ noaes
961 MOVL p+0(FP), AX // ptr to string object
962 MOVL 4(AX), BX // length of string
963 MOVL (AX), AX // string data
964 LEAL ret+8(FP), DX
965 JMP aeshashbody<>(SB)
966 noaes:
967 JMP runtime·strhashFallback(SB)
968
969 // AX: data
970 // BX: length
971 // DX: address to put return value
972 TEXT aeshashbody<>(SB),NOSPLIT,$0-0
973 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
974 PINSRW $4, BX, X0 // 16 bits of length
975 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
976 MOVO X0, X1 // save unscrambled seed
977 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
978 AESENC X0, X0 // scramble seed
979
980 CMPL BX, $16
981 JB aes0to15
982 JE aes16
983 CMPL BX, $32
984 JBE aes17to32
985 CMPL BX, $64
986 JBE aes33to64
987 JMP aes65plus
988
989 aes0to15:
990 TESTL BX, BX
991 JE aes0
992
993 ADDL $16, AX
994 TESTW $0xff0, AX
995 JE endofpage
996
997 // 16 bytes loaded at this address won't cross
998 // a page boundary, so we can load it directly.
999 MOVOU -16(AX), X1
1000 ADDL BX, BX
1001 PAND masks<>(SB)(BX*8), X1
1002
1003 final1:
1004 PXOR X0, X1 // xor data with seed
1005 AESENC X1, X1 // scramble combo 3 times
1006 AESENC X1, X1
1007 AESENC X1, X1
1008 MOVL X1, (DX)
1009 RET
1010
1011 endofpage:
1012 // address ends in 1111xxxx. Might be up against
1013 // a page boundary, so load ending at last byte.
1014 // Then shift bytes down using pshufb.
1015 MOVOU -32(AX)(BX*1), X1
1016 ADDL BX, BX
1017 PSHUFB shifts<>(SB)(BX*8), X1
1018 JMP final1
1019
1020 aes0:
1021 // Return scrambled input seed
1022 AESENC X0, X0
1023 MOVL X0, (DX)
1024 RET
1025
1026 aes16:
1027 MOVOU (AX), X1
1028 JMP final1
1029
1030 aes17to32:
1031 // make second starting seed
1032 PXOR runtime·aeskeysched+16(SB), X1
1033 AESENC X1, X1
1034
1035 // load data to be hashed
1036 MOVOU (AX), X2
1037 MOVOU -16(AX)(BX*1), X3
1038
1039 // xor with seed
1040 PXOR X0, X2
1041 PXOR X1, X3
1042
1043 // scramble 3 times
1044 AESENC X2, X2
1045 AESENC X3, X3
1046 AESENC X2, X2
1047 AESENC X3, X3
1048 AESENC X2, X2
1049 AESENC X3, X3
1050
1051 // combine results
1052 PXOR X3, X2
1053 MOVL X2, (DX)
1054 RET
1055
1056 aes33to64:
1057 // make 3 more starting seeds
1058 MOVO X1, X2
1059 MOVO X1, X3
1060 PXOR runtime·aeskeysched+16(SB), X1
1061 PXOR runtime·aeskeysched+32(SB), X2
1062 PXOR runtime·aeskeysched+48(SB), X3
1063 AESENC X1, X1
1064 AESENC X2, X2
1065 AESENC X3, X3
1066
1067 MOVOU (AX), X4
1068 MOVOU 16(AX), X5
1069 MOVOU -32(AX)(BX*1), X6
1070 MOVOU -16(AX)(BX*1), X7
1071
1072 PXOR X0, X4
1073 PXOR X1, X5
1074 PXOR X2, X6
1075 PXOR X3, X7
1076
1077 AESENC X4, X4
1078 AESENC X5, X5
1079 AESENC X6, X6
1080 AESENC X7, X7
1081
1082 AESENC X4, X4
1083 AESENC X5, X5
1084 AESENC X6, X6
1085 AESENC X7, X7
1086
1087 AESENC X4, X4
1088 AESENC X5, X5
1089 AESENC X6, X6
1090 AESENC X7, X7
1091
1092 PXOR X6, X4
1093 PXOR X7, X5
1094 PXOR X5, X4
1095 MOVL X4, (DX)
1096 RET
1097
1098 aes65plus:
1099 // make 3 more starting seeds
1100 MOVO X1, X2
1101 MOVO X1, X3
1102 PXOR runtime·aeskeysched+16(SB), X1
1103 PXOR runtime·aeskeysched+32(SB), X2
1104 PXOR runtime·aeskeysched+48(SB), X3
1105 AESENC X1, X1
1106 AESENC X2, X2
1107 AESENC X3, X3
1108
1109 // start with last (possibly overlapping) block
1110 MOVOU -64(AX)(BX*1), X4
1111 MOVOU -48(AX)(BX*1), X5
1112 MOVOU -32(AX)(BX*1), X6
1113 MOVOU -16(AX)(BX*1), X7
1114
1115 // scramble state once
1116 AESENC X0, X4
1117 AESENC X1, X5
1118 AESENC X2, X6
1119 AESENC X3, X7
1120
1121 // compute number of remaining 64-byte blocks
1122 DECL BX
1123 SHRL $6, BX
1124
1125 aesloop:
1126 // scramble state, xor in a block
1127 MOVOU (AX), X0
1128 MOVOU 16(AX), X1
1129 MOVOU 32(AX), X2
1130 MOVOU 48(AX), X3
1131 AESENC X0, X4
1132 AESENC X1, X5
1133 AESENC X2, X6
1134 AESENC X3, X7
1135
1136 // scramble state
1137 AESENC X4, X4
1138 AESENC X5, X5
1139 AESENC X6, X6
1140 AESENC X7, X7
1141
1142 ADDL $64, AX
1143 DECL BX
1144 JNE aesloop
1145
1146 // 3 more scrambles to finish
1147 AESENC X4, X4
1148 AESENC X5, X5
1149 AESENC X6, X6
1150 AESENC X7, X7
1151
1152 AESENC X4, X4
1153 AESENC X5, X5
1154 AESENC X6, X6
1155 AESENC X7, X7
1156
1157 AESENC X4, X4
1158 AESENC X5, X5
1159 AESENC X6, X6
1160 AESENC X7, X7
1161
1162 PXOR X6, X4
1163 PXOR X7, X5
1164 PXOR X5, X4
1165 MOVL X4, (DX)
1166 RET
1167
1168 TEXT runtime·memhash32(SB),NOSPLIT,$0-12
1169 CMPB runtime·useAeshash(SB), $0
1170 JEQ noaes
1171 MOVL p+0(FP), AX // ptr to data
1172 MOVL h+4(FP), X0 // seed
1173 PINSRD $1, (AX), X0 // data
1174 AESENC runtime·aeskeysched+0(SB), X0
1175 AESENC runtime·aeskeysched+16(SB), X0
1176 AESENC runtime·aeskeysched+32(SB), X0
1177 MOVL X0, ret+8(FP)
1178 RET
1179 noaes:
1180 JMP runtime·memhash32Fallback(SB)
1181
1182 TEXT runtime·memhash64(SB),NOSPLIT,$0-12
1183 CMPB runtime·useAeshash(SB), $0
1184 JEQ noaes
1185 MOVL p+0(FP), AX // ptr to data
1186 MOVQ (AX), X0 // data
1187 PINSRD $2, h+4(FP), X0 // seed
1188 AESENC runtime·aeskeysched+0(SB), X0
1189 AESENC runtime·aeskeysched+16(SB), X0
1190 AESENC runtime·aeskeysched+32(SB), X0
1191 MOVL X0, ret+8(FP)
1192 RET
1193 noaes:
1194 JMP runtime·memhash64Fallback(SB)
1195
1196 // simple mask to get rid of data in the high part of the register.
1197 DATA masks<>+0x00(SB)/4, $0x00000000
1198 DATA masks<>+0x04(SB)/4, $0x00000000
1199 DATA masks<>+0x08(SB)/4, $0x00000000
1200 DATA masks<>+0x0c(SB)/4, $0x00000000
1201
1202 DATA masks<>+0x10(SB)/4, $0x000000ff
1203 DATA masks<>+0x14(SB)/4, $0x00000000
1204 DATA masks<>+0x18(SB)/4, $0x00000000
1205 DATA masks<>+0x1c(SB)/4, $0x00000000
1206
1207 DATA masks<>+0x20(SB)/4, $0x0000ffff
1208 DATA masks<>+0x24(SB)/4, $0x00000000
1209 DATA masks<>+0x28(SB)/4, $0x00000000
1210 DATA masks<>+0x2c(SB)/4, $0x00000000
1211
1212 DATA masks<>+0x30(SB)/4, $0x00ffffff
1213 DATA masks<>+0x34(SB)/4, $0x00000000
1214 DATA masks<>+0x38(SB)/4, $0x00000000
1215 DATA masks<>+0x3c(SB)/4, $0x00000000
1216
1217 DATA masks<>+0x40(SB)/4, $0xffffffff
1218 DATA masks<>+0x44(SB)/4, $0x00000000
1219 DATA masks<>+0x48(SB)/4, $0x00000000
1220 DATA masks<>+0x4c(SB)/4, $0x00000000
1221
1222 DATA masks<>+0x50(SB)/4, $0xffffffff
1223 DATA masks<>+0x54(SB)/4, $0x000000ff
1224 DATA masks<>+0x58(SB)/4, $0x00000000
1225 DATA masks<>+0x5c(SB)/4, $0x00000000
1226
1227 DATA masks<>+0x60(SB)/4, $0xffffffff
1228 DATA masks<>+0x64(SB)/4, $0x0000ffff
1229 DATA masks<>+0x68(SB)/4, $0x00000000
1230 DATA masks<>+0x6c(SB)/4, $0x00000000
1231
1232 DATA masks<>+0x70(SB)/4, $0xffffffff
1233 DATA masks<>+0x74(SB)/4, $0x00ffffff
1234 DATA masks<>+0x78(SB)/4, $0x00000000
1235 DATA masks<>+0x7c(SB)/4, $0x00000000
1236
1237 DATA masks<>+0x80(SB)/4, $0xffffffff
1238 DATA masks<>+0x84(SB)/4, $0xffffffff
1239 DATA masks<>+0x88(SB)/4, $0x00000000
1240 DATA masks<>+0x8c(SB)/4, $0x00000000
1241
1242 DATA masks<>+0x90(SB)/4, $0xffffffff
1243 DATA masks<>+0x94(SB)/4, $0xffffffff
1244 DATA masks<>+0x98(SB)/4, $0x000000ff
1245 DATA masks<>+0x9c(SB)/4, $0x00000000
1246
1247 DATA masks<>+0xa0(SB)/4, $0xffffffff
1248 DATA masks<>+0xa4(SB)/4, $0xffffffff
1249 DATA masks<>+0xa8(SB)/4, $0x0000ffff
1250 DATA masks<>+0xac(SB)/4, $0x00000000
1251
1252 DATA masks<>+0xb0(SB)/4, $0xffffffff
1253 DATA masks<>+0xb4(SB)/4, $0xffffffff
1254 DATA masks<>+0xb8(SB)/4, $0x00ffffff
1255 DATA masks<>+0xbc(SB)/4, $0x00000000
1256
1257 DATA masks<>+0xc0(SB)/4, $0xffffffff
1258 DATA masks<>+0xc4(SB)/4, $0xffffffff
1259 DATA masks<>+0xc8(SB)/4, $0xffffffff
1260 DATA masks<>+0xcc(SB)/4, $0x00000000
1261
1262 DATA masks<>+0xd0(SB)/4, $0xffffffff
1263 DATA masks<>+0xd4(SB)/4, $0xffffffff
1264 DATA masks<>+0xd8(SB)/4, $0xffffffff
1265 DATA masks<>+0xdc(SB)/4, $0x000000ff
1266
1267 DATA masks<>+0xe0(SB)/4, $0xffffffff
1268 DATA masks<>+0xe4(SB)/4, $0xffffffff
1269 DATA masks<>+0xe8(SB)/4, $0xffffffff
1270 DATA masks<>+0xec(SB)/4, $0x0000ffff
1271
1272 DATA masks<>+0xf0(SB)/4, $0xffffffff
1273 DATA masks<>+0xf4(SB)/4, $0xffffffff
1274 DATA masks<>+0xf8(SB)/4, $0xffffffff
1275 DATA masks<>+0xfc(SB)/4, $0x00ffffff
1276
1277 GLOBL masks<>(SB),RODATA,$256
1278
1279 // these are arguments to pshufb. They move data down from
1280 // the high bytes of the register to the low bytes of the register.
1281 // index is how many bytes to move.
1282 DATA shifts<>+0x00(SB)/4, $0x00000000
1283 DATA shifts<>+0x04(SB)/4, $0x00000000
1284 DATA shifts<>+0x08(SB)/4, $0x00000000
1285 DATA shifts<>+0x0c(SB)/4, $0x00000000
1286
1287 DATA shifts<>+0x10(SB)/4, $0xffffff0f
1288 DATA shifts<>+0x14(SB)/4, $0xffffffff
1289 DATA shifts<>+0x18(SB)/4, $0xffffffff
1290 DATA shifts<>+0x1c(SB)/4, $0xffffffff
1291
1292 DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1293 DATA shifts<>+0x24(SB)/4, $0xffffffff
1294 DATA shifts<>+0x28(SB)/4, $0xffffffff
1295 DATA shifts<>+0x2c(SB)/4, $0xffffffff
1296
1297 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1298 DATA shifts<>+0x34(SB)/4, $0xffffffff
1299 DATA shifts<>+0x38(SB)/4, $0xffffffff
1300 DATA shifts<>+0x3c(SB)/4, $0xffffffff
1301
1302 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1303 DATA shifts<>+0x44(SB)/4, $0xffffffff
1304 DATA shifts<>+0x48(SB)/4, $0xffffffff
1305 DATA shifts<>+0x4c(SB)/4, $0xffffffff
1306
1307 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1308 DATA shifts<>+0x54(SB)/4, $0xffffff0f
1309 DATA shifts<>+0x58(SB)/4, $0xffffffff
1310 DATA shifts<>+0x5c(SB)/4, $0xffffffff
1311
1312 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1313 DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1314 DATA shifts<>+0x68(SB)/4, $0xffffffff
1315 DATA shifts<>+0x6c(SB)/4, $0xffffffff
1316
1317 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1318 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1319 DATA shifts<>+0x78(SB)/4, $0xffffffff
1320 DATA shifts<>+0x7c(SB)/4, $0xffffffff
1321
1322 DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1323 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1324 DATA shifts<>+0x88(SB)/4, $0xffffffff
1325 DATA shifts<>+0x8c(SB)/4, $0xffffffff
1326
1327 DATA shifts<>+0x90(SB)/4, $0x0a090807
1328 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1329 DATA shifts<>+0x98(SB)/4, $0xffffff0f
1330 DATA shifts<>+0x9c(SB)/4, $0xffffffff
1331
1332 DATA shifts<>+0xa0(SB)/4, $0x09080706
1333 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1334 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1335 DATA shifts<>+0xac(SB)/4, $0xffffffff
1336
1337 DATA shifts<>+0xb0(SB)/4, $0x08070605
1338 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1339 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1340 DATA shifts<>+0xbc(SB)/4, $0xffffffff
1341
1342 DATA shifts<>+0xc0(SB)/4, $0x07060504
1343 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1344 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1345 DATA shifts<>+0xcc(SB)/4, $0xffffffff
1346
1347 DATA shifts<>+0xd0(SB)/4, $0x06050403
1348 DATA shifts<>+0xd4(SB)/4, $0x0a090807
1349 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1350 DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1351
1352 DATA shifts<>+0xe0(SB)/4, $0x05040302
1353 DATA shifts<>+0xe4(SB)/4, $0x09080706
1354 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1355 DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1356
1357 DATA shifts<>+0xf0(SB)/4, $0x04030201
1358 DATA shifts<>+0xf4(SB)/4, $0x08070605
1359 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1360 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1361
1362 GLOBL shifts<>(SB),RODATA,$256
1363
1364 TEXT ·checkASM(SB),NOSPLIT,$0-1
1365 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1366 MOVL $masks<>(SB), AX
1367 MOVL $shifts<>(SB), BX
1368 ORL BX, AX
1369 TESTL $15, AX
1370 SETEQ ret+0(FP)
1371 RET
1372
1373 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1374 // Must obey the gcc calling convention.
1375 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1376 get_tls(CX)
1377 MOVL g(CX), AX
1378 MOVL g_m(AX), AX
1379 MOVL m_curg(AX), AX
1380 MOVL (g_stack+stack_hi)(AX), AX
1381 RET
1382
1383 // The top-most function running on a goroutine
1384 // returns to goexit+PCQuantum.
1385 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0
1386 BYTE $0x90 // NOP
1387 CALL runtime·goexit1(SB) // does not return
1388 // traceback from goexit1 must hit code range of goexit
1389 BYTE $0x90 // NOP
1390
1391 // Add a module's moduledata to the linked list of moduledata objects. This
1392 // is called from .init_array by a function generated in the linker and so
1393 // follows the platform ABI wrt register preservation -- it only touches AX,
1394 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
1395 // instead the pointer to the moduledata is passed in AX.
1396 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1397 MOVL runtime·lastmoduledatap(SB), DX
1398 MOVL AX, moduledata_next(DX)
1399 MOVL AX, runtime·lastmoduledatap(SB)
1400 RET
1401
1402 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
1403 MOVL a+0(FP), AX
1404 MOVL AX, 0(SP)
1405 MOVL $0, 4(SP)
1406 FMOVV 0(SP), F0
1407 FMOVDP F0, ret+4(FP)
1408 RET
1409
1410 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
1411 FMOVD a+0(FP), F0
1412 FSTCW 0(SP)
1413 FLDCW runtime·controlWord64trunc(SB)
1414 FMOVVP F0, 4(SP)
1415 FLDCW 0(SP)
1416 MOVL 4(SP), AX
1417 MOVL AX, ret+8(FP)
1418 RET
1419
1420 // gcWriteBarrier informs the GC about heap pointer writes.
1421 //
1422 // gcWriteBarrier returns space in a write barrier buffer which
1423 // should be filled in by the caller.
1424 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1425 // number of bytes of buffer needed in DI, and returns a pointer
1426 // to the buffer space in DI.
1427 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1428 // but may clobber others (e.g., SSE registers).
1429 // Typical use would be, when doing *(CX+88) = AX
1430 // CMPL $0, runtime.writeBarrier(SB)
1431 // JEQ dowrite
1432 // CALL runtime.gcBatchBarrier2(SB)
1433 // MOVL AX, (DI)
1434 // MOVL 88(CX), DX
1435 // MOVL DX, 4(DI)
1436 // dowrite:
1437 // MOVL AX, 88(CX)
1438 TEXT gcWriteBarrier<>(SB),NOSPLIT,$28
1439 // Save the registers clobbered by the fast path. This is slightly
1440 // faster than having the caller spill these.
1441 MOVL CX, 20(SP)
1442 MOVL BX, 24(SP)
1443 retry:
1444 // TODO: Consider passing g.m.p in as an argument so they can be shared
1445 // across a sequence of write barriers.
1446 get_tls(BX)
1447 MOVL g(BX), BX
1448 MOVL g_m(BX), BX
1449 MOVL m_p(BX), BX
1450 // Get current buffer write position.
1451 MOVL (p_wbBuf+wbBuf_next)(BX), CX // original next position
1452 ADDL DI, CX // new next position
1453 // Is the buffer full?
1454 CMPL CX, (p_wbBuf+wbBuf_end)(BX)
1455 JA flush
1456 // Commit to the larger buffer.
1457 MOVL CX, (p_wbBuf+wbBuf_next)(BX)
1458 // Make return value (the original next position)
1459 SUBL DI, CX
1460 MOVL CX, DI
1461 // Restore registers.
1462 MOVL 20(SP), CX
1463 MOVL 24(SP), BX
1464 RET
1465
1466 flush:
1467 // Save all general purpose registers since these could be
1468 // clobbered by wbBufFlush and were not saved by the caller.
1469 MOVL DI, 0(SP)
1470 MOVL AX, 4(SP)
1471 // BX already saved
1472 // CX already saved
1473 MOVL DX, 8(SP)
1474 MOVL BP, 12(SP)
1475 MOVL SI, 16(SP)
1476 // DI already saved
1477
1478 CALL runtime·wbBufFlush(SB)
1479
1480 MOVL 0(SP), DI
1481 MOVL 4(SP), AX
1482 MOVL 8(SP), DX
1483 MOVL 12(SP), BP
1484 MOVL 16(SP), SI
1485 JMP retry
1486
1487 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1488 MOVL $4, DI
1489 JMP gcWriteBarrier<>(SB)
1490 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1491 MOVL $8, DI
1492 JMP gcWriteBarrier<>(SB)
1493 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1494 MOVL $12, DI
1495 JMP gcWriteBarrier<>(SB)
1496 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1497 MOVL $16, DI
1498 JMP gcWriteBarrier<>(SB)
1499 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1500 MOVL $20, DI
1501 JMP gcWriteBarrier<>(SB)
1502 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1503 MOVL $24, DI
1504 JMP gcWriteBarrier<>(SB)
1505 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1506 MOVL $28, DI
1507 JMP gcWriteBarrier<>(SB)
1508 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1509 MOVL $32, DI
1510 JMP gcWriteBarrier<>(SB)
1511
1512 // Note: these functions use a special calling convention to save generated code space.
1513 // Arguments are passed in registers, but the space for those arguments are allocated
1514 // in the caller's stack frame. These stubs write the args into that stack space and
1515 // then tail call to the corresponding runtime handler.
1516 // The tail call makes these stubs disappear in backtraces.
1517 TEXT runtime·panicIndex(SB),NOSPLIT,$0-8
1518 MOVL AX, x+0(FP)
1519 MOVL CX, y+4(FP)
1520 JMP runtime·goPanicIndex(SB)
1521 TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8
1522 MOVL AX, x+0(FP)
1523 MOVL CX, y+4(FP)
1524 JMP runtime·goPanicIndexU(SB)
1525 TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8
1526 MOVL CX, x+0(FP)
1527 MOVL DX, y+4(FP)
1528 JMP runtime·goPanicSliceAlen(SB)
1529 TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8
1530 MOVL CX, x+0(FP)
1531 MOVL DX, y+4(FP)
1532 JMP runtime·goPanicSliceAlenU(SB)
1533 TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8
1534 MOVL CX, x+0(FP)
1535 MOVL DX, y+4(FP)
1536 JMP runtime·goPanicSliceAcap(SB)
1537 TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8
1538 MOVL CX, x+0(FP)
1539 MOVL DX, y+4(FP)
1540 JMP runtime·goPanicSliceAcapU(SB)
1541 TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8
1542 MOVL AX, x+0(FP)
1543 MOVL CX, y+4(FP)
1544 JMP runtime·goPanicSliceB(SB)
1545 TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8
1546 MOVL AX, x+0(FP)
1547 MOVL CX, y+4(FP)
1548 JMP runtime·goPanicSliceBU(SB)
1549 TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8
1550 MOVL DX, x+0(FP)
1551 MOVL BX, y+4(FP)
1552 JMP runtime·goPanicSlice3Alen(SB)
1553 TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8
1554 MOVL DX, x+0(FP)
1555 MOVL BX, y+4(FP)
1556 JMP runtime·goPanicSlice3AlenU(SB)
1557 TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8
1558 MOVL DX, x+0(FP)
1559 MOVL BX, y+4(FP)
1560 JMP runtime·goPanicSlice3Acap(SB)
1561 TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8
1562 MOVL DX, x+0(FP)
1563 MOVL BX, y+4(FP)
1564 JMP runtime·goPanicSlice3AcapU(SB)
1565 TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8
1566 MOVL CX, x+0(FP)
1567 MOVL DX, y+4(FP)
1568 JMP runtime·goPanicSlice3B(SB)
1569 TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8
1570 MOVL CX, x+0(FP)
1571 MOVL DX, y+4(FP)
1572 JMP runtime·goPanicSlice3BU(SB)
1573 TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8
1574 MOVL AX, x+0(FP)
1575 MOVL CX, y+4(FP)
1576 JMP runtime·goPanicSlice3C(SB)
1577 TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8
1578 MOVL AX, x+0(FP)
1579 MOVL CX, y+4(FP)
1580 JMP runtime·goPanicSlice3CU(SB)
1581 TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-8
1582 MOVL DX, x+0(FP)
1583 MOVL BX, y+4(FP)
1584 JMP runtime·goPanicSliceConvert(SB)
1585
1586 // Extended versions for 64-bit indexes.
1587 TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12
1588 MOVL SI, hi+0(FP)
1589 MOVL AX, lo+4(FP)
1590 MOVL CX, y+8(FP)
1591 JMP runtime·goPanicExtendIndex(SB)
1592 TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12
1593 MOVL SI, hi+0(FP)
1594 MOVL AX, lo+4(FP)
1595 MOVL CX, y+8(FP)
1596 JMP runtime·goPanicExtendIndexU(SB)
1597 TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12
1598 MOVL SI, hi+0(FP)
1599 MOVL CX, lo+4(FP)
1600 MOVL DX, y+8(FP)
1601 JMP runtime·goPanicExtendSliceAlen(SB)
1602 TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12
1603 MOVL SI, hi+0(FP)
1604 MOVL CX, lo+4(FP)
1605 MOVL DX, y+8(FP)
1606 JMP runtime·goPanicExtendSliceAlenU(SB)
1607 TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12
1608 MOVL SI, hi+0(FP)
1609 MOVL CX, lo+4(FP)
1610 MOVL DX, y+8(FP)
1611 JMP runtime·goPanicExtendSliceAcap(SB)
1612 TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12
1613 MOVL SI, hi+0(FP)
1614 MOVL CX, lo+4(FP)
1615 MOVL DX, y+8(FP)
1616 JMP runtime·goPanicExtendSliceAcapU(SB)
1617 TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12
1618 MOVL SI, hi+0(FP)
1619 MOVL AX, lo+4(FP)
1620 MOVL CX, y+8(FP)
1621 JMP runtime·goPanicExtendSliceB(SB)
1622 TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12
1623 MOVL SI, hi+0(FP)
1624 MOVL AX, lo+4(FP)
1625 MOVL CX, y+8(FP)
1626 JMP runtime·goPanicExtendSliceBU(SB)
1627 TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12
1628 MOVL SI, hi+0(FP)
1629 MOVL DX, lo+4(FP)
1630 MOVL BX, y+8(FP)
1631 JMP runtime·goPanicExtendSlice3Alen(SB)
1632 TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12
1633 MOVL SI, hi+0(FP)
1634 MOVL DX, lo+4(FP)
1635 MOVL BX, y+8(FP)
1636 JMP runtime·goPanicExtendSlice3AlenU(SB)
1637 TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12
1638 MOVL SI, hi+0(FP)
1639 MOVL DX, lo+4(FP)
1640 MOVL BX, y+8(FP)
1641 JMP runtime·goPanicExtendSlice3Acap(SB)
1642 TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12
1643 MOVL SI, hi+0(FP)
1644 MOVL DX, lo+4(FP)
1645 MOVL BX, y+8(FP)
1646 JMP runtime·goPanicExtendSlice3AcapU(SB)
1647 TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12
1648 MOVL SI, hi+0(FP)
1649 MOVL CX, lo+4(FP)
1650 MOVL DX, y+8(FP)
1651 JMP runtime·goPanicExtendSlice3B(SB)
1652 TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12
1653 MOVL SI, hi+0(FP)
1654 MOVL CX, lo+4(FP)
1655 MOVL DX, y+8(FP)
1656 JMP runtime·goPanicExtendSlice3BU(SB)
1657 TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12
1658 MOVL SI, hi+0(FP)
1659 MOVL AX, lo+4(FP)
1660 MOVL CX, y+8(FP)
1661 JMP runtime·goPanicExtendSlice3C(SB)
1662 TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12
1663 MOVL SI, hi+0(FP)
1664 MOVL AX, lo+4(FP)
1665 MOVL CX, y+8(FP)
1666 JMP runtime·goPanicExtendSlice3CU(SB)
1667
1668 #ifdef GOOS_android
1669 // Use the free TLS_SLOT_APP slot #2 on Android Q.
1670 // Earlier androids are set up in gcc_android.c.
1671 DATA runtime·tls_g+0(SB)/4, $8
1672 GLOBL runtime·tls_g+0(SB), NOPTR, $4
1673 #endif
1674 #ifdef GOOS_windows
1675 GLOBL runtime·tls_g+0(SB), NOPTR, $4
1676 #endif
1677
View as plain text