Text file
src/runtime/asm_386.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9
10 // _rt0_386 is common startup code for most 386 systems when using
11 // internal linking. This is the entry point for the program from the
12 // kernel for an ordinary -buildmode=exe program. The stack holds the
13 // number of arguments and the C-style argv.
14 TEXT _rt0_386(SB),NOSPLIT,$8
15 MOVL 8(SP), AX // argc
16 LEAL 12(SP), BX // argv
17 MOVL AX, 0(SP)
18 MOVL BX, 4(SP)
19 JMP runtime·rt0_go(SB)
20
21 // _rt0_386_lib is common startup code for most 386 systems when
22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
23 // arrange to invoke this function as a global constructor (for
24 // c-archive) or when the shared library is loaded (for c-shared).
25 // We expect argc and argv to be passed on the stack following the
26 // usual C ABI.
27 TEXT _rt0_386_lib(SB),NOSPLIT,$0
28 PUSHL BP
29 MOVL SP, BP
30 PUSHL BX
31 PUSHL SI
32 PUSHL DI
33
34 MOVL 8(BP), AX
35 MOVL AX, _rt0_386_lib_argc<>(SB)
36 MOVL 12(BP), AX
37 MOVL AX, _rt0_386_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 CALL runtime·libpreinit(SB)
41
42 SUBL $8, SP
43
44 // Create a new thread to do the runtime initialization.
45 MOVL _cgo_sys_thread_create(SB), AX
46 TESTL AX, AX
47 JZ nocgo
48
49 // Align stack to call C function.
50 // We moved SP to BP above, but BP was clobbered by the libpreinit call.
51 MOVL SP, BP
52 ANDL $~15, SP
53
54 MOVL $_rt0_386_lib_go(SB), BX
55 MOVL BX, 0(SP)
56 MOVL $0, 4(SP)
57
58 CALL AX
59
60 MOVL BP, SP
61
62 JMP restore
63
64 nocgo:
65 MOVL $0x800000, 0(SP) // stacksize = 8192KB
66 MOVL $_rt0_386_lib_go(SB), AX
67 MOVL AX, 4(SP) // fn
68 CALL runtime·newosproc0(SB)
69
70 restore:
71 ADDL $8, SP
72 POPL DI
73 POPL SI
74 POPL BX
75 POPL BP
76 RET
77
78 // _rt0_386_lib_go initializes the Go runtime.
79 // This is started in a separate thread by _rt0_386_lib.
80 TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
81 MOVL _rt0_386_lib_argc<>(SB), AX
82 MOVL AX, 0(SP)
83 MOVL _rt0_386_lib_argv<>(SB), AX
84 MOVL AX, 4(SP)
85 JMP runtime·rt0_go(SB)
86
87 DATA _rt0_386_lib_argc<>(SB)/4, $0
88 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
89 DATA _rt0_386_lib_argv<>(SB)/4, $0
90 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
91
92 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
93 // Copy arguments forward on an even stack.
94 // Users of this function jump to it, they don't call it.
95 MOVL 0(SP), AX
96 MOVL 4(SP), BX
97 SUBL $128, SP // plenty of scratch
98 ANDL $~15, SP
99 MOVL AX, 120(SP) // save argc, argv away
100 MOVL BX, 124(SP)
101
102 // set default stack bounds.
103 // _cgo_init may update stackguard.
104 MOVL $runtime·g0(SB), BP
105 LEAL (-64*1024+104)(SP), BX
106 MOVL BX, g_stackguard0(BP)
107 MOVL BX, g_stackguard1(BP)
108 MOVL BX, (g_stack+stack_lo)(BP)
109 MOVL SP, (g_stack+stack_hi)(BP)
110
111 // find out information about the processor we're on
112 // first see if CPUID instruction is supported.
113 PUSHFL
114 PUSHFL
115 XORL $(1<<21), 0(SP) // flip ID bit
116 POPFL
117 PUSHFL
118 POPL AX
119 XORL 0(SP), AX
120 POPFL // restore EFLAGS
121 TESTL $(1<<21), AX
122 JNE has_cpuid
123
124 bad_proc: // show that the program requires MMX.
125 MOVL $2, 0(SP)
126 MOVL $bad_proc_msg<>(SB), 4(SP)
127 MOVL $0x3d, 8(SP)
128 CALL runtime·write(SB)
129 MOVL $1, 0(SP)
130 CALL runtime·exit(SB)
131 CALL runtime·abort(SB)
132
133 has_cpuid:
134 MOVL $0, AX
135 CPUID
136 MOVL AX, SI
137 CMPL AX, $0
138 JE nocpuinfo
139
140 CMPL BX, $0x756E6547 // "Genu"
141 JNE notintel
142 CMPL DX, $0x49656E69 // "ineI"
143 JNE notintel
144 CMPL CX, $0x6C65746E // "ntel"
145 JNE notintel
146 MOVB $1, runtime·isIntel(SB)
147 notintel:
148
149 // Load EAX=1 cpuid flags
150 MOVL $1, AX
151 CPUID
152 MOVL CX, DI // Move to global variable clobbers CX when generating PIC
153 MOVL AX, runtime·processorVersionInfo(SB)
154
155 // Check for MMX support
156 TESTL $(1<<23), DX // MMX
157 JZ bad_proc
158
159 nocpuinfo:
160 // if there is an _cgo_init, call it to let it
161 // initialize and to set up GS. if not,
162 // we set up GS ourselves.
163 MOVL _cgo_init(SB), AX
164 TESTL AX, AX
165 JZ needtls
166 #ifdef GOOS_android
167 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
168 // Compensate for tls_g (+8).
169 MOVL -8(TLS), BX
170 MOVL BX, 12(SP)
171 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
172 #else
173 MOVL $0, BX
174 MOVL BX, 12(SP) // arg 4: not used when using platform's TLS
175 #ifdef GOOS_windows
176 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
177 #else
178 MOVL BX, 8(SP) // arg 3: not used when using platform's TLS
179 #endif
180 #endif
181 MOVL $setg_gcc<>(SB), BX
182 MOVL BX, 4(SP) // arg 2: setg_gcc
183 MOVL BP, 0(SP) // arg 1: g0
184 CALL AX
185
186 // update stackguard after _cgo_init
187 MOVL $runtime·g0(SB), CX
188 MOVL (g_stack+stack_lo)(CX), AX
189 ADDL $const_stackGuard, AX
190 MOVL AX, g_stackguard0(CX)
191 MOVL AX, g_stackguard1(CX)
192
193 #ifndef GOOS_windows
194 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
195 JMP ok
196 #endif
197 needtls:
198 #ifdef GOOS_openbsd
199 // skip runtime·ldt0setup(SB) and tls test on OpenBSD in all cases
200 JMP ok
201 #endif
202 #ifdef GOOS_plan9
203 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
204 JMP ok
205 #endif
206
207 // set up %gs
208 CALL ldt0setup<>(SB)
209
210 // store through it, to make sure it works
211 get_tls(BX)
212 MOVL $0x123, g(BX)
213 MOVL runtime·m0+m_tls(SB), AX
214 CMPL AX, $0x123
215 JEQ ok
216 MOVL AX, 0 // abort
217 ok:
218 // set up m and g "registers"
219 get_tls(BX)
220 LEAL runtime·g0(SB), DX
221 MOVL DX, g(BX)
222 LEAL runtime·m0(SB), AX
223
224 // save m->g0 = g0
225 MOVL DX, m_g0(AX)
226 // save g0->m = m0
227 MOVL AX, g_m(DX)
228
229 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
230
231 // convention is D is always cleared
232 CLD
233
234 CALL runtime·check(SB)
235
236 // saved argc, argv
237 MOVL 120(SP), AX
238 MOVL AX, 0(SP)
239 MOVL 124(SP), AX
240 MOVL AX, 4(SP)
241 CALL runtime·args(SB)
242 CALL runtime·osinit(SB)
243 CALL runtime·schedinit(SB)
244
245 // create a new goroutine to start program
246 PUSHL $runtime·mainPC(SB) // entry
247 CALL runtime·newproc(SB)
248 POPL AX
249
250 // start this M
251 CALL runtime·mstart(SB)
252
253 CALL runtime·abort(SB)
254 RET
255
256 DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n"
257 GLOBL bad_proc_msg<>(SB), RODATA, $61
258
259 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB)
260 GLOBL runtime·mainPC(SB),RODATA,$4
261
262 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
263 INT $3
264 RET
265
266 TEXT runtime·asminit(SB),NOSPLIT,$0-0
267 // Linux and MinGW start the FPU in extended double precision.
268 // Other operating systems use double precision.
269 // Change to double precision to match them,
270 // and to match other hardware that only has double.
271 FLDCW runtime·controlWord64(SB)
272 RET
273
274 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
275 CALL runtime·mstart0(SB)
276 RET // not reached
277
278 /*
279 * go-routine
280 */
281
282 // void gogo(Gobuf*)
283 // restore state from Gobuf; longjmp
284 TEXT runtime·gogo(SB), NOSPLIT, $0-4
285 MOVL buf+0(FP), BX // gobuf
286 MOVL gobuf_g(BX), DX
287 MOVL 0(DX), CX // make sure g != nil
288 JMP gogo<>(SB)
289
290 TEXT gogo<>(SB), NOSPLIT, $0
291 get_tls(CX)
292 MOVL DX, g(CX)
293 MOVL gobuf_sp(BX), SP // restore SP
294 MOVL gobuf_ctxt(BX), DX
295 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
296 MOVL $0, gobuf_ctxt(BX)
297 MOVL gobuf_pc(BX), BX
298 JMP BX
299
300 // func mcall(fn func(*g))
301 // Switch to m->g0's stack, call fn(g).
302 // Fn must never return. It should gogo(&g->sched)
303 // to keep running g.
304 TEXT runtime·mcall(SB), NOSPLIT, $0-4
305 MOVL fn+0(FP), DI
306
307 get_tls(DX)
308 MOVL g(DX), AX // save state in g->sched
309 MOVL 0(SP), BX // caller's PC
310 MOVL BX, (g_sched+gobuf_pc)(AX)
311 LEAL fn+0(FP), BX // caller's SP
312 MOVL BX, (g_sched+gobuf_sp)(AX)
313
314 // switch to m->g0 & its stack, call fn
315 MOVL g(DX), BX
316 MOVL g_m(BX), BX
317 MOVL m_g0(BX), SI
318 CMPL SI, AX // if g == m->g0 call badmcall
319 JNE 3(PC)
320 MOVL $runtime·badmcall(SB), AX
321 JMP AX
322 MOVL SI, g(DX) // g = m->g0
323 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
324 PUSHL AX
325 MOVL DI, DX
326 MOVL 0(DI), DI
327 CALL DI
328 POPL AX
329 MOVL $runtime·badmcall2(SB), AX
330 JMP AX
331 RET
332
333 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
334 // of the G stack. We need to distinguish the routine that
335 // lives at the bottom of the G stack from the one that lives
336 // at the top of the system stack because the one at the top of
337 // the system stack terminates the stack walk (see topofstack()).
338 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
339 RET
340
341 // func systemstack(fn func())
342 TEXT runtime·systemstack(SB), NOSPLIT, $0-4
343 MOVL fn+0(FP), DI // DI = fn
344 get_tls(CX)
345 MOVL g(CX), AX // AX = g
346 MOVL g_m(AX), BX // BX = m
347
348 CMPL AX, m_gsignal(BX)
349 JEQ noswitch
350
351 MOVL m_g0(BX), DX // DX = g0
352 CMPL AX, DX
353 JEQ noswitch
354
355 CMPL AX, m_curg(BX)
356 JNE bad
357
358 // switch stacks
359 // save our state in g->sched. Pretend to
360 // be systemstack_switch if the G stack is scanned.
361 CALL gosave_systemstack_switch<>(SB)
362
363 // switch to g0
364 get_tls(CX)
365 MOVL DX, g(CX)
366 MOVL (g_sched+gobuf_sp)(DX), BX
367 MOVL BX, SP
368
369 // call target function
370 MOVL DI, DX
371 MOVL 0(DI), DI
372 CALL DI
373
374 // switch back to g
375 get_tls(CX)
376 MOVL g(CX), AX
377 MOVL g_m(AX), BX
378 MOVL m_curg(BX), AX
379 MOVL AX, g(CX)
380 MOVL (g_sched+gobuf_sp)(AX), SP
381 MOVL $0, (g_sched+gobuf_sp)(AX)
382 RET
383
384 noswitch:
385 // already on system stack; tail call the function
386 // Using a tail call here cleans up tracebacks since we won't stop
387 // at an intermediate systemstack.
388 MOVL DI, DX
389 MOVL 0(DI), DI
390 JMP DI
391
392 bad:
393 // Bad: g is not gsignal, not g0, not curg. What is it?
394 // Hide call from linker nosplit analysis.
395 MOVL $runtime·badsystemstack(SB), AX
396 CALL AX
397 INT $3
398
399 // func switchToCrashStack0(fn func())
400 TEXT runtime·switchToCrashStack0(SB), NOSPLIT, $0-4
401 MOVL fn+0(FP), AX
402
403 get_tls(CX)
404 MOVL g(CX), BX // BX = g
405 MOVL g_m(BX), DX // DX = curm
406
407 // set g to gcrash
408 LEAL runtime·gcrash(SB), BX // g = &gcrash
409 MOVL DX, g_m(BX) // g.m = curm
410 MOVL BX, m_g0(DX) // curm.g0 = g
411 get_tls(CX)
412 MOVL BX, g(CX)
413
414 // switch to crashstack
415 MOVL (g_stack+stack_hi)(BX), DX
416 SUBL $(4*8), DX
417 MOVL DX, SP
418
419 // call target function
420 MOVL AX, DX
421 MOVL 0(AX), AX
422 CALL AX
423
424 // should never return
425 CALL runtime·abort(SB)
426 UNDEF
427
428 /*
429 * support for morestack
430 */
431
432 // Called during function prolog when more stack is needed.
433 //
434 // The traceback routines see morestack on a g0 as being
435 // the top of a stack (for example, morestack calling newstack
436 // calling the scheduler calling newm calling gc), so we must
437 // record an argument size. For that purpose, it has no arguments.
438 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
439 // Cannot grow scheduler stack (m->g0).
440 get_tls(CX)
441 MOVL g(CX), DI
442 MOVL g_m(DI), BX
443
444 // Set g->sched to context in f.
445 MOVL 0(SP), AX // f's PC
446 MOVL AX, (g_sched+gobuf_pc)(DI)
447 LEAL 4(SP), AX // f's SP
448 MOVL AX, (g_sched+gobuf_sp)(DI)
449 MOVL DX, (g_sched+gobuf_ctxt)(DI)
450
451 MOVL m_g0(BX), SI
452 CMPL g(CX), SI
453 JNE 3(PC)
454 CALL runtime·badmorestackg0(SB)
455 CALL runtime·abort(SB)
456
457 // Cannot grow signal stack.
458 MOVL m_gsignal(BX), SI
459 CMPL g(CX), SI
460 JNE 3(PC)
461 CALL runtime·badmorestackgsignal(SB)
462 CALL runtime·abort(SB)
463
464 // Called from f.
465 // Set m->morebuf to f's caller.
466 NOP SP // tell vet SP changed - stop checking offsets
467 MOVL 4(SP), DI // f's caller's PC
468 MOVL DI, (m_morebuf+gobuf_pc)(BX)
469 LEAL 8(SP), CX // f's caller's SP
470 MOVL CX, (m_morebuf+gobuf_sp)(BX)
471 get_tls(CX)
472 MOVL g(CX), SI
473 MOVL SI, (m_morebuf+gobuf_g)(BX)
474
475 // Call newstack on m->g0's stack.
476 MOVL m_g0(BX), BP
477 MOVL BP, g(CX)
478 MOVL (g_sched+gobuf_sp)(BP), AX
479 MOVL -4(AX), BX // fault if CALL would, before smashing SP
480 MOVL AX, SP
481 CALL runtime·newstack(SB)
482 CALL runtime·abort(SB) // crash if newstack returns
483 RET
484
485 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
486 MOVL $0, DX
487 JMP runtime·morestack(SB)
488
489 // reflectcall: call a function with the given argument list
490 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
491 // we don't have variable-sized frames, so we use a small number
492 // of constant-sized-frame functions to encode a few bits of size in the pc.
493 // Caution: ugly multiline assembly macros in your future!
494
495 #define DISPATCH(NAME,MAXSIZE) \
496 CMPL CX, $MAXSIZE; \
497 JA 3(PC); \
498 MOVL $NAME(SB), AX; \
499 JMP AX
500 // Note: can't just "JMP NAME(SB)" - bad inlining results.
501
502 TEXT ·reflectcall(SB), NOSPLIT, $0-28
503 MOVL frameSize+20(FP), CX
504 DISPATCH(runtime·call16, 16)
505 DISPATCH(runtime·call32, 32)
506 DISPATCH(runtime·call64, 64)
507 DISPATCH(runtime·call128, 128)
508 DISPATCH(runtime·call256, 256)
509 DISPATCH(runtime·call512, 512)
510 DISPATCH(runtime·call1024, 1024)
511 DISPATCH(runtime·call2048, 2048)
512 DISPATCH(runtime·call4096, 4096)
513 DISPATCH(runtime·call8192, 8192)
514 DISPATCH(runtime·call16384, 16384)
515 DISPATCH(runtime·call32768, 32768)
516 DISPATCH(runtime·call65536, 65536)
517 DISPATCH(runtime·call131072, 131072)
518 DISPATCH(runtime·call262144, 262144)
519 DISPATCH(runtime·call524288, 524288)
520 DISPATCH(runtime·call1048576, 1048576)
521 DISPATCH(runtime·call2097152, 2097152)
522 DISPATCH(runtime·call4194304, 4194304)
523 DISPATCH(runtime·call8388608, 8388608)
524 DISPATCH(runtime·call16777216, 16777216)
525 DISPATCH(runtime·call33554432, 33554432)
526 DISPATCH(runtime·call67108864, 67108864)
527 DISPATCH(runtime·call134217728, 134217728)
528 DISPATCH(runtime·call268435456, 268435456)
529 DISPATCH(runtime·call536870912, 536870912)
530 DISPATCH(runtime·call1073741824, 1073741824)
531 MOVL $runtime·badreflectcall(SB), AX
532 JMP AX
533
534 #define CALLFN(NAME,MAXSIZE) \
535 TEXT NAME(SB), WRAPPER, $MAXSIZE-28; \
536 NO_LOCAL_POINTERS; \
537 /* copy arguments to stack */ \
538 MOVL stackArgs+8(FP), SI; \
539 MOVL stackArgsSize+12(FP), CX; \
540 MOVL SP, DI; \
541 REP;MOVSB; \
542 /* call function */ \
543 MOVL f+4(FP), DX; \
544 MOVL (DX), AX; \
545 PCDATA $PCDATA_StackMapIndex, $0; \
546 CALL AX; \
547 /* copy return values back */ \
548 MOVL stackArgsType+0(FP), DX; \
549 MOVL stackArgs+8(FP), DI; \
550 MOVL stackArgsSize+12(FP), CX; \
551 MOVL stackRetOffset+16(FP), BX; \
552 MOVL SP, SI; \
553 ADDL BX, DI; \
554 ADDL BX, SI; \
555 SUBL BX, CX; \
556 CALL callRet<>(SB); \
557 RET
558
559 // callRet copies return values back at the end of call*. This is a
560 // separate function so it can allocate stack space for the arguments
561 // to reflectcallmove. It does not follow the Go ABI; it expects its
562 // arguments in registers.
563 TEXT callRet<>(SB), NOSPLIT, $20-0
564 MOVL DX, 0(SP)
565 MOVL DI, 4(SP)
566 MOVL SI, 8(SP)
567 MOVL CX, 12(SP)
568 MOVL $0, 16(SP)
569 CALL runtime·reflectcallmove(SB)
570 RET
571
572 CALLFN(·call16, 16)
573 CALLFN(·call32, 32)
574 CALLFN(·call64, 64)
575 CALLFN(·call128, 128)
576 CALLFN(·call256, 256)
577 CALLFN(·call512, 512)
578 CALLFN(·call1024, 1024)
579 CALLFN(·call2048, 2048)
580 CALLFN(·call4096, 4096)
581 CALLFN(·call8192, 8192)
582 CALLFN(·call16384, 16384)
583 CALLFN(·call32768, 32768)
584 CALLFN(·call65536, 65536)
585 CALLFN(·call131072, 131072)
586 CALLFN(·call262144, 262144)
587 CALLFN(·call524288, 524288)
588 CALLFN(·call1048576, 1048576)
589 CALLFN(·call2097152, 2097152)
590 CALLFN(·call4194304, 4194304)
591 CALLFN(·call8388608, 8388608)
592 CALLFN(·call16777216, 16777216)
593 CALLFN(·call33554432, 33554432)
594 CALLFN(·call67108864, 67108864)
595 CALLFN(·call134217728, 134217728)
596 CALLFN(·call268435456, 268435456)
597 CALLFN(·call536870912, 536870912)
598 CALLFN(·call1073741824, 1073741824)
599
600 TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
601 MOVL cycles+0(FP), AX
602 TESTL AX, AX
603 JZ done
604 again:
605 PAUSE
606 SUBL $1, AX
607 JNZ again
608 done:
609 RET
610
611 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
612 // Stores are already ordered on x86, so this is just a
613 // compile barrier.
614 RET
615
616 // Save state of caller into g->sched,
617 // but using fake PC from systemstack_switch.
618 // Must only be called from functions with no locals ($0)
619 // or else unwinding from systemstack_switch is incorrect.
620 TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0
621 PUSHL AX
622 PUSHL BX
623 get_tls(BX)
624 MOVL g(BX), BX
625 LEAL arg+0(FP), AX
626 MOVL AX, (g_sched+gobuf_sp)(BX)
627 MOVL $runtime·systemstack_switch(SB), AX
628 MOVL AX, (g_sched+gobuf_pc)(BX)
629 // Assert ctxt is zero. See func save.
630 MOVL (g_sched+gobuf_ctxt)(BX), AX
631 TESTL AX, AX
632 JZ 2(PC)
633 CALL runtime·abort(SB)
634 POPL BX
635 POPL AX
636 RET
637
638 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
639 // Call fn(arg) aligned appropriately for the gcc ABI.
640 // Called on a system stack, and there may be no g yet (during needm).
641 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-8
642 MOVL fn+0(FP), AX
643 MOVL arg+4(FP), BX
644 MOVL SP, DX
645 SUBL $32, SP
646 ANDL $~15, SP // alignment, perhaps unnecessary
647 MOVL DX, 8(SP) // save old SP
648 MOVL BX, 0(SP) // first argument in x86-32 ABI
649 CALL AX
650 MOVL 8(SP), DX
651 MOVL DX, SP
652 RET
653
654 // func asmcgocall(fn, arg unsafe.Pointer) int32
655 // Call fn(arg) on the scheduler stack,
656 // aligned appropriately for the gcc ABI.
657 // See cgocall.go for more details.
658 TEXT ·asmcgocall(SB),NOSPLIT,$0-12
659 MOVL fn+0(FP), AX
660 MOVL arg+4(FP), BX
661
662 MOVL SP, DX
663
664 // Figure out if we need to switch to m->g0 stack.
665 // We get called to create new OS threads too, and those
666 // come in on the m->g0 stack already. Or we might already
667 // be on the m->gsignal stack.
668 get_tls(CX)
669 MOVL g(CX), DI
670 CMPL DI, $0
671 JEQ nosave // Don't even have a G yet.
672 MOVL g_m(DI), BP
673 CMPL DI, m_gsignal(BP)
674 JEQ noswitch
675 MOVL m_g0(BP), SI
676 CMPL DI, SI
677 JEQ noswitch
678 CALL gosave_systemstack_switch<>(SB)
679 get_tls(CX)
680 MOVL SI, g(CX)
681 MOVL (g_sched+gobuf_sp)(SI), SP
682
683 noswitch:
684 // Now on a scheduling stack (a pthread-created stack).
685 SUBL $32, SP
686 ANDL $~15, SP // alignment, perhaps unnecessary
687 MOVL DI, 8(SP) // save g
688 MOVL (g_stack+stack_hi)(DI), DI
689 SUBL DX, DI
690 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
691 MOVL BX, 0(SP) // first argument in x86-32 ABI
692 CALL AX
693
694 // Restore registers, g, stack pointer.
695 get_tls(CX)
696 MOVL 8(SP), DI
697 MOVL (g_stack+stack_hi)(DI), SI
698 SUBL 4(SP), SI
699 MOVL DI, g(CX)
700 MOVL SI, SP
701
702 MOVL AX, ret+8(FP)
703 RET
704 nosave:
705 // Now on a scheduling stack (a pthread-created stack).
706 SUBL $32, SP
707 ANDL $~15, SP // alignment, perhaps unnecessary
708 MOVL DX, 4(SP) // save original stack pointer
709 MOVL BX, 0(SP) // first argument in x86-32 ABI
710 CALL AX
711
712 MOVL 4(SP), CX // restore original stack pointer
713 MOVL CX, SP
714 MOVL AX, ret+8(FP)
715 RET
716
717 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
718 // See cgocall.go for more details.
719 TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below
720 NO_LOCAL_POINTERS
721
722 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
723 // It is used to dropm while thread is exiting.
724 MOVL fn+0(FP), AX
725 CMPL AX, $0
726 JNE loadg
727 // Restore the g from frame.
728 get_tls(CX)
729 MOVL frame+4(FP), BX
730 MOVL BX, g(CX)
731 JMP dropm
732
733 loadg:
734 // If g is nil, Go did not create the current thread,
735 // or if this thread never called into Go on pthread platforms.
736 // Call needm to obtain one for temporary use.
737 // In this case, we're running on the thread stack, so there's
738 // lots of space, but the linker doesn't know. Hide the call from
739 // the linker analysis by using an indirect call through AX.
740 get_tls(CX)
741 #ifdef GOOS_windows
742 MOVL $0, BP
743 CMPL CX, $0
744 JEQ 2(PC) // TODO
745 #endif
746 MOVL g(CX), BP
747 CMPL BP, $0
748 JEQ needm
749 MOVL g_m(BP), BP
750 MOVL BP, savedm-4(SP) // saved copy of oldm
751 JMP havem
752 needm:
753 MOVL $runtime·needAndBindM(SB), AX
754 CALL AX
755 MOVL $0, savedm-4(SP)
756 get_tls(CX)
757 MOVL g(CX), BP
758 MOVL g_m(BP), BP
759
760 // Set m->sched.sp = SP, so that if a panic happens
761 // during the function we are about to execute, it will
762 // have a valid SP to run on the g0 stack.
763 // The next few lines (after the havem label)
764 // will save this SP onto the stack and then write
765 // the same SP back to m->sched.sp. That seems redundant,
766 // but if an unrecovered panic happens, unwindm will
767 // restore the g->sched.sp from the stack location
768 // and then systemstack will try to use it. If we don't set it here,
769 // that restored SP will be uninitialized (typically 0) and
770 // will not be usable.
771 MOVL m_g0(BP), SI
772 MOVL SP, (g_sched+gobuf_sp)(SI)
773
774 havem:
775 // Now there's a valid m, and we're running on its m->g0.
776 // Save current m->g0->sched.sp on stack and then set it to SP.
777 // Save current sp in m->g0->sched.sp in preparation for
778 // switch back to m->curg stack.
779 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
780 MOVL m_g0(BP), SI
781 MOVL (g_sched+gobuf_sp)(SI), AX
782 MOVL AX, 0(SP)
783 MOVL SP, (g_sched+gobuf_sp)(SI)
784
785 // Switch to m->curg stack and call runtime.cgocallbackg.
786 // Because we are taking over the execution of m->curg
787 // but *not* resuming what had been running, we need to
788 // save that information (m->curg->sched) so we can restore it.
789 // We can restore m->curg->sched.sp easily, because calling
790 // runtime.cgocallbackg leaves SP unchanged upon return.
791 // To save m->curg->sched.pc, we push it onto the curg stack and
792 // open a frame the same size as cgocallback's g0 frame.
793 // Once we switch to the curg stack, the pushed PC will appear
794 // to be the return PC of cgocallback, so that the traceback
795 // will seamlessly trace back into the earlier calls.
796 MOVL m_curg(BP), SI
797 MOVL SI, g(CX)
798 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
799 MOVL (g_sched+gobuf_pc)(SI), BP
800 MOVL BP, -4(DI) // "push" return PC on the g stack
801 // Gather our arguments into registers.
802 MOVL fn+0(FP), AX
803 MOVL frame+4(FP), BX
804 MOVL ctxt+8(FP), CX
805 LEAL -(4+12)(DI), SP // Must match declared frame size
806 MOVL AX, 0(SP)
807 MOVL BX, 4(SP)
808 MOVL CX, 8(SP)
809 CALL runtime·cgocallbackg(SB)
810
811 // Restore g->sched (== m->curg->sched) from saved values.
812 get_tls(CX)
813 MOVL g(CX), SI
814 MOVL 12(SP), BP // Must match declared frame size
815 MOVL BP, (g_sched+gobuf_pc)(SI)
816 LEAL (12+4)(SP), DI // Must match declared frame size
817 MOVL DI, (g_sched+gobuf_sp)(SI)
818
819 // Switch back to m->g0's stack and restore m->g0->sched.sp.
820 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
821 // so we do not have to restore it.)
822 MOVL g(CX), BP
823 MOVL g_m(BP), BP
824 MOVL m_g0(BP), SI
825 MOVL SI, g(CX)
826 MOVL (g_sched+gobuf_sp)(SI), SP
827 MOVL 0(SP), AX
828 MOVL AX, (g_sched+gobuf_sp)(SI)
829
830 // If the m on entry was nil, we called needm above to borrow an m,
831 // 1. for the duration of the call on non-pthread platforms,
832 // 2. or the duration of the C thread alive on pthread platforms.
833 // If the m on entry wasn't nil,
834 // 1. the thread might be a Go thread,
835 // 2. or it wasn't the first call from a C thread on pthread platforms,
836 // since then we skip dropm to reuse the m in the first call.
837 MOVL savedm-4(SP), DX
838 CMPL DX, $0
839 JNE droppedm
840
841 // Skip dropm to reuse it in the next call, when a pthread key has been created.
842 MOVL _cgo_pthread_key_created(SB), DX
843 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
844 CMPL DX, $0
845 JEQ dropm
846 CMPL (DX), $0
847 JNE droppedm
848
849 dropm:
850 MOVL $runtime·dropm(SB), AX
851 CALL AX
852 droppedm:
853
854 // Done!
855 RET
856
857 // void setg(G*); set g. for use by needm.
858 TEXT runtime·setg(SB), NOSPLIT, $0-4
859 MOVL gg+0(FP), BX
860 #ifdef GOOS_windows
861 MOVL runtime·tls_g(SB), CX
862 CMPL BX, $0
863 JNE settls
864 MOVL $0, 0(CX)(FS)
865 RET
866 settls:
867 MOVL g_m(BX), AX
868 LEAL m_tls(AX), AX
869 MOVL AX, 0(CX)(FS)
870 #endif
871 get_tls(CX)
872 MOVL BX, g(CX)
873 RET
874
875 // void setg_gcc(G*); set g. for use by gcc
876 TEXT setg_gcc<>(SB), NOSPLIT, $0
877 get_tls(AX)
878 MOVL gg+0(FP), DX
879 MOVL DX, g(AX)
880 RET
881
882 TEXT runtime·abort(SB),NOSPLIT,$0-0
883 INT $3
884 loop:
885 JMP loop
886
887 // check that SP is in range [g->stack.lo, g->stack.hi)
888 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
889 get_tls(CX)
890 MOVL g(CX), AX
891 CMPL (g_stack+stack_hi)(AX), SP
892 JHI 2(PC)
893 CALL runtime·abort(SB)
894 CMPL SP, (g_stack+stack_lo)(AX)
895 JHI 2(PC)
896 CALL runtime·abort(SB)
897 RET
898
899 // func cputicks() int64
900 TEXT runtime·cputicks(SB),NOSPLIT,$0-8
901 // LFENCE/MFENCE instruction support is dependent on SSE2.
902 // When no SSE2 support is present do not enforce any serialization
903 // since using CPUID to serialize the instruction stream is
904 // very costly.
905 #ifdef GO386_softfloat
906 JMP rdtsc // no fence instructions available
907 #endif
908 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
909 JNE fences
910 // Instruction stream serializing RDTSCP is supported.
911 // RDTSCP is supported by Intel Nehalem (2008) and
912 // AMD K8 Rev. F (2006) and newer.
913 RDTSCP
914 done:
915 MOVL AX, ret_lo+0(FP)
916 MOVL DX, ret_hi+4(FP)
917 RET
918 fences:
919 // MFENCE is instruction stream serializing and flushes the
920 // store buffers on AMD. The serialization semantics of LFENCE on AMD
921 // are dependent on MSR C001_1029 and CPU generation.
922 // LFENCE on Intel does wait for all previous instructions to have executed.
923 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
924 // previous instructions executed and all previous loads and stores to globally visible.
925 // Using MFENCE;LFENCE here aligns the serializing properties without
926 // runtime detection of CPU manufacturer.
927 MFENCE
928 LFENCE
929 rdtsc:
930 RDTSC
931 JMP done
932
933 TEXT ldt0setup<>(SB),NOSPLIT,$16-0
934 #ifdef GOOS_windows
935 CALL runtime·wintls(SB)
936 #endif
937 // set up ldt 7 to point at m0.tls
938 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
939 // the entry number is just a hint. setldt will set up GS with what it used.
940 MOVL $7, 0(SP)
941 LEAL runtime·m0+m_tls(SB), AX
942 MOVL AX, 4(SP)
943 MOVL $32, 8(SP) // sizeof(tls array)
944 CALL runtime·setldt(SB)
945 RET
946
947 TEXT runtime·emptyfunc(SB),0,$0-0
948 RET
949
950 // hash function using AES hardware instructions
951 TEXT runtime·memhash(SB),NOSPLIT,$0-16
952 CMPB runtime·useAeshash(SB), $0
953 JEQ noaes
954 MOVL p+0(FP), AX // ptr to data
955 MOVL s+8(FP), BX // size
956 LEAL ret+12(FP), DX
957 JMP aeshashbody<>(SB)
958 noaes:
959 JMP runtime·memhashFallback(SB)
960
961 TEXT runtime·strhash(SB),NOSPLIT,$0-12
962 CMPB runtime·useAeshash(SB), $0
963 JEQ noaes
964 MOVL p+0(FP), AX // ptr to string object
965 MOVL 4(AX), BX // length of string
966 MOVL (AX), AX // string data
967 LEAL ret+8(FP), DX
968 JMP aeshashbody<>(SB)
969 noaes:
970 JMP runtime·strhashFallback(SB)
971
972 // AX: data
973 // BX: length
974 // DX: address to put return value
975 TEXT aeshashbody<>(SB),NOSPLIT,$0-0
976 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
977 PINSRW $4, BX, X0 // 16 bits of length
978 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
979 MOVO X0, X1 // save unscrambled seed
980 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
981 AESENC X0, X0 // scramble seed
982
983 CMPL BX, $16
984 JB aes0to15
985 JE aes16
986 CMPL BX, $32
987 JBE aes17to32
988 CMPL BX, $64
989 JBE aes33to64
990 JMP aes65plus
991
992 aes0to15:
993 TESTL BX, BX
994 JE aes0
995
996 ADDL $16, AX
997 TESTW $0xff0, AX
998 JE endofpage
999
1000 // 16 bytes loaded at this address won't cross
1001 // a page boundary, so we can load it directly.
1002 MOVOU -16(AX), X1
1003 ADDL BX, BX
1004 PAND masks<>(SB)(BX*8), X1
1005
1006 final1:
1007 PXOR X0, X1 // xor data with seed
1008 AESENC X1, X1 // scramble combo 3 times
1009 AESENC X1, X1
1010 AESENC X1, X1
1011 MOVL X1, (DX)
1012 RET
1013
1014 endofpage:
1015 // address ends in 1111xxxx. Might be up against
1016 // a page boundary, so load ending at last byte.
1017 // Then shift bytes down using pshufb.
1018 MOVOU -32(AX)(BX*1), X1
1019 ADDL BX, BX
1020 PSHUFB shifts<>(SB)(BX*8), X1
1021 JMP final1
1022
1023 aes0:
1024 // Return scrambled input seed
1025 AESENC X0, X0
1026 MOVL X0, (DX)
1027 RET
1028
1029 aes16:
1030 MOVOU (AX), X1
1031 JMP final1
1032
1033 aes17to32:
1034 // make second starting seed
1035 PXOR runtime·aeskeysched+16(SB), X1
1036 AESENC X1, X1
1037
1038 // load data to be hashed
1039 MOVOU (AX), X2
1040 MOVOU -16(AX)(BX*1), X3
1041
1042 // xor with seed
1043 PXOR X0, X2
1044 PXOR X1, X3
1045
1046 // scramble 3 times
1047 AESENC X2, X2
1048 AESENC X3, X3
1049 AESENC X2, X2
1050 AESENC X3, X3
1051 AESENC X2, X2
1052 AESENC X3, X3
1053
1054 // combine results
1055 PXOR X3, X2
1056 MOVL X2, (DX)
1057 RET
1058
1059 aes33to64:
1060 // make 3 more starting seeds
1061 MOVO X1, X2
1062 MOVO X1, X3
1063 PXOR runtime·aeskeysched+16(SB), X1
1064 PXOR runtime·aeskeysched+32(SB), X2
1065 PXOR runtime·aeskeysched+48(SB), X3
1066 AESENC X1, X1
1067 AESENC X2, X2
1068 AESENC X3, X3
1069
1070 MOVOU (AX), X4
1071 MOVOU 16(AX), X5
1072 MOVOU -32(AX)(BX*1), X6
1073 MOVOU -16(AX)(BX*1), X7
1074
1075 PXOR X0, X4
1076 PXOR X1, X5
1077 PXOR X2, X6
1078 PXOR X3, X7
1079
1080 AESENC X4, X4
1081 AESENC X5, X5
1082 AESENC X6, X6
1083 AESENC X7, X7
1084
1085 AESENC X4, X4
1086 AESENC X5, X5
1087 AESENC X6, X6
1088 AESENC X7, X7
1089
1090 AESENC X4, X4
1091 AESENC X5, X5
1092 AESENC X6, X6
1093 AESENC X7, X7
1094
1095 PXOR X6, X4
1096 PXOR X7, X5
1097 PXOR X5, X4
1098 MOVL X4, (DX)
1099 RET
1100
1101 aes65plus:
1102 // make 3 more starting seeds
1103 MOVO X1, X2
1104 MOVO X1, X3
1105 PXOR runtime·aeskeysched+16(SB), X1
1106 PXOR runtime·aeskeysched+32(SB), X2
1107 PXOR runtime·aeskeysched+48(SB), X3
1108 AESENC X1, X1
1109 AESENC X2, X2
1110 AESENC X3, X3
1111
1112 // start with last (possibly overlapping) block
1113 MOVOU -64(AX)(BX*1), X4
1114 MOVOU -48(AX)(BX*1), X5
1115 MOVOU -32(AX)(BX*1), X6
1116 MOVOU -16(AX)(BX*1), X7
1117
1118 // scramble state once
1119 AESENC X0, X4
1120 AESENC X1, X5
1121 AESENC X2, X6
1122 AESENC X3, X7
1123
1124 // compute number of remaining 64-byte blocks
1125 DECL BX
1126 SHRL $6, BX
1127
1128 aesloop:
1129 // scramble state, xor in a block
1130 MOVOU (AX), X0
1131 MOVOU 16(AX), X1
1132 MOVOU 32(AX), X2
1133 MOVOU 48(AX), X3
1134 AESENC X0, X4
1135 AESENC X1, X5
1136 AESENC X2, X6
1137 AESENC X3, X7
1138
1139 // scramble state
1140 AESENC X4, X4
1141 AESENC X5, X5
1142 AESENC X6, X6
1143 AESENC X7, X7
1144
1145 ADDL $64, AX
1146 DECL BX
1147 JNE aesloop
1148
1149 // 3 more scrambles to finish
1150 AESENC X4, X4
1151 AESENC X5, X5
1152 AESENC X6, X6
1153 AESENC X7, X7
1154
1155 AESENC X4, X4
1156 AESENC X5, X5
1157 AESENC X6, X6
1158 AESENC X7, X7
1159
1160 AESENC X4, X4
1161 AESENC X5, X5
1162 AESENC X6, X6
1163 AESENC X7, X7
1164
1165 PXOR X6, X4
1166 PXOR X7, X5
1167 PXOR X5, X4
1168 MOVL X4, (DX)
1169 RET
1170
1171 TEXT runtime·memhash32(SB),NOSPLIT,$0-12
1172 CMPB runtime·useAeshash(SB), $0
1173 JEQ noaes
1174 MOVL p+0(FP), AX // ptr to data
1175 MOVL h+4(FP), X0 // seed
1176 PINSRD $1, (AX), X0 // data
1177 AESENC runtime·aeskeysched+0(SB), X0
1178 AESENC runtime·aeskeysched+16(SB), X0
1179 AESENC runtime·aeskeysched+32(SB), X0
1180 MOVL X0, ret+8(FP)
1181 RET
1182 noaes:
1183 JMP runtime·memhash32Fallback(SB)
1184
1185 TEXT runtime·memhash64(SB),NOSPLIT,$0-12
1186 CMPB runtime·useAeshash(SB), $0
1187 JEQ noaes
1188 MOVL p+0(FP), AX // ptr to data
1189 MOVQ (AX), X0 // data
1190 PINSRD $2, h+4(FP), X0 // seed
1191 AESENC runtime·aeskeysched+0(SB), X0
1192 AESENC runtime·aeskeysched+16(SB), X0
1193 AESENC runtime·aeskeysched+32(SB), X0
1194 MOVL X0, ret+8(FP)
1195 RET
1196 noaes:
1197 JMP runtime·memhash64Fallback(SB)
1198
1199 // simple mask to get rid of data in the high part of the register.
1200 DATA masks<>+0x00(SB)/4, $0x00000000
1201 DATA masks<>+0x04(SB)/4, $0x00000000
1202 DATA masks<>+0x08(SB)/4, $0x00000000
1203 DATA masks<>+0x0c(SB)/4, $0x00000000
1204
1205 DATA masks<>+0x10(SB)/4, $0x000000ff
1206 DATA masks<>+0x14(SB)/4, $0x00000000
1207 DATA masks<>+0x18(SB)/4, $0x00000000
1208 DATA masks<>+0x1c(SB)/4, $0x00000000
1209
1210 DATA masks<>+0x20(SB)/4, $0x0000ffff
1211 DATA masks<>+0x24(SB)/4, $0x00000000
1212 DATA masks<>+0x28(SB)/4, $0x00000000
1213 DATA masks<>+0x2c(SB)/4, $0x00000000
1214
1215 DATA masks<>+0x30(SB)/4, $0x00ffffff
1216 DATA masks<>+0x34(SB)/4, $0x00000000
1217 DATA masks<>+0x38(SB)/4, $0x00000000
1218 DATA masks<>+0x3c(SB)/4, $0x00000000
1219
1220 DATA masks<>+0x40(SB)/4, $0xffffffff
1221 DATA masks<>+0x44(SB)/4, $0x00000000
1222 DATA masks<>+0x48(SB)/4, $0x00000000
1223 DATA masks<>+0x4c(SB)/4, $0x00000000
1224
1225 DATA masks<>+0x50(SB)/4, $0xffffffff
1226 DATA masks<>+0x54(SB)/4, $0x000000ff
1227 DATA masks<>+0x58(SB)/4, $0x00000000
1228 DATA masks<>+0x5c(SB)/4, $0x00000000
1229
1230 DATA masks<>+0x60(SB)/4, $0xffffffff
1231 DATA masks<>+0x64(SB)/4, $0x0000ffff
1232 DATA masks<>+0x68(SB)/4, $0x00000000
1233 DATA masks<>+0x6c(SB)/4, $0x00000000
1234
1235 DATA masks<>+0x70(SB)/4, $0xffffffff
1236 DATA masks<>+0x74(SB)/4, $0x00ffffff
1237 DATA masks<>+0x78(SB)/4, $0x00000000
1238 DATA masks<>+0x7c(SB)/4, $0x00000000
1239
1240 DATA masks<>+0x80(SB)/4, $0xffffffff
1241 DATA masks<>+0x84(SB)/4, $0xffffffff
1242 DATA masks<>+0x88(SB)/4, $0x00000000
1243 DATA masks<>+0x8c(SB)/4, $0x00000000
1244
1245 DATA masks<>+0x90(SB)/4, $0xffffffff
1246 DATA masks<>+0x94(SB)/4, $0xffffffff
1247 DATA masks<>+0x98(SB)/4, $0x000000ff
1248 DATA masks<>+0x9c(SB)/4, $0x00000000
1249
1250 DATA masks<>+0xa0(SB)/4, $0xffffffff
1251 DATA masks<>+0xa4(SB)/4, $0xffffffff
1252 DATA masks<>+0xa8(SB)/4, $0x0000ffff
1253 DATA masks<>+0xac(SB)/4, $0x00000000
1254
1255 DATA masks<>+0xb0(SB)/4, $0xffffffff
1256 DATA masks<>+0xb4(SB)/4, $0xffffffff
1257 DATA masks<>+0xb8(SB)/4, $0x00ffffff
1258 DATA masks<>+0xbc(SB)/4, $0x00000000
1259
1260 DATA masks<>+0xc0(SB)/4, $0xffffffff
1261 DATA masks<>+0xc4(SB)/4, $0xffffffff
1262 DATA masks<>+0xc8(SB)/4, $0xffffffff
1263 DATA masks<>+0xcc(SB)/4, $0x00000000
1264
1265 DATA masks<>+0xd0(SB)/4, $0xffffffff
1266 DATA masks<>+0xd4(SB)/4, $0xffffffff
1267 DATA masks<>+0xd8(SB)/4, $0xffffffff
1268 DATA masks<>+0xdc(SB)/4, $0x000000ff
1269
1270 DATA masks<>+0xe0(SB)/4, $0xffffffff
1271 DATA masks<>+0xe4(SB)/4, $0xffffffff
1272 DATA masks<>+0xe8(SB)/4, $0xffffffff
1273 DATA masks<>+0xec(SB)/4, $0x0000ffff
1274
1275 DATA masks<>+0xf0(SB)/4, $0xffffffff
1276 DATA masks<>+0xf4(SB)/4, $0xffffffff
1277 DATA masks<>+0xf8(SB)/4, $0xffffffff
1278 DATA masks<>+0xfc(SB)/4, $0x00ffffff
1279
1280 GLOBL masks<>(SB),RODATA,$256
1281
1282 // these are arguments to pshufb. They move data down from
1283 // the high bytes of the register to the low bytes of the register.
1284 // index is how many bytes to move.
1285 DATA shifts<>+0x00(SB)/4, $0x00000000
1286 DATA shifts<>+0x04(SB)/4, $0x00000000
1287 DATA shifts<>+0x08(SB)/4, $0x00000000
1288 DATA shifts<>+0x0c(SB)/4, $0x00000000
1289
1290 DATA shifts<>+0x10(SB)/4, $0xffffff0f
1291 DATA shifts<>+0x14(SB)/4, $0xffffffff
1292 DATA shifts<>+0x18(SB)/4, $0xffffffff
1293 DATA shifts<>+0x1c(SB)/4, $0xffffffff
1294
1295 DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1296 DATA shifts<>+0x24(SB)/4, $0xffffffff
1297 DATA shifts<>+0x28(SB)/4, $0xffffffff
1298 DATA shifts<>+0x2c(SB)/4, $0xffffffff
1299
1300 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1301 DATA shifts<>+0x34(SB)/4, $0xffffffff
1302 DATA shifts<>+0x38(SB)/4, $0xffffffff
1303 DATA shifts<>+0x3c(SB)/4, $0xffffffff
1304
1305 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1306 DATA shifts<>+0x44(SB)/4, $0xffffffff
1307 DATA shifts<>+0x48(SB)/4, $0xffffffff
1308 DATA shifts<>+0x4c(SB)/4, $0xffffffff
1309
1310 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1311 DATA shifts<>+0x54(SB)/4, $0xffffff0f
1312 DATA shifts<>+0x58(SB)/4, $0xffffffff
1313 DATA shifts<>+0x5c(SB)/4, $0xffffffff
1314
1315 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1316 DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1317 DATA shifts<>+0x68(SB)/4, $0xffffffff
1318 DATA shifts<>+0x6c(SB)/4, $0xffffffff
1319
1320 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1321 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1322 DATA shifts<>+0x78(SB)/4, $0xffffffff
1323 DATA shifts<>+0x7c(SB)/4, $0xffffffff
1324
1325 DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1326 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1327 DATA shifts<>+0x88(SB)/4, $0xffffffff
1328 DATA shifts<>+0x8c(SB)/4, $0xffffffff
1329
1330 DATA shifts<>+0x90(SB)/4, $0x0a090807
1331 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1332 DATA shifts<>+0x98(SB)/4, $0xffffff0f
1333 DATA shifts<>+0x9c(SB)/4, $0xffffffff
1334
1335 DATA shifts<>+0xa0(SB)/4, $0x09080706
1336 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1337 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1338 DATA shifts<>+0xac(SB)/4, $0xffffffff
1339
1340 DATA shifts<>+0xb0(SB)/4, $0x08070605
1341 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1342 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1343 DATA shifts<>+0xbc(SB)/4, $0xffffffff
1344
1345 DATA shifts<>+0xc0(SB)/4, $0x07060504
1346 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1347 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1348 DATA shifts<>+0xcc(SB)/4, $0xffffffff
1349
1350 DATA shifts<>+0xd0(SB)/4, $0x06050403
1351 DATA shifts<>+0xd4(SB)/4, $0x0a090807
1352 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1353 DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1354
1355 DATA shifts<>+0xe0(SB)/4, $0x05040302
1356 DATA shifts<>+0xe4(SB)/4, $0x09080706
1357 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1358 DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1359
1360 DATA shifts<>+0xf0(SB)/4, $0x04030201
1361 DATA shifts<>+0xf4(SB)/4, $0x08070605
1362 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1363 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1364
1365 GLOBL shifts<>(SB),RODATA,$256
1366
1367 TEXT ·checkASM(SB),NOSPLIT,$0-1
1368 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1369 MOVL $masks<>(SB), AX
1370 MOVL $shifts<>(SB), BX
1371 ORL BX, AX
1372 TESTL $15, AX
1373 SETEQ ret+0(FP)
1374 RET
1375
1376 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1377 // Must obey the gcc calling convention.
1378 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1379 get_tls(CX)
1380 MOVL g(CX), AX
1381 MOVL g_m(AX), AX
1382 MOVL m_curg(AX), AX
1383 MOVL (g_stack+stack_hi)(AX), AX
1384 RET
1385
1386 // The top-most function running on a goroutine
1387 // returns to goexit+PCQuantum.
1388 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0
1389 BYTE $0x90 // NOP
1390 CALL runtime·goexit1(SB) // does not return
1391 // traceback from goexit1 must hit code range of goexit
1392 BYTE $0x90 // NOP
1393
1394 // Add a module's moduledata to the linked list of moduledata objects. This
1395 // is called from .init_array by a function generated in the linker and so
1396 // follows the platform ABI wrt register preservation -- it only touches AX,
1397 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
1398 // instead the pointer to the moduledata is passed in AX.
1399 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1400 MOVL runtime·lastmoduledatap(SB), DX
1401 MOVL AX, moduledata_next(DX)
1402 MOVL AX, runtime·lastmoduledatap(SB)
1403 RET
1404
1405 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
1406 MOVL a+0(FP), AX
1407 MOVL AX, 0(SP)
1408 MOVL $0, 4(SP)
1409 FMOVV 0(SP), F0
1410 FMOVDP F0, ret+4(FP)
1411 RET
1412
1413 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
1414 FMOVD a+0(FP), F0
1415 FSTCW 0(SP)
1416 FLDCW runtime·controlWord64trunc(SB)
1417 FMOVVP F0, 4(SP)
1418 FLDCW 0(SP)
1419 MOVL 4(SP), AX
1420 MOVL AX, ret+8(FP)
1421 RET
1422
1423 // gcWriteBarrier informs the GC about heap pointer writes.
1424 //
1425 // gcWriteBarrier returns space in a write barrier buffer which
1426 // should be filled in by the caller.
1427 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1428 // number of bytes of buffer needed in DI, and returns a pointer
1429 // to the buffer space in DI.
1430 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1431 // but may clobber others (e.g., SSE registers).
1432 // Typical use would be, when doing *(CX+88) = AX
1433 // CMPL $0, runtime.writeBarrier(SB)
1434 // JEQ dowrite
1435 // CALL runtime.gcBatchBarrier2(SB)
1436 // MOVL AX, (DI)
1437 // MOVL 88(CX), DX
1438 // MOVL DX, 4(DI)
1439 // dowrite:
1440 // MOVL AX, 88(CX)
1441 TEXT gcWriteBarrier<>(SB),NOSPLIT,$28
1442 // Save the registers clobbered by the fast path. This is slightly
1443 // faster than having the caller spill these.
1444 MOVL CX, 20(SP)
1445 MOVL BX, 24(SP)
1446 retry:
1447 // TODO: Consider passing g.m.p in as an argument so they can be shared
1448 // across a sequence of write barriers.
1449 get_tls(BX)
1450 MOVL g(BX), BX
1451 MOVL g_m(BX), BX
1452 MOVL m_p(BX), BX
1453 // Get current buffer write position.
1454 MOVL (p_wbBuf+wbBuf_next)(BX), CX // original next position
1455 ADDL DI, CX // new next position
1456 // Is the buffer full?
1457 CMPL CX, (p_wbBuf+wbBuf_end)(BX)
1458 JA flush
1459 // Commit to the larger buffer.
1460 MOVL CX, (p_wbBuf+wbBuf_next)(BX)
1461 // Make return value (the original next position)
1462 SUBL DI, CX
1463 MOVL CX, DI
1464 // Restore registers.
1465 MOVL 20(SP), CX
1466 MOVL 24(SP), BX
1467 RET
1468
1469 flush:
1470 // Save all general purpose registers since these could be
1471 // clobbered by wbBufFlush and were not saved by the caller.
1472 MOVL DI, 0(SP)
1473 MOVL AX, 4(SP)
1474 // BX already saved
1475 // CX already saved
1476 MOVL DX, 8(SP)
1477 MOVL BP, 12(SP)
1478 MOVL SI, 16(SP)
1479 // DI already saved
1480
1481 CALL runtime·wbBufFlush(SB)
1482
1483 MOVL 0(SP), DI
1484 MOVL 4(SP), AX
1485 MOVL 8(SP), DX
1486 MOVL 12(SP), BP
1487 MOVL 16(SP), SI
1488 JMP retry
1489
1490 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1491 MOVL $4, DI
1492 JMP gcWriteBarrier<>(SB)
1493 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1494 MOVL $8, DI
1495 JMP gcWriteBarrier<>(SB)
1496 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1497 MOVL $12, DI
1498 JMP gcWriteBarrier<>(SB)
1499 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1500 MOVL $16, DI
1501 JMP gcWriteBarrier<>(SB)
1502 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1503 MOVL $20, DI
1504 JMP gcWriteBarrier<>(SB)
1505 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1506 MOVL $24, DI
1507 JMP gcWriteBarrier<>(SB)
1508 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1509 MOVL $28, DI
1510 JMP gcWriteBarrier<>(SB)
1511 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1512 MOVL $32, DI
1513 JMP gcWriteBarrier<>(SB)
1514
1515 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$40-0
1516 NO_LOCAL_POINTERS
1517 // Save all int registers that could have an index in them.
1518 // They may be pointers, but if they are they are dead.
1519 MOVL AX, 8(SP)
1520 MOVL CX, 12(SP)
1521 MOVL DX, 16(SP)
1522 MOVL BX, 20(SP)
1523 // skip SP @ 24(SP)
1524 MOVL BP, 28(SP)
1525 MOVL SI, 32(SP)
1526 MOVL DI, 36(SP)
1527
1528 MOVL SP, AX // hide SP read from vet
1529 MOVL 40(AX), AX // PC immediately after call to panicBounds
1530 MOVL AX, 0(SP)
1531 LEAL 8(SP), AX
1532 MOVL AX, 4(SP)
1533 CALL runtime·panicBounds32<ABIInternal>(SB)
1534 RET
1535
1536 TEXT runtime·panicExtend<ABIInternal>(SB),NOSPLIT,$40-0
1537 NO_LOCAL_POINTERS
1538 // Save all int registers that could have an index in them.
1539 // They may be pointers, but if they are they are dead.
1540 MOVL AX, 8(SP)
1541 MOVL CX, 12(SP)
1542 MOVL DX, 16(SP)
1543 MOVL BX, 20(SP)
1544 // skip SP @ 24(SP)
1545 MOVL BP, 28(SP)
1546 MOVL SI, 32(SP)
1547 MOVL DI, 36(SP)
1548
1549 MOVL SP, AX // hide SP read from vet
1550 MOVL 40(AX), AX // PC immediately after call to panicExtend
1551 MOVL AX, 0(SP)
1552 LEAL 8(SP), AX
1553 MOVL AX, 4(SP)
1554 CALL runtime·panicBounds32X<ABIInternal>(SB)
1555 RET
1556
1557 #ifdef GOOS_android
1558 // Use the free TLS_SLOT_APP slot #2 on Android Q.
1559 // Earlier androids are set up in gcc_android.c.
1560 DATA runtime·tls_g+0(SB)/4, $8
1561 GLOBL runtime·tls_g+0(SB), NOPTR, $4
1562 #endif
1563 #ifdef GOOS_windows
1564 GLOBL runtime·tls_g+0(SB), NOPTR, $4
1565 #endif
1566
View as plain text