Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "internal/runtime/atomic"
11 "internal/runtime/strconv"
12 "internal/runtime/syscall/linux"
13 "unsafe"
14 )
15
16
17
18
19 const sigPerThreadSyscall = _SIGRTMIN + 1
20
21 type mOS struct {
22
23
24
25
26
27
28
29 profileTimer int32
30 profileTimerValid atomic.Bool
31
32
33
34 needPerThreadSyscall atomic.Uint8
35
36
37
38 vgetrandomState uintptr
39
40 waitsema uint32
41 }
42
43
44 func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
45
46
47
48
49
50
51
52
53
54
55 const (
56 _FUTEX_PRIVATE_FLAG = 128
57 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
58 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
59 )
60
61
62
63
64
65
66
67
68
69 func futexsleep(addr *uint32, val uint32, ns int64) {
70
71
72
73
74
75 if ns < 0 {
76 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
77 return
78 }
79
80 var ts timespec
81 ts.setNsec(ns)
82 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
83 }
84
85
86
87
88 func futexwakeup(addr *uint32, cnt uint32) {
89 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
90 if ret >= 0 {
91 return
92 }
93
94
95
96
97 systemstack(func() {
98 print("futexwakeup addr=", addr, " returned ", ret, "\n")
99 })
100
101 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
102 }
103
104 func getCPUCount() int32 {
105
106
107
108
109
110
111
112 const maxCPUs = 64 * 1024
113 var buf [maxCPUs / 8]byte
114 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
115 if r < 0 {
116 return 1
117 }
118 n := int32(0)
119 for _, v := range buf[:r] {
120 for v != 0 {
121 n += int32(v & 1)
122 v >>= 1
123 }
124 }
125 if n == 0 {
126 n = 1
127 }
128 return n
129 }
130
131
132 const (
133 _CLONE_VM = 0x100
134 _CLONE_FS = 0x200
135 _CLONE_FILES = 0x400
136 _CLONE_SIGHAND = 0x800
137 _CLONE_PTRACE = 0x2000
138 _CLONE_VFORK = 0x4000
139 _CLONE_PARENT = 0x8000
140 _CLONE_THREAD = 0x10000
141 _CLONE_NEWNS = 0x20000
142 _CLONE_SYSVSEM = 0x40000
143 _CLONE_SETTLS = 0x80000
144 _CLONE_PARENT_SETTID = 0x100000
145 _CLONE_CHILD_CLEARTID = 0x200000
146 _CLONE_UNTRACED = 0x800000
147 _CLONE_CHILD_SETTID = 0x1000000
148 _CLONE_STOPPED = 0x2000000
149 _CLONE_NEWUTS = 0x4000000
150 _CLONE_NEWIPC = 0x8000000
151
152
153
154
155
156
157
158
159 cloneFlags = _CLONE_VM |
160 _CLONE_FS |
161 _CLONE_FILES |
162 _CLONE_SIGHAND |
163 _CLONE_SYSVSEM |
164 _CLONE_THREAD
165 )
166
167
168 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
169
170
171
172
173 func newosproc(mp *m) {
174 stk := unsafe.Pointer(mp.g0.stack.hi)
175
178 if false {
179 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
180 }
181
182
183
184 var oset sigset
185 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
186 ret := retryOnEAGAIN(func() int32 {
187 r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
188
189
190 if r >= 0 {
191 return 0
192 }
193 return -r
194 })
195 sigprocmask(_SIG_SETMASK, &oset, nil)
196
197 if ret != 0 {
198 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
199 if ret == _EAGAIN {
200 println("runtime: may need to increase max user processes (ulimit -u)")
201 }
202 throw("newosproc")
203 }
204 }
205
206
207
208
209 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
210 stack := sysAlloc(stacksize, &memstats.stacks_sys, "OS thread stack")
211 if stack == nil {
212 writeErrStr(failallocatestack)
213 exit(1)
214 }
215 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
216 if ret < 0 {
217 writeErrStr(failthreadcreate)
218 exit(1)
219 }
220 }
221
222 const (
223 _AT_NULL = 0
224 _AT_PAGESZ = 6
225 _AT_PLATFORM = 15
226 _AT_HWCAP = 16
227 _AT_SECURE = 23
228 _AT_RANDOM = 25
229 _AT_HWCAP2 = 26
230 )
231
232 var procAuxv = []byte("/proc/self/auxv\x00")
233
234 var addrspace_vec [1]byte
235
236 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
237
238 var auxvreadbuf [128]uintptr
239
240 func sysargs(argc int32, argv **byte) {
241 n := argc + 1
242
243
244 for argv_index(argv, n) != nil {
245 n++
246 }
247
248
249 n++
250
251
252 auxvp := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
253
254 if pairs := sysauxv(auxvp[:]); pairs != 0 {
255 auxv = auxvp[: pairs*2 : pairs*2]
256 return
257 }
258
259
260
261 fd := open(&procAuxv[0], 0 , 0)
262 if fd < 0 {
263
264
265
266 const size = 256 << 10
267 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
268 if err != 0 {
269 return
270 }
271 var n uintptr
272 for n = 4 << 10; n < size; n <<= 1 {
273 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
274 if err == 0 {
275 physPageSize = n
276 break
277 }
278 }
279 if physPageSize == 0 {
280 physPageSize = size
281 }
282 munmap(p, size)
283 return
284 }
285
286 n = read(fd, noescape(unsafe.Pointer(&auxvreadbuf[0])), int32(unsafe.Sizeof(auxvreadbuf)))
287 closefd(fd)
288 if n < 0 {
289 return
290 }
291
292
293 auxvreadbuf[len(auxvreadbuf)-2] = _AT_NULL
294 pairs := sysauxv(auxvreadbuf[:])
295 auxv = auxvreadbuf[: pairs*2 : pairs*2]
296 }
297
298
299 var secureMode bool
300
301 func sysauxv(auxv []uintptr) (pairs int) {
302
303
304 var i int
305 for ; auxv[i] != _AT_NULL; i += 2 {
306 tag, val := auxv[i], auxv[i+1]
307 switch tag {
308 case _AT_RANDOM:
309
310
311
312
313
314
315 startupRand = (*[16]byte)(unsafe.Pointer(val))[:]
316
317 case _AT_PAGESZ:
318 physPageSize = val
319
320 case _AT_SECURE:
321 secureMode = val == 1
322 }
323
324 archauxv(tag, val)
325 vdsoauxv(tag, val)
326 }
327 return i / 2
328 }
329
330 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
331
332 func getHugePageSize() uintptr {
333 var numbuf [20]byte
334 fd := open(&sysTHPSizePath[0], 0 , 0)
335 if fd < 0 {
336 return 0
337 }
338 ptr := noescape(unsafe.Pointer(&numbuf[0]))
339 n := read(fd, ptr, int32(len(numbuf)))
340 closefd(fd)
341 if n <= 0 {
342 return 0
343 }
344 n--
345 v, ok := strconv.Atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
346 if !ok || v < 0 {
347 v = 0
348 }
349 if v&(v-1) != 0 {
350
351 return 0
352 }
353 return uintptr(v)
354 }
355
356 func osinit() {
357 numCPUStartup = getCPUCount()
358 physHugePageSize = getHugePageSize()
359 vgetrandomInit()
360 }
361
362 var urandom_dev = []byte("/dev/urandom\x00")
363
364 func readRandom(r []byte) int {
365
366
367 fd := open(&urandom_dev[0], 0 , 0)
368 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
369 closefd(fd)
370 return int(n)
371 }
372
373 func goenvs() {
374 goenvs_unix()
375 }
376
377
378
379
380
381
382
383 func libpreinit() {
384 initsig(true)
385 }
386
387
388
389 func mpreinit(mp *m) {
390 mp.gsignal = malg(32 * 1024)
391 mp.gsignal.m = mp
392 }
393
394 func gettid() uint32
395
396
397
398 func minit() {
399 minitSignals()
400
401
402
403
404 getg().m.procid = uint64(gettid())
405 }
406
407
408
409
410 func unminit() {
411 unminitSignals()
412 getg().m.procid = 0
413 }
414
415
416
417
418
419
420
421 func mdestroy(mp *m) {
422 }
423
424
425
426
427
428 func sigreturn__sigaction()
429 func sigtramp()
430 func cgoSigtramp()
431
432
433 func sigaltstack(new, old *stackt)
434
435
436 func setitimer(mode int32, new, old *itimerval)
437
438
439 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
440
441
442 func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32
443
444
445 func timer_delete(timerid int32) int32
446
447
448 func rtsigprocmask(how int32, new, old *sigset, size int32)
449
450
451
452 func sigprocmask(how int32, new, old *sigset) {
453 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
454 }
455
456 func raise(sig uint32)
457 func raiseproc(sig uint32)
458
459
460 func sched_getaffinity(pid, len uintptr, buf *byte) int32
461 func osyield()
462
463
464 func osyield_no_g() {
465 osyield()
466 }
467
468 func pipe2(flags int32) (r, w int32, errno int32)
469
470
471 func fcntl(fd, cmd, arg int32) (ret int32, errno int32) {
472 r, _, err := linux.Syscall6(linux.SYS_FCNTL, uintptr(fd), uintptr(cmd), uintptr(arg), 0, 0, 0)
473 return int32(r), int32(err)
474 }
475
476 const (
477 _si_max_size = 128
478 _sigev_max_size = 64
479 )
480
481
482
483 func setsig(i uint32, fn uintptr) {
484 var sa sigactiont
485 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
486 sigfillset(&sa.sa_mask)
487
488
489
490
491 if GOARCH == "386" || GOARCH == "amd64" {
492 sa.sa_restorer = abi.FuncPCABI0(sigreturn__sigaction)
493 }
494 if fn == abi.FuncPCABIInternal(sighandler) {
495 if iscgo {
496 fn = abi.FuncPCABI0(cgoSigtramp)
497 } else {
498 fn = abi.FuncPCABI0(sigtramp)
499 }
500 }
501 sa.sa_handler = fn
502 sigaction(i, &sa, nil)
503 }
504
505
506
507 func setsigstack(i uint32) {
508 var sa sigactiont
509 sigaction(i, nil, &sa)
510 if sa.sa_flags&_SA_ONSTACK != 0 {
511 return
512 }
513 sa.sa_flags |= _SA_ONSTACK
514 sigaction(i, &sa, nil)
515 }
516
517
518
519 func getsig(i uint32) uintptr {
520 var sa sigactiont
521 sigaction(i, nil, &sa)
522 return sa.sa_handler
523 }
524
525
526
527
528 func setSignalstackSP(s *stackt, sp uintptr) {
529 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
530 }
531
532
533 func (c *sigctxt) fixsigcode(sig uint32) {
534 }
535
536
537
538
539 func sysSigaction(sig uint32, new, old *sigactiont) {
540 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
541
542
543
544
545
546
547
548
549
550
551
552 if sig != 32 && sig != 33 && sig != 64 {
553
554 systemstack(func() {
555 throw("sigaction failed")
556 })
557 }
558 }
559 }
560
561
562
563
564 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
565
566
567
568
569
570
571
572
573
574 func fixSigactionForCgo(new *sigactiont) {
575 if GOARCH == "386" && new != nil {
576 new.sa_flags &^= _SA_RESTORER
577 new.sa_restorer = 0
578 }
579 }
580
581 func getpid() int
582 func tgkill(tgid, tid, sig int)
583
584
585 func signalM(mp *m, sig int) {
586 tgkill(getpid(), int(mp.procid), sig)
587 }
588
589
590
591
592
593
594
595
596 func validSIGPROF(mp *m, c *sigctxt) bool {
597 code := int32(c.sigcode())
598 setitimer := code == _SI_KERNEL
599 timer_create := code == _SI_TIMER
600
601 if !(setitimer || timer_create) {
602
603
604
605 return true
606 }
607
608 if mp == nil {
609
610
611
612
613
614
615
616
617
618
619
620
621 return setitimer
622 }
623
624
625
626 if mp.profileTimerValid.Load() {
627
628
629
630
631
632 return timer_create
633 }
634
635
636 return setitimer
637 }
638
639 func setProcessCPUProfiler(hz int32) {
640 setProcessCPUProfilerTimer(hz)
641 }
642
643 func setThreadCPUProfiler(hz int32) {
644 mp := getg().m
645 mp.profilehz = hz
646
647
648 if mp.profileTimerValid.Load() {
649 timerid := mp.profileTimer
650 mp.profileTimerValid.Store(false)
651 mp.profileTimer = 0
652
653 ret := timer_delete(timerid)
654 if ret != 0 {
655 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
656 throw("timer_delete")
657 }
658 }
659
660 if hz == 0 {
661
662 return
663 }
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684 spec := new(itimerspec)
685 spec.it_value.setNsec(1 + int64(cheaprandn(uint32(1e9/hz))))
686 spec.it_interval.setNsec(1e9 / int64(hz))
687
688 var timerid int32
689 var sevp sigevent
690 sevp.notify = _SIGEV_THREAD_ID
691 sevp.signo = _SIGPROF
692 sevp.sigev_notify_thread_id = int32(mp.procid)
693 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
694 if ret != 0 {
695
696
697 return
698 }
699
700 ret = timer_settime(timerid, 0, spec, nil)
701 if ret != 0 {
702 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
703 ", 0, {interval: {",
704 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
705 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
706 throw("timer_settime")
707 }
708
709 mp.profileTimer = timerid
710 mp.profileTimerValid.Store(true)
711 }
712
713
714
715 type perThreadSyscallArgs struct {
716 trap uintptr
717 a1 uintptr
718 a2 uintptr
719 a3 uintptr
720 a4 uintptr
721 a5 uintptr
722 a6 uintptr
723 r1 uintptr
724 r2 uintptr
725 }
726
727
728
729
730
731
732 var perThreadSyscall perThreadSyscallArgs
733
734
735
736
737
738
739
740
741
742 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
743 if iscgo {
744
745 panic("doAllThreadsSyscall not supported with cgo enabled")
746 }
747
748
749
750
751
752
753
754
755 stw := stopTheWorld(stwAllThreadsSyscall)
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777 allocmLock.lock()
778
779
780
781
782
783
784 acquirem()
785
786
787
788
789
790
791 r1, r2, errno := linux.Syscall6(trap, a1, a2, a3, a4, a5, a6)
792 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
793
794 r2 = 0
795 }
796 if errno != 0 {
797 releasem(getg().m)
798 allocmLock.unlock()
799 startTheWorld(stw)
800 return r1, r2, errno
801 }
802
803 perThreadSyscall = perThreadSyscallArgs{
804 trap: trap,
805 a1: a1,
806 a2: a2,
807 a3: a3,
808 a4: a4,
809 a5: a5,
810 a6: a6,
811 r1: r1,
812 r2: r2,
813 }
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850 for mp := allm; mp != nil; mp = mp.alllink {
851 for atomic.Load64(&mp.procid) == 0 {
852
853 osyield()
854 }
855 }
856
857
858
859 gp := getg()
860 tid := gp.m.procid
861 for mp := allm; mp != nil; mp = mp.alllink {
862 if atomic.Load64(&mp.procid) == tid {
863
864 continue
865 }
866 mp.needPerThreadSyscall.Store(1)
867 signalM(mp, sigPerThreadSyscall)
868 }
869
870
871 for mp := allm; mp != nil; mp = mp.alllink {
872 if mp.procid == tid {
873 continue
874 }
875 for mp.needPerThreadSyscall.Load() != 0 {
876 osyield()
877 }
878 }
879
880 perThreadSyscall = perThreadSyscallArgs{}
881
882 releasem(getg().m)
883 allocmLock.unlock()
884 startTheWorld(stw)
885
886 return r1, r2, errno
887 }
888
889
890
891
892
893
894
895 func runPerThreadSyscall() {
896 gp := getg()
897 if gp.m.needPerThreadSyscall.Load() == 0 {
898 return
899 }
900
901 args := perThreadSyscall
902 r1, r2, errno := linux.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
903 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
904
905 r2 = 0
906 }
907 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
908 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
909 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
910 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
911 }
912
913 gp.m.needPerThreadSyscall.Store(0)
914 }
915
916 const (
917 _SI_USER = 0
918 _SI_TKILL = -6
919 _SYS_SECCOMP = 1
920 )
921
922
923
924
925
926 func (c *sigctxt) sigFromUser() bool {
927 code := int32(c.sigcode())
928 return code == _SI_USER || code == _SI_TKILL
929 }
930
931
932
933
934 func (c *sigctxt) sigFromSeccomp() bool {
935 code := int32(c.sigcode())
936 return code == _SYS_SECCOMP
937 }
938
939
940 func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) {
941 r, _, err := linux.Syscall6(linux.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
942 return int32(r), int32(err)
943 }
944
View as plain text