Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "internal/runtime/atomic"
11 "internal/runtime/syscall"
12 "unsafe"
13 )
14
15
16
17
18 const sigPerThreadSyscall = _SIGRTMIN + 1
19
20 type mOS struct {
21
22
23
24
25
26
27
28 profileTimer int32
29 profileTimerValid atomic.Bool
30
31
32
33 needPerThreadSyscall atomic.Uint8
34
35
36
37 vgetrandomState uintptr
38
39 waitsema uint32
40 }
41
42
43 func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
44
45
46
47
48
49
50
51
52
53
54 const (
55 _FUTEX_PRIVATE_FLAG = 128
56 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
57 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
58 )
59
60
61
62
63
64
65
66
67
68 func futexsleep(addr *uint32, val uint32, ns int64) {
69
70
71
72
73
74 if ns < 0 {
75 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
76 return
77 }
78
79 var ts timespec
80 ts.setNsec(ns)
81 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
82 }
83
84
85
86
87 func futexwakeup(addr *uint32, cnt uint32) {
88 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
89 if ret >= 0 {
90 return
91 }
92
93
94
95
96 systemstack(func() {
97 print("futexwakeup addr=", addr, " returned ", ret, "\n")
98 })
99
100 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
101 }
102
103 func getproccount() int32 {
104
105
106
107
108
109
110
111 const maxCPUs = 64 * 1024
112 var buf [maxCPUs / 8]byte
113 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
114 if r < 0 {
115 return 1
116 }
117 n := int32(0)
118 for _, v := range buf[:r] {
119 for v != 0 {
120 n += int32(v & 1)
121 v >>= 1
122 }
123 }
124 if n == 0 {
125 n = 1
126 }
127 return n
128 }
129
130
131 const (
132 _CLONE_VM = 0x100
133 _CLONE_FS = 0x200
134 _CLONE_FILES = 0x400
135 _CLONE_SIGHAND = 0x800
136 _CLONE_PTRACE = 0x2000
137 _CLONE_VFORK = 0x4000
138 _CLONE_PARENT = 0x8000
139 _CLONE_THREAD = 0x10000
140 _CLONE_NEWNS = 0x20000
141 _CLONE_SYSVSEM = 0x40000
142 _CLONE_SETTLS = 0x80000
143 _CLONE_PARENT_SETTID = 0x100000
144 _CLONE_CHILD_CLEARTID = 0x200000
145 _CLONE_UNTRACED = 0x800000
146 _CLONE_CHILD_SETTID = 0x1000000
147 _CLONE_STOPPED = 0x2000000
148 _CLONE_NEWUTS = 0x4000000
149 _CLONE_NEWIPC = 0x8000000
150
151
152
153
154
155
156
157
158 cloneFlags = _CLONE_VM |
159 _CLONE_FS |
160 _CLONE_FILES |
161 _CLONE_SIGHAND |
162 _CLONE_SYSVSEM |
163 _CLONE_THREAD
164 )
165
166
167 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
168
169
170
171
172 func newosproc(mp *m) {
173 stk := unsafe.Pointer(mp.g0.stack.hi)
174
177 if false {
178 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
179 }
180
181
182
183 var oset sigset
184 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
185 ret := retryOnEAGAIN(func() int32 {
186 r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
187
188
189 if r >= 0 {
190 return 0
191 }
192 return -r
193 })
194 sigprocmask(_SIG_SETMASK, &oset, nil)
195
196 if ret != 0 {
197 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
198 if ret == _EAGAIN {
199 println("runtime: may need to increase max user processes (ulimit -u)")
200 }
201 throw("newosproc")
202 }
203 }
204
205
206
207
208 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
209 stack := sysAlloc(stacksize, &memstats.stacks_sys, "OS thread stack")
210 if stack == nil {
211 writeErrStr(failallocatestack)
212 exit(1)
213 }
214 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
215 if ret < 0 {
216 writeErrStr(failthreadcreate)
217 exit(1)
218 }
219 }
220
221 const (
222 _AT_NULL = 0
223 _AT_PAGESZ = 6
224 _AT_PLATFORM = 15
225 _AT_HWCAP = 16
226 _AT_SECURE = 23
227 _AT_RANDOM = 25
228 _AT_HWCAP2 = 26
229 )
230
231 var procAuxv = []byte("/proc/self/auxv\x00")
232
233 var addrspace_vec [1]byte
234
235 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
236
237 var auxvreadbuf [128]uintptr
238
239 func sysargs(argc int32, argv **byte) {
240 n := argc + 1
241
242
243 for argv_index(argv, n) != nil {
244 n++
245 }
246
247
248 n++
249
250
251 auxvp := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
252
253 if pairs := sysauxv(auxvp[:]); pairs != 0 {
254 auxv = auxvp[: pairs*2 : pairs*2]
255 return
256 }
257
258
259
260 fd := open(&procAuxv[0], 0 , 0)
261 if fd < 0 {
262
263
264
265 const size = 256 << 10
266 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
267 if err != 0 {
268 return
269 }
270 var n uintptr
271 for n = 4 << 10; n < size; n <<= 1 {
272 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
273 if err == 0 {
274 physPageSize = n
275 break
276 }
277 }
278 if physPageSize == 0 {
279 physPageSize = size
280 }
281 munmap(p, size)
282 return
283 }
284
285 n = read(fd, noescape(unsafe.Pointer(&auxvreadbuf[0])), int32(unsafe.Sizeof(auxvreadbuf)))
286 closefd(fd)
287 if n < 0 {
288 return
289 }
290
291
292 auxvreadbuf[len(auxvreadbuf)-2] = _AT_NULL
293 pairs := sysauxv(auxvreadbuf[:])
294 auxv = auxvreadbuf[: pairs*2 : pairs*2]
295 }
296
297
298 var secureMode bool
299
300 func sysauxv(auxv []uintptr) (pairs int) {
301
302
303 var i int
304 for ; auxv[i] != _AT_NULL; i += 2 {
305 tag, val := auxv[i], auxv[i+1]
306 switch tag {
307 case _AT_RANDOM:
308
309
310
311
312
313
314 startupRand = (*[16]byte)(unsafe.Pointer(val))[:]
315
316 case _AT_PAGESZ:
317 physPageSize = val
318
319 case _AT_SECURE:
320 secureMode = val == 1
321 }
322
323 archauxv(tag, val)
324 vdsoauxv(tag, val)
325 }
326 return i / 2
327 }
328
329 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
330
331 func getHugePageSize() uintptr {
332 var numbuf [20]byte
333 fd := open(&sysTHPSizePath[0], 0 , 0)
334 if fd < 0 {
335 return 0
336 }
337 ptr := noescape(unsafe.Pointer(&numbuf[0]))
338 n := read(fd, ptr, int32(len(numbuf)))
339 closefd(fd)
340 if n <= 0 {
341 return 0
342 }
343 n--
344 v, ok := atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
345 if !ok || v < 0 {
346 v = 0
347 }
348 if v&(v-1) != 0 {
349
350 return 0
351 }
352 return uintptr(v)
353 }
354
355 func osinit() {
356 ncpu = getproccount()
357 physHugePageSize = getHugePageSize()
358 osArchInit()
359 vgetrandomInit()
360 }
361
362 var urandom_dev = []byte("/dev/urandom\x00")
363
364 func readRandom(r []byte) int {
365
366
367 fd := open(&urandom_dev[0], 0 , 0)
368 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
369 closefd(fd)
370 return int(n)
371 }
372
373 func goenvs() {
374 goenvs_unix()
375 }
376
377
378
379
380
381
382
383 func libpreinit() {
384 initsig(true)
385 }
386
387
388
389 func mpreinit(mp *m) {
390 mp.gsignal = malg(32 * 1024)
391 mp.gsignal.m = mp
392 }
393
394 func gettid() uint32
395
396
397
398 func minit() {
399 minitSignals()
400
401
402
403
404 getg().m.procid = uint64(gettid())
405 }
406
407
408
409
410 func unminit() {
411 unminitSignals()
412 getg().m.procid = 0
413 }
414
415
416
417
418
419
420 func mdestroy(mp *m) {
421 }
422
423
424
425
426
427 func sigreturn__sigaction()
428 func sigtramp()
429 func cgoSigtramp()
430
431
432 func sigaltstack(new, old *stackt)
433
434
435 func setitimer(mode int32, new, old *itimerval)
436
437
438 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
439
440
441 func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32
442
443
444 func timer_delete(timerid int32) int32
445
446
447 func rtsigprocmask(how int32, new, old *sigset, size int32)
448
449
450
451 func sigprocmask(how int32, new, old *sigset) {
452 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
453 }
454
455 func raise(sig uint32)
456 func raiseproc(sig uint32)
457
458
459 func sched_getaffinity(pid, len uintptr, buf *byte) int32
460 func osyield()
461
462
463 func osyield_no_g() {
464 osyield()
465 }
466
467 func pipe2(flags int32) (r, w int32, errno int32)
468
469
470 func fcntl(fd, cmd, arg int32) (ret int32, errno int32) {
471 r, _, err := syscall.Syscall6(syscall.SYS_FCNTL, uintptr(fd), uintptr(cmd), uintptr(arg), 0, 0, 0)
472 return int32(r), int32(err)
473 }
474
475 const (
476 _si_max_size = 128
477 _sigev_max_size = 64
478 )
479
480
481
482 func setsig(i uint32, fn uintptr) {
483 var sa sigactiont
484 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
485 sigfillset(&sa.sa_mask)
486
487
488
489 if GOARCH == "386" || GOARCH == "amd64" {
490 sa.sa_restorer = abi.FuncPCABI0(sigreturn__sigaction)
491 }
492 if fn == abi.FuncPCABIInternal(sighandler) {
493 if iscgo {
494 fn = abi.FuncPCABI0(cgoSigtramp)
495 } else {
496 fn = abi.FuncPCABI0(sigtramp)
497 }
498 }
499 sa.sa_handler = fn
500 sigaction(i, &sa, nil)
501 }
502
503
504
505 func setsigstack(i uint32) {
506 var sa sigactiont
507 sigaction(i, nil, &sa)
508 if sa.sa_flags&_SA_ONSTACK != 0 {
509 return
510 }
511 sa.sa_flags |= _SA_ONSTACK
512 sigaction(i, &sa, nil)
513 }
514
515
516
517 func getsig(i uint32) uintptr {
518 var sa sigactiont
519 sigaction(i, nil, &sa)
520 return sa.sa_handler
521 }
522
523
524
525
526 func setSignalstackSP(s *stackt, sp uintptr) {
527 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
528 }
529
530
531 func (c *sigctxt) fixsigcode(sig uint32) {
532 }
533
534
535
536
537 func sysSigaction(sig uint32, new, old *sigactiont) {
538 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
539
540
541
542
543
544
545
546
547
548
549
550 if sig != 32 && sig != 33 && sig != 64 {
551
552 systemstack(func() {
553 throw("sigaction failed")
554 })
555 }
556 }
557 }
558
559
560
561
562 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
563
564 func getpid() int
565 func tgkill(tgid, tid, sig int)
566
567
568 func signalM(mp *m, sig int) {
569 tgkill(getpid(), int(mp.procid), sig)
570 }
571
572
573
574
575
576
577
578
579 func validSIGPROF(mp *m, c *sigctxt) bool {
580 code := int32(c.sigcode())
581 setitimer := code == _SI_KERNEL
582 timer_create := code == _SI_TIMER
583
584 if !(setitimer || timer_create) {
585
586
587
588 return true
589 }
590
591 if mp == nil {
592
593
594
595
596
597
598
599
600
601
602
603
604 return setitimer
605 }
606
607
608
609 if mp.profileTimerValid.Load() {
610
611
612
613
614
615 return timer_create
616 }
617
618
619 return setitimer
620 }
621
622 func setProcessCPUProfiler(hz int32) {
623 setProcessCPUProfilerTimer(hz)
624 }
625
626 func setThreadCPUProfiler(hz int32) {
627 mp := getg().m
628 mp.profilehz = hz
629
630
631 if mp.profileTimerValid.Load() {
632 timerid := mp.profileTimer
633 mp.profileTimerValid.Store(false)
634 mp.profileTimer = 0
635
636 ret := timer_delete(timerid)
637 if ret != 0 {
638 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
639 throw("timer_delete")
640 }
641 }
642
643 if hz == 0 {
644
645 return
646 }
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667 spec := new(itimerspec)
668 spec.it_value.setNsec(1 + int64(cheaprandn(uint32(1e9/hz))))
669 spec.it_interval.setNsec(1e9 / int64(hz))
670
671 var timerid int32
672 var sevp sigevent
673 sevp.notify = _SIGEV_THREAD_ID
674 sevp.signo = _SIGPROF
675 sevp.sigev_notify_thread_id = int32(mp.procid)
676 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
677 if ret != 0 {
678
679
680 return
681 }
682
683 ret = timer_settime(timerid, 0, spec, nil)
684 if ret != 0 {
685 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
686 ", 0, {interval: {",
687 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
688 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
689 throw("timer_settime")
690 }
691
692 mp.profileTimer = timerid
693 mp.profileTimerValid.Store(true)
694 }
695
696
697
698 type perThreadSyscallArgs struct {
699 trap uintptr
700 a1 uintptr
701 a2 uintptr
702 a3 uintptr
703 a4 uintptr
704 a5 uintptr
705 a6 uintptr
706 r1 uintptr
707 r2 uintptr
708 }
709
710
711
712
713
714
715 var perThreadSyscall perThreadSyscallArgs
716
717
718
719
720
721
722
723
724
725 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
726 if iscgo {
727
728 panic("doAllThreadsSyscall not supported with cgo enabled")
729 }
730
731
732
733
734
735
736
737
738 stw := stopTheWorld(stwAllThreadsSyscall)
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760 allocmLock.lock()
761
762
763
764
765
766
767 acquirem()
768
769
770
771
772
773
774 r1, r2, errno := syscall.Syscall6(trap, a1, a2, a3, a4, a5, a6)
775 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
776
777 r2 = 0
778 }
779 if errno != 0 {
780 releasem(getg().m)
781 allocmLock.unlock()
782 startTheWorld(stw)
783 return r1, r2, errno
784 }
785
786 perThreadSyscall = perThreadSyscallArgs{
787 trap: trap,
788 a1: a1,
789 a2: a2,
790 a3: a3,
791 a4: a4,
792 a5: a5,
793 a6: a6,
794 r1: r1,
795 r2: r2,
796 }
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833 for mp := allm; mp != nil; mp = mp.alllink {
834 for atomic.Load64(&mp.procid) == 0 {
835
836 osyield()
837 }
838 }
839
840
841
842 gp := getg()
843 tid := gp.m.procid
844 for mp := allm; mp != nil; mp = mp.alllink {
845 if atomic.Load64(&mp.procid) == tid {
846
847 continue
848 }
849 mp.needPerThreadSyscall.Store(1)
850 signalM(mp, sigPerThreadSyscall)
851 }
852
853
854 for mp := allm; mp != nil; mp = mp.alllink {
855 if mp.procid == tid {
856 continue
857 }
858 for mp.needPerThreadSyscall.Load() != 0 {
859 osyield()
860 }
861 }
862
863 perThreadSyscall = perThreadSyscallArgs{}
864
865 releasem(getg().m)
866 allocmLock.unlock()
867 startTheWorld(stw)
868
869 return r1, r2, errno
870 }
871
872
873
874
875
876
877
878 func runPerThreadSyscall() {
879 gp := getg()
880 if gp.m.needPerThreadSyscall.Load() == 0 {
881 return
882 }
883
884 args := perThreadSyscall
885 r1, r2, errno := syscall.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
886 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
887
888 r2 = 0
889 }
890 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
891 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
892 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
893 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
894 }
895
896 gp.m.needPerThreadSyscall.Store(0)
897 }
898
899 const (
900 _SI_USER = 0
901 _SI_TKILL = -6
902 _SYS_SECCOMP = 1
903 )
904
905
906
907
908
909 func (c *sigctxt) sigFromUser() bool {
910 code := int32(c.sigcode())
911 return code == _SI_USER || code == _SI_TKILL
912 }
913
914
915
916
917 func (c *sigctxt) sigFromSeccomp() bool {
918 code := int32(c.sigcode())
919 return code == _SYS_SECCOMP
920 }
921
922
923 func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) {
924 r, _, err := syscall.Syscall6(syscall.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
925 return int32(r), int32(err)
926 }
927
View as plain text