Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "internal/runtime/atomic"
11 "internal/runtime/strconv"
12 "internal/runtime/syscall/linux"
13 "unsafe"
14 )
15
16
17
18
19 const sigPerThreadSyscall = _SIGRTMIN + 1
20
21 type mOS struct {
22
23
24
25
26
27
28
29 profileTimer int32
30 profileTimerValid atomic.Bool
31
32
33
34 needPerThreadSyscall atomic.Uint8
35
36
37
38 vgetrandomState uintptr
39
40 waitsema uint32
41 }
42
43
44
45
46
47
48
49
50
51
52 const (
53 _FUTEX_PRIVATE_FLAG = 128
54 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
55 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
56 )
57
58
59
60
61
62
63
64
65
66 func futexsleep(addr *uint32, val uint32, ns int64) {
67
68
69
70
71
72 if ns < 0 {
73 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
74 return
75 }
76
77 var ts timespec
78 ts.setNsec(ns)
79 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, &ts, nil, 0)
80 }
81
82
83
84
85 func futexwakeup(addr *uint32, cnt uint32) {
86 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
87 if ret >= 0 {
88 return
89 }
90
91
92
93
94 systemstack(func() {
95 print("futexwakeup addr=", addr, " returned ", ret, "\n")
96 })
97
98 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
99 }
100
101 func getCPUCount() int32 {
102
103
104
105
106
107
108
109 const maxCPUs = 64 * 1024
110 var buf [maxCPUs / 8]byte
111 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
112 if r < 0 {
113 return 1
114 }
115 n := int32(0)
116 for _, v := range buf[:r] {
117 for v != 0 {
118 n += int32(v & 1)
119 v >>= 1
120 }
121 }
122 if n == 0 {
123 n = 1
124 }
125 return n
126 }
127
128
129 const (
130 _CLONE_VM = 0x100
131 _CLONE_FS = 0x200
132 _CLONE_FILES = 0x400
133 _CLONE_SIGHAND = 0x800
134 _CLONE_PTRACE = 0x2000
135 _CLONE_VFORK = 0x4000
136 _CLONE_PARENT = 0x8000
137 _CLONE_THREAD = 0x10000
138 _CLONE_NEWNS = 0x20000
139 _CLONE_SYSVSEM = 0x40000
140 _CLONE_SETTLS = 0x80000
141 _CLONE_PARENT_SETTID = 0x100000
142 _CLONE_CHILD_CLEARTID = 0x200000
143 _CLONE_UNTRACED = 0x800000
144 _CLONE_CHILD_SETTID = 0x1000000
145 _CLONE_STOPPED = 0x2000000
146 _CLONE_NEWUTS = 0x4000000
147 _CLONE_NEWIPC = 0x8000000
148
149
150
151
152
153
154
155
156 cloneFlags = _CLONE_VM |
157 _CLONE_FS |
158 _CLONE_FILES |
159 _CLONE_SIGHAND |
160 _CLONE_SYSVSEM |
161 _CLONE_THREAD
162 )
163
164
165 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
166
167
168
169
170 func newosproc(mp *m) {
171 stk := unsafe.Pointer(mp.g0.stack.hi)
172
175 if false {
176 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
177 }
178
179
180
181 var oset sigset
182 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
183 ret := retryOnEAGAIN(func() int32 {
184 r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
185
186
187 if r >= 0 {
188 return 0
189 }
190 return -r
191 })
192 sigprocmask(_SIG_SETMASK, &oset, nil)
193
194 if ret != 0 {
195 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
196 if ret == _EAGAIN {
197 println("runtime: may need to increase max user processes (ulimit -u)")
198 }
199 throw("newosproc")
200 }
201 }
202
203
204
205
206 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
207 stack := sysAlloc(stacksize, &memstats.stacks_sys, "OS thread stack")
208 if stack == nil {
209 writeErrStr(failallocatestack)
210 exit(1)
211 }
212 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
213 if ret < 0 {
214 writeErrStr(failthreadcreate)
215 exit(1)
216 }
217 }
218
219 const (
220 _AT_NULL = 0
221 _AT_PAGESZ = 6
222 _AT_PLATFORM = 15
223 _AT_HWCAP = 16
224 _AT_SECURE = 23
225 _AT_RANDOM = 25
226 _AT_HWCAP2 = 26
227 )
228
229 var procAuxv = []byte("/proc/self/auxv\x00")
230
231 var addrspace_vec [1]byte
232
233 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
234
235 var auxvreadbuf [128]uintptr
236
237 func sysargs(argc int32, argv **byte) {
238 n := argc + 1
239
240
241 for argv_index(argv, n) != nil {
242 n++
243 }
244
245
246 n++
247
248
249 auxvp := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
250
251 if pairs := sysauxv(auxvp[:]); pairs != 0 {
252 auxv = auxvp[: pairs*2 : pairs*2]
253 return
254 }
255
256
257
258 fd := open(&procAuxv[0], 0 , 0)
259 if fd < 0 {
260
261
262
263 const size = 256 << 10
264 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
265 if err != 0 {
266 return
267 }
268 var n uintptr
269 for n = 4 << 10; n < size; n <<= 1 {
270 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
271 if err == 0 {
272 physPageSize = n
273 break
274 }
275 }
276 if physPageSize == 0 {
277 physPageSize = size
278 }
279 munmap(p, size)
280 return
281 }
282
283 n = read(fd, noescape(unsafe.Pointer(&auxvreadbuf[0])), int32(unsafe.Sizeof(auxvreadbuf)))
284 closefd(fd)
285 if n < 0 {
286 return
287 }
288
289
290 auxvreadbuf[len(auxvreadbuf)-2] = _AT_NULL
291 pairs := sysauxv(auxvreadbuf[:])
292 auxv = auxvreadbuf[: pairs*2 : pairs*2]
293 }
294
295
296 var secureMode bool
297
298 func sysauxv(auxv []uintptr) (pairs int) {
299
300
301 var i int
302 for ; auxv[i] != _AT_NULL; i += 2 {
303 tag, val := auxv[i], auxv[i+1]
304 switch tag {
305 case _AT_RANDOM:
306
307
308
309
310
311
312 startupRand = (*[16]byte)(unsafe.Pointer(val))[:]
313
314 case _AT_PAGESZ:
315 physPageSize = val
316
317 case _AT_SECURE:
318 secureMode = val == 1
319 }
320
321 archauxv(tag, val)
322 vdsoauxv(tag, val)
323 }
324 return i / 2
325 }
326
327 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
328
329 func getHugePageSize() uintptr {
330 var numbuf [20]byte
331 fd := open(&sysTHPSizePath[0], 0 , 0)
332 if fd < 0 {
333 return 0
334 }
335 ptr := noescape(unsafe.Pointer(&numbuf[0]))
336 n := read(fd, ptr, int32(len(numbuf)))
337 closefd(fd)
338 if n <= 0 {
339 return 0
340 }
341 n--
342 v, ok := strconv.Atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
343 if !ok || v < 0 {
344 v = 0
345 }
346 if v&(v-1) != 0 {
347
348 return 0
349 }
350 return uintptr(v)
351 }
352
353 func osinit() {
354 numCPUStartup = getCPUCount()
355 physHugePageSize = getHugePageSize()
356 vgetrandomInit()
357 }
358
359 var urandom_dev = []byte("/dev/urandom\x00")
360
361 func readRandom(r []byte) int {
362
363
364 fd := open(&urandom_dev[0], 0 , 0)
365 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
366 closefd(fd)
367 return int(n)
368 }
369
370 func goenvs() {
371 goenvs_unix()
372 }
373
374
375
376
377
378
379
380 func libpreinit() {
381 initsig(true)
382 }
383
384
385
386 func mpreinit(mp *m) {
387 mp.gsignal = malg(32 * 1024)
388 mp.gsignal.m = mp
389 }
390
391 func gettid() uint32
392
393
394
395 func minit() {
396 minitSignals()
397
398
399
400
401 getg().m.procid = uint64(gettid())
402 }
403
404
405
406
407 func unminit() {
408 unminitSignals()
409 getg().m.procid = 0
410 }
411
412
413
414
415
416
417
418 func mdestroy(mp *m) {
419 }
420
421
422
423
424
425 func sigreturn__sigaction()
426 func sigtramp()
427 func cgoSigtramp()
428
429
430 func sigaltstack(new, old *stackt)
431
432
433 func setitimer(mode int32, new, old *itimerval)
434
435
436 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
437
438
439 func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32
440
441
442 func timer_delete(timerid int32) int32
443
444
445 func rtsigprocmask(how int32, new, old *sigset, size int32)
446
447
448
449 func sigprocmask(how int32, new, old *sigset) {
450 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
451 }
452
453 func raise(sig uint32)
454 func raiseproc(sig uint32)
455
456
457 func sched_getaffinity(pid, len uintptr, buf *byte) int32
458 func osyield()
459
460
461 func osyield_no_g() {
462 osyield()
463 }
464
465 func pipe2(flags int32) (r, w int32, errno int32)
466
467
468 func fcntl(fd, cmd, arg int32) (ret int32, errno int32) {
469 r, _, err := linux.Syscall6(linux.SYS_FCNTL, uintptr(fd), uintptr(cmd), uintptr(arg), 0, 0, 0)
470 return int32(r), int32(err)
471 }
472
473 const (
474 _si_max_size = 128
475 _sigev_max_size = 64
476 )
477
478
479
480 func setsig(i uint32, fn uintptr) {
481 var sa sigactiont
482 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
483 sigfillset(&sa.sa_mask)
484
485
486
487
488 if GOARCH == "386" || GOARCH == "amd64" {
489 sa.sa_restorer = abi.FuncPCABI0(sigreturn__sigaction)
490 }
491 if fn == abi.FuncPCABIInternal(sighandler) {
492 if iscgo {
493 fn = abi.FuncPCABI0(cgoSigtramp)
494 } else {
495 fn = abi.FuncPCABI0(sigtramp)
496 }
497 }
498 sa.sa_handler = fn
499 sigaction(i, &sa, nil)
500 }
501
502
503
504 func setsigstack(i uint32) {
505 var sa sigactiont
506 sigaction(i, nil, &sa)
507 if sa.sa_flags&_SA_ONSTACK != 0 {
508 return
509 }
510 sa.sa_flags |= _SA_ONSTACK
511 sigaction(i, &sa, nil)
512 }
513
514
515
516 func getsig(i uint32) uintptr {
517 var sa sigactiont
518 sigaction(i, nil, &sa)
519 return sa.sa_handler
520 }
521
522
523
524
525 func setSignalstackSP(s *stackt, sp uintptr) {
526 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
527 }
528
529
530 func (c *sigctxt) fixsigcode(sig uint32) {
531 }
532
533
534
535
536 func sysSigaction(sig uint32, new, old *sigactiont) {
537 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
538
539
540
541
542
543
544
545
546
547
548
549 if sig != 32 && sig != 33 && sig != 64 {
550
551 systemstack(func() {
552 throw("sigaction failed")
553 })
554 }
555 }
556 }
557
558
559
560
561 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
562
563
564
565
566
567
568
569
570
571 func fixSigactionForCgo(new *sigactiont) {
572 if GOARCH == "386" && new != nil {
573 new.sa_flags &^= _SA_RESTORER
574 new.sa_restorer = 0
575 }
576 }
577
578 func getpid() int
579 func tgkill(tgid, tid, sig int)
580
581
582 func signalM(mp *m, sig int) {
583 tgkill(getpid(), int(mp.procid), sig)
584 }
585
586
587
588
589
590
591
592
593 func validSIGPROF(mp *m, c *sigctxt) bool {
594 code := int32(c.sigcode())
595 setitimer := code == _SI_KERNEL
596 timer_create := code == _SI_TIMER
597
598 if !(setitimer || timer_create) {
599
600
601
602 return true
603 }
604
605 if mp == nil {
606
607
608
609
610
611
612
613
614
615
616
617
618 return setitimer
619 }
620
621
622
623 if mp.profileTimerValid.Load() {
624
625
626
627
628
629 return timer_create
630 }
631
632
633 return setitimer
634 }
635
636 func setProcessCPUProfiler(hz int32) {
637 setProcessCPUProfilerTimer(hz)
638 }
639
640 func setThreadCPUProfiler(hz int32) {
641 mp := getg().m
642 mp.profilehz = hz
643
644
645 if mp.profileTimerValid.Load() {
646 timerid := mp.profileTimer
647 mp.profileTimerValid.Store(false)
648 mp.profileTimer = 0
649
650 ret := timer_delete(timerid)
651 if ret != 0 {
652 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
653 throw("timer_delete")
654 }
655 }
656
657 if hz == 0 {
658
659 return
660 }
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681 spec := new(itimerspec)
682 spec.it_value.setNsec(1 + int64(cheaprandn(uint32(1e9/hz))))
683 spec.it_interval.setNsec(1e9 / int64(hz))
684
685 var timerid int32
686 var sevp sigevent
687 sevp.notify = _SIGEV_THREAD_ID
688 sevp.signo = _SIGPROF
689 sevp.sigev_notify_thread_id = int32(mp.procid)
690 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
691 if ret != 0 {
692
693
694 return
695 }
696
697 ret = timer_settime(timerid, 0, spec, nil)
698 if ret != 0 {
699 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
700 ", 0, {interval: {",
701 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
702 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
703 throw("timer_settime")
704 }
705
706 mp.profileTimer = timerid
707 mp.profileTimerValid.Store(true)
708 }
709
710
711
712 type perThreadSyscallArgs struct {
713 trap uintptr
714 a1 uintptr
715 a2 uintptr
716 a3 uintptr
717 a4 uintptr
718 a5 uintptr
719 a6 uintptr
720 r1 uintptr
721 r2 uintptr
722 }
723
724
725
726
727
728
729 var perThreadSyscall perThreadSyscallArgs
730
731
732
733
734
735
736
737
738
739 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
740 if iscgo {
741
742 panic("doAllThreadsSyscall not supported with cgo enabled")
743 }
744
745
746
747
748
749
750
751
752 stw := stopTheWorld(stwAllThreadsSyscall)
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774 allocmLock.lock()
775
776
777
778
779
780
781 acquirem()
782
783
784
785
786
787
788 r1, r2, errno := linux.Syscall6(trap, a1, a2, a3, a4, a5, a6)
789 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
790
791 r2 = 0
792 }
793 if errno != 0 {
794 releasem(getg().m)
795 allocmLock.unlock()
796 startTheWorld(stw)
797 return r1, r2, errno
798 }
799
800 perThreadSyscall = perThreadSyscallArgs{
801 trap: trap,
802 a1: a1,
803 a2: a2,
804 a3: a3,
805 a4: a4,
806 a5: a5,
807 a6: a6,
808 r1: r1,
809 r2: r2,
810 }
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847 for mp := allm; mp != nil; mp = mp.alllink {
848 for atomic.Load64(&mp.procid) == 0 {
849
850 osyield()
851 }
852 }
853
854
855
856 gp := getg()
857 tid := gp.m.procid
858 for mp := allm; mp != nil; mp = mp.alllink {
859 if atomic.Load64(&mp.procid) == tid {
860
861 continue
862 }
863 mp.needPerThreadSyscall.Store(1)
864 signalM(mp, sigPerThreadSyscall)
865 }
866
867
868 for mp := allm; mp != nil; mp = mp.alllink {
869 if mp.procid == tid {
870 continue
871 }
872 for mp.needPerThreadSyscall.Load() != 0 {
873 osyield()
874 }
875 }
876
877 perThreadSyscall = perThreadSyscallArgs{}
878
879 releasem(getg().m)
880 allocmLock.unlock()
881 startTheWorld(stw)
882
883 return r1, r2, errno
884 }
885
886
887
888
889
890
891
892 func runPerThreadSyscall() {
893 gp := getg()
894 if gp.m.needPerThreadSyscall.Load() == 0 {
895 return
896 }
897
898 args := perThreadSyscall
899 r1, r2, errno := linux.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
900 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
901
902 r2 = 0
903 }
904 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
905 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
906 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
907 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
908 }
909
910 gp.m.needPerThreadSyscall.Store(0)
911 }
912
913 const (
914 _SI_USER = 0
915 _SI_TKILL = -6
916 _SYS_SECCOMP = 1
917 )
918
919
920
921
922
923 func (c *sigctxt) sigFromUser() bool {
924 code := int32(c.sigcode())
925 return code == _SI_USER || code == _SI_TKILL
926 }
927
928
929
930
931 func (c *sigctxt) sigFromSeccomp() bool {
932 code := int32(c.sigcode())
933 return code == _SYS_SECCOMP
934 }
935
936
937 func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) {
938 r, _, err := linux.Syscall6(linux.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
939 return int32(r), int32(err)
940 }
941
View as plain text