Source file src/runtime/proc.go
1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import ( 8 "internal/abi" 9 "internal/cpu" 10 "internal/goarch" 11 "internal/goexperiment" 12 "internal/goos" 13 "internal/runtime/atomic" 14 "internal/runtime/exithook" 15 "internal/runtime/maps" 16 "internal/runtime/sys" 17 "internal/strconv" 18 "internal/stringslite" 19 "unsafe" 20 ) 21 22 // set using cmd/go/internal/modload.ModInfoProg 23 var modinfo string 24 25 // Goroutine scheduler 26 // The scheduler's job is to distribute ready-to-run goroutines over worker threads. 27 // 28 // The main concepts are: 29 // G - goroutine. 30 // M - worker thread, or machine. 31 // P - processor, a resource that is required to execute Go code. 32 // M must have an associated P to execute Go code, however it can be 33 // blocked or in a syscall w/o an associated P. 34 // 35 // Design doc at https://golang.org/s/go11sched. 36 37 // Worker thread parking/unparking. 38 // We need to balance between keeping enough running worker threads to utilize 39 // available hardware parallelism and parking excessive running worker threads 40 // to conserve CPU resources and power. This is not simple for two reasons: 41 // (1) scheduler state is intentionally distributed (in particular, per-P work 42 // queues), so it is not possible to compute global predicates on fast paths; 43 // (2) for optimal thread management we would need to know the future (don't park 44 // a worker thread when a new goroutine will be readied in near future). 45 // 46 // Three rejected approaches that would work badly: 47 // 1. Centralize all scheduler state (would inhibit scalability). 48 // 2. Direct goroutine handoff. That is, when we ready a new goroutine and there 49 // is a spare P, unpark a thread and handoff it the thread and the goroutine. 50 // This would lead to thread state thrashing, as the thread that readied the 51 // goroutine can be out of work the very next moment, we will need to park it. 52 // Also, it would destroy locality of computation as we want to preserve 53 // dependent goroutines on the same thread; and introduce additional latency. 54 // 3. Unpark an additional thread whenever we ready a goroutine and there is an 55 // idle P, but don't do handoff. This would lead to excessive thread parking/ 56 // unparking as the additional threads will instantly park without discovering 57 // any work to do. 58 // 59 // The current approach: 60 // 61 // This approach applies to three primary sources of potential work: readying a 62 // goroutine, new/modified-earlier timers, and idle-priority GC. See below for 63 // additional details. 64 // 65 // We unpark an additional thread when we submit work if (this is wakep()): 66 // 1. There is an idle P, and 67 // 2. There are no "spinning" worker threads. 68 // 69 // A worker thread is considered spinning if it is out of local work and did 70 // not find work in the global run queue or netpoller; the spinning state is 71 // denoted in m.spinning and in sched.nmspinning. Threads unparked this way are 72 // also considered spinning; we don't do goroutine handoff so such threads are 73 // out of work initially. Spinning threads spin on looking for work in per-P 74 // run queues and timer heaps or from the GC before parking. If a spinning 75 // thread finds work it takes itself out of the spinning state and proceeds to 76 // execution. If it does not find work it takes itself out of the spinning 77 // state and then parks. 78 // 79 // If there is at least one spinning thread (sched.nmspinning>1), we don't 80 // unpark new threads when submitting work. To compensate for that, if the last 81 // spinning thread finds work and stops spinning, it must unpark a new spinning 82 // thread. This approach smooths out unjustified spikes of thread unparking, 83 // but at the same time guarantees eventual maximal CPU parallelism 84 // utilization. 85 // 86 // The main implementation complication is that we need to be very careful 87 // during spinning->non-spinning thread transition. This transition can race 88 // with submission of new work, and either one part or another needs to unpark 89 // another worker thread. If they both fail to do that, we can end up with 90 // semi-persistent CPU underutilization. 91 // 92 // The general pattern for submission is: 93 // 1. Submit work to the local or global run queue, timer heap, or GC state. 94 // 2. #StoreLoad-style memory barrier. 95 // 3. Check sched.nmspinning. 96 // 97 // The general pattern for spinning->non-spinning transition is: 98 // 1. Decrement nmspinning. 99 // 2. #StoreLoad-style memory barrier. 100 // 3. Check all per-P work queues and GC for new work. 101 // 102 // Note that all this complexity does not apply to global run queue as we are 103 // not sloppy about thread unparking when submitting to global queue. Also see 104 // comments for nmspinning manipulation. 105 // 106 // How these different sources of work behave varies, though it doesn't affect 107 // the synchronization approach: 108 // * Ready goroutine: this is an obvious source of work; the goroutine is 109 // immediately ready and must run on some thread eventually. 110 // * New/modified-earlier timer: The current timer implementation (see time.go) 111 // uses netpoll in a thread with no work available to wait for the soonest 112 // timer. If there is no thread waiting, we want a new spinning thread to go 113 // wait. 114 // * Idle-priority GC: The GC wakes a stopped idle thread to contribute to 115 // background GC work (note: currently disabled per golang.org/issue/19112). 116 // Also see golang.org/issue/44313, as this should be extended to all GC 117 // workers. 118 119 var ( 120 m0 m 121 g0 g 122 mcache0 *mcache 123 raceprocctx0 uintptr 124 raceFiniLock mutex 125 ) 126 127 // This slice records the initializing tasks that need to be 128 // done to start up the runtime. It is built by the linker. 129 var runtime_inittasks []*initTask 130 131 // mainInitDone is a signal used by cgocallbackg that initialization 132 // has been completed. If this is false, wait on mainInitDoneChan. 133 var mainInitDone atomic.Bool 134 135 // mainInitDoneChan is closed after initialization has been completed. 136 // It is made before _cgo_notify_runtime_init_done, so all cgo 137 // calls can rely on it existing. 138 var mainInitDoneChan chan bool 139 140 //go:linkname main_main main.main 141 func main_main() 142 143 // mainStarted indicates that the main M has started. 144 var mainStarted bool 145 146 // runtimeInitTime is the nanotime() at which the runtime started. 147 var runtimeInitTime int64 148 149 // Value to use for signal mask for newly created M's. 150 var initSigmask sigset 151 152 // The main goroutine. 153 func main() { 154 mp := getg().m 155 156 // Racectx of m0->g0 is used only as the parent of the main goroutine. 157 // It must not be used for anything else. 158 mp.g0.racectx = 0 159 160 // Max stack size is 1 GB on 64-bit, 250 MB on 32-bit. 161 // Using decimal instead of binary GB and MB because 162 // they look nicer in the stack overflow failure message. 163 if goarch.PtrSize == 8 { 164 maxstacksize = 1000000000 165 } else { 166 maxstacksize = 250000000 167 } 168 169 // An upper limit for max stack size. Used to avoid random crashes 170 // after calling SetMaxStack and trying to allocate a stack that is too big, 171 // since stackalloc works with 32-bit sizes. 172 maxstackceiling = 2 * maxstacksize 173 174 // Allow newproc to start new Ms. 175 mainStarted = true 176 177 if haveSysmon { 178 systemstack(func() { 179 newm(sysmon, nil, -1) 180 }) 181 } 182 183 // Lock the main goroutine onto this, the main OS thread, 184 // during initialization. Most programs won't care, but a few 185 // do require certain calls to be made by the main thread. 186 // Those can arrange for main.main to run in the main thread 187 // by calling runtime.LockOSThread during initialization 188 // to preserve the lock. 189 lockOSThread() 190 191 if mp != &m0 { 192 throw("runtime.main not on m0") 193 } 194 195 // Record when the world started. 196 // Must be before doInit for tracing init. 197 runtimeInitTime = nanotime() 198 if runtimeInitTime == 0 { 199 throw("nanotime returning zero") 200 } 201 202 if debug.inittrace != 0 { 203 inittrace.id = getg().goid 204 inittrace.active = true 205 } 206 207 doInit(runtime_inittasks) // Must be before defer. 208 209 // Defer unlock so that runtime.Goexit during init does the unlock too. 210 needUnlock := true 211 defer func() { 212 if needUnlock { 213 unlockOSThread() 214 } 215 }() 216 217 gcenable() 218 defaultGOMAXPROCSUpdateEnable() // don't STW before runtime initialized. 219 220 // If we encountered a removed GODEBUG during startup we can panic now. 221 if k := invalidGODEBUG.key; k != "" { 222 v := invalidGODEBUG.value 223 fatal("removed GODEBUG " + k + " set to old value (" + v + ") in environment") 224 } 225 226 mainInitDoneChan = make(chan bool) 227 if iscgo { 228 if _cgo_pthread_key_created == nil { 229 throw("_cgo_pthread_key_created missing") 230 } 231 232 if GOOS != "windows" { 233 if _cgo_thread_start == nil { 234 throw("_cgo_thread_start missing") 235 } 236 if _cgo_setenv == nil { 237 throw("_cgo_setenv missing") 238 } 239 if _cgo_unsetenv == nil { 240 throw("_cgo_unsetenv missing") 241 } 242 } 243 if _cgo_notify_runtime_init_done == nil { 244 throw("_cgo_notify_runtime_init_done missing") 245 } 246 247 // Set the x_crosscall2_ptr C function pointer variable point to crosscall2. 248 if set_crosscall2 == nil { 249 throw("set_crosscall2 missing") 250 } 251 set_crosscall2() 252 253 // Start the template thread in case we enter Go from 254 // a C-created thread and need to create a new thread. 255 startTemplateThread() 256 cgocall(_cgo_notify_runtime_init_done, nil) 257 } 258 259 // Run the initializing tasks. Depending on build mode this 260 // list can arrive a few different ways, but it will always 261 // contain the init tasks computed by the linker for all the 262 // packages in the program (excluding those added at runtime 263 // by package plugin). Run through the modules in dependency 264 // order (the order they are initialized by the dynamic 265 // loader, i.e. they are added to the moduledata linked list). 266 last := lastmoduledatap // grab before loop starts. Any added modules after this point will do their own doInit calls. 267 for m := &firstmoduledata; true; m = m.next { 268 doInit(m.inittasks) 269 if m == last { 270 break 271 } 272 } 273 274 // Disable init tracing after main init done to avoid overhead 275 // of collecting statistics in malloc and newproc 276 inittrace.active = false 277 278 mainInitDone.Store(true) 279 close(mainInitDoneChan) 280 281 needUnlock = false 282 unlockOSThread() 283 284 if isarchive || islibrary { 285 // A program compiled with -buildmode=c-archive or c-shared 286 // has a main, but it is not executed. 287 if GOARCH == "wasm" { 288 // On Wasm, pause makes it return to the host. 289 // Unlike cgo callbacks where Ms are created on demand, 290 // on Wasm we have only one M. So we keep this M (and this 291 // G) for callbacks. 292 // Using the caller's SP unwinds this frame and backs to 293 // goexit. The -16 is: 8 for goexit's (fake) return PC, 294 // and pause's epilogue pops 8. 295 pause(sys.GetCallerSP() - 16) // should not return 296 panic("unreachable") 297 } 298 return 299 } 300 fn := main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime 301 fn() 302 303 // Check for C memory leaks if using ASAN and we've made cgo calls, 304 // or if we are running as a library in a C program. 305 // We always make one cgo call, above, to notify_runtime_init_done, 306 // so we ignore that one. 307 // No point in leak checking if no cgo calls, since leak checking 308 // just looks for objects allocated using malloc and friends. 309 // Just checking iscgo doesn't help because asan implies iscgo. 310 exitHooksRun := false 311 if asanenabled && (isarchive || islibrary || NumCgoCall() > 1) { 312 runExitHooks(0) // lsandoleakcheck may not return 313 exitHooksRun = true 314 lsandoleakcheck() 315 } 316 317 // Make racy client program work: if panicking on 318 // another goroutine at the same time as main returns, 319 // let the other goroutine finish printing the panic trace. 320 // Once it does, it will exit. See issues 3934 and 20018. 321 if runningPanicDefers.Load() != 0 { 322 // Running deferred functions should not take long. 323 for c := 0; c < 1000; c++ { 324 if runningPanicDefers.Load() == 0 { 325 break 326 } 327 Gosched() 328 } 329 } 330 if panicking.Load() != 0 { 331 gopark(nil, nil, waitReasonPanicWait, traceBlockForever, 1) 332 } 333 if !exitHooksRun { 334 runExitHooks(0) 335 } 336 if raceenabled { 337 racefini() // does not return 338 } 339 340 exit(0) 341 for { 342 var x *int32 343 *x = 0 344 } 345 } 346 347 // os_beforeExit is called from os.Exit(0). 348 // 349 //go:linkname os_beforeExit os.runtime_beforeExit 350 func os_beforeExit(exitCode int) { 351 runExitHooks(exitCode) 352 if exitCode == 0 && raceenabled { 353 racefini() 354 } 355 356 // See comment in main, above. 357 if exitCode == 0 && asanenabled && (isarchive || islibrary || NumCgoCall() > 1) { 358 lsandoleakcheck() 359 } 360 } 361 362 func init() { 363 exithook.Gosched = Gosched 364 exithook.Goid = func() uint64 { return getg().goid } 365 exithook.Throw = throw 366 } 367 368 func runExitHooks(code int) { 369 exithook.Run(code) 370 } 371 372 // start forcegc helper goroutine 373 func init() { 374 go forcegchelper() 375 } 376 377 func forcegchelper() { 378 forcegc.g = getg() 379 lockInit(&forcegc.lock, lockRankForcegc) 380 for { 381 lock(&forcegc.lock) 382 if forcegc.idle.Load() { 383 throw("forcegc: phase error") 384 } 385 forcegc.idle.Store(true) 386 goparkunlock(&forcegc.lock, waitReasonForceGCIdle, traceBlockSystemGoroutine, 1) 387 // this goroutine is explicitly resumed by sysmon 388 if debug.gctrace > 0 { 389 println("GC forced") 390 } 391 // Time-triggered, fully concurrent. 392 gcStart(gcTrigger{kind: gcTriggerTime, now: nanotime()}) 393 } 394 } 395 396 // Gosched yields the processor, allowing other goroutines to run. It does not 397 // suspend the current goroutine, so execution resumes automatically. 398 // 399 //go:nosplit 400 func Gosched() { 401 checkTimeouts() 402 mcall(gosched_m) 403 } 404 405 // goschedguarded yields the processor like gosched, but also checks 406 // for forbidden states and opts out of the yield in those cases. 407 // 408 //go:nosplit 409 func goschedguarded() { 410 mcall(goschedguarded_m) 411 } 412 413 // goschedIfBusy yields the processor like gosched, but only does so if 414 // there are no idle Ps or if we're on the only P and there's nothing in 415 // the run queue. In both cases, there is freely available idle time. 416 // 417 //go:nosplit 418 func goschedIfBusy() { 419 gp := getg() 420 // Call gosched if gp.preempt is set; we may be in a tight loop that 421 // doesn't otherwise yield. 422 if !gp.preempt && sched.npidle.Load() > 0 { 423 return 424 } 425 mcall(gosched_m) 426 } 427 428 // Puts the current goroutine into a waiting state and calls unlockf on the 429 // system stack. 430 // 431 // If unlockf returns false, the goroutine is resumed. 432 // 433 // unlockf must not access this G's stack, as it may be moved between 434 // the call to gopark and the call to unlockf. 435 // 436 // Note that because unlockf is called after putting the G into a waiting 437 // state, the G may have already been readied by the time unlockf is called 438 // unless there is external synchronization preventing the G from being 439 // readied. If unlockf returns false, it must guarantee that the G cannot be 440 // externally readied. 441 // 442 // Reason explains why the goroutine has been parked. It is displayed in stack 443 // traces and heap dumps. Reasons should be unique and descriptive. Do not 444 // re-use reasons, add new ones. 445 // 446 // gopark should be an internal detail, 447 // but widely used packages access it using linkname. 448 // Notable members of the hall of shame include: 449 // - gvisor.dev/gvisor 450 // - github.com/sagernet/gvisor 451 // 452 // Do not remove or change the type signature. 453 // See go.dev/issue/67401. 454 // 455 //go:linkname gopark 456 func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceReason traceBlockReason, traceskip int) { 457 if reason != waitReasonSleep { 458 checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy 459 } 460 mp := acquirem() 461 gp := mp.curg 462 status := readgstatus(gp) 463 if status != _Grunning && status != _Gscanrunning { 464 throw("gopark: bad g status") 465 } 466 mp.waitlock = lock 467 mp.waitunlockf = unlockf 468 gp.waitreason = reason 469 mp.waitTraceBlockReason = traceReason 470 mp.waitTraceSkip = traceskip 471 releasem(mp) 472 // can't do anything that might move the G between Ms here. 473 mcall(park_m) 474 } 475 476 // Puts the current goroutine into a waiting state and unlocks the lock. 477 // The goroutine can be made runnable again by calling goready(gp). 478 func goparkunlock(lock *mutex, reason waitReason, traceReason traceBlockReason, traceskip int) { 479 gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceReason, traceskip) 480 } 481 482 // goready should be an internal detail, 483 // but widely used packages access it using linkname. 484 // Notable members of the hall of shame include: 485 // - gvisor.dev/gvisor 486 // - github.com/sagernet/gvisor 487 // 488 // Do not remove or change the type signature. 489 // See go.dev/issue/67401. 490 // 491 //go:linkname goready 492 func goready(gp *g, traceskip int) { 493 systemstack(func() { 494 ready(gp, traceskip, true) 495 }) 496 } 497 498 //go:nosplit 499 func acquireSudog() *sudog { 500 // Delicate dance: the semaphore implementation calls 501 // acquireSudog, acquireSudog calls new(sudog), 502 // new calls malloc, malloc can call the garbage collector, 503 // and the garbage collector calls the semaphore implementation 504 // in stopTheWorld. 505 // Break the cycle by doing acquirem/releasem around new(sudog). 506 // The acquirem/releasem increments m.locks during new(sudog), 507 // which keeps the garbage collector from being invoked. 508 mp := acquirem() 509 pp := mp.p.ptr() 510 if len(pp.sudogcache) == 0 { 511 lock(&sched.sudoglock) 512 // First, try to grab a batch from central cache. 513 for len(pp.sudogcache) < cap(pp.sudogcache)/2 && sched.sudogcache != nil { 514 s := sched.sudogcache 515 sched.sudogcache = s.next 516 s.next = nil 517 pp.sudogcache = append(pp.sudogcache, s) 518 } 519 unlock(&sched.sudoglock) 520 // If the central cache is empty, allocate a new one. 521 if len(pp.sudogcache) == 0 { 522 pp.sudogcache = append(pp.sudogcache, new(sudog)) 523 } 524 } 525 n := len(pp.sudogcache) 526 s := pp.sudogcache[n-1] 527 pp.sudogcache[n-1] = nil 528 pp.sudogcache = pp.sudogcache[:n-1] 529 if s.elem.get() != nil { 530 throw("acquireSudog: found s.elem != nil in cache") 531 } 532 releasem(mp) 533 return s 534 } 535 536 //go:nosplit 537 func releaseSudog(s *sudog) { 538 if s.elem.get() != nil { 539 throw("runtime: sudog with non-nil elem") 540 } 541 if s.isSelect { 542 throw("runtime: sudog with non-false isSelect") 543 } 544 if s.next != nil { 545 throw("runtime: sudog with non-nil next") 546 } 547 if s.prev != nil { 548 throw("runtime: sudog with non-nil prev") 549 } 550 if s.waitlink != nil { 551 throw("runtime: sudog with non-nil waitlink") 552 } 553 if s.c.get() != nil { 554 throw("runtime: sudog with non-nil c") 555 } 556 gp := getg() 557 if gp.param != nil { 558 throw("runtime: releaseSudog with non-nil gp.param") 559 } 560 mp := acquirem() // avoid rescheduling to another P 561 pp := mp.p.ptr() 562 if len(pp.sudogcache) == cap(pp.sudogcache) { 563 // Transfer half of local cache to the central cache. 564 var first, last *sudog 565 for len(pp.sudogcache) > cap(pp.sudogcache)/2 { 566 n := len(pp.sudogcache) 567 p := pp.sudogcache[n-1] 568 pp.sudogcache[n-1] = nil 569 pp.sudogcache = pp.sudogcache[:n-1] 570 if first == nil { 571 first = p 572 } else { 573 last.next = p 574 } 575 last = p 576 } 577 lock(&sched.sudoglock) 578 last.next = sched.sudogcache 579 sched.sudogcache = first 580 unlock(&sched.sudoglock) 581 } 582 pp.sudogcache = append(pp.sudogcache, s) 583 releasem(mp) 584 } 585 586 // called from assembly. 587 func badmcall(fn func(*g)) { 588 throw("runtime: mcall called on m->g0 stack") 589 } 590 591 func badmcall2(fn func(*g)) { 592 throw("runtime: mcall function returned") 593 } 594 595 func badreflectcall() { 596 panic(plainError("arg size to reflect.call more than 1GB")) 597 } 598 599 //go:nosplit 600 //go:nowritebarrierrec 601 func badmorestackg0() { 602 if !crashStackImplemented { 603 writeErrStr("fatal: morestack on g0\n") 604 return 605 } 606 607 g := getg() 608 switchToCrashStack(func() { 609 print("runtime: morestack on g0, stack [", hex(g.stack.lo), " ", hex(g.stack.hi), "], sp=", hex(g.sched.sp), ", called from\n") 610 g.m.traceback = 2 // include pc and sp in stack trace 611 traceback1(g.sched.pc, g.sched.sp, g.sched.lr, g, 0) 612 print("\n") 613 614 throw("morestack on g0") 615 }) 616 } 617 618 //go:nosplit 619 //go:nowritebarrierrec 620 func badmorestackgsignal() { 621 writeErrStr("fatal: morestack on gsignal\n") 622 } 623 624 //go:nosplit 625 func badctxt() { 626 throw("ctxt != 0") 627 } 628 629 // gcrash is a fake g that can be used when crashing due to bad 630 // stack conditions. 631 var gcrash g 632 633 var crashingG atomic.Pointer[g] 634 635 // Switch to crashstack and call fn, with special handling of 636 // concurrent and recursive cases. 637 // 638 // Nosplit as it is called in a bad stack condition (we know 639 // morestack would fail). 640 // 641 //go:nosplit 642 //go:nowritebarrierrec 643 func switchToCrashStack(fn func()) { 644 me := getg() 645 if crashingG.CompareAndSwapNoWB(nil, me) { 646 switchToCrashStack0(fn) // should never return 647 abort() 648 } 649 if crashingG.Load() == me { 650 // recursive crashing. too bad. 651 writeErrStr("fatal: recursive switchToCrashStack\n") 652 abort() 653 } 654 // Another g is crashing. Give it some time, hopefully it will finish traceback. 655 usleep_no_g(100) 656 writeErrStr("fatal: concurrent switchToCrashStack\n") 657 abort() 658 } 659 660 // Disable crash stack on Windows for now. Apparently, throwing an exception 661 // on a non-system-allocated crash stack causes EXCEPTION_STACK_OVERFLOW and 662 // hangs the process (see issue 63938). 663 const crashStackImplemented = GOOS != "windows" 664 665 //go:noescape 666 func switchToCrashStack0(fn func()) // in assembly 667 668 func lockedOSThread() bool { 669 gp := getg() 670 return gp.lockedm != 0 && gp.m.lockedg != 0 671 } 672 673 var ( 674 // allgs contains all Gs ever created (including dead Gs), and thus 675 // never shrinks. 676 // 677 // Access via the slice is protected by allglock or stop-the-world. 678 // Readers that cannot take the lock may (carefully!) use the atomic 679 // variables below. 680 allglock mutex 681 allgs []*g 682 683 // allglen and allgptr are atomic variables that contain len(allgs) and 684 // &allgs[0] respectively. Proper ordering depends on totally-ordered 685 // loads and stores. Writes are protected by allglock. 686 // 687 // allgptr is updated before allglen. Readers should read allglen 688 // before allgptr to ensure that allglen is always <= len(allgptr). New 689 // Gs appended during the race can be missed. For a consistent view of 690 // all Gs, allglock must be held. 691 // 692 // allgptr copies should always be stored as a concrete type or 693 // unsafe.Pointer, not uintptr, to ensure that GC can still reach it 694 // even if it points to a stale array. 695 allglen uintptr 696 allgptr **g 697 ) 698 699 func allgadd(gp *g) { 700 if readgstatus(gp) == _Gidle { 701 throw("allgadd: bad status Gidle") 702 } 703 704 lock(&allglock) 705 allgs = append(allgs, gp) 706 if &allgs[0] != allgptr { 707 atomicstorep(unsafe.Pointer(&allgptr), unsafe.Pointer(&allgs[0])) 708 } 709 atomic.Storeuintptr(&allglen, uintptr(len(allgs))) 710 unlock(&allglock) 711 } 712 713 // allGsSnapshot returns a snapshot of the slice of all Gs. 714 // 715 // The world must be stopped or allglock must be held. 716 func allGsSnapshot() []*g { 717 assertWorldStoppedOrLockHeld(&allglock) 718 719 // Because the world is stopped or allglock is held, allgadd 720 // cannot happen concurrently with this. allgs grows 721 // monotonically and existing entries never change, so we can 722 // simply return a copy of the slice header. For added safety, 723 // we trim everything past len because that can still change. 724 return allgs[:len(allgs):len(allgs)] 725 } 726 727 // atomicAllG returns &allgs[0] and len(allgs) for use with atomicAllGIndex. 728 func atomicAllG() (**g, uintptr) { 729 length := atomic.Loaduintptr(&allglen) 730 ptr := (**g)(atomic.Loadp(unsafe.Pointer(&allgptr))) 731 return ptr, length 732 } 733 734 // atomicAllGIndex returns ptr[i] with the allgptr returned from atomicAllG. 735 func atomicAllGIndex(ptr **g, i uintptr) *g { 736 return *(**g)(add(unsafe.Pointer(ptr), i*goarch.PtrSize)) 737 } 738 739 // forEachG calls fn on every G from allgs. 740 // 741 // forEachG takes a lock to exclude concurrent addition of new Gs. 742 func forEachG(fn func(gp *g)) { 743 lock(&allglock) 744 for _, gp := range allgs { 745 fn(gp) 746 } 747 unlock(&allglock) 748 } 749 750 // forEachGRace calls fn on every G from allgs. 751 // 752 // forEachGRace avoids locking, but does not exclude addition of new Gs during 753 // execution, which may be missed. 754 func forEachGRace(fn func(gp *g)) { 755 ptr, length := atomicAllG() 756 for i := uintptr(0); i < length; i++ { 757 gp := atomicAllGIndex(ptr, i) 758 fn(gp) 759 } 760 return 761 } 762 763 const ( 764 // Number of goroutine ids to grab from sched.goidgen to local per-P cache at once. 765 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. 766 _GoidCacheBatch = 16 767 ) 768 769 // cpuinit sets up CPU feature flags and calls internal/cpu.Initialize. env should be the complete 770 // value of the GODEBUG environment variable. 771 func cpuinit(env string) { 772 cpu.Initialize(env) 773 774 // Support cpu feature variables are used in code generated by the compiler 775 // to guard execution of instructions that can not be assumed to be always supported. 776 switch GOARCH { 777 case "386", "amd64": 778 x86HasAVX = cpu.X86.HasAVX 779 x86HasFMA = cpu.X86.HasFMA 780 x86HasPOPCNT = cpu.X86.HasPOPCNT 781 x86HasSSE41 = cpu.X86.HasSSE41 782 783 case "arm": 784 armHasVFPv4 = cpu.ARM.HasVFPv4 785 786 case "arm64": 787 arm64HasATOMICS = cpu.ARM64.HasATOMICS 788 789 case "loong64": 790 loong64HasLAMCAS = cpu.Loong64.HasLAMCAS 791 loong64HasLAM_BH = cpu.Loong64.HasLAM_BH 792 loong64HasDBAR_HINTS = cpu.Loong64.HasDBAR_HINTS 793 loong64HasLSX = cpu.Loong64.HasLSX 794 795 case "riscv64": 796 riscv64HasZbb = cpu.RISCV64.HasZbb 797 } 798 } 799 800 // getGodebugEarly extracts the environment variable GODEBUG from the environment on 801 // Unix-like operating systems and returns it. This function exists to extract GODEBUG 802 // early before much of the runtime is initialized. 803 // 804 // Returns nil, false if OS doesn't provide env vars early in the init sequence. 805 func getGodebugEarly() (string, bool) { 806 const prefix = "GODEBUG=" 807 var env string 808 switch GOOS { 809 case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux": 810 // Similar to goenv_unix but extracts the environment value for 811 // GODEBUG directly. 812 // TODO(moehrmann): remove when general goenvs() can be called before cpuinit() 813 n := int32(0) 814 for argv_index(argv, argc+1+n) != nil { 815 n++ 816 } 817 818 for i := int32(0); i < n; i++ { 819 p := argv_index(argv, argc+1+i) 820 s := unsafe.String(p, findnull(p)) 821 822 if stringslite.HasPrefix(s, prefix) { 823 env = gostringnocopy(p)[len(prefix):] 824 break 825 } 826 } 827 break 828 829 default: 830 return "", false 831 } 832 return env, true 833 } 834 835 // The bootstrap sequence is: 836 // 837 // call osinit 838 // call schedinit 839 // make & queue new G 840 // call runtime·mstart 841 // 842 // The new G calls runtime·main. 843 func schedinit() { 844 lockInit(&sched.lock, lockRankSched) 845 lockInit(&sched.sysmonlock, lockRankSysmon) 846 lockInit(&sched.deferlock, lockRankDefer) 847 lockInit(&sched.sudoglock, lockRankSudog) 848 lockInit(&deadlock, lockRankDeadlock) 849 lockInit(&paniclk, lockRankPanic) 850 lockInit(&allglock, lockRankAllg) 851 lockInit(&allpLock, lockRankAllp) 852 lockInit(&reflectOffs.lock, lockRankReflectOffs) 853 lockInit(&finlock, lockRankFin) 854 lockInit(&cpuprof.lock, lockRankCpuprof) 855 lockInit(&computeMaxProcsLock, lockRankComputeMaxProcs) 856 allocmLock.init(lockRankAllocmR, lockRankAllocmRInternal, lockRankAllocmW) 857 execLock.init(lockRankExecR, lockRankExecRInternal, lockRankExecW) 858 traceLockInit() 859 // Enforce that this lock is always a leaf lock. 860 // All of this lock's critical sections should be 861 // extremely short. 862 lockInit(&memstats.heapStats.noPLock, lockRankLeafRank) 863 864 lockVerifyMSize() 865 866 sched.midle.init(unsafe.Offsetof(m{}.idleNode)) 867 868 // raceinit must be the first call to race detector. 869 // In particular, it must be done before mallocinit below calls racemapshadow. 870 gp := getg() 871 if raceenabled { 872 gp.racectx, raceprocctx0 = raceinit() 873 } 874 875 sched.maxmcount = 10000 876 crashFD.Store(^uintptr(0)) 877 878 // The world starts stopped. 879 worldStopped() 880 881 godebug, parsedGodebug := getGodebugEarly() 882 if parsedGodebug { 883 parseRuntimeDebugVars(godebug) 884 } 885 ticks.init() // run as early as possible 886 moduledataverify() 887 stackinit() 888 randinit() // must run before mallocinit, AlgInit, mcommoninit 889 mallocinit() 890 cpuinit(godebug) // must run before AlgInit 891 maps.AlgInit() // maps, hash, rand must not be used before this call 892 mcommoninit(gp.m, -1) 893 modulesinit() // provides activeModules 894 typelinksinit() // uses maps, activeModules 895 itabsinit() // uses activeModules 896 stkobjinit() // must run before GC starts 897 898 sigsave(&gp.m.sigmask) 899 initSigmask = gp.m.sigmask 900 901 goargs() 902 goenvs() 903 secure() 904 checkfds() 905 if !parsedGodebug { 906 // Some platforms, e.g., Windows, didn't make env vars available "early", 907 // so try again now. 908 parseRuntimeDebugVars(gogetenv("GODEBUG")) 909 } 910 finishDebugVarsSetup() 911 gcinit() 912 913 // Allocate stack space that can be used when crashing due to bad stack 914 // conditions, e.g. morestack on g0. 915 gcrash.stack = stackalloc(16384) 916 gcrash.stackguard0 = gcrash.stack.lo + 1000 917 gcrash.stackguard1 = gcrash.stack.lo + 1000 918 919 // if disableMemoryProfiling is set, update MemProfileRate to 0 to turn off memprofile. 920 // Note: parsedebugvars may update MemProfileRate, but when disableMemoryProfiling is 921 // set to true by the linker, it means that nothing is consuming the profile, it is 922 // safe to set MemProfileRate to 0. 923 if disableMemoryProfiling { 924 MemProfileRate = 0 925 } 926 927 // mcommoninit runs before parsedebugvars, so init profstacks again. 928 mProfStackInit(gp.m) 929 defaultGOMAXPROCSInit() 930 931 lock(&sched.lock) 932 sched.lastpoll.Store(nanotime()) 933 var procs int32 934 if n, err := strconv.ParseInt(gogetenv("GOMAXPROCS"), 10, 32); err == nil && n > 0 { 935 procs = int32(n) 936 sched.customGOMAXPROCS = true 937 } else { 938 // Use numCPUStartup for initial GOMAXPROCS for two reasons: 939 // 940 // 1. We just computed it in osinit, recomputing is (minorly) wasteful. 941 // 942 // 2. More importantly, if debug.containermaxprocs == 0 && 943 // debug.updatemaxprocs == 0, we want to guarantee that 944 // runtime.GOMAXPROCS(0) always equals runtime.NumCPU (which is 945 // just numCPUStartup). 946 procs = defaultGOMAXPROCS(numCPUStartup) 947 } 948 if procresize(procs) != nil { 949 throw("unknown runnable goroutine during bootstrap") 950 } 951 unlock(&sched.lock) 952 953 // World is effectively started now, as P's can run. 954 worldStarted() 955 956 if buildVersion == "" { 957 // Condition should never trigger. This code just serves 958 // to ensure runtime·buildVersion is kept in the resulting binary. 959 buildVersion = "unknown" 960 } 961 if len(modinfo) == 1 { 962 // Condition should never trigger. This code just serves 963 // to ensure runtime·modinfo is kept in the resulting binary. 964 modinfo = "" 965 } 966 } 967 968 func dumpgstatus(gp *g) { 969 thisg := getg() 970 print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") 971 print("runtime: getg: g=", thisg, ", goid=", thisg.goid, ", g->atomicstatus=", readgstatus(thisg), "\n") 972 } 973 974 // sched.lock must be held. 975 func checkmcount() { 976 assertLockHeld(&sched.lock) 977 978 // Exclude extra M's, which are used for cgocallback from threads 979 // created in C. 980 // 981 // The purpose of the SetMaxThreads limit is to avoid accidental fork 982 // bomb from something like millions of goroutines blocking on system 983 // calls, causing the runtime to create millions of threads. By 984 // definition, this isn't a problem for threads created in C, so we 985 // exclude them from the limit. See https://go.dev/issue/60004. 986 count := mcount() - int32(extraMInUse.Load()) - int32(extraMLength.Load()) 987 if count > sched.maxmcount { 988 print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n") 989 throw("thread exhaustion") 990 } 991 } 992 993 // mReserveID returns the next ID to use for a new m. This new m is immediately 994 // considered 'running' by checkdead. 995 // 996 // sched.lock must be held. 997 func mReserveID() int64 { 998 assertLockHeld(&sched.lock) 999 1000 if sched.mnext+1 < sched.mnext { 1001 throw("runtime: thread ID overflow") 1002 } 1003 id := sched.mnext 1004 sched.mnext++ 1005 checkmcount() 1006 return id 1007 } 1008 1009 // Pre-allocated ID may be passed as 'id', or omitted by passing -1. 1010 func mcommoninit(mp *m, id int64) { 1011 gp := getg() 1012 1013 // g0 stack won't make sense for user (and is not necessary unwindable). 1014 if gp != gp.m.g0 { 1015 callers(1, mp.createstack[:]) 1016 } 1017 1018 lock(&sched.lock) 1019 1020 if id >= 0 { 1021 mp.id = id 1022 } else { 1023 mp.id = mReserveID() 1024 } 1025 1026 mp.self = newMWeakPointer(mp) 1027 1028 mrandinit(mp) 1029 1030 mpreinit(mp) 1031 if mp.gsignal != nil { 1032 mp.gsignal.stackguard1 = mp.gsignal.stack.lo + stackGuard 1033 } 1034 1035 // Add to allm so garbage collector doesn't free g->m 1036 // when it is just in a register or thread-local storage. 1037 mp.alllink = allm 1038 1039 // NumCgoCall and others iterate over allm w/o schedlock, 1040 // so we need to publish it safely. 1041 atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp)) 1042 unlock(&sched.lock) 1043 1044 // Allocate memory to hold a cgo traceback if the cgo call crashes. 1045 if iscgo || GOOS == "solaris" || GOOS == "illumos" || GOOS == "windows" { 1046 mp.cgoCallers = new(cgoCallers) 1047 } 1048 mProfStackInit(mp) 1049 } 1050 1051 // mProfStackInit is used to eagerly initialize stack trace buffers for 1052 // profiling. Lazy allocation would have to deal with reentrancy issues in 1053 // malloc and runtime locks for mLockProfile. 1054 // TODO(mknyszek): Implement lazy allocation if this becomes a problem. 1055 func mProfStackInit(mp *m) { 1056 if debug.profstackdepth == 0 { 1057 // debug.profstack is set to 0 by the user, or we're being called from 1058 // schedinit before parsedebugvars. 1059 return 1060 } 1061 mp.profStack = makeProfStackFP() 1062 mp.mLockProfile.stack = makeProfStackFP() 1063 } 1064 1065 // makeProfStackFP creates a buffer large enough to hold a maximum-sized stack 1066 // trace as well as any additional frames needed for frame pointer unwinding 1067 // with delayed inline expansion. 1068 func makeProfStackFP() []uintptr { 1069 // The "1" term is to account for the first stack entry being 1070 // taken up by a "skip" sentinel value for profilers which 1071 // defer inline frame expansion until the profile is reported. 1072 // The "maxSkip" term is for frame pointer unwinding, where we 1073 // want to end up with debug.profstackdebth frames but will discard 1074 // some "physical" frames to account for skipping. 1075 return make([]uintptr, 1+maxSkip+debug.profstackdepth) 1076 } 1077 1078 // makeProfStack returns a buffer large enough to hold a maximum-sized stack 1079 // trace. 1080 func makeProfStack() []uintptr { return make([]uintptr, debug.profstackdepth) } 1081 1082 //go:linkname pprof_makeProfStack 1083 func pprof_makeProfStack() []uintptr { return makeProfStack() } 1084 1085 func (mp *m) becomeSpinning() { 1086 mp.spinning = true 1087 sched.nmspinning.Add(1) 1088 sched.needspinning.Store(0) 1089 } 1090 1091 // Take a snapshot of allp, for use after dropping the P. 1092 // 1093 // Must be called with a P, but the returned slice may be used after dropping 1094 // the P. The M holds a reference on the snapshot to keep the backing array 1095 // alive. 1096 // 1097 //go:yeswritebarrierrec 1098 func (mp *m) snapshotAllp() []*p { 1099 mp.allpSnapshot = allp 1100 return mp.allpSnapshot 1101 } 1102 1103 // Clear the saved allp snapshot. Should be called as soon as the snapshot is 1104 // no longer required. 1105 // 1106 // Must be called after reacquiring a P, as it requires a write barrier. 1107 // 1108 //go:yeswritebarrierrec 1109 func (mp *m) clearAllpSnapshot() { 1110 mp.allpSnapshot = nil 1111 } 1112 1113 func (mp *m) hasCgoOnStack() bool { 1114 return mp.ncgo > 0 || mp.isextra 1115 } 1116 1117 const ( 1118 // osHasLowResTimer indicates that the platform's internal timer system has a low resolution, 1119 // typically on the order of 1 ms or more. 1120 osHasLowResTimer = GOOS == "windows" || GOOS == "openbsd" || GOOS == "netbsd" 1121 1122 // osHasLowResClockInt is osHasLowResClock but in integer form, so it can be used to create 1123 // constants conditionally. 1124 osHasLowResClockInt = goos.IsWindows 1125 1126 // osHasLowResClock indicates that timestamps produced by nanotime on the platform have a 1127 // low resolution, typically on the order of 1 ms or more. 1128 osHasLowResClock = osHasLowResClockInt > 0 1129 ) 1130 1131 // Mark gp ready to run. 1132 func ready(gp *g, traceskip int, next bool) { 1133 status := readgstatus(gp) 1134 1135 // Mark runnable. 1136 mp := acquirem() // disable preemption because it can be holding p in a local var 1137 if status&^_Gscan != _Gwaiting { 1138 dumpgstatus(gp) 1139 throw("bad g->status in ready") 1140 } 1141 1142 // status is Gwaiting or Gscanwaiting, make Grunnable and put on runq 1143 trace := traceAcquire() 1144 casgstatus(gp, _Gwaiting, _Grunnable) 1145 if trace.ok() { 1146 trace.GoUnpark(gp, traceskip) 1147 traceRelease(trace) 1148 } 1149 runqput(mp.p.ptr(), gp, next) 1150 wakep() 1151 releasem(mp) 1152 } 1153 1154 // freezeStopWait is a large value that freezetheworld sets 1155 // sched.stopwait to in order to request that all Gs permanently stop. 1156 const freezeStopWait = 0x7fffffff 1157 1158 // freezing is set to non-zero if the runtime is trying to freeze the 1159 // world. 1160 var freezing atomic.Bool 1161 1162 // Similar to stopTheWorld but best-effort and can be called several times. 1163 // There is no reverse operation, used during crashing. 1164 // This function must not lock any mutexes. 1165 func freezetheworld() { 1166 freezing.Store(true) 1167 if debug.dontfreezetheworld > 0 { 1168 // Don't prempt Ps to stop goroutines. That will perturb 1169 // scheduler state, making debugging more difficult. Instead, 1170 // allow goroutines to continue execution. 1171 // 1172 // fatalpanic will tracebackothers to trace all goroutines. It 1173 // is unsafe to trace a running goroutine, so tracebackothers 1174 // will skip running goroutines. That is OK and expected, we 1175 // expect users of dontfreezetheworld to use core files anyway. 1176 // 1177 // However, allowing the scheduler to continue running free 1178 // introduces a race: a goroutine may be stopped when 1179 // tracebackothers checks its status, and then start running 1180 // later when we are in the middle of traceback, potentially 1181 // causing a crash. 1182 // 1183 // To mitigate this, when an M naturally enters the scheduler, 1184 // schedule checks if freezing is set and if so stops 1185 // execution. This guarantees that while Gs can transition from 1186 // running to stopped, they can never transition from stopped 1187 // to running. 1188 // 1189 // The sleep here allows racing Ms that missed freezing and are 1190 // about to run a G to complete the transition to running 1191 // before we start traceback. 1192 usleep(1000) 1193 return 1194 } 1195 1196 // stopwait and preemption requests can be lost 1197 // due to races with concurrently executing threads, 1198 // so try several times 1199 for i := 0; i < 5; i++ { 1200 // this should tell the scheduler to not start any new goroutines 1201 sched.stopwait = freezeStopWait 1202 sched.gcwaiting.Store(true) 1203 // this should stop running goroutines 1204 if !preemptall() { 1205 break // no running goroutines 1206 } 1207 usleep(1000) 1208 } 1209 // to be sure 1210 usleep(1000) 1211 preemptall() 1212 usleep(1000) 1213 } 1214 1215 // All reads and writes of g's status go through readgstatus, casgstatus 1216 // castogscanstatus, casfrom_Gscanstatus. 1217 // 1218 //go:nosplit 1219 func readgstatus(gp *g) uint32 { 1220 return gp.atomicstatus.Load() 1221 } 1222 1223 // The Gscanstatuses are acting like locks and this releases them. 1224 // If it proves to be a performance hit we should be able to make these 1225 // simple atomic stores but for now we are going to throw if 1226 // we see an inconsistent state. 1227 func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { 1228 success := false 1229 1230 // Check that transition is valid. 1231 switch oldval { 1232 default: 1233 print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") 1234 dumpgstatus(gp) 1235 throw("casfrom_Gscanstatus:top gp->status is not in scan state") 1236 case _Gscanrunnable, 1237 _Gscanwaiting, 1238 _Gscanrunning, 1239 _Gscansyscall, 1240 _Gscanleaked, 1241 _Gscanpreempted, 1242 _Gscandeadextra: 1243 if newval == oldval&^_Gscan { 1244 success = gp.atomicstatus.CompareAndSwap(oldval, newval) 1245 } 1246 } 1247 if !success { 1248 print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") 1249 dumpgstatus(gp) 1250 throw("casfrom_Gscanstatus: gp->status is not in scan state") 1251 } 1252 releaseLockRankAndM(lockRankGscan) 1253 } 1254 1255 // This will return false if the gp is not in the expected status and the cas fails. 1256 // This acts like a lock acquire while the casfromgstatus acts like a lock release. 1257 func castogscanstatus(gp *g, oldval, newval uint32) bool { 1258 switch oldval { 1259 case _Grunnable, 1260 _Grunning, 1261 _Gwaiting, 1262 _Gleaked, 1263 _Gsyscall, 1264 _Gdeadextra: 1265 if newval == oldval|_Gscan { 1266 r := gp.atomicstatus.CompareAndSwap(oldval, newval) 1267 if r { 1268 acquireLockRankAndM(lockRankGscan) 1269 } 1270 return r 1271 1272 } 1273 } 1274 print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n") 1275 throw("bad oldval passed to castogscanstatus") 1276 return false 1277 } 1278 1279 // casgstatusAlwaysTrack is a debug flag that causes casgstatus to always track 1280 // various latencies on every transition instead of sampling them. 1281 var casgstatusAlwaysTrack = false 1282 1283 // If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus 1284 // and casfrom_Gscanstatus instead. 1285 // casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that 1286 // put it in the Gscan state is finished. 1287 // 1288 //go:nosplit 1289 func casgstatus(gp *g, oldval, newval uint32) { 1290 if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval { 1291 systemstack(func() { 1292 // Call on the systemstack to prevent print and throw from counting 1293 // against the nosplit stack reservation. 1294 print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n") 1295 throw("casgstatus: bad incoming values") 1296 }) 1297 } 1298 1299 lockWithRankMayAcquire(nil, lockRankGscan) 1300 1301 // See https://golang.org/cl/21503 for justification of the yield delay. 1302 const yieldDelay = 5 * 1000 1303 var nextYield int64 1304 1305 // loop if gp->atomicstatus is in a scan state giving 1306 // GC time to finish and change the state to oldval. 1307 for i := 0; !gp.atomicstatus.CompareAndSwap(oldval, newval); i++ { 1308 if oldval == _Gwaiting && gp.atomicstatus.Load() == _Grunnable { 1309 systemstack(func() { 1310 // Call on the systemstack to prevent throw from counting 1311 // against the nosplit stack reservation. 1312 throw("casgstatus: waiting for Gwaiting but is Grunnable") 1313 }) 1314 } 1315 if i == 0 { 1316 nextYield = nanotime() + yieldDelay 1317 } 1318 if nanotime() < nextYield { 1319 for x := 0; x < 10 && gp.atomicstatus.Load() != oldval; x++ { 1320 procyield(1) 1321 } 1322 } else { 1323 osyield() 1324 nextYield = nanotime() + yieldDelay/2 1325 } 1326 } 1327 1328 if gp.bubble != nil { 1329 systemstack(func() { 1330 gp.bubble.changegstatus(gp, oldval, newval) 1331 }) 1332 } 1333 1334 if (oldval == _Grunning || oldval == _Gsyscall) && (newval != _Grunning && newval != _Gsyscall) { 1335 // Track every gTrackingPeriod time a goroutine transitions out of _Grunning or _Gsyscall. 1336 // Do not track _Grunning <-> _Gsyscall transitions, since they're two very similar states. 1337 if casgstatusAlwaysTrack || gp.trackingSeq%gTrackingPeriod == 0 { 1338 gp.tracking = true 1339 } 1340 gp.trackingSeq++ 1341 } 1342 if !gp.tracking { 1343 return 1344 } 1345 1346 // Handle various kinds of tracking. 1347 // 1348 // Currently: 1349 // - Time spent in runnable. 1350 // - Time spent blocked on a sync.Mutex or sync.RWMutex. 1351 switch oldval { 1352 case _Grunnable: 1353 // We transitioned out of runnable, so measure how much 1354 // time we spent in this state and add it to 1355 // runnableTime. 1356 now := nanotime() 1357 gp.runnableTime += now - gp.trackingStamp 1358 gp.trackingStamp = 0 1359 case _Gwaiting: 1360 if !gp.waitreason.isMutexWait() { 1361 // Not blocking on a lock. 1362 break 1363 } 1364 // Blocking on a lock, measure it. Note that because we're 1365 // sampling, we have to multiply by our sampling period to get 1366 // a more representative estimate of the absolute value. 1367 // gTrackingPeriod also represents an accurate sampling period 1368 // because we can only enter this state from _Grunning. 1369 now := nanotime() 1370 sched.totalMutexWaitTime.Add((now - gp.trackingStamp) * gTrackingPeriod) 1371 gp.trackingStamp = 0 1372 } 1373 switch newval { 1374 case _Gwaiting: 1375 if !gp.waitreason.isMutexWait() { 1376 // Not blocking on a lock. 1377 break 1378 } 1379 // Blocking on a lock. Write down the timestamp. 1380 now := nanotime() 1381 gp.trackingStamp = now 1382 case _Grunnable: 1383 // We just transitioned into runnable, so record what 1384 // time that happened. 1385 now := nanotime() 1386 gp.trackingStamp = now 1387 case _Grunning: 1388 // We're transitioning into running, so turn off 1389 // tracking and record how much time we spent in 1390 // runnable. 1391 gp.tracking = false 1392 sched.timeToRun.record(gp.runnableTime) 1393 gp.runnableTime = 0 1394 } 1395 } 1396 1397 // casGToWaiting transitions gp from old to _Gwaiting, and sets the wait reason. 1398 // 1399 // Use this over casgstatus when possible to ensure that a waitreason is set. 1400 func casGToWaiting(gp *g, old uint32, reason waitReason) { 1401 // Set the wait reason before calling casgstatus, because casgstatus will use it. 1402 gp.waitreason = reason 1403 casgstatus(gp, old, _Gwaiting) 1404 } 1405 1406 // casGToWaitingForSuspendG transitions gp from old to _Gwaiting, and sets the wait reason. 1407 // The wait reason must be a valid isWaitingForSuspendG wait reason. 1408 // 1409 // While a goroutine is in this state, it's stack is effectively pinned. 1410 // The garbage collector must not shrink or otherwise mutate the goroutine's stack. 1411 // 1412 // Use this over casgstatus when possible to ensure that a waitreason is set. 1413 func casGToWaitingForSuspendG(gp *g, old uint32, reason waitReason) { 1414 if !reason.isWaitingForSuspendG() { 1415 throw("casGToWaitingForSuspendG with non-isWaitingForSuspendG wait reason") 1416 } 1417 casGToWaiting(gp, old, reason) 1418 } 1419 1420 // casGToPreemptScan transitions gp from _Grunning to _Gscan|_Gpreempted. 1421 // 1422 // TODO(austin): This is the only status operation that both changes 1423 // the status and locks the _Gscan bit. Rethink this. 1424 func casGToPreemptScan(gp *g, old, new uint32) { 1425 if old != _Grunning || new != _Gscan|_Gpreempted { 1426 throw("bad g transition") 1427 } 1428 acquireLockRankAndM(lockRankGscan) 1429 for !gp.atomicstatus.CompareAndSwap(_Grunning, _Gscan|_Gpreempted) { 1430 } 1431 // We never notify gp.bubble that the goroutine state has moved 1432 // from _Grunning to _Gpreempted. We call bubble.changegstatus 1433 // after status changes happen, but doing so here would violate the 1434 // ordering between the gscan and synctest locks. The bubble doesn't 1435 // distinguish between _Grunning and _Gpreempted anyway, so not 1436 // notifying it is fine. 1437 } 1438 1439 // casGFromPreempted attempts to transition gp from _Gpreempted to 1440 // _Gwaiting. If successful, the caller is responsible for 1441 // re-scheduling gp. 1442 func casGFromPreempted(gp *g, old, new uint32) bool { 1443 if old != _Gpreempted || new != _Gwaiting { 1444 throw("bad g transition") 1445 } 1446 gp.waitreason = waitReasonPreempted 1447 if !gp.atomicstatus.CompareAndSwap(_Gpreempted, _Gwaiting) { 1448 return false 1449 } 1450 if bubble := gp.bubble; bubble != nil { 1451 bubble.changegstatus(gp, _Gpreempted, _Gwaiting) 1452 } 1453 return true 1454 } 1455 1456 // stwReason is an enumeration of reasons the world is stopping. 1457 type stwReason uint8 1458 1459 // Reasons to stop-the-world. 1460 // 1461 // Avoid reusing reasons and add new ones instead. 1462 const ( 1463 stwUnknown stwReason = iota // "unknown" 1464 stwGCMarkTerm // "GC mark termination" 1465 stwGCSweepTerm // "GC sweep termination" 1466 stwWriteHeapDump // "write heap dump" 1467 stwGoroutineProfile // "goroutine profile" 1468 stwGoroutineProfileCleanup // "goroutine profile cleanup" 1469 stwAllGoroutinesStack // "all goroutines stack trace" 1470 stwReadMemStats // "read mem stats" 1471 stwAllThreadsSyscall // "AllThreadsSyscall" 1472 stwGOMAXPROCS // "GOMAXPROCS" 1473 stwStartTrace // "start trace" 1474 stwStopTrace // "stop trace" 1475 stwForTestCountPagesInUse // "CountPagesInUse (test)" 1476 stwForTestReadMetricsSlow // "ReadMetricsSlow (test)" 1477 stwForTestReadMemStatsSlow // "ReadMemStatsSlow (test)" 1478 stwForTestPageCachePagesLeaked // "PageCachePagesLeaked (test)" 1479 stwForTestResetDebugLog // "ResetDebugLog (test)" 1480 ) 1481 1482 func (r stwReason) String() string { 1483 return stwReasonStrings[r] 1484 } 1485 1486 func (r stwReason) isGC() bool { 1487 return r == stwGCMarkTerm || r == stwGCSweepTerm 1488 } 1489 1490 // If you add to this list, also add it to src/internal/trace/parser.go. 1491 // If you change the values of any of the stw* constants, bump the trace 1492 // version number and make a copy of this. 1493 var stwReasonStrings = [...]string{ 1494 stwUnknown: "unknown", 1495 stwGCMarkTerm: "GC mark termination", 1496 stwGCSweepTerm: "GC sweep termination", 1497 stwWriteHeapDump: "write heap dump", 1498 stwGoroutineProfile: "goroutine profile", 1499 stwGoroutineProfileCleanup: "goroutine profile cleanup", 1500 stwAllGoroutinesStack: "all goroutines stack trace", 1501 stwReadMemStats: "read mem stats", 1502 stwAllThreadsSyscall: "AllThreadsSyscall", 1503 stwGOMAXPROCS: "GOMAXPROCS", 1504 stwStartTrace: "start trace", 1505 stwStopTrace: "stop trace", 1506 stwForTestCountPagesInUse: "CountPagesInUse (test)", 1507 stwForTestReadMetricsSlow: "ReadMetricsSlow (test)", 1508 stwForTestReadMemStatsSlow: "ReadMemStatsSlow (test)", 1509 stwForTestPageCachePagesLeaked: "PageCachePagesLeaked (test)", 1510 stwForTestResetDebugLog: "ResetDebugLog (test)", 1511 } 1512 1513 // worldStop provides context from the stop-the-world required by the 1514 // start-the-world. 1515 type worldStop struct { 1516 reason stwReason 1517 startedStopping int64 1518 finishedStopping int64 1519 stoppingCPUTime int64 1520 } 1521 1522 // Temporary variable for stopTheWorld, when it can't write to the stack. 1523 // 1524 // Protected by worldsema. 1525 var stopTheWorldContext worldStop 1526 1527 // stopTheWorld stops all P's from executing goroutines, interrupting 1528 // all goroutines at GC safe points and records reason as the reason 1529 // for the stop. On return, only the current goroutine's P is running. 1530 // stopTheWorld must not be called from a system stack and the caller 1531 // must not hold worldsema. The caller must call startTheWorld when 1532 // other P's should resume execution. 1533 // 1534 // stopTheWorld is safe for multiple goroutines to call at the 1535 // same time. Each will execute its own stop, and the stops will 1536 // be serialized. 1537 // 1538 // This is also used by routines that do stack dumps. If the system is 1539 // in panic or being exited, this may not reliably stop all 1540 // goroutines. 1541 // 1542 // Returns the STW context. When starting the world, this context must be 1543 // passed to startTheWorld. 1544 func stopTheWorld(reason stwReason) worldStop { 1545 semacquire(&worldsema) 1546 gp := getg() 1547 gp.m.preemptoff = reason.String() 1548 systemstack(func() { 1549 stopTheWorldContext = stopTheWorldWithSema(reason) // avoid write to stack 1550 }) 1551 return stopTheWorldContext 1552 } 1553 1554 // startTheWorld undoes the effects of stopTheWorld. 1555 // 1556 // w must be the worldStop returned by stopTheWorld. 1557 func startTheWorld(w worldStop) { 1558 systemstack(func() { startTheWorldWithSema(0, w) }) 1559 1560 // worldsema must be held over startTheWorldWithSema to ensure 1561 // gomaxprocs cannot change while worldsema is held. 1562 // 1563 // Release worldsema with direct handoff to the next waiter, but 1564 // acquirem so that semrelease1 doesn't try to yield our time. 1565 // 1566 // Otherwise if e.g. ReadMemStats is being called in a loop, 1567 // it might stomp on other attempts to stop the world, such as 1568 // for starting or ending GC. The operation this blocks is 1569 // so heavy-weight that we should just try to be as fair as 1570 // possible here. 1571 // 1572 // We don't want to just allow us to get preempted between now 1573 // and releasing the semaphore because then we keep everyone 1574 // (including, for example, GCs) waiting longer. 1575 mp := acquirem() 1576 mp.preemptoff = "" 1577 semrelease1(&worldsema, true, 0) 1578 releasem(mp) 1579 } 1580 1581 // stopTheWorldGC has the same effect as stopTheWorld, but blocks 1582 // until the GC is not running. It also blocks a GC from starting 1583 // until startTheWorldGC is called. 1584 func stopTheWorldGC(reason stwReason) worldStop { 1585 semacquire(&gcsema) 1586 return stopTheWorld(reason) 1587 } 1588 1589 // startTheWorldGC undoes the effects of stopTheWorldGC. 1590 // 1591 // w must be the worldStop returned by stopTheWorld. 1592 func startTheWorldGC(w worldStop) { 1593 startTheWorld(w) 1594 semrelease(&gcsema) 1595 } 1596 1597 // Holding worldsema grants an M the right to try to stop the world. 1598 var worldsema uint32 = 1 1599 1600 // Holding gcsema grants the M the right to block a GC, and blocks 1601 // until the current GC is done. In particular, it prevents gomaxprocs 1602 // from changing concurrently. 1603 // 1604 // TODO(mknyszek): Once gomaxprocs and the execution tracer can handle 1605 // being changed/enabled during a GC, remove this. 1606 var gcsema uint32 = 1 1607 1608 // stopTheWorldWithSema is the core implementation of stopTheWorld. 1609 // The caller is responsible for acquiring worldsema and disabling 1610 // preemption first and then should stopTheWorldWithSema on the system 1611 // stack: 1612 // 1613 // semacquire(&worldsema, 0) 1614 // m.preemptoff = "reason" 1615 // var stw worldStop 1616 // systemstack(func() { 1617 // stw = stopTheWorldWithSema(reason) 1618 // }) 1619 // 1620 // When finished, the caller must either call startTheWorld or undo 1621 // these three operations separately: 1622 // 1623 // m.preemptoff = "" 1624 // systemstack(func() { 1625 // now = startTheWorldWithSema(stw) 1626 // }) 1627 // semrelease(&worldsema) 1628 // 1629 // It is allowed to acquire worldsema once and then execute multiple 1630 // startTheWorldWithSema/stopTheWorldWithSema pairs. 1631 // Other P's are able to execute between successive calls to 1632 // startTheWorldWithSema and stopTheWorldWithSema. 1633 // Holding worldsema causes any other goroutines invoking 1634 // stopTheWorld to block. 1635 // 1636 // Returns the STW context. When starting the world, this context must be 1637 // passed to startTheWorldWithSema. 1638 // 1639 //go:systemstack 1640 func stopTheWorldWithSema(reason stwReason) worldStop { 1641 // Mark the goroutine which called stopTheWorld preemptible so its 1642 // stack may be scanned by the GC or observed by the execution tracer. 1643 // 1644 // This lets a mark worker scan us or the execution tracer take our 1645 // stack while we try to stop the world since otherwise we could get 1646 // in a mutual preemption deadlock. 1647 // 1648 // casGToWaitingForSuspendG marks the goroutine as ineligible for a 1649 // stack shrink, effectively pinning the stack in memory for the duration. 1650 // 1651 // N.B. The execution tracer is not aware of this status transition and 1652 // handles it specially based on the wait reason. 1653 casGToWaitingForSuspendG(getg().m.curg, _Grunning, waitReasonStoppingTheWorld) 1654 1655 trace := traceAcquire() 1656 if trace.ok() { 1657 trace.STWStart(reason) 1658 traceRelease(trace) 1659 } 1660 gp := getg() 1661 1662 // If we hold a lock, then we won't be able to stop another M 1663 // that is blocked trying to acquire the lock. 1664 if gp.m.locks > 0 { 1665 throw("stopTheWorld: holding locks") 1666 } 1667 1668 lock(&sched.lock) 1669 start := nanotime() // exclude time waiting for sched.lock from start and total time metrics. 1670 sched.stopwait = gomaxprocs 1671 sched.gcwaiting.Store(true) 1672 preemptall() 1673 1674 // Stop current P. 1675 gp.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic. 1676 gp.m.p.ptr().gcStopTime = start 1677 sched.stopwait-- 1678 1679 // Try to retake all P's in syscalls. 1680 for _, pp := range allp { 1681 if thread, ok := setBlockOnExitSyscall(pp); ok { 1682 thread.gcstopP() 1683 thread.resume() 1684 } 1685 } 1686 1687 // Stop idle Ps. 1688 now := nanotime() 1689 for { 1690 pp, _ := pidleget(now) 1691 if pp == nil { 1692 break 1693 } 1694 pp.status = _Pgcstop 1695 pp.gcStopTime = nanotime() 1696 sched.stopwait-- 1697 } 1698 wait := sched.stopwait > 0 1699 unlock(&sched.lock) 1700 1701 // Wait for remaining Ps to stop voluntarily. 1702 if wait { 1703 for { 1704 // wait for 100us, then try to re-preempt in case of any races 1705 if notetsleep(&sched.stopnote, 100*1000) { 1706 noteclear(&sched.stopnote) 1707 break 1708 } 1709 preemptall() 1710 } 1711 } 1712 1713 finish := nanotime() 1714 startTime := finish - start 1715 if reason.isGC() { 1716 sched.stwStoppingTimeGC.record(startTime) 1717 } else { 1718 sched.stwStoppingTimeOther.record(startTime) 1719 } 1720 1721 // Double-check we actually stopped everything, and all the invariants hold. 1722 // Also accumulate all the time spent by each P in _Pgcstop up to the point 1723 // where everything was stopped. This will be accumulated into the total pause 1724 // CPU time by the caller. 1725 stoppingCPUTime := int64(0) 1726 bad := "" 1727 if sched.stopwait != 0 { 1728 bad = "stopTheWorld: not stopped (stopwait != 0)" 1729 } else { 1730 for _, pp := range allp { 1731 if pp.status != _Pgcstop { 1732 bad = "stopTheWorld: not stopped (status != _Pgcstop)" 1733 } 1734 if pp.gcStopTime == 0 && bad == "" { 1735 bad = "stopTheWorld: broken CPU time accounting" 1736 } 1737 stoppingCPUTime += finish - pp.gcStopTime 1738 pp.gcStopTime = 0 1739 } 1740 } 1741 if freezing.Load() { 1742 // Some other thread is panicking. This can cause the 1743 // sanity checks above to fail if the panic happens in 1744 // the signal handler on a stopped thread. Either way, 1745 // we should halt this thread. 1746 lock(&deadlock) 1747 lock(&deadlock) 1748 } 1749 if bad != "" { 1750 throw(bad) 1751 } 1752 1753 worldStopped() 1754 1755 // Switch back to _Grunning, now that the world is stopped. 1756 casgstatus(getg().m.curg, _Gwaiting, _Grunning) 1757 1758 return worldStop{ 1759 reason: reason, 1760 startedStopping: start, 1761 finishedStopping: finish, 1762 stoppingCPUTime: stoppingCPUTime, 1763 } 1764 } 1765 1766 // reason is the same STW reason passed to stopTheWorld. start is the start 1767 // time returned by stopTheWorld. 1768 // 1769 // now is the current time; prefer to pass 0 to capture a fresh timestamp. 1770 // 1771 // stattTheWorldWithSema returns now. 1772 func startTheWorldWithSema(now int64, w worldStop) int64 { 1773 assertWorldStopped() 1774 1775 mp := acquirem() // disable preemption because it can be holding p in a local var 1776 if netpollinited() { 1777 list, delta := netpoll(0) // non-blocking 1778 injectglist(&list) 1779 netpollAdjustWaiters(delta) 1780 } 1781 lock(&sched.lock) 1782 1783 procs := gomaxprocs 1784 if newprocs != 0 { 1785 procs = newprocs 1786 newprocs = 0 1787 } 1788 p1 := procresize(procs) 1789 sched.gcwaiting.Store(false) 1790 if sched.sysmonwait.Load() { 1791 sched.sysmonwait.Store(false) 1792 notewakeup(&sched.sysmonnote) 1793 } 1794 unlock(&sched.lock) 1795 1796 worldStarted() 1797 1798 for p1 != nil { 1799 p := p1 1800 p1 = p1.link.ptr() 1801 if p.m != 0 { 1802 mp := p.m.ptr() 1803 p.m = 0 1804 if mp.nextp != 0 { 1805 throw("startTheWorld: inconsistent mp->nextp") 1806 } 1807 mp.nextp.set(p) 1808 notewakeup(&mp.park) 1809 } else { 1810 // Start M to run P. Do not start another M below. 1811 newm(nil, p, -1) 1812 } 1813 } 1814 1815 // Capture start-the-world time before doing clean-up tasks. 1816 if now == 0 { 1817 now = nanotime() 1818 } 1819 totalTime := now - w.startedStopping 1820 if w.reason.isGC() { 1821 sched.stwTotalTimeGC.record(totalTime) 1822 } else { 1823 sched.stwTotalTimeOther.record(totalTime) 1824 } 1825 trace := traceAcquire() 1826 if trace.ok() { 1827 trace.STWDone() 1828 traceRelease(trace) 1829 } 1830 1831 // Wakeup an additional proc in case we have excessive runnable goroutines 1832 // in local queues or in the global queue. If we don't, the proc will park itself. 1833 // If we have lots of excessive work, resetspinning will unpark additional procs as necessary. 1834 wakep() 1835 1836 releasem(mp) 1837 1838 return now 1839 } 1840 1841 // usesLibcall indicates whether this runtime performs system calls 1842 // via libcall. 1843 func usesLibcall() bool { 1844 switch GOOS { 1845 case "aix", "darwin", "illumos", "ios", "openbsd", "solaris", "windows": 1846 return true 1847 } 1848 return false 1849 } 1850 1851 // mStackIsSystemAllocated indicates whether this runtime starts on a 1852 // system-allocated stack. 1853 func mStackIsSystemAllocated() bool { 1854 switch GOOS { 1855 case "aix", "darwin", "plan9", "illumos", "ios", "openbsd", "solaris", "windows": 1856 return true 1857 } 1858 return false 1859 } 1860 1861 // mstart is the entry-point for new Ms. 1862 // It is written in assembly, uses ABI0, is marked TOPFRAME, and calls mstart0. 1863 func mstart() 1864 1865 // mstart0 is the Go entry-point for new Ms. 1866 // This must not split the stack because we may not even have stack 1867 // bounds set up yet. 1868 // 1869 // May run during STW (because it doesn't have a P yet), so write 1870 // barriers are not allowed. 1871 // 1872 //go:nosplit 1873 //go:nowritebarrierrec 1874 func mstart0() { 1875 gp := getg() 1876 1877 osStack := gp.stack.lo == 0 1878 if osStack { 1879 // Initialize stack bounds from system stack. 1880 // Cgo may have left stack size in stack.hi. 1881 // minit may update the stack bounds. 1882 // 1883 // Note: these bounds may not be very accurate. 1884 // We set hi to &size, but there are things above 1885 // it. The 1024 is supposed to compensate this, 1886 // but is somewhat arbitrary. 1887 size := gp.stack.hi 1888 if size == 0 { 1889 size = 16384 * sys.StackGuardMultiplier 1890 } 1891 gp.stack.hi = uintptr(noescape(unsafe.Pointer(&size))) 1892 gp.stack.lo = gp.stack.hi - size + 1024 1893 } 1894 // Initialize stack guard so that we can start calling regular 1895 // Go code. 1896 gp.stackguard0 = gp.stack.lo + stackGuard 1897 // This is the g0, so we can also call go:systemstack 1898 // functions, which check stackguard1. 1899 gp.stackguard1 = gp.stackguard0 1900 mstart1() 1901 1902 // Exit this thread. 1903 if mStackIsSystemAllocated() { 1904 // Windows, Solaris, illumos, Darwin, AIX and Plan 9 always system-allocate 1905 // the stack, but put it in gp.stack before mstart, 1906 // so the logic above hasn't set osStack yet. 1907 osStack = true 1908 } 1909 mexit(osStack) 1910 } 1911 1912 // The go:noinline is to guarantee the sys.GetCallerPC/sys.GetCallerSP below are safe, 1913 // so that we can set up g0.sched to return to the call of mstart1 above. 1914 // 1915 //go:noinline 1916 func mstart1() { 1917 gp := getg() 1918 1919 if gp != gp.m.g0 { 1920 throw("bad runtime·mstart") 1921 } 1922 1923 // Set up m.g0.sched as a label returning to just 1924 // after the mstart1 call in mstart0 above, for use by goexit0 and mcall. 1925 // We're never coming back to mstart1 after we call schedule, 1926 // so other calls can reuse the current frame. 1927 // And goexit0 does a gogo that needs to return from mstart1 1928 // and let mstart0 exit the thread. 1929 gp.sched.g = guintptr(unsafe.Pointer(gp)) 1930 gp.sched.pc = sys.GetCallerPC() 1931 gp.sched.sp = sys.GetCallerSP() 1932 1933 asminit() 1934 minit() 1935 1936 // Install signal handlers; after minit so that minit can 1937 // prepare the thread to be able to handle the signals. 1938 if gp.m == &m0 { 1939 mstartm0() 1940 } 1941 1942 if debug.dataindependenttiming == 1 { 1943 sys.EnableDIT() 1944 } 1945 1946 if fn := gp.m.mstartfn; fn != nil { 1947 fn() 1948 } 1949 1950 if gp.m != &m0 { 1951 acquirep(gp.m.nextp.ptr()) 1952 gp.m.nextp = 0 1953 } 1954 schedule() 1955 } 1956 1957 // mstartm0 implements part of mstart1 that only runs on the m0. 1958 // 1959 // Write barriers are allowed here because we know the GC can't be 1960 // running yet, so they'll be no-ops. 1961 // 1962 //go:yeswritebarrierrec 1963 func mstartm0() { 1964 // Create an extra M for callbacks on threads not created by Go. 1965 // An extra M is also needed on Windows for callbacks created by 1966 // syscall.NewCallback. See issue #6751 for details. 1967 if (iscgo || GOOS == "windows") && !cgoHasExtraM { 1968 cgoHasExtraM = true 1969 newextram() 1970 } 1971 initsig(false) 1972 } 1973 1974 // mPark causes a thread to park itself, returning once woken. 1975 // 1976 //go:nosplit 1977 func mPark() { 1978 gp := getg() 1979 // This M might stay parked through an entire GC cycle. 1980 // Erase any leftovers on the signal stack. 1981 if goexperiment.RuntimeSecret { 1982 eraseSecretsSignalStk() 1983 } 1984 notesleep(&gp.m.park) 1985 noteclear(&gp.m.park) 1986 } 1987 1988 // mexit tears down and exits the current thread. 1989 // 1990 // Don't call this directly to exit the thread, since it must run at 1991 // the top of the thread stack. Instead, use gogo(&gp.m.g0.sched) to 1992 // unwind the stack to the point that exits the thread. 1993 // 1994 // It is entered with m.p != nil, so write barriers are allowed. It 1995 // will release the P before exiting. 1996 // 1997 //go:yeswritebarrierrec 1998 func mexit(osStack bool) { 1999 mp := getg().m 2000 2001 if mp == &m0 { 2002 // This is the main thread. Just wedge it. 2003 // 2004 // On Linux, exiting the main thread puts the process 2005 // into a non-waitable zombie state. On Plan 9, 2006 // exiting the main thread unblocks wait even though 2007 // other threads are still running. On Solaris we can 2008 // neither exitThread nor return from mstart. Other 2009 // bad things probably happen on other platforms. 2010 // 2011 // We could try to clean up this M more before wedging 2012 // it, but that complicates signal handling. 2013 handoffp(releasep()) 2014 lock(&sched.lock) 2015 sched.nmfreed++ 2016 checkdead() 2017 unlock(&sched.lock) 2018 mPark() 2019 throw("locked m0 woke up") 2020 } 2021 2022 sigblock(true) 2023 unminit() 2024 2025 // Free the gsignal stack. 2026 if mp.gsignal != nil { 2027 stackfree(mp.gsignal.stack) 2028 if valgrindenabled { 2029 valgrindDeregisterStack(mp.gsignal.valgrindStackID) 2030 mp.gsignal.valgrindStackID = 0 2031 } 2032 // On some platforms, when calling into VDSO (e.g. nanotime) 2033 // we store our g on the gsignal stack, if there is one. 2034 // Now the stack is freed, unlink it from the m, so we 2035 // won't write to it when calling VDSO code. 2036 mp.gsignal = nil 2037 } 2038 2039 // Free vgetrandom state. 2040 vgetrandomDestroy(mp) 2041 2042 // Clear the self pointer so Ps don't access this M after it is freed, 2043 // or keep it alive. 2044 mp.self.clear() 2045 2046 // Remove m from allm. 2047 lock(&sched.lock) 2048 for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink { 2049 if *pprev == mp { 2050 *pprev = mp.alllink 2051 goto found 2052 } 2053 } 2054 throw("m not found in allm") 2055 found: 2056 // Events must not be traced after this point. 2057 2058 // Delay reaping m until it's done with the stack. 2059 // 2060 // Put mp on the free list, though it will not be reaped while freeWait 2061 // is freeMWait. mp is no longer reachable via allm, so even if it is 2062 // on an OS stack, we must keep a reference to mp alive so that the GC 2063 // doesn't free mp while we are still using it. 2064 // 2065 // Note that the free list must not be linked through alllink because 2066 // some functions walk allm without locking, so may be using alllink. 2067 // 2068 // N.B. It's important that the M appears on the free list simultaneously 2069 // with it being removed so that the tracer can find it. 2070 mp.freeWait.Store(freeMWait) 2071 mp.freelink = sched.freem 2072 sched.freem = mp 2073 unlock(&sched.lock) 2074 2075 atomic.Xadd64(&ncgocall, int64(mp.ncgocall)) 2076 sched.totalRuntimeLockWaitTime.Add(mp.mLockProfile.waitTime.Load()) 2077 2078 // Release the P. 2079 handoffp(releasep()) 2080 // After this point we must not have write barriers. 2081 2082 // Invoke the deadlock detector. This must happen after 2083 // handoffp because it may have started a new M to take our 2084 // P's work. 2085 lock(&sched.lock) 2086 sched.nmfreed++ 2087 checkdead() 2088 unlock(&sched.lock) 2089 2090 if GOOS == "darwin" || GOOS == "ios" { 2091 // Make sure pendingPreemptSignals is correct when an M exits. 2092 // For #41702. 2093 if mp.signalPending.Load() != 0 { 2094 pendingPreemptSignals.Add(-1) 2095 } 2096 } 2097 2098 // Destroy all allocated resources. After this is called, we may no 2099 // longer take any locks. 2100 mdestroy(mp) 2101 2102 if osStack { 2103 // No more uses of mp, so it is safe to drop the reference. 2104 mp.freeWait.Store(freeMRef) 2105 2106 // Return from mstart and let the system thread 2107 // library free the g0 stack and terminate the thread. 2108 return 2109 } 2110 2111 // mstart is the thread's entry point, so there's nothing to 2112 // return to. Exit the thread directly. exitThread will clear 2113 // m.freeWait when it's done with the stack and the m can be 2114 // reaped. 2115 exitThread(&mp.freeWait) 2116 } 2117 2118 // forEachP calls fn(p) for every P p when p reaches a GC safe point. 2119 // If a P is currently executing code, this will bring the P to a GC 2120 // safe point and execute fn on that P. If the P is not executing code 2121 // (it is idle or in a syscall), this will call fn(p) directly while 2122 // preventing the P from exiting its state. This does not ensure that 2123 // fn will run on every CPU executing Go code, but it acts as a global 2124 // memory barrier. GC uses this as a "ragged barrier." 2125 // 2126 // The caller must hold worldsema. fn must not refer to any 2127 // part of the current goroutine's stack, since the GC may move it. 2128 func forEachP(reason waitReason, fn func(*p)) { 2129 systemstack(func() { 2130 gp := getg().m.curg 2131 // Mark the user stack as preemptible so that it may be scanned 2132 // by the GC or observed by the execution tracer. Otherwise, our 2133 // attempt to force all P's to a safepoint could result in a 2134 // deadlock as we attempt to preempt a goroutine that's trying 2135 // to preempt us (e.g. for a stack scan). 2136 // 2137 // casGToWaitingForSuspendG marks the goroutine as ineligible for a 2138 // stack shrink, effectively pinning the stack in memory for the duration. 2139 // 2140 // N.B. The execution tracer is not aware of this status transition and 2141 // handles it specially based on the wait reason. 2142 casGToWaitingForSuspendG(gp, _Grunning, reason) 2143 forEachPInternal(fn) 2144 casgstatus(gp, _Gwaiting, _Grunning) 2145 }) 2146 } 2147 2148 // forEachPInternal calls fn(p) for every P p when p reaches a GC safe point. 2149 // It is the internal implementation of forEachP. 2150 // 2151 // The caller must hold worldsema and either must ensure that a GC is not 2152 // running (otherwise this may deadlock with the GC trying to preempt this P) 2153 // or it must leave its goroutine in a preemptible state before it switches 2154 // to the systemstack. Due to these restrictions, prefer forEachP when possible. 2155 // 2156 //go:systemstack 2157 func forEachPInternal(fn func(*p)) { 2158 mp := acquirem() 2159 pp := getg().m.p.ptr() 2160 2161 lock(&sched.lock) 2162 if sched.safePointWait != 0 { 2163 throw("forEachP: sched.safePointWait != 0") 2164 } 2165 sched.safePointWait = gomaxprocs - 1 2166 sched.safePointFn = fn 2167 2168 // Ask all Ps to run the safe point function. 2169 for _, p2 := range allp { 2170 if p2 != pp { 2171 atomic.Store(&p2.runSafePointFn, 1) 2172 } 2173 } 2174 preemptall() 2175 2176 // Any P entering _Pidle or a system call from now on will observe 2177 // p.runSafePointFn == 1 and will call runSafePointFn when 2178 // changing its status to _Pidle. 2179 2180 // Run safe point function for all idle Ps. sched.pidle will 2181 // not change because we hold sched.lock. 2182 for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() { 2183 if atomic.Cas(&p.runSafePointFn, 1, 0) { 2184 fn(p) 2185 sched.safePointWait-- 2186 } 2187 } 2188 2189 wait := sched.safePointWait > 0 2190 unlock(&sched.lock) 2191 2192 // Run fn for the current P. 2193 fn(pp) 2194 2195 // Force Ps currently in a system call into _Pidle and hand them 2196 // off to induce safe point function execution. 2197 for _, p2 := range allp { 2198 if atomic.Load(&p2.runSafePointFn) != 1 { 2199 // Already ran it. 2200 continue 2201 } 2202 if thread, ok := setBlockOnExitSyscall(p2); ok { 2203 thread.takeP() 2204 thread.resume() 2205 handoffp(p2) 2206 } 2207 } 2208 2209 // Wait for remaining Ps to run fn. 2210 if wait { 2211 for { 2212 // Wait for 100us, then try to re-preempt in 2213 // case of any races. 2214 // 2215 // Requires system stack. 2216 if notetsleep(&sched.safePointNote, 100*1000) { 2217 noteclear(&sched.safePointNote) 2218 break 2219 } 2220 preemptall() 2221 } 2222 } 2223 if sched.safePointWait != 0 { 2224 throw("forEachP: not done") 2225 } 2226 for _, p2 := range allp { 2227 if p2.runSafePointFn != 0 { 2228 throw("forEachP: P did not run fn") 2229 } 2230 } 2231 2232 lock(&sched.lock) 2233 sched.safePointFn = nil 2234 unlock(&sched.lock) 2235 releasem(mp) 2236 } 2237 2238 // runSafePointFn runs the safe point function, if any, for this P. 2239 // This should be called like 2240 // 2241 // if getg().m.p.runSafePointFn != 0 { 2242 // runSafePointFn() 2243 // } 2244 // 2245 // runSafePointFn must be checked on any transition in to _Pidle or 2246 // when entering a system call to avoid a race where forEachP sees 2247 // that the P is running just before the P goes into _Pidle/system call 2248 // and neither forEachP nor the P run the safe-point function. 2249 func runSafePointFn() { 2250 p := getg().m.p.ptr() 2251 // Resolve the race between forEachP running the safe-point 2252 // function on this P's behalf and this P running the 2253 // safe-point function directly. 2254 if !atomic.Cas(&p.runSafePointFn, 1, 0) { 2255 return 2256 } 2257 sched.safePointFn(p) 2258 lock(&sched.lock) 2259 sched.safePointWait-- 2260 if sched.safePointWait == 0 { 2261 notewakeup(&sched.safePointNote) 2262 } 2263 unlock(&sched.lock) 2264 } 2265 2266 // When running with cgo, we call _cgo_thread_start 2267 // to start threads for us so that we can play nicely with 2268 // foreign code. 2269 var cgoThreadStart unsafe.Pointer 2270 2271 type cgothreadstart struct { 2272 g guintptr 2273 tls *uint64 2274 fn unsafe.Pointer 2275 } 2276 2277 // Allocate a new m unassociated with any thread. 2278 // Can use p for allocation context if needed. 2279 // fn is recorded as the new m's m.mstartfn. 2280 // id is optional pre-allocated m ID. Omit by passing -1. 2281 // 2282 // This function is allowed to have write barriers even if the caller 2283 // isn't because it borrows pp. 2284 // 2285 //go:yeswritebarrierrec 2286 func allocm(pp *p, fn func(), id int64) *m { 2287 allocmLock.rlock() 2288 2289 // The caller owns pp, but we may borrow (i.e., acquirep) it. We must 2290 // disable preemption to ensure it is not stolen, which would make the 2291 // caller lose ownership. 2292 acquirem() 2293 2294 gp := getg() 2295 if gp.m.p == 0 { 2296 acquirep(pp) // temporarily borrow p for mallocs in this function 2297 } 2298 2299 // Release the free M list. We need to do this somewhere and 2300 // this may free up a stack we can use. 2301 if sched.freem != nil { 2302 lock(&sched.lock) 2303 var newList *m 2304 for freem := sched.freem; freem != nil; { 2305 // Wait for freeWait to indicate that freem's stack is unused. 2306 wait := freem.freeWait.Load() 2307 if wait == freeMWait { 2308 next := freem.freelink 2309 freem.freelink = newList 2310 newList = freem 2311 freem = next 2312 continue 2313 } 2314 // Drop any remaining trace resources. 2315 // Ms can continue to emit events all the way until wait != freeMWait, 2316 // so it's only safe to call traceThreadDestroy at this point. 2317 if traceEnabled() || traceShuttingDown() { 2318 traceThreadDestroy(freem) 2319 } 2320 // Free the stack if needed. For freeMRef, there is 2321 // nothing to do except drop freem from the sched.freem 2322 // list. 2323 if wait == freeMStack { 2324 // stackfree must be on the system stack, but allocm is 2325 // reachable off the system stack transitively from 2326 // startm. 2327 systemstack(func() { 2328 stackfree(freem.g0.stack) 2329 if valgrindenabled { 2330 valgrindDeregisterStack(freem.g0.valgrindStackID) 2331 freem.g0.valgrindStackID = 0 2332 } 2333 }) 2334 } 2335 freem = freem.freelink 2336 } 2337 sched.freem = newList 2338 unlock(&sched.lock) 2339 } 2340 2341 mp := &new(mPadded).m 2342 mp.mstartfn = fn 2343 mcommoninit(mp, id) 2344 2345 // In case of cgo or Solaris or illumos or Darwin, pthread_create will make us a stack. 2346 // Windows and Plan 9 will layout sched stack on OS stack. 2347 if iscgo || mStackIsSystemAllocated() { 2348 mp.g0 = malg(-1) 2349 } else { 2350 mp.g0 = malg(16384 * sys.StackGuardMultiplier) 2351 } 2352 mp.g0.m = mp 2353 2354 if pp == gp.m.p.ptr() { 2355 releasep() 2356 } 2357 2358 releasem(gp.m) 2359 allocmLock.runlock() 2360 return mp 2361 } 2362 2363 // needm is called when a cgo callback happens on a 2364 // thread without an m (a thread not created by Go). 2365 // In this case, needm is expected to find an m to use 2366 // and return with m, g initialized correctly. 2367 // Since m and g are not set now (likely nil, but see below) 2368 // needm is limited in what routines it can call. In particular 2369 // it can only call nosplit functions (textflag 7) and cannot 2370 // do any scheduling that requires an m. 2371 // 2372 // In order to avoid needing heavy lifting here, we adopt 2373 // the following strategy: there is a stack of available m's 2374 // that can be stolen. Using compare-and-swap 2375 // to pop from the stack has ABA races, so we simulate 2376 // a lock by doing an exchange (via Casuintptr) to steal the stack 2377 // head and replace the top pointer with MLOCKED (1). 2378 // This serves as a simple spin lock that we can use even 2379 // without an m. The thread that locks the stack in this way 2380 // unlocks the stack by storing a valid stack head pointer. 2381 // 2382 // In order to make sure that there is always an m structure 2383 // available to be stolen, we maintain the invariant that there 2384 // is always one more than needed. At the beginning of the 2385 // program (if cgo is in use) the list is seeded with a single m. 2386 // If needm finds that it has taken the last m off the list, its job 2387 // is - once it has installed its own m so that it can do things like 2388 // allocate memory - to create a spare m and put it on the list. 2389 // 2390 // Each of these extra m's also has a g0 and a curg that are 2391 // pressed into service as the scheduling stack and current 2392 // goroutine for the duration of the cgo callback. 2393 // 2394 // It calls dropm to put the m back on the list, 2395 // 1. when the callback is done with the m in non-pthread platforms, 2396 // 2. or when the C thread exiting on pthread platforms. 2397 // 2398 // The signal argument indicates whether we're called from a signal 2399 // handler. 2400 // 2401 //go:nosplit 2402 func needm(signal bool) { 2403 if (iscgo || GOOS == "windows") && !cgoHasExtraM { 2404 // Can happen if C/C++ code calls Go from a global ctor. 2405 // Can also happen on Windows if a global ctor uses a 2406 // callback created by syscall.NewCallback. See issue #6751 2407 // for details. 2408 // 2409 // Can not throw, because scheduler is not initialized yet. 2410 writeErrStr("fatal error: cgo callback before cgo call\n") 2411 exit(1) 2412 } 2413 2414 // Save and block signals before getting an M. 2415 // The signal handler may call needm itself, 2416 // and we must avoid a deadlock. Also, once g is installed, 2417 // any incoming signals will try to execute, 2418 // but we won't have the sigaltstack settings and other data 2419 // set up appropriately until the end of minit, which will 2420 // unblock the signals. This is the same dance as when 2421 // starting a new m to run Go code via newosproc. 2422 var sigmask sigset 2423 sigsave(&sigmask) 2424 sigblock(false) 2425 2426 // getExtraM is safe here because of the invariant above, 2427 // that the extra list always contains or will soon contain 2428 // at least one m. 2429 mp, last := getExtraM() 2430 2431 // Set needextram when we've just emptied the list, 2432 // so that the eventual call into cgocallbackg will 2433 // allocate a new m for the extra list. We delay the 2434 // allocation until then so that it can be done 2435 // after exitsyscall makes sure it is okay to be 2436 // running at all (that is, there's no garbage collection 2437 // running right now). 2438 mp.needextram = last 2439 2440 // Store the original signal mask for use by minit. 2441 mp.sigmask = sigmask 2442 2443 // Install TLS on some platforms (previously setg 2444 // would do this if necessary). 2445 osSetupTLS(mp) 2446 2447 // Install g (= m->g0) and set the stack bounds 2448 // to match the current stack. 2449 setg(mp.g0) 2450 sp := sys.GetCallerSP() 2451 callbackUpdateSystemStack(mp, sp, signal) 2452 2453 // We must mark that we are already in Go now. 2454 // Otherwise, we may call needm again when we get a signal, before cgocallbackg1, 2455 // which means the extram list may be empty, that will cause a deadlock. 2456 mp.isExtraInC = false 2457 2458 // Initialize this thread to use the m. 2459 asminit() 2460 minit() 2461 2462 // Emit a trace event for this dead -> syscall transition, 2463 // but only if we're not in a signal handler. 2464 // 2465 // N.B. the tracer can run on a bare M just fine, we just have 2466 // to make sure to do this before setg(nil) and unminit. 2467 var trace traceLocker 2468 if !signal { 2469 trace = traceAcquire() 2470 } 2471 2472 // mp.curg is now a real goroutine. 2473 casgstatus(mp.curg, _Gdeadextra, _Gsyscall) 2474 sched.ngsys.Add(-1) 2475 2476 // This is technically inaccurate, but we set isExtraInC to false above, 2477 // and so we need to update addGSyscallNoP to keep the two pieces of state 2478 // consistent (it's only updated when isExtraInC is false). More specifically, 2479 // When we get to cgocallbackg and exitsyscall, we'll be looking for a P, and 2480 // since isExtraInC is false, we will decrement this metric. 2481 // 2482 // The inaccuracy is thankfully transient: only until this thread can get a P. 2483 // We're going into Go anyway, so it's okay to pretend we're a real goroutine now. 2484 addGSyscallNoP(mp) 2485 2486 if !signal { 2487 if trace.ok() { 2488 trace.GoCreateSyscall(mp.curg) 2489 traceRelease(trace) 2490 } 2491 } 2492 mp.isExtraInSig = signal 2493 } 2494 2495 // Acquire an extra m and bind it to the C thread when a pthread key has been created. 2496 // 2497 //go:nosplit 2498 func needAndBindM() { 2499 needm(false) 2500 2501 if _cgo_pthread_key_created != nil && *(*uintptr)(_cgo_pthread_key_created) != 0 { 2502 cgoBindM() 2503 } 2504 } 2505 2506 // newextram allocates m's and puts them on the extra list. 2507 // It is called with a working local m, so that it can do things 2508 // like call schedlock and allocate. 2509 func newextram() { 2510 c := extraMWaiters.Swap(0) 2511 if c > 0 { 2512 for i := uint32(0); i < c; i++ { 2513 oneNewExtraM() 2514 } 2515 } else if extraMLength.Load() == 0 { 2516 // Make sure there is at least one extra M. 2517 oneNewExtraM() 2518 } 2519 } 2520 2521 // oneNewExtraM allocates an m and puts it on the extra list. 2522 func oneNewExtraM() { 2523 // Create extra goroutine locked to extra m. 2524 // The goroutine is the context in which the cgo callback will run. 2525 // The sched.pc will never be returned to, but setting it to 2526 // goexit makes clear to the traceback routines where 2527 // the goroutine stack ends. 2528 mp := allocm(nil, nil, -1) 2529 gp := malg(4096) 2530 gp.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum 2531 gp.sched.sp = gp.stack.hi 2532 gp.sched.sp -= 4 * goarch.PtrSize // extra space in case of reads slightly beyond frame 2533 gp.sched.lr = 0 2534 gp.sched.g = guintptr(unsafe.Pointer(gp)) 2535 gp.syscallpc = gp.sched.pc 2536 gp.syscallsp = gp.sched.sp 2537 gp.stktopsp = gp.sched.sp 2538 // malg returns status as _Gidle. Change to _Gdeadextra before 2539 // adding to allg where GC can see it. _Gdeadextra hides this 2540 // from traceback and stack scans. 2541 casgstatus(gp, _Gidle, _Gdeadextra) 2542 gp.m = mp 2543 mp.curg = gp 2544 mp.isextra = true 2545 // mark we are in C by default. 2546 mp.isExtraInC = true 2547 mp.lockedInt++ 2548 mp.lockedg.set(gp) 2549 gp.lockedm.set(mp) 2550 gp.goid = sched.goidgen.Add(1) 2551 if raceenabled { 2552 gp.racectx = racegostart(abi.FuncPCABIInternal(newextram) + sys.PCQuantum) 2553 } 2554 // put on allg for garbage collector 2555 allgadd(gp) 2556 2557 // gp is now on the allg list, but we don't want it to be 2558 // counted by gcount. It would be more "proper" to increment 2559 // sched.ngfree, but that requires locking. Incrementing ngsys 2560 // has the same effect. 2561 sched.ngsys.Add(1) 2562 2563 // Add m to the extra list. 2564 addExtraM(mp) 2565 } 2566 2567 // dropm puts the current m back onto the extra list. 2568 // 2569 // 1. On systems without pthreads, like Windows 2570 // dropm is called when a cgo callback has called needm but is now 2571 // done with the callback and returning back into the non-Go thread. 2572 // 2573 // The main expense here is the call to signalstack to release the 2574 // m's signal stack, and then the call to needm on the next callback 2575 // from this thread. It is tempting to try to save the m for next time, 2576 // which would eliminate both these costs, but there might not be 2577 // a next time: the current thread (which Go does not control) might exit. 2578 // If we saved the m for that thread, there would be an m leak each time 2579 // such a thread exited. Instead, we acquire and release an m on each 2580 // call. These should typically not be scheduling operations, just a few 2581 // atomics, so the cost should be small. 2582 // 2583 // 2. On systems with pthreads 2584 // dropm is called while a non-Go thread is exiting. 2585 // We allocate a pthread per-thread variable using pthread_key_create, 2586 // to register a thread-exit-time destructor. 2587 // And store the g into a thread-specific value associated with the pthread key, 2588 // when first return back to C. 2589 // So that the destructor would invoke dropm while the non-Go thread is exiting. 2590 // This is much faster since it avoids expensive signal-related syscalls. 2591 // 2592 // This may run without a P, so //go:nowritebarrierrec is required. 2593 // 2594 // This may run with a different stack than was recorded in g0 (there is no 2595 // call to callbackUpdateSystemStack prior to dropm), so this must be 2596 // //go:nosplit to avoid the stack bounds check. 2597 // 2598 //go:nowritebarrierrec 2599 //go:nosplit 2600 func dropm() { 2601 // Clear m and g, and return m to the extra list. 2602 // After the call to setg we can only call nosplit functions 2603 // with no pointer manipulation. 2604 mp := getg().m 2605 2606 // Emit a trace event for this syscall -> dead transition. 2607 // 2608 // N.B. the tracer can run on a bare M just fine, we just have 2609 // to make sure to do this before setg(nil) and unminit. 2610 var trace traceLocker 2611 if !mp.isExtraInSig { 2612 trace = traceAcquire() 2613 } 2614 2615 // Return mp.curg to _Gdeadextra state. 2616 casgstatus(mp.curg, _Gsyscall, _Gdeadextra) 2617 mp.curg.preemptStop = false 2618 sched.ngsys.Add(1) 2619 decGSyscallNoP(mp) 2620 2621 if !mp.isExtraInSig { 2622 if trace.ok() { 2623 trace.GoDestroySyscall() 2624 traceRelease(trace) 2625 } 2626 } 2627 2628 // Trash syscalltick so that it doesn't line up with mp.old.syscalltick anymore. 2629 // 2630 // In the new tracer, we model needm and dropm and a goroutine being created and 2631 // destroyed respectively. The m then might get reused with a different procid but 2632 // still with a reference to oldp, and still with the same syscalltick. The next 2633 // time a G is "created" in needm, it'll return and quietly reacquire its P from a 2634 // different m with a different procid, which will confuse the trace parser. By 2635 // trashing syscalltick, we ensure that it'll appear as if we lost the P to the 2636 // tracer parser and that we just reacquired it. 2637 // 2638 // Trash the value by decrementing because that gets us as far away from the value 2639 // the syscall exit code expects as possible. Setting to zero is risky because 2640 // syscalltick could already be zero (and in fact, is initialized to zero). 2641 mp.syscalltick-- 2642 2643 // Reset trace state unconditionally. This goroutine is being 'destroyed' 2644 // from the perspective of the tracer. 2645 mp.curg.trace.reset() 2646 2647 // Flush all the M's buffers. This is necessary because the M might 2648 // be used on a different thread with a different procid, so we have 2649 // to make sure we don't write into the same buffer. 2650 if traceEnabled() || traceShuttingDown() { 2651 // Acquire sched.lock across thread destruction. One of the invariants of the tracer 2652 // is that a thread cannot disappear from the tracer's view (allm or freem) without 2653 // it noticing, so it requires that sched.lock be held over traceThreadDestroy. 2654 // 2655 // This isn't strictly necessary in this case, because this thread never leaves allm, 2656 // but the critical section is short and dropm is rare on pthread platforms, so just 2657 // take the lock and play it safe. traceThreadDestroy also asserts that the lock is held. 2658 lock(&sched.lock) 2659 traceThreadDestroy(mp) 2660 unlock(&sched.lock) 2661 } 2662 mp.isExtraInSig = false 2663 2664 // Block signals before unminit. 2665 // Unminit unregisters the signal handling stack (but needs g on some systems). 2666 // Setg(nil) clears g, which is the signal handler's cue not to run Go handlers. 2667 // It's important not to try to handle a signal between those two steps. 2668 sigmask := mp.sigmask 2669 sigblock(false) 2670 unminit() 2671 2672 setg(nil) 2673 2674 // Clear g0 stack bounds to ensure that needm always refreshes the 2675 // bounds when reusing this M. 2676 g0 := mp.g0 2677 g0.stack.hi = 0 2678 g0.stack.lo = 0 2679 g0.stackguard0 = 0 2680 g0.stackguard1 = 0 2681 mp.g0StackAccurate = false 2682 2683 putExtraM(mp) 2684 2685 msigrestore(sigmask) 2686 } 2687 2688 // bindm store the g0 of the current m into a thread-specific value. 2689 // 2690 // We allocate a pthread per-thread variable using pthread_key_create, 2691 // to register a thread-exit-time destructor. 2692 // We are here setting the thread-specific value of the pthread key, to enable the destructor. 2693 // So that the pthread_key_destructor would dropm while the C thread is exiting. 2694 // 2695 // And the saved g will be used in pthread_key_destructor, 2696 // since the g stored in the TLS by Go might be cleared in some platforms, 2697 // before the destructor invoked, so, we restore g by the stored g, before dropm. 2698 // 2699 // We store g0 instead of m, to make the assembly code simpler, 2700 // since we need to restore g0 in runtime.cgocallback. 2701 // 2702 // On systems without pthreads, like Windows, bindm shouldn't be used. 2703 // 2704 // NOTE: this always runs without a P, so, nowritebarrierrec required. 2705 // 2706 //go:nosplit 2707 //go:nowritebarrierrec 2708 func cgoBindM() { 2709 if GOOS == "windows" || GOOS == "plan9" { 2710 fatal("bindm in unexpected GOOS") 2711 } 2712 g := getg() 2713 if g.m.g0 != g { 2714 fatal("the current g is not g0") 2715 } 2716 if _cgo_bindm != nil { 2717 asmcgocall(_cgo_bindm, unsafe.Pointer(g)) 2718 } 2719 } 2720 2721 // A helper function for EnsureDropM. 2722 // 2723 // getm should be an internal detail, 2724 // but widely used packages access it using linkname. 2725 // Notable members of the hall of shame include: 2726 // - fortio.org/log 2727 // 2728 // Do not remove or change the type signature. 2729 // See go.dev/issue/67401. 2730 // 2731 //go:linkname getm 2732 func getm() uintptr { 2733 return uintptr(unsafe.Pointer(getg().m)) 2734 } 2735 2736 var ( 2737 // Locking linked list of extra M's, via mp.schedlink. Must be accessed 2738 // only via lockextra/unlockextra. 2739 // 2740 // Can't be atomic.Pointer[m] because we use an invalid pointer as a 2741 // "locked" sentinel value. M's on this list remain visible to the GC 2742 // because their mp.curg is on allgs. 2743 extraM atomic.Uintptr 2744 // Number of M's in the extraM list. 2745 extraMLength atomic.Uint32 2746 // Number of waiters in lockextra. 2747 extraMWaiters atomic.Uint32 2748 2749 // Number of extra M's in use by threads. 2750 extraMInUse atomic.Uint32 2751 ) 2752 2753 // lockextra locks the extra list and returns the list head. 2754 // The caller must unlock the list by storing a new list head 2755 // to extram. If nilokay is true, then lockextra will 2756 // return a nil list head if that's what it finds. If nilokay is false, 2757 // lockextra will keep waiting until the list head is no longer nil. 2758 // 2759 //go:nosplit 2760 func lockextra(nilokay bool) *m { 2761 const locked = 1 2762 2763 incr := false 2764 for { 2765 old := extraM.Load() 2766 if old == locked { 2767 osyield_no_g() 2768 continue 2769 } 2770 if old == 0 && !nilokay { 2771 if !incr { 2772 // Add 1 to the number of threads 2773 // waiting for an M. 2774 // This is cleared by newextram. 2775 extraMWaiters.Add(1) 2776 incr = true 2777 } 2778 usleep_no_g(1) 2779 continue 2780 } 2781 if extraM.CompareAndSwap(old, locked) { 2782 return (*m)(unsafe.Pointer(old)) 2783 } 2784 osyield_no_g() 2785 continue 2786 } 2787 } 2788 2789 //go:nosplit 2790 func unlockextra(mp *m, delta int32) { 2791 extraMLength.Add(delta) 2792 extraM.Store(uintptr(unsafe.Pointer(mp))) 2793 } 2794 2795 // Return an M from the extra M list. Returns last == true if the list becomes 2796 // empty because of this call. 2797 // 2798 // Spins waiting for an extra M, so caller must ensure that the list always 2799 // contains or will soon contain at least one M. 2800 // 2801 //go:nosplit 2802 func getExtraM() (mp *m, last bool) { 2803 mp = lockextra(false) 2804 extraMInUse.Add(1) 2805 unlockextra(mp.schedlink.ptr(), -1) 2806 return mp, mp.schedlink.ptr() == nil 2807 } 2808 2809 // Returns an extra M back to the list. mp must be from getExtraM. Newly 2810 // allocated M's should use addExtraM. 2811 // 2812 //go:nosplit 2813 func putExtraM(mp *m) { 2814 extraMInUse.Add(-1) 2815 addExtraM(mp) 2816 } 2817 2818 // Adds a newly allocated M to the extra M list. 2819 // 2820 //go:nosplit 2821 func addExtraM(mp *m) { 2822 mnext := lockextra(true) 2823 mp.schedlink.set(mnext) 2824 unlockextra(mp, 1) 2825 } 2826 2827 var ( 2828 // allocmLock is locked for read when creating new Ms in allocm and their 2829 // addition to allm. Thus acquiring this lock for write blocks the 2830 // creation of new Ms. 2831 allocmLock rwmutex 2832 2833 // execLock serializes exec and clone to avoid bugs or unspecified 2834 // behaviour around exec'ing while creating/destroying threads. See 2835 // issue #19546. 2836 execLock rwmutex 2837 ) 2838 2839 // These errors are reported (via writeErrStr) by some OS-specific 2840 // versions of newosproc and newosproc0. 2841 const ( 2842 failthreadcreate = "runtime: failed to create new OS thread\n" 2843 failallocatestack = "runtime: failed to allocate stack for the new OS thread\n" 2844 ) 2845 2846 // newmHandoff contains a list of m structures that need new OS threads. 2847 // This is used by newm in situations where newm itself can't safely 2848 // start an OS thread. 2849 var newmHandoff struct { 2850 lock mutex 2851 2852 // newm points to a list of M structures that need new OS 2853 // threads. The list is linked through m.schedlink. 2854 newm muintptr 2855 2856 // waiting indicates that wake needs to be notified when an m 2857 // is put on the list. 2858 waiting bool 2859 wake note 2860 2861 // haveTemplateThread indicates that the templateThread has 2862 // been started. This is not protected by lock. Use cas to set 2863 // to 1. 2864 haveTemplateThread uint32 2865 } 2866 2867 // Create a new m. It will start off with a call to fn, or else the scheduler. 2868 // fn needs to be static and not a heap allocated closure. 2869 // May run with m.p==nil, so write barriers are not allowed. 2870 // 2871 // id is optional pre-allocated m ID. Omit by passing -1. 2872 // 2873 //go:nowritebarrierrec 2874 func newm(fn func(), pp *p, id int64) { 2875 // allocm adds a new M to allm, but they do not start until created by 2876 // the OS in newm1 or the template thread. 2877 // 2878 // doAllThreadsSyscall requires that every M in allm will eventually 2879 // start and be signal-able, even with a STW. 2880 // 2881 // Disable preemption here until we start the thread to ensure that 2882 // newm is not preempted between allocm and starting the new thread, 2883 // ensuring that anything added to allm is guaranteed to eventually 2884 // start. 2885 acquirem() 2886 2887 mp := allocm(pp, fn, id) 2888 mp.nextp.set(pp) 2889 mp.sigmask = initSigmask 2890 if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" { 2891 // We're on a locked M or a thread that may have been 2892 // started by C. The kernel state of this thread may 2893 // be strange (the user may have locked it for that 2894 // purpose). We don't want to clone that into another 2895 // thread. Instead, ask a known-good thread to create 2896 // the thread for us. 2897 // 2898 // This is disabled on Plan 9. See golang.org/issue/22227. 2899 // 2900 // TODO: This may be unnecessary on Windows, which 2901 // doesn't model thread creation off fork. 2902 lock(&newmHandoff.lock) 2903 if newmHandoff.haveTemplateThread == 0 { 2904 throw("on a locked thread with no template thread") 2905 } 2906 mp.schedlink = newmHandoff.newm 2907 newmHandoff.newm.set(mp) 2908 if newmHandoff.waiting { 2909 newmHandoff.waiting = false 2910 notewakeup(&newmHandoff.wake) 2911 } 2912 unlock(&newmHandoff.lock) 2913 // The M has not started yet, but the template thread does not 2914 // participate in STW, so it will always process queued Ms and 2915 // it is safe to releasem. 2916 releasem(getg().m) 2917 return 2918 } 2919 newm1(mp) 2920 releasem(getg().m) 2921 } 2922 2923 func newm1(mp *m) { 2924 if iscgo && _cgo_thread_start != nil { 2925 var ts cgothreadstart 2926 ts.g.set(mp.g0) 2927 ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0])) 2928 ts.fn = unsafe.Pointer(abi.FuncPCABI0(mstart)) 2929 if msanenabled { 2930 msanwrite(unsafe.Pointer(&ts), unsafe.Sizeof(ts)) 2931 } 2932 if asanenabled { 2933 asanwrite(unsafe.Pointer(&ts), unsafe.Sizeof(ts)) 2934 } 2935 execLock.rlock() // Prevent process clone. 2936 asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts)) 2937 execLock.runlock() 2938 return 2939 } 2940 execLock.rlock() // Prevent process clone. 2941 newosproc(mp) 2942 execLock.runlock() 2943 } 2944 2945 // startTemplateThread starts the template thread if it is not already 2946 // running. 2947 // 2948 // The calling thread must itself be in a known-good state. 2949 func startTemplateThread() { 2950 if GOARCH == "wasm" { // no threads on wasm yet 2951 return 2952 } 2953 2954 // Disable preemption to guarantee that the template thread will be 2955 // created before a park once haveTemplateThread is set. 2956 mp := acquirem() 2957 if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) { 2958 releasem(mp) 2959 return 2960 } 2961 newm(templateThread, nil, -1) 2962 releasem(mp) 2963 } 2964 2965 // templateThread is a thread in a known-good state that exists solely 2966 // to start new threads in known-good states when the calling thread 2967 // may not be in a good state. 2968 // 2969 // Many programs never need this, so templateThread is started lazily 2970 // when we first enter a state that might lead to running on a thread 2971 // in an unknown state. 2972 // 2973 // templateThread runs on an M without a P, so it must not have write 2974 // barriers. 2975 // 2976 //go:nowritebarrierrec 2977 func templateThread() { 2978 lock(&sched.lock) 2979 sched.nmsys++ 2980 checkdead() 2981 unlock(&sched.lock) 2982 2983 for { 2984 lock(&newmHandoff.lock) 2985 for newmHandoff.newm != 0 { 2986 newm := newmHandoff.newm.ptr() 2987 newmHandoff.newm = 0 2988 unlock(&newmHandoff.lock) 2989 for newm != nil { 2990 next := newm.schedlink.ptr() 2991 newm.schedlink = 0 2992 newm1(newm) 2993 newm = next 2994 } 2995 lock(&newmHandoff.lock) 2996 } 2997 newmHandoff.waiting = true 2998 noteclear(&newmHandoff.wake) 2999 unlock(&newmHandoff.lock) 3000 notesleep(&newmHandoff.wake) 3001 } 3002 } 3003 3004 // Stops execution of the current m until new work is available. 3005 // Returns with acquired P. 3006 func stopm() { 3007 gp := getg() 3008 3009 if gp.m.locks != 0 { 3010 throw("stopm holding locks") 3011 } 3012 if gp.m.p != 0 { 3013 throw("stopm holding p") 3014 } 3015 if gp.m.spinning { 3016 throw("stopm spinning") 3017 } 3018 3019 lock(&sched.lock) 3020 mput(gp.m) 3021 unlock(&sched.lock) 3022 mPark() 3023 acquirep(gp.m.nextp.ptr()) 3024 gp.m.nextp = 0 3025 } 3026 3027 func mspinning() { 3028 // startm's caller incremented nmspinning. Set the new M's spinning. 3029 getg().m.spinning = true 3030 } 3031 3032 // Schedules some M to run the p (creates an M if necessary). 3033 // If p==nil, tries to get an idle P, if no idle P's does nothing. 3034 // May run with m.p==nil, so write barriers are not allowed. 3035 // If spinning is set, the caller has incremented nmspinning and must provide a 3036 // P. startm will set m.spinning in the newly started M. 3037 // 3038 // Callers passing a non-nil P must call from a non-preemptible context. See 3039 // comment on acquirem below. 3040 // 3041 // Argument lockheld indicates whether the caller already acquired the 3042 // scheduler lock. Callers holding the lock when making the call must pass 3043 // true. The lock might be temporarily dropped, but will be reacquired before 3044 // returning. 3045 // 3046 // Must not have write barriers because this may be called without a P. 3047 // 3048 //go:nowritebarrierrec 3049 func startm(pp *p, spinning, lockheld bool) { 3050 // Disable preemption. 3051 // 3052 // Every owned P must have an owner that will eventually stop it in the 3053 // event of a GC stop request. startm takes transient ownership of a P 3054 // (either from argument or pidleget below) and transfers ownership to 3055 // a started M, which will be responsible for performing the stop. 3056 // 3057 // Preemption must be disabled during this transient ownership, 3058 // otherwise the P this is running on may enter GC stop while still 3059 // holding the transient P, leaving that P in limbo and deadlocking the 3060 // STW. 3061 // 3062 // Callers passing a non-nil P must already be in non-preemptible 3063 // context, otherwise such preemption could occur on function entry to 3064 // startm. Callers passing a nil P may be preemptible, so we must 3065 // disable preemption before acquiring a P from pidleget below. 3066 mp := acquirem() 3067 if !lockheld { 3068 lock(&sched.lock) 3069 } 3070 if pp == nil { 3071 if spinning { 3072 // TODO(prattmic): All remaining calls to this function 3073 // with _p_ == nil could be cleaned up to find a P 3074 // before calling startm. 3075 throw("startm: P required for spinning=true") 3076 } 3077 pp, _ = pidleget(0) 3078 if pp == nil { 3079 if !lockheld { 3080 unlock(&sched.lock) 3081 } 3082 releasem(mp) 3083 return 3084 } 3085 } 3086 nmp := mget() 3087 if nmp == nil { 3088 // No M is available, we must drop sched.lock and call newm. 3089 // However, we already own a P to assign to the M. 3090 // 3091 // Once sched.lock is released, another G (e.g., in a syscall), 3092 // could find no idle P while checkdead finds a runnable G but 3093 // no running M's because this new M hasn't started yet, thus 3094 // throwing in an apparent deadlock. 3095 // This apparent deadlock is possible when startm is called 3096 // from sysmon, which doesn't count as a running M. 3097 // 3098 // Avoid this situation by pre-allocating the ID for the new M, 3099 // thus marking it as 'running' before we drop sched.lock. This 3100 // new M will eventually run the scheduler to execute any 3101 // queued G's. 3102 id := mReserveID() 3103 unlock(&sched.lock) 3104 3105 var fn func() 3106 if spinning { 3107 // The caller incremented nmspinning, so set m.spinning in the new M. 3108 fn = mspinning 3109 } 3110 newm(fn, pp, id) 3111 3112 if lockheld { 3113 lock(&sched.lock) 3114 } 3115 // Ownership transfer of pp committed by start in newm. 3116 // Preemption is now safe. 3117 releasem(mp) 3118 return 3119 } 3120 if !lockheld { 3121 unlock(&sched.lock) 3122 } 3123 if nmp.spinning { 3124 throw("startm: m is spinning") 3125 } 3126 if nmp.nextp != 0 { 3127 throw("startm: m has p") 3128 } 3129 if spinning && !runqempty(pp) { 3130 throw("startm: p has runnable gs") 3131 } 3132 // The caller incremented nmspinning, so set m.spinning in the new M. 3133 nmp.spinning = spinning 3134 nmp.nextp.set(pp) 3135 notewakeup(&nmp.park) 3136 // Ownership transfer of pp committed by wakeup. Preemption is now 3137 // safe. 3138 releasem(mp) 3139 } 3140 3141 // Hands off P from syscall or locked M. 3142 // Always runs without a P, so write barriers are not allowed. 3143 // 3144 //go:nowritebarrierrec 3145 func handoffp(pp *p) { 3146 // handoffp must start an M in any situation where 3147 // findRunnable would return a G to run on pp. 3148 3149 // if it has local work, start it straight away 3150 if !runqempty(pp) || !sched.runq.empty() { 3151 startm(pp, false, false) 3152 return 3153 } 3154 // if there's trace work to do, start it straight away 3155 if (traceEnabled() || traceShuttingDown()) && traceReaderAvailable() != nil { 3156 startm(pp, false, false) 3157 return 3158 } 3159 // if it has GC work, start it straight away 3160 if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) { 3161 startm(pp, false, false) 3162 return 3163 } 3164 // no local work, check that there are no spinning/idle M's, 3165 // otherwise our help is not required 3166 if sched.nmspinning.Load()+sched.npidle.Load() == 0 && sched.nmspinning.CompareAndSwap(0, 1) { // TODO: fast atomic 3167 sched.needspinning.Store(0) 3168 startm(pp, true, false) 3169 return 3170 } 3171 lock(&sched.lock) 3172 if sched.gcwaiting.Load() { 3173 pp.status = _Pgcstop 3174 pp.gcStopTime = nanotime() 3175 sched.stopwait-- 3176 if sched.stopwait == 0 { 3177 notewakeup(&sched.stopnote) 3178 } 3179 unlock(&sched.lock) 3180 return 3181 } 3182 if pp.runSafePointFn != 0 && atomic.Cas(&pp.runSafePointFn, 1, 0) { 3183 sched.safePointFn(pp) 3184 sched.safePointWait-- 3185 if sched.safePointWait == 0 { 3186 notewakeup(&sched.safePointNote) 3187 } 3188 } 3189 if !sched.runq.empty() { 3190 unlock(&sched.lock) 3191 startm(pp, false, false) 3192 return 3193 } 3194 // If this is the last running P and nobody is polling network, 3195 // need to wakeup another M to poll network. 3196 if sched.npidle.Load() == gomaxprocs-1 && sched.lastpoll.Load() != 0 { 3197 unlock(&sched.lock) 3198 startm(pp, false, false) 3199 return 3200 } 3201 3202 // The scheduler lock cannot be held when calling wakeNetPoller below 3203 // because wakeNetPoller may call wakep which may call startm. 3204 when := pp.timers.wakeTime() 3205 pidleput(pp, 0) 3206 unlock(&sched.lock) 3207 3208 if when != 0 { 3209 wakeNetPoller(when) 3210 } 3211 } 3212 3213 // Tries to add one more P to execute G's. 3214 // Called when a G is made runnable (newproc, ready). 3215 // Must be called with a P. 3216 // 3217 // wakep should be an internal detail, 3218 // but widely used packages access it using linkname. 3219 // Notable members of the hall of shame include: 3220 // - gvisor.dev/gvisor 3221 // 3222 // Do not remove or change the type signature. 3223 // See go.dev/issue/67401. 3224 // 3225 //go:linkname wakep 3226 func wakep() { 3227 // Be conservative about spinning threads, only start one if none exist 3228 // already. 3229 if sched.nmspinning.Load() != 0 || !sched.nmspinning.CompareAndSwap(0, 1) { 3230 return 3231 } 3232 3233 // Disable preemption until ownership of pp transfers to the next M in 3234 // startm. Otherwise preemption here would leave pp stuck waiting to 3235 // enter _Pgcstop. 3236 // 3237 // See preemption comment on acquirem in startm for more details. 3238 mp := acquirem() 3239 3240 var pp *p 3241 lock(&sched.lock) 3242 pp, _ = pidlegetSpinning(0) 3243 if pp == nil { 3244 if sched.nmspinning.Add(-1) < 0 { 3245 throw("wakep: negative nmspinning") 3246 } 3247 unlock(&sched.lock) 3248 releasem(mp) 3249 return 3250 } 3251 // Since we always have a P, the race in the "No M is available" 3252 // comment in startm doesn't apply during the small window between the 3253 // unlock here and lock in startm. A checkdead in between will always 3254 // see at least one running M (ours). 3255 unlock(&sched.lock) 3256 3257 startm(pp, true, false) 3258 3259 releasem(mp) 3260 } 3261 3262 // Stops execution of the current m that is locked to a g until the g is runnable again. 3263 // Returns with acquired P. 3264 func stoplockedm() { 3265 gp := getg() 3266 3267 if gp.m.lockedg == 0 || gp.m.lockedg.ptr().lockedm.ptr() != gp.m { 3268 throw("stoplockedm: inconsistent locking") 3269 } 3270 if gp.m.p != 0 { 3271 // Schedule another M to run this p. 3272 pp := releasep() 3273 handoffp(pp) 3274 } 3275 incidlelocked(1) 3276 // Wait until another thread schedules lockedg again. 3277 mPark() 3278 status := readgstatus(gp.m.lockedg.ptr()) 3279 if status&^_Gscan != _Grunnable { 3280 print("runtime:stoplockedm: lockedg (atomicstatus=", status, ") is not Grunnable or Gscanrunnable\n") 3281 dumpgstatus(gp.m.lockedg.ptr()) 3282 throw("stoplockedm: not runnable") 3283 } 3284 acquirep(gp.m.nextp.ptr()) 3285 gp.m.nextp = 0 3286 } 3287 3288 // Schedules the locked m to run the locked gp. 3289 // May run during STW, so write barriers are not allowed. 3290 // 3291 //go:nowritebarrierrec 3292 func startlockedm(gp *g) { 3293 mp := gp.lockedm.ptr() 3294 if mp == getg().m { 3295 throw("startlockedm: locked to me") 3296 } 3297 if mp.nextp != 0 { 3298 throw("startlockedm: m has p") 3299 } 3300 // directly handoff current P to the locked m 3301 incidlelocked(-1) 3302 pp := releasep() 3303 mp.nextp.set(pp) 3304 notewakeup(&mp.park) 3305 stopm() 3306 } 3307 3308 // Stops the current m for stopTheWorld. 3309 // Returns when the world is restarted. 3310 func gcstopm() { 3311 gp := getg() 3312 3313 if !sched.gcwaiting.Load() { 3314 throw("gcstopm: not waiting for gc") 3315 } 3316 if gp.m.spinning { 3317 gp.m.spinning = false 3318 // OK to just drop nmspinning here, 3319 // startTheWorld will unpark threads as necessary. 3320 if sched.nmspinning.Add(-1) < 0 { 3321 throw("gcstopm: negative nmspinning") 3322 } 3323 } 3324 pp := releasep() 3325 lock(&sched.lock) 3326 pp.status = _Pgcstop 3327 pp.gcStopTime = nanotime() 3328 sched.stopwait-- 3329 if sched.stopwait == 0 { 3330 notewakeup(&sched.stopnote) 3331 } 3332 unlock(&sched.lock) 3333 stopm() 3334 } 3335 3336 // Schedules gp to run on the current M. 3337 // If inheritTime is true, gp inherits the remaining time in the 3338 // current time slice. Otherwise, it starts a new time slice. 3339 // Never returns. 3340 // 3341 // Write barriers are allowed because this is called immediately after 3342 // acquiring a P in several places. 3343 // 3344 //go:yeswritebarrierrec 3345 func execute(gp *g, inheritTime bool) { 3346 mp := getg().m 3347 3348 if goroutineProfile.active { 3349 // Make sure that gp has had its stack written out to the goroutine 3350 // profile, exactly as it was when the goroutine profiler first stopped 3351 // the world. 3352 tryRecordGoroutineProfile(gp, nil, osyield) 3353 } 3354 3355 // Assign gp.m before entering _Grunning so running Gs have an M. 3356 mp.curg = gp 3357 gp.m = mp 3358 gp.syncSafePoint = false // Clear the flag, which may have been set by morestack. 3359 casgstatus(gp, _Grunnable, _Grunning) 3360 gp.waitsince = 0 3361 gp.preempt = false 3362 gp.stackguard0 = gp.stack.lo + stackGuard 3363 if !inheritTime { 3364 mp.p.ptr().schedtick++ 3365 } 3366 3367 if sys.DITSupported && debug.dataindependenttiming != 1 { 3368 if gp.ditWanted && !mp.ditEnabled { 3369 // The current M doesn't have DIT enabled, but the goroutine we're 3370 // executing does need it, so turn it on. 3371 sys.EnableDIT() 3372 mp.ditEnabled = true 3373 } else if !gp.ditWanted && mp.ditEnabled { 3374 // The current M has DIT enabled, but the goroutine we're executing does 3375 // not need it, so turn it off. 3376 // NOTE: turning off DIT here means that the scheduler will have DIT enabled 3377 // when it runs after this goroutine yields or is preempted. This may have 3378 // a minor performance impact on the scheduler. 3379 sys.DisableDIT() 3380 mp.ditEnabled = false 3381 } 3382 } 3383 3384 // Check whether the profiler needs to be turned on or off. 3385 hz := sched.profilehz 3386 if mp.profilehz != hz { 3387 setThreadCPUProfiler(hz) 3388 } 3389 3390 trace := traceAcquire() 3391 if trace.ok() { 3392 trace.GoStart() 3393 traceRelease(trace) 3394 } 3395 3396 gogo(&gp.sched) 3397 } 3398 3399 // Finds a runnable goroutine to execute. 3400 // Tries to steal from other P's, get g from local or global queue, poll network. 3401 // tryWakeP indicates that the returned goroutine is not normal (GC worker, trace 3402 // reader) so the caller should try to wake a P. 3403 func findRunnable() (gp *g, inheritTime, tryWakeP bool) { 3404 mp := getg().m 3405 3406 // The conditions here and in handoffp must agree: if 3407 // findRunnable would return a G to run, handoffp must start 3408 // an M. 3409 3410 top: 3411 // We may have collected an allp snapshot below. The snapshot is only 3412 // required in each loop iteration. Clear it to all GC to collect the 3413 // slice. 3414 mp.clearAllpSnapshot() 3415 3416 pp := mp.p.ptr() 3417 if sched.gcwaiting.Load() { 3418 gcstopm() 3419 goto top 3420 } 3421 if pp.runSafePointFn != 0 { 3422 runSafePointFn() 3423 } 3424 3425 // now and pollUntil are saved for work stealing later, 3426 // which may steal timers. It's important that between now 3427 // and then, nothing blocks, so these numbers remain mostly 3428 // relevant. 3429 now, pollUntil, _ := pp.timers.check(0, nil) 3430 3431 // Try to schedule the trace reader. 3432 if traceEnabled() || traceShuttingDown() { 3433 gp := traceReader() 3434 if gp != nil { 3435 trace := traceAcquire() 3436 casgstatus(gp, _Gwaiting, _Grunnable) 3437 if trace.ok() { 3438 trace.GoUnpark(gp, 0) 3439 traceRelease(trace) 3440 } 3441 return gp, false, true 3442 } 3443 } 3444 3445 // Try to schedule a GC worker. 3446 if gcBlackenEnabled != 0 { 3447 gp, tnow := gcController.findRunnableGCWorker(pp, now) 3448 if gp != nil { 3449 return gp, false, true 3450 } 3451 now = tnow 3452 } 3453 3454 // Check the global runnable queue once in a while to ensure fairness. 3455 // Otherwise two goroutines can completely occupy the local runqueue 3456 // by constantly respawning each other. 3457 if pp.schedtick%61 == 0 && !sched.runq.empty() { 3458 lock(&sched.lock) 3459 gp := globrunqget() 3460 unlock(&sched.lock) 3461 if gp != nil { 3462 return gp, false, false 3463 } 3464 } 3465 3466 // Wake up the finalizer G. 3467 if fingStatus.Load()&(fingWait|fingWake) == fingWait|fingWake { 3468 if gp := wakefing(); gp != nil { 3469 ready(gp, 0, true) 3470 } 3471 } 3472 3473 // Wake up one or more cleanup Gs. 3474 if gcCleanups.needsWake() { 3475 gcCleanups.wake() 3476 } 3477 3478 if *cgo_yield != nil { 3479 asmcgocall(*cgo_yield, nil) 3480 } 3481 3482 // local runq 3483 if gp, inheritTime := runqget(pp); gp != nil { 3484 return gp, inheritTime, false 3485 } 3486 3487 // global runq 3488 if !sched.runq.empty() { 3489 lock(&sched.lock) 3490 gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) 3491 unlock(&sched.lock) 3492 if gp != nil { 3493 if runqputbatch(pp, &q); !q.empty() { 3494 throw("Couldn't put Gs into empty local runq") 3495 } 3496 return gp, false, false 3497 } 3498 } 3499 3500 // Poll network. 3501 // This netpoll is only an optimization before we resort to stealing. 3502 // We can safely skip it if there are no waiters or a thread is blocked 3503 // in netpoll already. If there is any kind of logical race with that 3504 // blocked thread (e.g. it has already returned from netpoll, but does 3505 // not set lastpoll yet), this thread will do blocking netpoll below 3506 // anyway. 3507 // We only poll from one thread at a time to avoid kernel contention 3508 // on machines with many cores. 3509 if netpollinited() && netpollAnyWaiters() && sched.lastpoll.Load() != 0 && sched.pollingNet.Swap(1) == 0 { 3510 list, delta := netpoll(0) 3511 sched.pollingNet.Store(0) 3512 if !list.empty() { // non-blocking 3513 gp := list.pop() 3514 injectglist(&list) 3515 netpollAdjustWaiters(delta) 3516 trace := traceAcquire() 3517 casgstatus(gp, _Gwaiting, _Grunnable) 3518 if trace.ok() { 3519 trace.GoUnpark(gp, 0) 3520 traceRelease(trace) 3521 } 3522 return gp, false, false 3523 } 3524 } 3525 3526 // Spinning Ms: steal work from other Ps. 3527 // 3528 // Limit the number of spinning Ms to half the number of busy Ps. 3529 // This is necessary to prevent excessive CPU consumption when 3530 // GOMAXPROCS>>1 but the program parallelism is low. 3531 if mp.spinning || 2*sched.nmspinning.Load() < gomaxprocs-sched.npidle.Load() { 3532 if !mp.spinning { 3533 mp.becomeSpinning() 3534 } 3535 3536 gp, inheritTime, tnow, w, newWork := stealWork(now) 3537 if gp != nil { 3538 // Successfully stole. 3539 return gp, inheritTime, false 3540 } 3541 if newWork { 3542 // There may be new timer or GC work; restart to 3543 // discover. 3544 goto top 3545 } 3546 3547 now = tnow 3548 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3549 // Earlier timer to wait for. 3550 pollUntil = w 3551 } 3552 } 3553 3554 // We have nothing to do. 3555 // 3556 // If we're in the GC mark phase, can safely scan and blacken objects, 3557 // and have work to do, run idle-time marking rather than give up the P. 3558 if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) && gcController.addIdleMarkWorker() { 3559 node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 3560 if node != nil { 3561 pp.gcMarkWorkerMode = gcMarkWorkerIdleMode 3562 gp := node.gp.ptr() 3563 3564 trace := traceAcquire() 3565 casgstatus(gp, _Gwaiting, _Grunnable) 3566 if trace.ok() { 3567 trace.GoUnpark(gp, 0) 3568 traceRelease(trace) 3569 } 3570 return gp, false, false 3571 } 3572 gcController.removeIdleMarkWorker() 3573 } 3574 3575 // wasm only: 3576 // If a callback returned and no other goroutine is awake, 3577 // then wake event handler goroutine which pauses execution 3578 // until a callback was triggered. 3579 gp, otherReady := beforeIdle(now, pollUntil) 3580 if gp != nil { 3581 trace := traceAcquire() 3582 casgstatus(gp, _Gwaiting, _Grunnable) 3583 if trace.ok() { 3584 trace.GoUnpark(gp, 0) 3585 traceRelease(trace) 3586 } 3587 return gp, false, false 3588 } 3589 if otherReady { 3590 goto top 3591 } 3592 3593 // Before we drop our P, make a snapshot of the allp slice, 3594 // which can change underfoot once we no longer block 3595 // safe-points. We don't need to snapshot the contents because 3596 // everything up to cap(allp) is immutable. 3597 // 3598 // We clear the snapshot from the M after return via 3599 // mp.clearAllpSnapshop (in schedule) and on each iteration of the top 3600 // loop. 3601 allpSnapshot := mp.snapshotAllp() 3602 // Also snapshot masks. Value changes are OK, but we can't allow 3603 // len to change out from under us. 3604 idlepMaskSnapshot := idlepMask 3605 timerpMaskSnapshot := timerpMask 3606 3607 // return P and block 3608 lock(&sched.lock) 3609 if sched.gcwaiting.Load() || pp.runSafePointFn != 0 { 3610 unlock(&sched.lock) 3611 goto top 3612 } 3613 if !sched.runq.empty() { 3614 gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) 3615 unlock(&sched.lock) 3616 if gp == nil { 3617 throw("global runq empty with non-zero runqsize") 3618 } 3619 if runqputbatch(pp, &q); !q.empty() { 3620 throw("Couldn't put Gs into empty local runq") 3621 } 3622 return gp, false, false 3623 } 3624 if !mp.spinning && sched.needspinning.Load() == 1 { 3625 // See "Delicate dance" comment below. 3626 mp.becomeSpinning() 3627 unlock(&sched.lock) 3628 goto top 3629 } 3630 if releasep() != pp { 3631 throw("findRunnable: wrong p") 3632 } 3633 now = pidleput(pp, now) 3634 unlock(&sched.lock) 3635 3636 // Delicate dance: thread transitions from spinning to non-spinning 3637 // state, potentially concurrently with submission of new work. We must 3638 // drop nmspinning first and then check all sources again (with 3639 // #StoreLoad memory barrier in between). If we do it the other way 3640 // around, another thread can submit work after we've checked all 3641 // sources but before we drop nmspinning; as a result nobody will 3642 // unpark a thread to run the work. 3643 // 3644 // This applies to the following sources of work: 3645 // 3646 // * Goroutines added to the global or a per-P run queue. 3647 // * New/modified-earlier timers on a per-P timer heap. 3648 // * Idle-priority GC work (barring golang.org/issue/19112). 3649 // 3650 // If we discover new work below, we need to restore m.spinning as a 3651 // signal for resetspinning to unpark a new worker thread (because 3652 // there can be more than one starving goroutine). 3653 // 3654 // However, if after discovering new work we also observe no idle Ps 3655 // (either here or in resetspinning), we have a problem. We may be 3656 // racing with a non-spinning M in the block above, having found no 3657 // work and preparing to release its P and park. Allowing that P to go 3658 // idle will result in loss of work conservation (idle P while there is 3659 // runnable work). This could result in complete deadlock in the 3660 // unlikely event that we discover new work (from netpoll) right as we 3661 // are racing with _all_ other Ps going idle. 3662 // 3663 // We use sched.needspinning to synchronize with non-spinning Ms going 3664 // idle. If needspinning is set when they are about to drop their P, 3665 // they abort the drop and instead become a new spinning M on our 3666 // behalf. If we are not racing and the system is truly fully loaded 3667 // then no spinning threads are required, and the next thread to 3668 // naturally become spinning will clear the flag. 3669 // 3670 // Also see "Worker thread parking/unparking" comment at the top of the 3671 // file. 3672 wasSpinning := mp.spinning 3673 if mp.spinning { 3674 mp.spinning = false 3675 if sched.nmspinning.Add(-1) < 0 { 3676 throw("findRunnable: negative nmspinning") 3677 } 3678 3679 // Note the for correctness, only the last M transitioning from 3680 // spinning to non-spinning must perform these rechecks to 3681 // ensure no missed work. However, the runtime has some cases 3682 // of transient increments of nmspinning that are decremented 3683 // without going through this path, so we must be conservative 3684 // and perform the check on all spinning Ms. 3685 // 3686 // See https://go.dev/issue/43997. 3687 3688 // Check global and P runqueues again. 3689 3690 lock(&sched.lock) 3691 if !sched.runq.empty() { 3692 pp, _ := pidlegetSpinning(0) 3693 if pp != nil { 3694 gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) 3695 unlock(&sched.lock) 3696 if gp == nil { 3697 throw("global runq empty with non-zero runqsize") 3698 } 3699 if runqputbatch(pp, &q); !q.empty() { 3700 throw("Couldn't put Gs into empty local runq") 3701 } 3702 acquirep(pp) 3703 mp.becomeSpinning() 3704 return gp, false, false 3705 } 3706 } 3707 unlock(&sched.lock) 3708 3709 pp := checkRunqsNoP(allpSnapshot, idlepMaskSnapshot) 3710 if pp != nil { 3711 acquirep(pp) 3712 mp.becomeSpinning() 3713 goto top 3714 } 3715 3716 // Check for idle-priority GC work again. 3717 pp, gp := checkIdleGCNoP() 3718 if pp != nil { 3719 acquirep(pp) 3720 mp.becomeSpinning() 3721 3722 // Run the idle worker. 3723 pp.gcMarkWorkerMode = gcMarkWorkerIdleMode 3724 trace := traceAcquire() 3725 casgstatus(gp, _Gwaiting, _Grunnable) 3726 if trace.ok() { 3727 trace.GoUnpark(gp, 0) 3728 traceRelease(trace) 3729 } 3730 return gp, false, false 3731 } 3732 3733 // Finally, check for timer creation or expiry concurrently with 3734 // transitioning from spinning to non-spinning. 3735 // 3736 // Note that we cannot use checkTimers here because it calls 3737 // adjusttimers which may need to allocate memory, and that isn't 3738 // allowed when we don't have an active P. 3739 pollUntil = checkTimersNoP(allpSnapshot, timerpMaskSnapshot, pollUntil) 3740 } 3741 3742 // We don't need allp anymore at this pointer, but can't clear the 3743 // snapshot without a P for the write barrier.. 3744 3745 // Poll network until next timer. 3746 if netpollinited() && (netpollAnyWaiters() || pollUntil != 0) && sched.lastpoll.Swap(0) != 0 { 3747 sched.pollUntil.Store(pollUntil) 3748 if mp.p != 0 { 3749 throw("findRunnable: netpoll with p") 3750 } 3751 if mp.spinning { 3752 throw("findRunnable: netpoll with spinning") 3753 } 3754 delay := int64(-1) 3755 if pollUntil != 0 { 3756 if now == 0 { 3757 now = nanotime() 3758 } 3759 delay = pollUntil - now 3760 if delay < 0 { 3761 delay = 0 3762 } 3763 } 3764 if faketime != 0 { 3765 // When using fake time, just poll. 3766 delay = 0 3767 } 3768 list, delta := netpoll(delay) // block until new work is available 3769 // Refresh now again, after potentially blocking. 3770 now = nanotime() 3771 sched.pollUntil.Store(0) 3772 sched.lastpoll.Store(now) 3773 if faketime != 0 && list.empty() { 3774 // Using fake time and nothing is ready; stop M. 3775 // When all M's stop, checkdead will call timejump. 3776 stopm() 3777 goto top 3778 } 3779 lock(&sched.lock) 3780 pp, _ := pidleget(now) 3781 unlock(&sched.lock) 3782 if pp == nil { 3783 injectglist(&list) 3784 netpollAdjustWaiters(delta) 3785 } else { 3786 acquirep(pp) 3787 if !list.empty() { 3788 gp := list.pop() 3789 injectglist(&list) 3790 netpollAdjustWaiters(delta) 3791 trace := traceAcquire() 3792 casgstatus(gp, _Gwaiting, _Grunnable) 3793 if trace.ok() { 3794 trace.GoUnpark(gp, 0) 3795 traceRelease(trace) 3796 } 3797 return gp, false, false 3798 } 3799 if wasSpinning { 3800 mp.becomeSpinning() 3801 } 3802 goto top 3803 } 3804 } else if pollUntil != 0 && netpollinited() { 3805 pollerPollUntil := sched.pollUntil.Load() 3806 if pollerPollUntil == 0 || pollerPollUntil > pollUntil { 3807 netpollBreak() 3808 } 3809 } 3810 stopm() 3811 goto top 3812 } 3813 3814 // pollWork reports whether there is non-background work this P could 3815 // be doing. This is a fairly lightweight check to be used for 3816 // background work loops, like idle GC. It checks a subset of the 3817 // conditions checked by the actual scheduler. 3818 func pollWork() bool { 3819 if !sched.runq.empty() { 3820 return true 3821 } 3822 p := getg().m.p.ptr() 3823 if !runqempty(p) { 3824 return true 3825 } 3826 if netpollinited() && netpollAnyWaiters() && sched.lastpoll.Load() != 0 { 3827 if list, delta := netpoll(0); !list.empty() { 3828 injectglist(&list) 3829 netpollAdjustWaiters(delta) 3830 return true 3831 } 3832 } 3833 return false 3834 } 3835 3836 // stealWork attempts to steal a runnable goroutine or timer from any P. 3837 // 3838 // If newWork is true, new work may have been readied. 3839 // 3840 // If now is not 0 it is the current time. stealWork returns the passed time or 3841 // the current time if now was passed as 0. 3842 func stealWork(now int64) (gp *g, inheritTime bool, rnow, pollUntil int64, newWork bool) { 3843 pp := getg().m.p.ptr() 3844 3845 ranTimer := false 3846 3847 const stealTries = 4 3848 for i := 0; i < stealTries; i++ { 3849 stealTimersOrRunNextG := i == stealTries-1 3850 3851 for enum := stealOrder.start(cheaprand()); !enum.done(); enum.next() { 3852 if sched.gcwaiting.Load() { 3853 // GC work may be available. 3854 return nil, false, now, pollUntil, true 3855 } 3856 p2 := allp[enum.position()] 3857 if pp == p2 { 3858 continue 3859 } 3860 3861 // Steal timers from p2. This call to checkTimers is the only place 3862 // where we might hold a lock on a different P's timers. We do this 3863 // once on the last pass before checking runnext because stealing 3864 // from the other P's runnext should be the last resort, so if there 3865 // are timers to steal do that first. 3866 // 3867 // We only check timers on one of the stealing iterations because 3868 // the time stored in now doesn't change in this loop and checking 3869 // the timers for each P more than once with the same value of now 3870 // is probably a waste of time. 3871 // 3872 // timerpMask tells us whether the P may have timers at all. If it 3873 // can't, no need to check at all. 3874 if stealTimersOrRunNextG && timerpMask.read(enum.position()) { 3875 tnow, w, ran := p2.timers.check(now, nil) 3876 now = tnow 3877 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3878 pollUntil = w 3879 } 3880 if ran { 3881 // Running the timers may have 3882 // made an arbitrary number of G's 3883 // ready and added them to this P's 3884 // local run queue. That invalidates 3885 // the assumption of runqsteal 3886 // that it always has room to add 3887 // stolen G's. So check now if there 3888 // is a local G to run. 3889 if gp, inheritTime := runqget(pp); gp != nil { 3890 return gp, inheritTime, now, pollUntil, ranTimer 3891 } 3892 ranTimer = true 3893 } 3894 } 3895 3896 // Don't bother to attempt to steal if p2 is idle. 3897 if !idlepMask.read(enum.position()) { 3898 if gp := runqsteal(pp, p2, stealTimersOrRunNextG); gp != nil { 3899 return gp, false, now, pollUntil, ranTimer 3900 } 3901 } 3902 } 3903 } 3904 3905 // No goroutines found to steal. Regardless, running a timer may have 3906 // made some goroutine ready that we missed. Indicate the next timer to 3907 // wait for. 3908 return nil, false, now, pollUntil, ranTimer 3909 } 3910 3911 // Check all Ps for a runnable G to steal. 3912 // 3913 // On entry we have no P. If a G is available to steal and a P is available, 3914 // the P is returned which the caller should acquire and attempt to steal the 3915 // work to. 3916 func checkRunqsNoP(allpSnapshot []*p, idlepMaskSnapshot pMask) *p { 3917 for id, p2 := range allpSnapshot { 3918 if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(p2) { 3919 lock(&sched.lock) 3920 pp, _ := pidlegetSpinning(0) 3921 if pp == nil { 3922 // Can't get a P, don't bother checking remaining Ps. 3923 unlock(&sched.lock) 3924 return nil 3925 } 3926 unlock(&sched.lock) 3927 return pp 3928 } 3929 } 3930 3931 // No work available. 3932 return nil 3933 } 3934 3935 // Check all Ps for a timer expiring sooner than pollUntil. 3936 // 3937 // Returns updated pollUntil value. 3938 func checkTimersNoP(allpSnapshot []*p, timerpMaskSnapshot pMask, pollUntil int64) int64 { 3939 for id, p2 := range allpSnapshot { 3940 if timerpMaskSnapshot.read(uint32(id)) { 3941 w := p2.timers.wakeTime() 3942 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3943 pollUntil = w 3944 } 3945 } 3946 } 3947 3948 return pollUntil 3949 } 3950 3951 // Check for idle-priority GC, without a P on entry. 3952 // 3953 // If some GC work, a P, and a worker G are all available, the P and G will be 3954 // returned. The returned P has not been wired yet. 3955 func checkIdleGCNoP() (*p, *g) { 3956 // N.B. Since we have no P, gcBlackenEnabled may change at any time; we 3957 // must check again after acquiring a P. As an optimization, we also check 3958 // if an idle mark worker is needed at all. This is OK here, because if we 3959 // observe that one isn't needed, at least one is currently running. Even if 3960 // it stops running, its own journey into the scheduler should schedule it 3961 // again, if need be (at which point, this check will pass, if relevant). 3962 if atomic.Load(&gcBlackenEnabled) == 0 || !gcController.needIdleMarkWorker() { 3963 return nil, nil 3964 } 3965 if !gcShouldScheduleWorker(nil) { 3966 return nil, nil 3967 } 3968 3969 // Work is available; we can start an idle GC worker only if there is 3970 // an available P and available worker G. 3971 // 3972 // We can attempt to acquire these in either order, though both have 3973 // synchronization concerns (see below). Workers are almost always 3974 // available (see comment in findRunnableGCWorker for the one case 3975 // there may be none). Since we're slightly less likely to find a P, 3976 // check for that first. 3977 // 3978 // Synchronization: note that we must hold sched.lock until we are 3979 // committed to keeping it. Otherwise we cannot put the unnecessary P 3980 // back in sched.pidle without performing the full set of idle 3981 // transition checks. 3982 // 3983 // If we were to check gcBgMarkWorkerPool first, we must somehow handle 3984 // the assumption in gcControllerState.findRunnableGCWorker that an 3985 // empty gcBgMarkWorkerPool is only possible if gcMarkDone is running. 3986 lock(&sched.lock) 3987 pp, now := pidlegetSpinning(0) 3988 if pp == nil { 3989 unlock(&sched.lock) 3990 return nil, nil 3991 } 3992 3993 // Now that we own a P, gcBlackenEnabled can't change (as it requires STW). 3994 if gcBlackenEnabled == 0 || !gcController.addIdleMarkWorker() { 3995 pidleput(pp, now) 3996 unlock(&sched.lock) 3997 return nil, nil 3998 } 3999 4000 node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 4001 if node == nil { 4002 pidleput(pp, now) 4003 unlock(&sched.lock) 4004 gcController.removeIdleMarkWorker() 4005 return nil, nil 4006 } 4007 4008 unlock(&sched.lock) 4009 4010 return pp, node.gp.ptr() 4011 } 4012 4013 // wakeNetPoller wakes up the thread sleeping in the network poller if it isn't 4014 // going to wake up before the when argument; or it wakes an idle P to service 4015 // timers and the network poller if there isn't one already. 4016 func wakeNetPoller(when int64) { 4017 if sched.lastpoll.Load() == 0 { 4018 // In findRunnable we ensure that when polling the pollUntil 4019 // field is either zero or the time to which the current 4020 // poll is expected to run. This can have a spurious wakeup 4021 // but should never miss a wakeup. 4022 pollerPollUntil := sched.pollUntil.Load() 4023 if pollerPollUntil == 0 || pollerPollUntil > when { 4024 netpollBreak() 4025 } 4026 } else { 4027 // There are no threads in the network poller, try to get 4028 // one there so it can handle new timers. 4029 if GOOS != "plan9" { // Temporary workaround - see issue #42303. 4030 wakep() 4031 } 4032 } 4033 } 4034 4035 func resetspinning() { 4036 gp := getg() 4037 if !gp.m.spinning { 4038 throw("resetspinning: not a spinning m") 4039 } 4040 gp.m.spinning = false 4041 nmspinning := sched.nmspinning.Add(-1) 4042 if nmspinning < 0 { 4043 throw("findRunnable: negative nmspinning") 4044 } 4045 // M wakeup policy is deliberately somewhat conservative, so check if we 4046 // need to wakeup another P here. See "Worker thread parking/unparking" 4047 // comment at the top of the file for details. 4048 wakep() 4049 } 4050 4051 // injectglist adds each runnable G on the list to some run queue, 4052 // and clears glist. If there is no current P, they are added to the 4053 // global queue, and up to npidle M's are started to run them. 4054 // Otherwise, for each idle P, this adds a G to the global queue 4055 // and starts an M. Any remaining G's are added to the current P's 4056 // local run queue. 4057 // This may temporarily acquire sched.lock. 4058 // Can run concurrently with GC. 4059 func injectglist(glist *gList) { 4060 if glist.empty() { 4061 return 4062 } 4063 4064 // Mark all the goroutines as runnable before we put them 4065 // on the run queues. 4066 var tail *g 4067 trace := traceAcquire() 4068 for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() { 4069 tail = gp 4070 casgstatus(gp, _Gwaiting, _Grunnable) 4071 if trace.ok() { 4072 trace.GoUnpark(gp, 0) 4073 } 4074 } 4075 if trace.ok() { 4076 traceRelease(trace) 4077 } 4078 4079 // Turn the gList into a gQueue. 4080 q := gQueue{glist.head, tail.guintptr(), glist.size} 4081 *glist = gList{} 4082 4083 startIdle := func(n int32) { 4084 for ; n > 0; n-- { 4085 mp := acquirem() // See comment in startm. 4086 lock(&sched.lock) 4087 4088 pp, _ := pidlegetSpinning(0) 4089 if pp == nil { 4090 unlock(&sched.lock) 4091 releasem(mp) 4092 break 4093 } 4094 4095 startm(pp, false, true) 4096 unlock(&sched.lock) 4097 releasem(mp) 4098 } 4099 } 4100 4101 pp := getg().m.p.ptr() 4102 if pp == nil { 4103 n := q.size 4104 lock(&sched.lock) 4105 globrunqputbatch(&q) 4106 unlock(&sched.lock) 4107 startIdle(n) 4108 return 4109 } 4110 4111 var globq gQueue 4112 npidle := sched.npidle.Load() 4113 for ; npidle > 0 && !q.empty(); npidle-- { 4114 g := q.pop() 4115 globq.pushBack(g) 4116 } 4117 if !globq.empty() { 4118 n := globq.size 4119 lock(&sched.lock) 4120 globrunqputbatch(&globq) 4121 unlock(&sched.lock) 4122 startIdle(n) 4123 } 4124 4125 if runqputbatch(pp, &q); !q.empty() { 4126 lock(&sched.lock) 4127 globrunqputbatch(&q) 4128 unlock(&sched.lock) 4129 } 4130 4131 // Some P's might have become idle after we loaded `sched.npidle` 4132 // but before any goroutines were added to the queue, which could 4133 // lead to idle P's when there is work available in the global queue. 4134 // That could potentially last until other goroutines become ready 4135 // to run. That said, we need to find a way to hedge 4136 // 4137 // Calling wakep() here is the best bet, it will do nothing in the 4138 // common case (no racing on `sched.npidle`), while it could wake one 4139 // more P to execute G's, which might end up with >1 P's: the first one 4140 // wakes another P and so forth until there is no more work, but this 4141 // ought to be an extremely rare case. 4142 // 4143 // Also see "Worker thread parking/unparking" comment at the top of the file for details. 4144 wakep() 4145 } 4146 4147 // One round of scheduler: find a runnable goroutine and execute it. 4148 // Never returns. 4149 func schedule() { 4150 mp := getg().m 4151 4152 if mp.locks != 0 { 4153 throw("schedule: holding locks") 4154 } 4155 4156 if mp.lockedg != 0 { 4157 stoplockedm() 4158 execute(mp.lockedg.ptr(), false) // Never returns. 4159 } 4160 4161 // We should not schedule away from a g that is executing a cgo call, 4162 // since the cgo call is using the m's g0 stack. 4163 if mp.incgo { 4164 throw("schedule: in cgo") 4165 } 4166 4167 top: 4168 pp := mp.p.ptr() 4169 pp.preempt = false 4170 4171 // Safety check: if we are spinning, the run queue should be empty. 4172 // Check this before calling checkTimers, as that might call 4173 // goready to put a ready goroutine on the local run queue. 4174 if mp.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) { 4175 throw("schedule: spinning with local work") 4176 } 4177 4178 gp, inheritTime, tryWakeP := findRunnable() // blocks until work is available 4179 4180 // May be on a new P. 4181 pp = mp.p.ptr() 4182 4183 // findRunnable may have collected an allp snapshot. The snapshot is 4184 // only required within findRunnable. Clear it to all GC to collect the 4185 // slice. 4186 mp.clearAllpSnapshot() 4187 4188 // If the P was assigned a next GC mark worker but findRunnable 4189 // selected anything else, release the worker so another P may run it. 4190 // 4191 // N.B. If this occurs because a higher-priority goroutine was selected 4192 // (trace reader), then tryWakeP is set, which will wake another P to 4193 // run the worker. If this occurs because the GC is no longer active, 4194 // there is no need to wakep. 4195 gcController.releaseNextGCMarkWorker(pp) 4196 4197 if debug.dontfreezetheworld > 0 && freezing.Load() { 4198 // See comment in freezetheworld. We don't want to perturb 4199 // scheduler state, so we didn't gcstopm in findRunnable, but 4200 // also don't want to allow new goroutines to run. 4201 // 4202 // Deadlock here rather than in the findRunnable loop so if 4203 // findRunnable is stuck in a loop we don't perturb that 4204 // either. 4205 lock(&deadlock) 4206 lock(&deadlock) 4207 } 4208 4209 // This thread is going to run a goroutine and is not spinning anymore, 4210 // so if it was marked as spinning we need to reset it now and potentially 4211 // start a new spinning M. 4212 if mp.spinning { 4213 resetspinning() 4214 } 4215 4216 if sched.disable.user && !schedEnabled(gp) { 4217 // Scheduling of this goroutine is disabled. Put it on 4218 // the list of pending runnable goroutines for when we 4219 // re-enable user scheduling and look again. 4220 lock(&sched.lock) 4221 if schedEnabled(gp) { 4222 // Something re-enabled scheduling while we 4223 // were acquiring the lock. 4224 unlock(&sched.lock) 4225 } else { 4226 sched.disable.runnable.pushBack(gp) 4227 unlock(&sched.lock) 4228 goto top 4229 } 4230 } 4231 4232 // If about to schedule a not-normal goroutine (a GCworker or tracereader), 4233 // wake a P if there is one. 4234 if tryWakeP { 4235 wakep() 4236 } 4237 if gp.lockedm != 0 { 4238 // Hands off own p to the locked m, 4239 // then blocks waiting for a new p. 4240 startlockedm(gp) 4241 goto top 4242 } 4243 4244 execute(gp, inheritTime) 4245 } 4246 4247 // dropg removes the association between m and the current goroutine m->curg (gp for short). 4248 // Typically a caller sets gp's status away from Grunning and then 4249 // immediately calls dropg to finish the job. The caller is also responsible 4250 // for arranging that gp will be restarted using ready at an 4251 // appropriate time. After calling dropg and arranging for gp to be 4252 // readied later, the caller can do other work but eventually should 4253 // call schedule to restart the scheduling of goroutines on this m. 4254 func dropg() { 4255 gp := getg() 4256 4257 setMNoWB(&gp.m.curg.m, nil) 4258 setGNoWB(&gp.m.curg, nil) 4259 } 4260 4261 func parkunlock_c(gp *g, lock unsafe.Pointer) bool { 4262 unlock((*mutex)(lock)) 4263 return true 4264 } 4265 4266 // park continuation on g0. 4267 func park_m(gp *g) { 4268 mp := getg().m 4269 4270 trace := traceAcquire() 4271 4272 // If g is in a synctest group, we don't want to let the group 4273 // become idle until after the waitunlockf (if any) has confirmed 4274 // that the park is happening. 4275 // We need to record gp.bubble here, since waitunlockf can change it. 4276 bubble := gp.bubble 4277 if bubble != nil { 4278 bubble.incActive() 4279 } 4280 4281 if trace.ok() { 4282 // Trace the event before the transition. It may take a 4283 // stack trace, but we won't own the stack after the 4284 // transition anymore. 4285 trace.GoPark(mp.waitTraceBlockReason, mp.waitTraceSkip) 4286 } 4287 // N.B. Not using casGToWaiting here because the waitreason is 4288 // set by park_m's caller. 4289 casgstatus(gp, _Grunning, _Gwaiting) 4290 if trace.ok() { 4291 traceRelease(trace) 4292 } 4293 4294 dropg() 4295 4296 if fn := mp.waitunlockf; fn != nil { 4297 ok := fn(gp, mp.waitlock) 4298 mp.waitunlockf = nil 4299 mp.waitlock = nil 4300 if !ok { 4301 trace := traceAcquire() 4302 casgstatus(gp, _Gwaiting, _Grunnable) 4303 if bubble != nil { 4304 bubble.decActive() 4305 } 4306 if trace.ok() { 4307 trace.GoUnpark(gp, 2) 4308 traceRelease(trace) 4309 } 4310 execute(gp, true) // Schedule it back, never returns. 4311 } 4312 } 4313 4314 if bubble != nil { 4315 bubble.decActive() 4316 } 4317 4318 schedule() 4319 } 4320 4321 func goschedImpl(gp *g, preempted bool) { 4322 pp := gp.m.p.ptr() 4323 trace := traceAcquire() 4324 status := readgstatus(gp) 4325 if status&^_Gscan != _Grunning { 4326 dumpgstatus(gp) 4327 throw("bad g status") 4328 } 4329 if trace.ok() { 4330 // Trace the event before the transition. It may take a 4331 // stack trace, but we won't own the stack after the 4332 // transition anymore. 4333 if preempted { 4334 trace.GoPreempt() 4335 } else { 4336 trace.GoSched() 4337 } 4338 } 4339 casgstatus(gp, _Grunning, _Grunnable) 4340 if trace.ok() { 4341 traceRelease(trace) 4342 } 4343 4344 dropg() 4345 if preempted && sched.gcwaiting.Load() { 4346 // If preempted for STW, keep the G on the local P in runnext 4347 // so it can keep running immediately after the STW. 4348 runqput(pp, gp, true) 4349 } else { 4350 lock(&sched.lock) 4351 globrunqput(gp) 4352 unlock(&sched.lock) 4353 } 4354 4355 if mainStarted { 4356 wakep() 4357 } 4358 4359 schedule() 4360 } 4361 4362 // Gosched continuation on g0. 4363 func gosched_m(gp *g) { 4364 goschedImpl(gp, false) 4365 } 4366 4367 // goschedguarded is a forbidden-states-avoided version of gosched_m. 4368 func goschedguarded_m(gp *g) { 4369 if !canPreemptM(gp.m) { 4370 gogo(&gp.sched) // never return 4371 } 4372 goschedImpl(gp, false) 4373 } 4374 4375 func gopreempt_m(gp *g) { 4376 goschedImpl(gp, true) 4377 } 4378 4379 // preemptPark parks gp and puts it in _Gpreempted. 4380 // 4381 //go:systemstack 4382 func preemptPark(gp *g) { 4383 status := readgstatus(gp) 4384 if status&^_Gscan != _Grunning { 4385 dumpgstatus(gp) 4386 throw("bad g status") 4387 } 4388 4389 if gp.asyncSafePoint { 4390 // Double-check that async preemption does not 4391 // happen in SPWRITE assembly functions. 4392 // isAsyncSafePoint must exclude this case. 4393 f := findfunc(gp.sched.pc) 4394 if !f.valid() { 4395 throw("preempt at unknown pc") 4396 } 4397 if f.flag&abi.FuncFlagSPWrite != 0 { 4398 println("runtime: unexpected SPWRITE function", funcname(f), "in async preempt") 4399 throw("preempt SPWRITE") 4400 } 4401 } 4402 4403 // Transition from _Grunning to _Gscan|_Gpreempted. We can't 4404 // be in _Grunning when we dropg because then we'd be running 4405 // without an M, but the moment we're in _Gpreempted, 4406 // something could claim this G before we've fully cleaned it 4407 // up. Hence, we set the scan bit to lock down further 4408 // transitions until we can dropg. 4409 casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted) 4410 4411 // Be careful about ownership as we trace this next event. 4412 // 4413 // According to the tracer invariants (trace.go) it's unsafe 4414 // for us to emit an event for a goroutine we do not own. 4415 // The moment we CAS into _Gpreempted, suspendG could CAS the 4416 // goroutine to _Gwaiting, effectively taking ownership. All of 4417 // this could happen before we even get the chance to emit 4418 // an event. The end result is that the events could appear 4419 // out of order, and the tracer generally assumes the scheduler 4420 // takes care of the ordering between GoPark and GoUnpark. 4421 // 4422 // The answer here is simple: emit the event while we still hold 4423 // the _Gscan bit on the goroutine, since the _Gscan bit means 4424 // ownership over transitions. 4425 // 4426 // We still need to traceAcquire and traceRelease across the CAS 4427 // because the tracer could be what's calling suspendG in the first 4428 // place. This also upholds the tracer invariant that we must hold 4429 // traceAcquire/traceRelease across the transition. However, we 4430 // specifically *only* emit the event while we still have ownership. 4431 trace := traceAcquire() 4432 if trace.ok() { 4433 trace.GoPark(traceBlockPreempted, 0) 4434 } 4435 4436 // Drop the goroutine from the M. Only do this after the tracer has 4437 // emitted an event, because it needs the association for GoPark to 4438 // work correctly. 4439 dropg() 4440 4441 // Drop the scan bit and release the trace locker if necessary. 4442 casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted) 4443 if trace.ok() { 4444 traceRelease(trace) 4445 } 4446 4447 // All done. 4448 schedule() 4449 } 4450 4451 // goyield is like Gosched, but it: 4452 // - emits a GoPreempt trace event instead of a GoSched trace event 4453 // - puts the current G on the runq of the current P instead of the globrunq 4454 // 4455 // goyield should be an internal detail, 4456 // but widely used packages access it using linkname. 4457 // Notable members of the hall of shame include: 4458 // - gvisor.dev/gvisor 4459 // - github.com/sagernet/gvisor 4460 // 4461 // Do not remove or change the type signature. 4462 // See go.dev/issue/67401. 4463 // 4464 //go:linkname goyield 4465 func goyield() { 4466 checkTimeouts() 4467 mcall(goyield_m) 4468 } 4469 4470 func goyield_m(gp *g) { 4471 trace := traceAcquire() 4472 pp := gp.m.p.ptr() 4473 if trace.ok() { 4474 // Trace the event before the transition. It may take a 4475 // stack trace, but we won't own the stack after the 4476 // transition anymore. 4477 trace.GoPreempt() 4478 } 4479 casgstatus(gp, _Grunning, _Grunnable) 4480 if trace.ok() { 4481 traceRelease(trace) 4482 } 4483 dropg() 4484 runqput(pp, gp, false) 4485 schedule() 4486 } 4487 4488 // Finishes execution of the current goroutine. 4489 func goexit1() { 4490 if raceenabled { 4491 if gp := getg(); gp.bubble != nil { 4492 racereleasemergeg(gp, gp.bubble.raceaddr()) 4493 } 4494 racegoend() 4495 } 4496 trace := traceAcquire() 4497 if trace.ok() { 4498 trace.GoEnd() 4499 traceRelease(trace) 4500 } 4501 mcall(goexit0) 4502 } 4503 4504 // goexit continuation on g0. 4505 func goexit0(gp *g) { 4506 if goexperiment.RuntimeSecret && gp.secret > 0 { 4507 // Erase the whole stack. This path only occurs when 4508 // runtime.Goexit is called from within a runtime/secret.Do call. 4509 memclrNoHeapPointers(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 4510 // Since this is running on g0, our registers are already zeroed from going through 4511 // mcall in secret mode. 4512 } 4513 gdestroy(gp) 4514 schedule() 4515 } 4516 4517 func gdestroy(gp *g) { 4518 mp := getg().m 4519 pp := mp.p.ptr() 4520 4521 casgstatus(gp, _Grunning, _Gdead) 4522 gcController.addScannableStack(pp, -int64(gp.stack.hi-gp.stack.lo)) 4523 if isSystemGoroutine(gp, false) { 4524 sched.ngsys.Add(-1) 4525 } 4526 gp.m = nil 4527 locked := gp.lockedm != 0 4528 gp.lockedm = 0 4529 mp.lockedg = 0 4530 gp.preemptStop = false 4531 gp.paniconfault = false 4532 gp._defer = nil // should be true already but just in case. 4533 gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data. 4534 gp.writebuf = nil 4535 gp.waitreason = waitReasonZero 4536 gp.param = nil 4537 gp.labels = nil 4538 gp.timer = nil 4539 gp.bubble = nil 4540 gp.fipsOnlyBypass = false 4541 gp.secret = 0 4542 4543 if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 { 4544 // Flush assist credit to the global pool. This gives 4545 // better information to pacing if the application is 4546 // rapidly creating an exiting goroutines. 4547 assistWorkPerByte := gcController.assistWorkPerByte.Load() 4548 scanCredit := int64(assistWorkPerByte * float64(gp.gcAssistBytes)) 4549 gcController.bgScanCredit.Add(scanCredit) 4550 gp.gcAssistBytes = 0 4551 } 4552 4553 dropg() 4554 4555 if GOARCH == "wasm" { // no threads yet on wasm 4556 gfput(pp, gp) 4557 return 4558 } 4559 4560 if locked && mp.lockedInt != 0 { 4561 print("runtime: mp.lockedInt = ", mp.lockedInt, "\n") 4562 if mp.isextra { 4563 throw("runtime.Goexit called in a thread that was not created by the Go runtime") 4564 } 4565 throw("exited a goroutine internally locked to the OS thread") 4566 } 4567 gfput(pp, gp) 4568 if locked { 4569 // The goroutine may have locked this thread because 4570 // it put it in an unusual kernel state. Kill it 4571 // rather than returning it to the thread pool. 4572 4573 // Return to mstart, which will release the P and exit 4574 // the thread. 4575 if GOOS != "plan9" { // See golang.org/issue/22227. 4576 gogo(&mp.g0.sched) 4577 } else { 4578 // Clear lockedExt on plan9 since we may end up re-using 4579 // this thread. 4580 mp.lockedExt = 0 4581 } 4582 } 4583 } 4584 4585 // save updates getg().sched to refer to pc and sp so that a following 4586 // gogo will restore pc and sp. 4587 // 4588 // save must not have write barriers because invoking a write barrier 4589 // can clobber getg().sched. 4590 // 4591 //go:nosplit 4592 //go:nowritebarrierrec 4593 func save(pc, sp, bp uintptr) { 4594 gp := getg() 4595 4596 if gp == gp.m.g0 || gp == gp.m.gsignal { 4597 // m.g0.sched is special and must describe the context 4598 // for exiting the thread. mstart1 writes to it directly. 4599 // m.gsignal.sched should not be used at all. 4600 // This check makes sure save calls do not accidentally 4601 // run in contexts where they'd write to system g's. 4602 throw("save on system g not allowed") 4603 } 4604 4605 gp.sched.pc = pc 4606 gp.sched.sp = sp 4607 gp.sched.lr = 0 4608 gp.sched.bp = bp 4609 // We need to ensure ctxt is zero, but can't have a write 4610 // barrier here. However, it should always already be zero. 4611 // Assert that. 4612 if gp.sched.ctxt != nil { 4613 badctxt() 4614 } 4615 } 4616 4617 // The goroutine g is about to enter a system call. 4618 // Record that it's not using the cpu anymore. 4619 // This is called only from the go syscall library and cgocall, 4620 // not from the low-level system calls used by the runtime. 4621 // 4622 // Entersyscall cannot split the stack: the save must 4623 // make g->sched refer to the caller's stack segment, because 4624 // entersyscall is going to return immediately after. 4625 // 4626 // Nothing entersyscall calls can split the stack either. 4627 // We cannot safely move the stack during an active call to syscall, 4628 // because we do not know which of the uintptr arguments are 4629 // really pointers (back into the stack). 4630 // In practice, this means that we make the fast path run through 4631 // entersyscall doing no-split things, and the slow path has to use systemstack 4632 // to run bigger things on the system stack. 4633 // 4634 // reentersyscall is the entry point used by cgo callbacks, where explicitly 4635 // saved SP and PC are restored. This is needed when exitsyscall will be called 4636 // from a function further up in the call stack than the parent, as g->syscallsp 4637 // must always point to a valid stack frame. entersyscall below is the normal 4638 // entry point for syscalls, which obtains the SP and PC from the caller. 4639 // 4640 //go:nosplit 4641 func reentersyscall(pc, sp, bp uintptr) { 4642 gp := getg() 4643 4644 // Disable preemption because during this function g is in Gsyscall status, 4645 // but can have inconsistent g->sched, do not let GC observe it. 4646 gp.m.locks++ 4647 4648 // This M may have a signal stack that is dirtied with secret information 4649 // (see package "runtime/secret"). Since it's about to go into a syscall for 4650 // an arbitrary amount of time and the G that put the secret info there 4651 // might have returned from secret.Do, we have to zero it out now, lest we 4652 // break the guarantee that secrets are purged by the next GC after a return 4653 // to secret.Do. 4654 // 4655 // It might be tempting to think that we only need to zero out this if we're 4656 // not running in secret mode anymore, but that leaves an ABA problem. The G 4657 // that put the secrets onto our signal stack may not be the one that is 4658 // currently executing. 4659 // 4660 // Logically, we should erase this when we lose our P, not when we enter the 4661 // syscall. This would avoid a zeroing in the case where the call returns 4662 // almost immediately. Since we use this path for cgo calls as well, these 4663 // fast "syscalls" are quite common. However, since we only erase the signal 4664 // stack if we were delivered a signal in secret mode and considering the 4665 // cross-thread synchronization cost for the P, it hardly seems worth it. 4666 // 4667 // TODO(dmo): can we encode the goid into mp.signalSecret and avoid the ABA problem? 4668 if goexperiment.RuntimeSecret { 4669 eraseSecretsSignalStk() 4670 } 4671 4672 // Entersyscall must not call any function that might split/grow the stack. 4673 // (See details in comment above.) 4674 // Catch calls that might, by replacing the stack guard with something that 4675 // will trip any stack check and leaving a flag to tell newstack to die. 4676 gp.stackguard0 = stackPreempt 4677 gp.throwsplit = true 4678 4679 // Copy the syscalltick over so we can identify if the P got stolen later. 4680 gp.m.syscalltick = gp.m.p.ptr().syscalltick 4681 4682 pp := gp.m.p.ptr() 4683 if pp.runSafePointFn != 0 { 4684 // runSafePointFn may stack split if run on this stack 4685 systemstack(runSafePointFn) 4686 } 4687 gp.m.oldp.set(pp) 4688 4689 // Leave SP around for GC and traceback. 4690 save(pc, sp, bp) 4691 gp.syscallsp = sp 4692 gp.syscallpc = pc 4693 gp.syscallbp = bp 4694 4695 // Double-check sp and bp. 4696 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4697 systemstack(func() { 4698 print("entersyscall inconsistent sp ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4699 throw("entersyscall") 4700 }) 4701 } 4702 if gp.syscallbp != 0 && gp.syscallbp < gp.stack.lo || gp.stack.hi < gp.syscallbp { 4703 systemstack(func() { 4704 print("entersyscall inconsistent bp ", hex(gp.syscallbp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4705 throw("entersyscall") 4706 }) 4707 } 4708 trace := traceAcquire() 4709 if trace.ok() { 4710 // Emit a trace event. Notably, actually emitting the event must happen before 4711 // the casgstatus because it mutates the P, but the traceLocker must be held 4712 // across the casgstatus since we're transitioning out of _Grunning 4713 // (see trace.go invariants). 4714 systemstack(func() { 4715 trace.GoSysCall() 4716 }) 4717 // systemstack clobbered gp.sched, so restore it. 4718 save(pc, sp, bp) 4719 } 4720 if sched.gcwaiting.Load() { 4721 // Optimization: If there's a pending STW, do the equivalent of 4722 // entersyscallblock here at the last minute and immediately give 4723 // away our P. 4724 systemstack(func() { 4725 entersyscallHandleGCWait(trace) 4726 }) 4727 // systemstack clobbered gp.sched, so restore it. 4728 save(pc, sp, bp) 4729 } 4730 // As soon as we switch to _Gsyscall, we are in danger of losing our P. 4731 // We must not touch it after this point. 4732 // 4733 // Try to do a quick CAS to avoid calling into casgstatus in the common case. 4734 // If we have a bubble, we need to fall into casgstatus. 4735 if gp.bubble != nil || !gp.atomicstatus.CompareAndSwap(_Grunning, _Gsyscall) { 4736 casgstatus(gp, _Grunning, _Gsyscall) 4737 } 4738 if staticLockRanking { 4739 // casgstatus clobbers gp.sched via systemstack under staticLockRanking. Restore it. 4740 save(pc, sp, bp) 4741 } 4742 if trace.ok() { 4743 // N.B. We don't need to go on the systemstack because traceRelease is very 4744 // carefully recursively nosplit. This also means we don't need to worry 4745 // about clobbering gp.sched. 4746 traceRelease(trace) 4747 } 4748 if sched.sysmonwait.Load() { 4749 systemstack(entersyscallWakeSysmon) 4750 // systemstack clobbered gp.sched, so restore it. 4751 save(pc, sp, bp) 4752 } 4753 gp.m.locks-- 4754 } 4755 4756 // debugExtendGrunningNoP is a debug mode that extends the windows in which 4757 // we're _Grunning without a P in order to try to shake out bugs with code 4758 // assuming this state is impossible. 4759 const debugExtendGrunningNoP = false 4760 4761 // Standard syscall entry used by the go syscall library and normal cgo calls. 4762 // 4763 // This is exported via linkname to assembly in the syscall package and x/sys. 4764 // 4765 // Other packages should not be accessing entersyscall directly, 4766 // but widely used packages access it using linkname. 4767 // Notable members of the hall of shame include: 4768 // - gvisor.dev/gvisor 4769 // 4770 // Do not remove or change the type signature. 4771 // See go.dev/issue/67401. 4772 // 4773 //go:nosplit 4774 //go:linkname entersyscall 4775 func entersyscall() { 4776 // N.B. getcallerfp cannot be written directly as argument in the call 4777 // to reentersyscall because it forces spilling the other arguments to 4778 // the stack. This results in exceeding the nosplit stack requirements 4779 // on some platforms. 4780 fp := getcallerfp() 4781 reentersyscall(sys.GetCallerPC(), sys.GetCallerSP(), fp) 4782 } 4783 4784 func entersyscallWakeSysmon() { 4785 lock(&sched.lock) 4786 if sched.sysmonwait.Load() { 4787 sched.sysmonwait.Store(false) 4788 notewakeup(&sched.sysmonnote) 4789 } 4790 unlock(&sched.lock) 4791 } 4792 4793 func entersyscallHandleGCWait(trace traceLocker) { 4794 gp := getg() 4795 4796 lock(&sched.lock) 4797 if sched.stopwait > 0 { 4798 // Set our P to _Pgcstop so the STW can take it. 4799 pp := gp.m.p.ptr() 4800 pp.m = 0 4801 gp.m.p = 0 4802 atomic.Store(&pp.status, _Pgcstop) 4803 4804 if trace.ok() { 4805 trace.ProcStop(pp) 4806 } 4807 addGSyscallNoP(gp.m) // We gave up our P voluntarily. 4808 pp.gcStopTime = nanotime() 4809 pp.syscalltick++ 4810 if sched.stopwait--; sched.stopwait == 0 { 4811 notewakeup(&sched.stopnote) 4812 } 4813 } 4814 unlock(&sched.lock) 4815 } 4816 4817 // The same as entersyscall(), but with a hint that the syscall is blocking. 4818 4819 // entersyscallblock should be an internal detail, 4820 // but widely used packages access it using linkname. 4821 // Notable members of the hall of shame include: 4822 // - gvisor.dev/gvisor 4823 // 4824 // Do not remove or change the type signature. 4825 // See go.dev/issue/67401. 4826 // 4827 //go:linkname entersyscallblock 4828 //go:nosplit 4829 func entersyscallblock() { 4830 gp := getg() 4831 4832 gp.m.locks++ // see comment in entersyscall 4833 gp.throwsplit = true 4834 gp.stackguard0 = stackPreempt // see comment in entersyscall 4835 gp.m.syscalltick = gp.m.p.ptr().syscalltick 4836 gp.m.p.ptr().syscalltick++ 4837 4838 addGSyscallNoP(gp.m) // We're going to give up our P. 4839 4840 // Leave SP around for GC and traceback. 4841 pc := sys.GetCallerPC() 4842 sp := sys.GetCallerSP() 4843 bp := getcallerfp() 4844 save(pc, sp, bp) 4845 gp.syscallsp = gp.sched.sp 4846 gp.syscallpc = gp.sched.pc 4847 gp.syscallbp = gp.sched.bp 4848 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4849 sp1 := sp 4850 sp2 := gp.sched.sp 4851 sp3 := gp.syscallsp 4852 systemstack(func() { 4853 print("entersyscallblock inconsistent sp ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4854 throw("entersyscallblock") 4855 }) 4856 } 4857 4858 // Once we switch to _Gsyscall, we can't safely touch 4859 // our P anymore, so we need to hand it off beforehand. 4860 // The tracer also needs to see the syscall before the P 4861 // handoff, so the order here must be (1) trace, 4862 // (2) handoff, (3) _Gsyscall switch. 4863 trace := traceAcquire() 4864 systemstack(func() { 4865 if trace.ok() { 4866 trace.GoSysCall() 4867 } 4868 handoffp(releasep()) 4869 }) 4870 // <-- 4871 // Caution: we're in a small window where we are in _Grunning without a P. 4872 // --> 4873 if debugExtendGrunningNoP { 4874 usleep(10) 4875 } 4876 casgstatus(gp, _Grunning, _Gsyscall) 4877 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4878 systemstack(func() { 4879 print("entersyscallblock inconsistent sp ", hex(sp), " ", hex(gp.sched.sp), " ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4880 throw("entersyscallblock") 4881 }) 4882 } 4883 if gp.syscallbp != 0 && gp.syscallbp < gp.stack.lo || gp.stack.hi < gp.syscallbp { 4884 systemstack(func() { 4885 print("entersyscallblock inconsistent bp ", hex(bp), " ", hex(gp.sched.bp), " ", hex(gp.syscallbp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4886 throw("entersyscallblock") 4887 }) 4888 } 4889 if trace.ok() { 4890 systemstack(func() { 4891 traceRelease(trace) 4892 }) 4893 } 4894 4895 // Resave for traceback during blocked call. 4896 save(sys.GetCallerPC(), sys.GetCallerSP(), getcallerfp()) 4897 4898 gp.m.locks-- 4899 } 4900 4901 // The goroutine g exited its system call. 4902 // Arrange for it to run on a cpu again. 4903 // This is called only from the go syscall library, not 4904 // from the low-level system calls used by the runtime. 4905 // 4906 // Write barriers are not allowed because our P may have been stolen. 4907 // 4908 // This is exported via linkname to assembly in the syscall package. 4909 // 4910 // exitsyscall should be an internal detail, 4911 // but widely used packages access it using linkname. 4912 // Notable members of the hall of shame include: 4913 // - gvisor.dev/gvisor 4914 // 4915 // Do not remove or change the type signature. 4916 // See go.dev/issue/67401. 4917 // 4918 //go:nosplit 4919 //go:nowritebarrierrec 4920 //go:linkname exitsyscall 4921 func exitsyscall() { 4922 gp := getg() 4923 4924 gp.m.locks++ // see comment in entersyscall 4925 if sys.GetCallerSP() > gp.syscallsp { 4926 throw("exitsyscall: syscall frame is no longer valid") 4927 } 4928 gp.waitsince = 0 4929 4930 if sched.stopwait == freezeStopWait { 4931 // Wedge ourselves if there's an outstanding freezetheworld. 4932 // If we transition to running, we might end up with our traceback 4933 // being taken twice. 4934 systemstack(func() { 4935 lock(&deadlock) 4936 lock(&deadlock) 4937 }) 4938 } 4939 4940 // Optimistically assume we're going to keep running, and switch to running. 4941 // Before this point, our P wiring is not ours. Once we get past this point, 4942 // we can access our P if we have it, otherwise we lost it. 4943 // 4944 // N.B. Because we're transitioning to _Grunning here, traceAcquire doesn't 4945 // need to be held ahead of time. We're effectively atomic with respect to 4946 // the tracer because we're non-preemptible and in the runtime. It can't stop 4947 // us to read a bad status. 4948 // 4949 // Try to do a quick CAS to avoid calling into casgstatus in the common case. 4950 // If we have a bubble, we need to fall into casgstatus. 4951 if gp.bubble != nil || !gp.atomicstatus.CompareAndSwap(_Gsyscall, _Grunning) { 4952 casgstatus(gp, _Gsyscall, _Grunning) 4953 } 4954 4955 // Caution: we're in a window where we may be in _Grunning without a P. 4956 // Either we will grab a P or call exitsyscall0, where we'll switch to 4957 // _Grunnable. 4958 if debugExtendGrunningNoP { 4959 usleep(10) 4960 } 4961 4962 // Grab and clear our old P. 4963 oldp := gp.m.oldp.ptr() 4964 gp.m.oldp.set(nil) 4965 4966 // Check if we still have a P, and if not, try to acquire an idle P. 4967 pp := gp.m.p.ptr() 4968 if pp != nil { 4969 // Fast path: we still have our P. Just emit a syscall exit event. 4970 if trace := traceAcquire(); trace.ok() { 4971 systemstack(func() { 4972 // The truth is we truly never lost the P, but syscalltick 4973 // is used to indicate whether the P should be treated as 4974 // lost anyway. For example, when syscalltick is trashed by 4975 // dropm. 4976 // 4977 // TODO(mknyszek): Consider a more explicit mechanism for this. 4978 // Then syscalltick doesn't need to be trashed, and can be used 4979 // exclusively by sysmon for deciding when it's time to retake. 4980 if pp.syscalltick == gp.m.syscalltick { 4981 trace.GoSysExit(false) 4982 } else { 4983 // Since we need to pretend we lost the P, but nobody ever 4984 // took it, we need a ProcSteal event to model the loss. 4985 // Then, continue with everything else we'd do if we lost 4986 // the P. 4987 trace.ProcSteal(pp) 4988 trace.ProcStart() 4989 trace.GoSysExit(true) 4990 trace.GoStart() 4991 } 4992 traceRelease(trace) 4993 }) 4994 } 4995 } else { 4996 // Slow path: we lost our P. Try to get another one. 4997 systemstack(func() { 4998 // Try to get some other P. 4999 if pp := exitsyscallTryGetP(oldp); pp != nil { 5000 // Install the P. 5001 acquirepNoTrace(pp) 5002 5003 // We're going to start running again, so emit all the relevant events. 5004 if trace := traceAcquire(); trace.ok() { 5005 trace.ProcStart() 5006 trace.GoSysExit(true) 5007 trace.GoStart() 5008 traceRelease(trace) 5009 } 5010 } 5011 }) 5012 pp = gp.m.p.ptr() 5013 } 5014 5015 // If we have a P, clean up and exit. 5016 if pp != nil { 5017 if goroutineProfile.active { 5018 // Make sure that gp has had its stack written out to the goroutine 5019 // profile, exactly as it was when the goroutine profiler first 5020 // stopped the world. 5021 systemstack(func() { 5022 tryRecordGoroutineProfileWB(gp) 5023 }) 5024 } 5025 5026 // Increment the syscalltick for P, since we're exiting a syscall. 5027 pp.syscalltick++ 5028 5029 // Garbage collector isn't running (since we are), 5030 // so okay to clear syscallsp. 5031 gp.syscallsp = 0 5032 gp.m.locks-- 5033 if gp.preempt { 5034 // Restore the preemption request in case we cleared it in newstack. 5035 gp.stackguard0 = stackPreempt 5036 } else { 5037 // Otherwise restore the real stackGuard, we clobbered it in entersyscall/entersyscallblock. 5038 gp.stackguard0 = gp.stack.lo + stackGuard 5039 } 5040 gp.throwsplit = false 5041 5042 if sched.disable.user && !schedEnabled(gp) { 5043 // Scheduling of this goroutine is disabled. 5044 Gosched() 5045 } 5046 return 5047 } 5048 // Slowest path: We couldn't get a P, so call into the scheduler. 5049 gp.m.locks-- 5050 5051 // Call the scheduler. 5052 mcall(exitsyscallNoP) 5053 5054 // Scheduler returned, so we're allowed to run now. 5055 // Delete the syscallsp information that we left for 5056 // the garbage collector during the system call. 5057 // Must wait until now because until gosched returns 5058 // we don't know for sure that the garbage collector 5059 // is not running. 5060 gp.syscallsp = 0 5061 gp.m.p.ptr().syscalltick++ 5062 gp.throwsplit = false 5063 } 5064 5065 // exitsyscall's attempt to try to get any P, if it's missing one. 5066 // Returns true on success. 5067 // 5068 // Must execute on the systemstack because exitsyscall is nosplit. 5069 // 5070 //go:systemstack 5071 func exitsyscallTryGetP(oldp *p) *p { 5072 // Try to steal our old P back. 5073 if oldp != nil { 5074 if thread, ok := setBlockOnExitSyscall(oldp); ok { 5075 thread.takeP() 5076 decGSyscallNoP(getg().m) // We got a P for ourselves. 5077 thread.resume() 5078 return oldp 5079 } 5080 } 5081 5082 // Try to get an idle P. 5083 if sched.pidle != 0 { 5084 lock(&sched.lock) 5085 pp, _ := pidleget(0) 5086 if pp != nil && sched.sysmonwait.Load() { 5087 sched.sysmonwait.Store(false) 5088 notewakeup(&sched.sysmonnote) 5089 } 5090 unlock(&sched.lock) 5091 if pp != nil { 5092 decGSyscallNoP(getg().m) // We got a P for ourselves. 5093 return pp 5094 } 5095 } 5096 return nil 5097 } 5098 5099 // exitsyscall slow path on g0. 5100 // Failed to acquire P, enqueue gp as runnable. 5101 // 5102 // Called via mcall, so gp is the calling g from this M. 5103 // 5104 //go:nowritebarrierrec 5105 func exitsyscallNoP(gp *g) { 5106 traceExitingSyscall() 5107 trace := traceAcquire() 5108 casgstatus(gp, _Grunning, _Grunnable) 5109 traceExitedSyscall() 5110 if trace.ok() { 5111 // Write out syscall exit eagerly. 5112 // 5113 // It's important that we write this *after* we know whether we 5114 // lost our P or not (determined by exitsyscallfast). 5115 trace.GoSysExit(true) 5116 traceRelease(trace) 5117 } 5118 decGSyscallNoP(getg().m) 5119 dropg() 5120 lock(&sched.lock) 5121 var pp *p 5122 if schedEnabled(gp) { 5123 pp, _ = pidleget(0) 5124 } 5125 var locked bool 5126 if pp == nil { 5127 globrunqput(gp) 5128 5129 // Below, we stoplockedm if gp is locked. globrunqput releases 5130 // ownership of gp, so we must check if gp is locked prior to 5131 // committing the release by unlocking sched.lock, otherwise we 5132 // could race with another M transitioning gp from unlocked to 5133 // locked. 5134 locked = gp.lockedm != 0 5135 } else if sched.sysmonwait.Load() { 5136 sched.sysmonwait.Store(false) 5137 notewakeup(&sched.sysmonnote) 5138 } 5139 unlock(&sched.lock) 5140 if pp != nil { 5141 acquirep(pp) 5142 execute(gp, false) // Never returns. 5143 } 5144 if locked { 5145 // Wait until another thread schedules gp and so m again. 5146 // 5147 // N.B. lockedm must be this M, as this g was running on this M 5148 // before entersyscall. 5149 stoplockedm() 5150 execute(gp, false) // Never returns. 5151 } 5152 stopm() 5153 schedule() // Never returns. 5154 } 5155 5156 // addGSyscallNoP must be called when a goroutine in a syscall loses its P. 5157 // This function updates all relevant accounting. 5158 // 5159 // nosplit because it's called on the syscall paths. 5160 // 5161 //go:nosplit 5162 func addGSyscallNoP(mp *m) { 5163 // It's safe to read isExtraInC here because it's only mutated 5164 // outside of _Gsyscall, and we know this thread is attached 5165 // to a goroutine in _Gsyscall and blocked from exiting. 5166 if !mp.isExtraInC { 5167 // Increment nGsyscallNoP since we're taking away a P 5168 // from a _Gsyscall goroutine, but only if isExtraInC 5169 // is not set on the M. If it is, then this thread is 5170 // back to being a full C thread, and will just inflate 5171 // the count of not-in-go goroutines. See go.dev/issue/76435. 5172 sched.nGsyscallNoP.Add(1) 5173 } 5174 } 5175 5176 // decGSsyscallNoP must be called whenever a goroutine in a syscall without 5177 // a P exits the system call. This function updates all relevant accounting. 5178 // 5179 // nosplit because it's called from dropm. 5180 // 5181 //go:nosplit 5182 func decGSyscallNoP(mp *m) { 5183 // Update nGsyscallNoP, but only if this is not a thread coming 5184 // out of C. See the comment in addGSyscallNoP. This logic must match, 5185 // to avoid unmatched increments and decrements. 5186 if !mp.isExtraInC { 5187 sched.nGsyscallNoP.Add(-1) 5188 } 5189 } 5190 5191 // Called from syscall package before fork. 5192 // 5193 // syscall_runtime_BeforeFork is for package syscall, 5194 // but widely used packages access it using linkname. 5195 // Notable members of the hall of shame include: 5196 // - gvisor.dev/gvisor 5197 // 5198 // Do not remove or change the type signature. 5199 // See go.dev/issue/67401. 5200 // 5201 //go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork 5202 //go:nosplit 5203 func syscall_runtime_BeforeFork() { 5204 gp := getg().m.curg 5205 5206 // Block signals during a fork, so that the child does not run 5207 // a signal handler before exec if a signal is sent to the process 5208 // group. See issue #18600. 5209 gp.m.locks++ 5210 sigsave(&gp.m.sigmask) 5211 sigblock(false) 5212 5213 // This function is called before fork in syscall package. 5214 // Code between fork and exec must not allocate memory nor even try to grow stack. 5215 // Here we spoil g.stackguard0 to reliably detect any attempts to grow stack. 5216 // runtime_AfterFork will undo this in parent process, but not in child. 5217 gp.stackguard0 = stackFork 5218 } 5219 5220 // Called from syscall package after fork in parent. 5221 // 5222 // syscall_runtime_AfterFork is for package syscall, 5223 // but widely used packages access it using linkname. 5224 // Notable members of the hall of shame include: 5225 // - gvisor.dev/gvisor 5226 // 5227 // Do not remove or change the type signature. 5228 // See go.dev/issue/67401. 5229 // 5230 //go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork 5231 //go:nosplit 5232 func syscall_runtime_AfterFork() { 5233 gp := getg().m.curg 5234 5235 // See the comments in beforefork. 5236 gp.stackguard0 = gp.stack.lo + stackGuard 5237 5238 msigrestore(gp.m.sigmask) 5239 5240 gp.m.locks-- 5241 } 5242 5243 // inForkedChild is true while manipulating signals in the child process. 5244 // This is used to avoid calling libc functions in case we are using vfork. 5245 var inForkedChild bool 5246 5247 // Called from syscall package after fork in child. 5248 // It resets non-sigignored signals to the default handler, and 5249 // restores the signal mask in preparation for the exec. 5250 // 5251 // Because this might be called during a vfork, and therefore may be 5252 // temporarily sharing address space with the parent process, this must 5253 // not change any global variables or calling into C code that may do so. 5254 // 5255 // syscall_runtime_AfterForkInChild is for package syscall, 5256 // but widely used packages access it using linkname. 5257 // Notable members of the hall of shame include: 5258 // - gvisor.dev/gvisor 5259 // 5260 // Do not remove or change the type signature. 5261 // See go.dev/issue/67401. 5262 // 5263 //go:linkname syscall_runtime_AfterForkInChild syscall.runtime_AfterForkInChild 5264 //go:nosplit 5265 //go:nowritebarrierrec 5266 func syscall_runtime_AfterForkInChild() { 5267 // It's OK to change the global variable inForkedChild here 5268 // because we are going to change it back. There is no race here, 5269 // because if we are sharing address space with the parent process, 5270 // then the parent process can not be running concurrently. 5271 inForkedChild = true 5272 5273 clearSignalHandlers() 5274 5275 // When we are the child we are the only thread running, 5276 // so we know that nothing else has changed gp.m.sigmask. 5277 msigrestore(getg().m.sigmask) 5278 5279 inForkedChild = false 5280 } 5281 5282 // pendingPreemptSignals is the number of preemption signals 5283 // that have been sent but not received. This is only used on Darwin. 5284 // For #41702. 5285 var pendingPreemptSignals atomic.Int32 5286 5287 // Called from syscall package before Exec. 5288 // 5289 //go:linkname syscall_runtime_BeforeExec syscall.runtime_BeforeExec 5290 func syscall_runtime_BeforeExec() { 5291 // Prevent thread creation during exec. 5292 execLock.lock() 5293 5294 // On Darwin, wait for all pending preemption signals to 5295 // be received. See issue #41702. 5296 if GOOS == "darwin" || GOOS == "ios" { 5297 for pendingPreemptSignals.Load() > 0 { 5298 osyield() 5299 } 5300 } 5301 } 5302 5303 // Called from syscall package after Exec. 5304 // 5305 //go:linkname syscall_runtime_AfterExec syscall.runtime_AfterExec 5306 func syscall_runtime_AfterExec() { 5307 execLock.unlock() 5308 } 5309 5310 // Allocate a new g, with a stack big enough for stacksize bytes. 5311 func malg(stacksize int32) *g { 5312 newg := new(g) 5313 if stacksize >= 0 { 5314 stacksize = round2(stackSystem + stacksize) 5315 systemstack(func() { 5316 newg.stack = stackalloc(uint32(stacksize)) 5317 if valgrindenabled { 5318 newg.valgrindStackID = valgrindRegisterStack(unsafe.Pointer(newg.stack.lo), unsafe.Pointer(newg.stack.hi)) 5319 } 5320 }) 5321 newg.stackguard0 = newg.stack.lo + stackGuard 5322 newg.stackguard1 = ^uintptr(0) 5323 // Clear the bottom word of the stack. We record g 5324 // there on gsignal stack during VDSO on ARM and ARM64. 5325 *(*uintptr)(unsafe.Pointer(newg.stack.lo)) = 0 5326 } 5327 return newg 5328 } 5329 5330 // Create a new g running fn. 5331 // Put it on the queue of g's waiting to run. 5332 // The compiler turns a go statement into a call to this. 5333 func newproc(fn *funcval) { 5334 gp := getg() 5335 pc := sys.GetCallerPC() 5336 systemstack(func() { 5337 newg := newproc1(fn, gp, pc, false, waitReasonZero) 5338 5339 pp := getg().m.p.ptr() 5340 runqput(pp, newg, true) 5341 5342 if mainStarted { 5343 wakep() 5344 } 5345 }) 5346 } 5347 5348 // Create a new g in state _Grunnable (or _Gwaiting if parked is true), starting at fn. 5349 // callerpc is the address of the go statement that created this. The caller is responsible 5350 // for adding the new g to the scheduler. If parked is true, waitreason must be non-zero. 5351 func newproc1(fn *funcval, callergp *g, callerpc uintptr, parked bool, waitreason waitReason) *g { 5352 if fn == nil { 5353 fatal("go of nil func value") 5354 } 5355 5356 mp := acquirem() // disable preemption because we hold M and P in local vars. 5357 pp := mp.p.ptr() 5358 newg := gfget(pp) 5359 if newg == nil { 5360 newg = malg(stackMin) 5361 casgstatus(newg, _Gidle, _Gdead) 5362 allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack. 5363 } 5364 if newg.stack.hi == 0 { 5365 throw("newproc1: newg missing stack") 5366 } 5367 5368 if readgstatus(newg) != _Gdead { 5369 throw("newproc1: new g is not Gdead") 5370 } 5371 5372 totalSize := uintptr(4*goarch.PtrSize + sys.MinFrameSize) // extra space in case of reads slightly beyond frame 5373 totalSize = alignUp(totalSize, sys.StackAlign) 5374 sp := newg.stack.hi - totalSize 5375 if usesLR { 5376 // caller's LR 5377 *(*uintptr)(unsafe.Pointer(sp)) = 0 5378 prepGoExitFrame(sp) 5379 } 5380 if GOARCH == "arm64" { 5381 // caller's FP 5382 *(*uintptr)(unsafe.Pointer(sp - goarch.PtrSize)) = 0 5383 } 5384 5385 memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched)) 5386 newg.sched.sp = sp 5387 newg.stktopsp = sp 5388 newg.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function 5389 newg.sched.g = guintptr(unsafe.Pointer(newg)) 5390 gostartcallfn(&newg.sched, fn) 5391 newg.parentGoid = callergp.goid 5392 newg.gopc = callerpc 5393 newg.ancestors = saveAncestors(callergp) 5394 newg.startpc = fn.fn 5395 newg.runningCleanups.Store(false) 5396 if isSystemGoroutine(newg, false) { 5397 sched.ngsys.Add(1) 5398 } else { 5399 // Only user goroutines inherit synctest groups and pprof labels. 5400 newg.bubble = callergp.bubble 5401 if mp.curg != nil { 5402 newg.labels = mp.curg.labels 5403 } 5404 if goroutineProfile.active { 5405 // A concurrent goroutine profile is running. It should include 5406 // exactly the set of goroutines that were alive when the goroutine 5407 // profiler first stopped the world. That does not include newg, so 5408 // mark it as not needing a profile before transitioning it from 5409 // _Gdead. 5410 newg.goroutineProfiled.Store(goroutineProfileSatisfied) 5411 } 5412 } 5413 // Track initial transition? 5414 newg.trackingSeq = uint8(cheaprand()) 5415 if newg.trackingSeq%gTrackingPeriod == 0 { 5416 newg.tracking = true 5417 } 5418 gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo)) 5419 5420 // Get a goid and switch to runnable. This needs to happen under traceAcquire 5421 // since it's a goroutine transition. See tracer invariants in trace.go. 5422 trace := traceAcquire() 5423 var status uint32 = _Grunnable 5424 if parked { 5425 status = _Gwaiting 5426 newg.waitreason = waitreason 5427 } 5428 if pp.goidcache == pp.goidcacheend { 5429 // Sched.goidgen is the last allocated id, 5430 // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch]. 5431 // At startup sched.goidgen=0, so main goroutine receives goid=1. 5432 pp.goidcache = sched.goidgen.Add(_GoidCacheBatch) 5433 pp.goidcache -= _GoidCacheBatch - 1 5434 pp.goidcacheend = pp.goidcache + _GoidCacheBatch 5435 } 5436 newg.goid = pp.goidcache 5437 casgstatus(newg, _Gdead, status) 5438 pp.goidcache++ 5439 newg.trace.reset() 5440 if trace.ok() { 5441 trace.GoCreate(newg, newg.startpc, parked) 5442 traceRelease(trace) 5443 } 5444 5445 // fips140 bubble 5446 newg.fipsOnlyBypass = callergp.fipsOnlyBypass 5447 5448 // dit bubble 5449 newg.ditWanted = callergp.ditWanted 5450 5451 if goexperiment.RuntimeSecret && callergp.secret > 0 { 5452 // while it might seem weird to have a non-zero gp.secret value 5453 // with no calls to secret.Do on the stack, this case is handled 5454 // just fine by the cleanup logic in goexit0 5455 // TODO: secret mode is invisible to the user if they don't ask about it via secret.Enabled 5456 // and can have severe performance penalties (at time of writing, wrapping the entire 5457 // tls handshake resulted in a 30% slowdown of the benchmarks). 5458 // Whether a goroutine is running in secret mode should be more visible, 5459 // maybe with a stack frame or some sort of bubble inspecting mechanism 5460 newg.secret = 1 5461 } 5462 5463 // Set up race context. 5464 if raceenabled { 5465 newg.racectx = racegostart(callerpc) 5466 newg.raceignore = 0 5467 if newg.labels != nil { 5468 // See note in proflabel.go on labelSync's role in synchronizing 5469 // with the reads in the signal handler. 5470 racereleasemergeg(newg, unsafe.Pointer(&labelSync)) 5471 } 5472 } 5473 pp.goroutinesCreated++ 5474 releasem(mp) 5475 5476 return newg 5477 } 5478 5479 // saveAncestors copies previous ancestors of the given caller g and 5480 // includes info for the current caller into a new set of tracebacks for 5481 // a g being created. 5482 func saveAncestors(callergp *g) *[]ancestorInfo { 5483 // Copy all prior info, except for the root goroutine (goid 0). 5484 if debug.tracebackancestors <= 0 || callergp.goid == 0 { 5485 return nil 5486 } 5487 var callerAncestors []ancestorInfo 5488 if callergp.ancestors != nil { 5489 callerAncestors = *callergp.ancestors 5490 } 5491 n := int32(len(callerAncestors)) + 1 5492 if n > debug.tracebackancestors { 5493 n = debug.tracebackancestors 5494 } 5495 ancestors := make([]ancestorInfo, n) 5496 copy(ancestors[1:], callerAncestors) 5497 5498 var pcs [tracebackInnerFrames]uintptr 5499 npcs := gcallers(callergp, 0, pcs[:]) 5500 ipcs := make([]uintptr, npcs) 5501 copy(ipcs, pcs[:]) 5502 ancestors[0] = ancestorInfo{ 5503 pcs: ipcs, 5504 goid: callergp.goid, 5505 gopc: callergp.gopc, 5506 } 5507 5508 ancestorsp := new([]ancestorInfo) 5509 *ancestorsp = ancestors 5510 return ancestorsp 5511 } 5512 5513 // Put on gfree list. 5514 // If local list is too long, transfer a batch to the global list. 5515 func gfput(pp *p, gp *g) { 5516 if readgstatus(gp) != _Gdead { 5517 throw("gfput: bad status (not Gdead)") 5518 } 5519 5520 stksize := gp.stack.hi - gp.stack.lo 5521 5522 if stksize != uintptr(startingStackSize) { 5523 // non-standard stack size - free it. 5524 stackfree(gp.stack) 5525 gp.stack.lo = 0 5526 gp.stack.hi = 0 5527 gp.stackguard0 = 0 5528 if valgrindenabled { 5529 valgrindDeregisterStack(gp.valgrindStackID) 5530 gp.valgrindStackID = 0 5531 } 5532 } 5533 5534 pp.gFree.push(gp) 5535 if pp.gFree.size >= 64 { 5536 var ( 5537 stackQ gQueue 5538 noStackQ gQueue 5539 ) 5540 for pp.gFree.size >= 32 { 5541 gp := pp.gFree.pop() 5542 if gp.stack.lo == 0 { 5543 noStackQ.push(gp) 5544 } else { 5545 stackQ.push(gp) 5546 } 5547 } 5548 lock(&sched.gFree.lock) 5549 sched.gFree.noStack.pushAll(noStackQ) 5550 sched.gFree.stack.pushAll(stackQ) 5551 unlock(&sched.gFree.lock) 5552 } 5553 } 5554 5555 // Get from gfree list. 5556 // If local list is empty, grab a batch from global list. 5557 func gfget(pp *p) *g { 5558 retry: 5559 if pp.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) { 5560 lock(&sched.gFree.lock) 5561 // Move a batch of free Gs to the P. 5562 for pp.gFree.size < 32 { 5563 // Prefer Gs with stacks. 5564 gp := sched.gFree.stack.pop() 5565 if gp == nil { 5566 gp = sched.gFree.noStack.pop() 5567 if gp == nil { 5568 break 5569 } 5570 } 5571 pp.gFree.push(gp) 5572 } 5573 unlock(&sched.gFree.lock) 5574 goto retry 5575 } 5576 gp := pp.gFree.pop() 5577 if gp == nil { 5578 return nil 5579 } 5580 if gp.stack.lo != 0 && gp.stack.hi-gp.stack.lo != uintptr(startingStackSize) { 5581 // Deallocate old stack. We kept it in gfput because it was the 5582 // right size when the goroutine was put on the free list, but 5583 // the right size has changed since then. 5584 systemstack(func() { 5585 stackfree(gp.stack) 5586 gp.stack.lo = 0 5587 gp.stack.hi = 0 5588 gp.stackguard0 = 0 5589 if valgrindenabled { 5590 valgrindDeregisterStack(gp.valgrindStackID) 5591 gp.valgrindStackID = 0 5592 } 5593 }) 5594 } 5595 if gp.stack.lo == 0 { 5596 // Stack was deallocated in gfput or just above. Allocate a new one. 5597 systemstack(func() { 5598 gp.stack = stackalloc(startingStackSize) 5599 if valgrindenabled { 5600 gp.valgrindStackID = valgrindRegisterStack(unsafe.Pointer(gp.stack.lo), unsafe.Pointer(gp.stack.hi)) 5601 } 5602 }) 5603 gp.stackguard0 = gp.stack.lo + stackGuard 5604 } else { 5605 if raceenabled { 5606 racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5607 } 5608 if msanenabled { 5609 msanmalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5610 } 5611 if asanenabled { 5612 asanunpoison(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5613 } 5614 } 5615 return gp 5616 } 5617 5618 // Purge all cached G's from gfree list to the global list. 5619 func gfpurge(pp *p) { 5620 var ( 5621 stackQ gQueue 5622 noStackQ gQueue 5623 ) 5624 for !pp.gFree.empty() { 5625 gp := pp.gFree.pop() 5626 if gp.stack.lo == 0 { 5627 noStackQ.push(gp) 5628 } else { 5629 stackQ.push(gp) 5630 } 5631 } 5632 lock(&sched.gFree.lock) 5633 sched.gFree.noStack.pushAll(noStackQ) 5634 sched.gFree.stack.pushAll(stackQ) 5635 unlock(&sched.gFree.lock) 5636 } 5637 5638 // Breakpoint executes a breakpoint trap. 5639 func Breakpoint() { 5640 breakpoint() 5641 } 5642 5643 // dolockOSThread is called by LockOSThread and lockOSThread below 5644 // after they modify m.locked. Do not allow preemption during this call, 5645 // or else the m might be different in this function than in the caller. 5646 // 5647 //go:nosplit 5648 func dolockOSThread() { 5649 if GOARCH == "wasm" { 5650 return // no threads on wasm yet 5651 } 5652 gp := getg() 5653 gp.m.lockedg.set(gp) 5654 gp.lockedm.set(gp.m) 5655 } 5656 5657 // LockOSThread wires the calling goroutine to its current operating system thread. 5658 // The calling goroutine will always execute in that thread, 5659 // and no other goroutine will execute in it, 5660 // until the calling goroutine has made as many calls to 5661 // [UnlockOSThread] as to LockOSThread. 5662 // If the calling goroutine exits without unlocking the thread, 5663 // the thread will be terminated. 5664 // 5665 // All init functions are run on the startup thread. Calling LockOSThread 5666 // from an init function will cause the main function to be invoked on 5667 // that thread. 5668 // 5669 // A goroutine should call LockOSThread before calling OS services or 5670 // non-Go library functions that depend on per-thread state. 5671 // 5672 //go:nosplit 5673 func LockOSThread() { 5674 if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" { 5675 // If we need to start a new thread from the locked 5676 // thread, we need the template thread. Start it now 5677 // while we're in a known-good state. 5678 startTemplateThread() 5679 } 5680 gp := getg() 5681 gp.m.lockedExt++ 5682 if gp.m.lockedExt == 0 { 5683 gp.m.lockedExt-- 5684 panic("LockOSThread nesting overflow") 5685 } 5686 dolockOSThread() 5687 } 5688 5689 //go:nosplit 5690 func lockOSThread() { 5691 getg().m.lockedInt++ 5692 dolockOSThread() 5693 } 5694 5695 // dounlockOSThread is called by UnlockOSThread and unlockOSThread below 5696 // after they update m->locked. Do not allow preemption during this call, 5697 // or else the m might be in different in this function than in the caller. 5698 // 5699 //go:nosplit 5700 func dounlockOSThread() { 5701 if GOARCH == "wasm" { 5702 return // no threads on wasm yet 5703 } 5704 gp := getg() 5705 if gp.m.lockedInt != 0 || gp.m.lockedExt != 0 { 5706 return 5707 } 5708 gp.m.lockedg = 0 5709 gp.lockedm = 0 5710 } 5711 5712 // UnlockOSThread undoes an earlier call to LockOSThread. 5713 // If this drops the number of active LockOSThread calls on the 5714 // calling goroutine to zero, it unwires the calling goroutine from 5715 // its fixed operating system thread. 5716 // If there are no active LockOSThread calls, this is a no-op. 5717 // 5718 // Before calling UnlockOSThread, the caller must ensure that the OS 5719 // thread is suitable for running other goroutines. If the caller made 5720 // any permanent changes to the state of the thread that would affect 5721 // other goroutines, it should not call this function and thus leave 5722 // the goroutine locked to the OS thread until the goroutine (and 5723 // hence the thread) exits. 5724 // 5725 //go:nosplit 5726 func UnlockOSThread() { 5727 gp := getg() 5728 if gp.m.lockedExt == 0 { 5729 return 5730 } 5731 gp.m.lockedExt-- 5732 dounlockOSThread() 5733 } 5734 5735 //go:nosplit 5736 func unlockOSThread() { 5737 gp := getg() 5738 if gp.m.lockedInt == 0 { 5739 systemstack(badunlockosthread) 5740 } 5741 gp.m.lockedInt-- 5742 dounlockOSThread() 5743 } 5744 5745 func badunlockosthread() { 5746 throw("runtime: internal error: misuse of lockOSThread/unlockOSThread") 5747 } 5748 5749 func gcount(includeSys bool) int32 { 5750 n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.stack.size - sched.gFree.noStack.size 5751 if !includeSys { 5752 n -= sched.ngsys.Load() 5753 } 5754 for _, pp := range allp { 5755 n -= pp.gFree.size 5756 } 5757 5758 // All these variables can be changed concurrently, so the result can be inconsistent. 5759 // But at least the current goroutine is running. 5760 if n < 1 { 5761 n = 1 5762 } 5763 return n 5764 } 5765 5766 // goroutineleakcount returns the number of leaked goroutines last reported by 5767 // the runtime. 5768 // 5769 //go:linkname goroutineleakcount runtime/pprof.runtime_goroutineleakcount 5770 func goroutineleakcount() int { 5771 return work.goroutineLeak.count 5772 } 5773 5774 func mcount() int32 { 5775 return int32(sched.mnext - sched.nmfreed) 5776 } 5777 5778 var prof struct { 5779 signalLock atomic.Uint32 5780 5781 // Must hold signalLock to write. Reads may be lock-free, but 5782 // signalLock should be taken to synchronize with changes. 5783 hz atomic.Int32 5784 } 5785 5786 func _System() { _System() } 5787 func _ExternalCode() { _ExternalCode() } 5788 func _LostExternalCode() { _LostExternalCode() } 5789 func _GC() { _GC() } 5790 func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() } 5791 func _LostContendedRuntimeLock() { _LostContendedRuntimeLock() } 5792 func _VDSO() { _VDSO() } 5793 5794 // Called if we receive a SIGPROF signal. 5795 // Called by the signal handler, may run during STW. 5796 // 5797 //go:nowritebarrierrec 5798 func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { 5799 if prof.hz.Load() == 0 { 5800 return 5801 } 5802 5803 // If mp.profilehz is 0, then profiling is not enabled for this thread. 5804 // We must check this to avoid a deadlock between setcpuprofilerate 5805 // and the call to cpuprof.add, below. 5806 if mp != nil && mp.profilehz == 0 { 5807 return 5808 } 5809 5810 // On mips{,le}/arm, 64bit atomics are emulated with spinlocks, in 5811 // internal/runtime/atomic. If SIGPROF arrives while the program is inside 5812 // the critical section, it creates a deadlock (when writing the sample). 5813 // As a workaround, create a counter of SIGPROFs while in critical section 5814 // to store the count, and pass it to sigprof.add() later when SIGPROF is 5815 // received from somewhere else (with _LostSIGPROFDuringAtomic64 as pc). 5816 if GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm" { 5817 if f := findfunc(pc); f.valid() { 5818 if stringslite.HasPrefix(funcname(f), "internal/runtime/atomic") { 5819 cpuprof.lostAtomic++ 5820 return 5821 } 5822 } 5823 if GOARCH == "arm" && goarm < 7 && GOOS == "linux" && pc&0xffff0000 == 0xffff0000 { 5824 // internal/runtime/atomic functions call into kernel 5825 // helpers on arm < 7. See 5826 // internal/runtime/atomic/sys_linux_arm.s. 5827 cpuprof.lostAtomic++ 5828 return 5829 } 5830 } 5831 5832 // Profiling runs concurrently with GC, so it must not allocate. 5833 // Set a trap in case the code does allocate. 5834 // Note that on windows, one thread takes profiles of all the 5835 // other threads, so mp is usually not getg().m. 5836 // In fact mp may not even be stopped. 5837 // See golang.org/issue/17165. 5838 getg().m.mallocing++ 5839 5840 var u unwinder 5841 var stk [maxCPUProfStack]uintptr 5842 n := 0 5843 if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 { 5844 cgoOff := 0 5845 // Check cgoCallersUse to make sure that we are not 5846 // interrupting other code that is fiddling with 5847 // cgoCallers. We are running in a signal handler 5848 // with all signals blocked, so we don't have to worry 5849 // about any other code interrupting us. 5850 if mp.cgoCallersUse.Load() == 0 && mp.cgoCallers != nil && mp.cgoCallers[0] != 0 { 5851 for cgoOff < len(mp.cgoCallers) && mp.cgoCallers[cgoOff] != 0 { 5852 cgoOff++ 5853 } 5854 n += copy(stk[:], mp.cgoCallers[:cgoOff]) 5855 mp.cgoCallers[0] = 0 5856 } 5857 5858 // Collect Go stack that leads to the cgo call. 5859 u.initAt(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, unwindSilentErrors) 5860 } else if usesLibcall() && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 { 5861 // Libcall, i.e. runtime syscall on windows. 5862 // Collect Go stack that leads to the call. 5863 u.initAt(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), unwindSilentErrors) 5864 } else if mp != nil && mp.vdsoSP != 0 { 5865 // VDSO call, e.g. nanotime1 on Linux. 5866 // Collect Go stack that leads to the call. 5867 u.initAt(mp.vdsoPC, mp.vdsoSP, 0, gp, unwindSilentErrors|unwindJumpStack) 5868 } else { 5869 u.initAt(pc, sp, lr, gp, unwindSilentErrors|unwindTrap|unwindJumpStack) 5870 } 5871 n += tracebackPCs(&u, 0, stk[n:]) 5872 5873 if n <= 0 { 5874 // Normal traceback is impossible or has failed. 5875 // Account it against abstract "System" or "GC". 5876 n = 2 5877 if inVDSOPage(pc) { 5878 pc = abi.FuncPCABIInternal(_VDSO) + sys.PCQuantum 5879 } else if pc > firstmoduledata.etext { 5880 // "ExternalCode" is better than "etext". 5881 pc = abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum 5882 } 5883 stk[0] = pc 5884 if mp.preemptoff != "" { 5885 stk[1] = abi.FuncPCABIInternal(_GC) + sys.PCQuantum 5886 } else { 5887 stk[1] = abi.FuncPCABIInternal(_System) + sys.PCQuantum 5888 } 5889 } 5890 5891 if prof.hz.Load() != 0 { 5892 // Note: it can happen on Windows that we interrupted a system thread 5893 // with no g, so gp could nil. The other nil checks are done out of 5894 // caution, but not expected to be nil in practice. 5895 var tagPtr *unsafe.Pointer 5896 if gp != nil && gp.m != nil && gp.m.curg != nil { 5897 tagPtr = &gp.m.curg.labels 5898 } 5899 cpuprof.add(tagPtr, stk[:n]) 5900 5901 gprof := gp 5902 var mp *m 5903 var pp *p 5904 if gp != nil && gp.m != nil { 5905 if gp.m.curg != nil { 5906 gprof = gp.m.curg 5907 } 5908 mp = gp.m 5909 pp = gp.m.p.ptr() 5910 } 5911 traceCPUSample(gprof, mp, pp, stk[:n]) 5912 } 5913 getg().m.mallocing-- 5914 } 5915 5916 // setcpuprofilerate sets the CPU profiling rate to hz times per second. 5917 // If hz <= 0, setcpuprofilerate turns off CPU profiling. 5918 func setcpuprofilerate(hz int32) { 5919 // Force sane arguments. 5920 if hz < 0 { 5921 hz = 0 5922 } 5923 5924 // Disable preemption, otherwise we can be rescheduled to another thread 5925 // that has profiling enabled. 5926 gp := getg() 5927 gp.m.locks++ 5928 5929 // Stop profiler on this thread so that it is safe to lock prof. 5930 // if a profiling signal came in while we had prof locked, 5931 // it would deadlock. 5932 setThreadCPUProfiler(0) 5933 5934 for !prof.signalLock.CompareAndSwap(0, 1) { 5935 osyield() 5936 } 5937 if prof.hz.Load() != hz { 5938 setProcessCPUProfiler(hz) 5939 prof.hz.Store(hz) 5940 } 5941 prof.signalLock.Store(0) 5942 5943 lock(&sched.lock) 5944 sched.profilehz = hz 5945 unlock(&sched.lock) 5946 5947 if hz != 0 { 5948 setThreadCPUProfiler(hz) 5949 } 5950 5951 gp.m.locks-- 5952 } 5953 5954 // init initializes pp, which may be a freshly allocated p or a 5955 // previously destroyed p, and transitions it to status _Pgcstop. 5956 func (pp *p) init(id int32) { 5957 pp.id = id 5958 pp.gcw.id = id 5959 pp.status = _Pgcstop 5960 pp.sudogcache = pp.sudogbuf[:0] 5961 pp.deferpool = pp.deferpoolbuf[:0] 5962 pp.wbBuf.reset() 5963 if pp.mcache == nil { 5964 if id == 0 { 5965 if mcache0 == nil { 5966 throw("missing mcache?") 5967 } 5968 // Use the bootstrap mcache0. Only one P will get 5969 // mcache0: the one with ID 0. 5970 pp.mcache = mcache0 5971 } else { 5972 pp.mcache = allocmcache() 5973 } 5974 } 5975 if raceenabled && pp.raceprocctx == 0 { 5976 if id == 0 { 5977 pp.raceprocctx = raceprocctx0 5978 raceprocctx0 = 0 // bootstrap 5979 } else { 5980 pp.raceprocctx = raceproccreate() 5981 } 5982 } 5983 lockInit(&pp.timers.mu, lockRankTimers) 5984 5985 // This P may get timers when it starts running. Set the mask here 5986 // since the P may not go through pidleget (notably P 0 on startup). 5987 timerpMask.set(id) 5988 // Similarly, we may not go through pidleget before this P starts 5989 // running if it is P 0 on startup. 5990 idlepMask.clear(id) 5991 } 5992 5993 // destroy releases all of the resources associated with pp and 5994 // transitions it to status _Pdead. 5995 // 5996 // sched.lock must be held and the world must be stopped. 5997 func (pp *p) destroy() { 5998 assertLockHeld(&sched.lock) 5999 assertWorldStopped() 6000 6001 // Move all runnable goroutines to the global queue 6002 for pp.runqhead != pp.runqtail { 6003 // Pop from tail of local queue 6004 pp.runqtail-- 6005 gp := pp.runq[pp.runqtail%uint32(len(pp.runq))].ptr() 6006 // Push onto head of global queue 6007 globrunqputhead(gp) 6008 } 6009 if pp.runnext != 0 { 6010 globrunqputhead(pp.runnext.ptr()) 6011 pp.runnext = 0 6012 } 6013 6014 // Move all timers to the local P. 6015 getg().m.p.ptr().timers.take(&pp.timers) 6016 6017 // No need to flush p's write barrier buffer or span queue, as Ps 6018 // cannot be destroyed during the mark phase. 6019 if phase := gcphase; phase != _GCoff { 6020 println("runtime: p id", pp.id, "destroyed during GC phase", phase) 6021 throw("P destroyed while GC is running") 6022 } 6023 // We should free the queues though. 6024 pp.gcw.spanq.destroy() 6025 6026 clear(pp.sudogbuf[:]) 6027 pp.sudogcache = pp.sudogbuf[:0] 6028 pp.pinnerCache = nil 6029 clear(pp.deferpoolbuf[:]) 6030 pp.deferpool = pp.deferpoolbuf[:0] 6031 systemstack(func() { 6032 for i := 0; i < pp.mspancache.len; i++ { 6033 // Safe to call since the world is stopped. 6034 mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i])) 6035 } 6036 pp.mspancache.len = 0 6037 lock(&mheap_.lock) 6038 pp.pcache.flush(&mheap_.pages) 6039 unlock(&mheap_.lock) 6040 }) 6041 freemcache(pp.mcache) 6042 pp.mcache = nil 6043 gfpurge(pp) 6044 if raceenabled { 6045 if pp.timers.raceCtx != 0 { 6046 // The race detector code uses a callback to fetch 6047 // the proc context, so arrange for that callback 6048 // to see the right thing. 6049 // This hack only works because we are the only 6050 // thread running. 6051 mp := getg().m 6052 phold := mp.p.ptr() 6053 mp.p.set(pp) 6054 6055 racectxend(pp.timers.raceCtx) 6056 pp.timers.raceCtx = 0 6057 6058 mp.p.set(phold) 6059 } 6060 raceprocdestroy(pp.raceprocctx) 6061 pp.raceprocctx = 0 6062 } 6063 pp.gcAssistTime = 0 6064 gcCleanups.queued += pp.cleanupsQueued 6065 pp.cleanupsQueued = 0 6066 sched.goroutinesCreated.Add(int64(pp.goroutinesCreated)) 6067 pp.goroutinesCreated = 0 6068 pp.xRegs.free() 6069 pp.status = _Pdead 6070 } 6071 6072 // Change number of processors. 6073 // 6074 // sched.lock must be held, and the world must be stopped. 6075 // 6076 // gcworkbufs must not be being modified by either the GC or the write barrier 6077 // code, so the GC must not be running if the number of Ps actually changes. 6078 // 6079 // Returns list of Ps with local work, they need to be scheduled by the caller. 6080 func procresize(nprocs int32) *p { 6081 assertLockHeld(&sched.lock) 6082 assertWorldStopped() 6083 6084 old := gomaxprocs 6085 if old < 0 || nprocs <= 0 { 6086 throw("procresize: invalid arg") 6087 } 6088 trace := traceAcquire() 6089 if trace.ok() { 6090 trace.Gomaxprocs(nprocs) 6091 traceRelease(trace) 6092 } 6093 6094 // update statistics 6095 now := nanotime() 6096 if sched.procresizetime != 0 { 6097 sched.totaltime += int64(old) * (now - sched.procresizetime) 6098 } 6099 sched.procresizetime = now 6100 6101 // Grow allp if necessary. 6102 if nprocs > int32(len(allp)) { 6103 // Synchronize with retake, which could be running 6104 // concurrently since it doesn't run on a P. 6105 lock(&allpLock) 6106 if nprocs <= int32(cap(allp)) { 6107 allp = allp[:nprocs] 6108 } else { 6109 nallp := make([]*p, nprocs) 6110 // Copy everything up to allp's cap so we 6111 // never lose old allocated Ps. 6112 copy(nallp, allp[:cap(allp)]) 6113 allp = nallp 6114 } 6115 6116 idlepMask = idlepMask.resize(nprocs) 6117 timerpMask = timerpMask.resize(nprocs) 6118 work.spanqMask = work.spanqMask.resize(nprocs) 6119 unlock(&allpLock) 6120 } 6121 6122 // initialize new P's 6123 for i := old; i < nprocs; i++ { 6124 pp := allp[i] 6125 if pp == nil { 6126 pp = new(p) 6127 } 6128 pp.init(i) 6129 atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp)) 6130 } 6131 6132 gp := getg() 6133 if gp.m.p != 0 && gp.m.p.ptr().id < nprocs { 6134 // continue to use the current P 6135 gp.m.p.ptr().status = _Prunning 6136 gp.m.p.ptr().mcache.prepareForSweep() 6137 } else { 6138 // release the current P and acquire allp[0]. 6139 // 6140 // We must do this before destroying our current P 6141 // because p.destroy itself has write barriers, so we 6142 // need to do that from a valid P. 6143 if gp.m.p != 0 { 6144 trace := traceAcquire() 6145 if trace.ok() { 6146 // Pretend that we were descheduled 6147 // and then scheduled again to keep 6148 // the trace consistent. 6149 trace.GoSched() 6150 trace.ProcStop(gp.m.p.ptr()) 6151 traceRelease(trace) 6152 } 6153 gp.m.p.ptr().m = 0 6154 } 6155 gp.m.p = 0 6156 pp := allp[0] 6157 pp.m = 0 6158 pp.status = _Pidle 6159 acquirep(pp) 6160 trace := traceAcquire() 6161 if trace.ok() { 6162 trace.GoStart() 6163 traceRelease(trace) 6164 } 6165 } 6166 6167 // g.m.p is now set, so we no longer need mcache0 for bootstrapping. 6168 mcache0 = nil 6169 6170 // release resources from unused P's 6171 for i := nprocs; i < old; i++ { 6172 pp := allp[i] 6173 pp.destroy() 6174 // can't free P itself because it can be referenced by an M in syscall 6175 } 6176 6177 // Trim allp. 6178 if int32(len(allp)) != nprocs { 6179 lock(&allpLock) 6180 allp = allp[:nprocs] 6181 idlepMask = idlepMask.resize(nprocs) 6182 timerpMask = timerpMask.resize(nprocs) 6183 work.spanqMask = work.spanqMask.resize(nprocs) 6184 unlock(&allpLock) 6185 } 6186 6187 // Assign Ms to Ps with runnable goroutines. 6188 var runnablePs *p 6189 var runnablePsNeedM *p 6190 var idlePs *p 6191 for i := nprocs - 1; i >= 0; i-- { 6192 pp := allp[i] 6193 if gp.m.p.ptr() == pp { 6194 continue 6195 } 6196 pp.status = _Pidle 6197 if runqempty(pp) { 6198 pp.link.set(idlePs) 6199 idlePs = pp 6200 continue 6201 } 6202 6203 // Prefer to run on the most recent M if it is 6204 // available. 6205 // 6206 // Ps with no oldm (or for which oldm is already taken 6207 // by an earlier P), we delay until all oldm Ps are 6208 // handled. Otherwise, mget may return an M that a 6209 // later P has in oldm. 6210 var mp *m 6211 if oldm := pp.oldm.get(); oldm != nil { 6212 // Returns nil if oldm is not idle. 6213 mp = mgetSpecific(oldm) 6214 } 6215 if mp == nil { 6216 // Call mget later. 6217 pp.link.set(runnablePsNeedM) 6218 runnablePsNeedM = pp 6219 continue 6220 } 6221 pp.m.set(mp) 6222 pp.link.set(runnablePs) 6223 runnablePs = pp 6224 } 6225 // Assign Ms to remaining runnable Ps without usable oldm. See comment 6226 // above. 6227 for runnablePsNeedM != nil { 6228 pp := runnablePsNeedM 6229 runnablePsNeedM = pp.link.ptr() 6230 6231 mp := mget() 6232 pp.m.set(mp) 6233 pp.link.set(runnablePs) 6234 runnablePs = pp 6235 } 6236 6237 // Now that we've assigned Ms to Ps with runnable goroutines, assign GC 6238 // mark workers to remaining idle Ps, if needed. 6239 // 6240 // By assigning GC workers to Ps here, we slightly speed up starting 6241 // the world, as we will start enough Ps to run all of the user 6242 // goroutines and GC mark workers all at once, rather than using a 6243 // sequence of wakep calls as each P's findRunnable realizes it needs 6244 // to run a mark worker instead of a user goroutine. 6245 // 6246 // By assigning GC workers to Ps only _after_ previously-running Ps are 6247 // assigned Ms, we ensure that goroutines previously running on a P 6248 // continue to run on the same P, with GC mark workers preferring 6249 // previously-idle Ps. This helps prevent goroutines from shuffling 6250 // around too much across STW. 6251 // 6252 // N.B., if there aren't enough Ps left in idlePs for all of the GC 6253 // mark workers, then findRunnable will still choose to run mark 6254 // workers on Ps assigned above. 6255 // 6256 // N.B., we do this during any STW in the mark phase, not just the 6257 // sweep termination STW that starts the mark phase. gcBgMarkWorker 6258 // always preempts by removing itself from the P, so even unrelated 6259 // STWs during the mark require that Ps reselect mark workers upon 6260 // restart. 6261 if gcBlackenEnabled != 0 { 6262 for idlePs != nil { 6263 pp := idlePs 6264 6265 ok, _ := gcController.assignWaitingGCWorker(pp, now) 6266 if !ok { 6267 // No more mark workers needed. 6268 break 6269 } 6270 6271 // Got a worker, P is now runnable. 6272 // 6273 // mget may return nil if there aren't enough Ms, in 6274 // which case startTheWorldWithSema will start one. 6275 // 6276 // N.B. findRunnableGCWorker will make the worker G 6277 // itself runnable. 6278 idlePs = pp.link.ptr() 6279 mp := mget() 6280 pp.m.set(mp) 6281 pp.link.set(runnablePs) 6282 runnablePs = pp 6283 } 6284 } 6285 6286 // Finally, any remaining Ps are truly idle. 6287 for idlePs != nil { 6288 pp := idlePs 6289 idlePs = pp.link.ptr() 6290 pidleput(pp, now) 6291 } 6292 6293 stealOrder.reset(uint32(nprocs)) 6294 var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 6295 atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs)) 6296 if old != nprocs { 6297 // Notify the limiter that the amount of procs has changed. 6298 gcCPULimiter.resetCapacity(now, nprocs) 6299 } 6300 return runnablePs 6301 } 6302 6303 // Associate p and the current m. 6304 // 6305 // This function is allowed to have write barriers even if the caller 6306 // isn't because it immediately acquires pp. 6307 // 6308 //go:yeswritebarrierrec 6309 func acquirep(pp *p) { 6310 // Do the work. 6311 acquirepNoTrace(pp) 6312 6313 // Emit the event. 6314 trace := traceAcquire() 6315 if trace.ok() { 6316 trace.ProcStart() 6317 traceRelease(trace) 6318 } 6319 } 6320 6321 // Internals of acquirep, just skipping the trace events. 6322 // 6323 //go:yeswritebarrierrec 6324 func acquirepNoTrace(pp *p) { 6325 // Do the part that isn't allowed to have write barriers. 6326 wirep(pp) 6327 6328 // Have p; write barriers now allowed. 6329 6330 // The M we're associating with will be the old M after the next 6331 // releasep. We must set this here because write barriers are not 6332 // allowed in releasep. 6333 pp.oldm = pp.m.ptr().self 6334 6335 // Perform deferred mcache flush before this P can allocate 6336 // from a potentially stale mcache. 6337 pp.mcache.prepareForSweep() 6338 } 6339 6340 // wirep is the first step of acquirep, which actually associates the 6341 // current M to pp. This is broken out so we can disallow write 6342 // barriers for this part, since we don't yet have a P. 6343 // 6344 //go:nowritebarrierrec 6345 //go:nosplit 6346 func wirep(pp *p) { 6347 gp := getg() 6348 6349 if gp.m.p != 0 { 6350 // Call on the systemstack to avoid a nosplit overflow build failure 6351 // on some platforms when built with -N -l. See #64113. 6352 systemstack(func() { 6353 throw("wirep: already in go") 6354 }) 6355 } 6356 if pp.m != 0 || pp.status != _Pidle { 6357 // Call on the systemstack to avoid a nosplit overflow build failure 6358 // on some platforms when built with -N -l. See #64113. 6359 systemstack(func() { 6360 id := int64(0) 6361 if pp.m != 0 { 6362 id = pp.m.ptr().id 6363 } 6364 print("wirep: p->m=", pp.m, "(", id, ") p->status=", pp.status, "\n") 6365 throw("wirep: invalid p state") 6366 }) 6367 } 6368 gp.m.p.set(pp) 6369 pp.m.set(gp.m) 6370 pp.status = _Prunning 6371 } 6372 6373 // Disassociate p and the current m. 6374 func releasep() *p { 6375 trace := traceAcquire() 6376 if trace.ok() { 6377 trace.ProcStop(getg().m.p.ptr()) 6378 traceRelease(trace) 6379 } 6380 return releasepNoTrace() 6381 } 6382 6383 // Disassociate p and the current m without tracing an event. 6384 func releasepNoTrace() *p { 6385 gp := getg() 6386 6387 if gp.m.p == 0 { 6388 throw("releasep: invalid arg") 6389 } 6390 pp := gp.m.p.ptr() 6391 if pp.m.ptr() != gp.m || pp.status != _Prunning { 6392 print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n") 6393 throw("releasep: invalid p state") 6394 } 6395 6396 // P must clear if nextGCMarkWorker if it stops. 6397 gcController.releaseNextGCMarkWorker(pp) 6398 6399 gp.m.p = 0 6400 pp.m = 0 6401 pp.status = _Pidle 6402 return pp 6403 } 6404 6405 func incidlelocked(v int32) { 6406 lock(&sched.lock) 6407 sched.nmidlelocked += v 6408 if v > 0 { 6409 checkdead() 6410 } 6411 unlock(&sched.lock) 6412 } 6413 6414 // Check for deadlock situation. 6415 // The check is based on number of running M's, if 0 -> deadlock. 6416 // sched.lock must be held. 6417 func checkdead() { 6418 assertLockHeld(&sched.lock) 6419 6420 // For -buildmode=c-shared or -buildmode=c-archive it's OK if 6421 // there are no running goroutines. The calling program is 6422 // assumed to be running. 6423 // One exception is Wasm, which is single-threaded. If we are 6424 // in Go and all goroutines are blocked, it deadlocks. 6425 if (islibrary || isarchive) && GOARCH != "wasm" { 6426 return 6427 } 6428 6429 // If we are dying because of a signal caught on an already idle thread, 6430 // freezetheworld will cause all running threads to block. 6431 // And runtime will essentially enter into deadlock state, 6432 // except that there is a thread that will call exit soon. 6433 if panicking.Load() > 0 { 6434 return 6435 } 6436 6437 // If we are not running under cgo, but we have an extra M then account 6438 // for it. (It is possible to have an extra M on Windows without cgo to 6439 // accommodate callbacks created by syscall.NewCallback. See issue #6751 6440 // for details.) 6441 var run0 int32 6442 if !iscgo && cgoHasExtraM && extraMLength.Load() > 0 { 6443 run0 = 1 6444 } 6445 6446 run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys 6447 if run > run0 { 6448 return 6449 } 6450 if run < 0 { 6451 print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n") 6452 unlock(&sched.lock) 6453 throw("checkdead: inconsistent counts") 6454 } 6455 6456 grunning := 0 6457 forEachG(func(gp *g) { 6458 if isSystemGoroutine(gp, false) { 6459 return 6460 } 6461 s := readgstatus(gp) 6462 switch s &^ _Gscan { 6463 case _Gwaiting, 6464 _Gpreempted: 6465 grunning++ 6466 case _Grunnable, 6467 _Grunning, 6468 _Gsyscall: 6469 print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n") 6470 unlock(&sched.lock) 6471 throw("checkdead: runnable g") 6472 } 6473 }) 6474 if grunning == 0 { // possible if main goroutine calls runtime·Goexit() 6475 unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang 6476 fatal("no goroutines (main called runtime.Goexit) - deadlock!") 6477 } 6478 6479 // Maybe jump time forward for playground. 6480 if faketime != 0 { 6481 if when := timeSleepUntil(); when < maxWhen { 6482 faketime = when 6483 6484 // Start an M to steal the timer. 6485 pp, _ := pidleget(faketime) 6486 if pp == nil { 6487 // There should always be a free P since 6488 // nothing is running. 6489 unlock(&sched.lock) 6490 throw("checkdead: no p for timer") 6491 } 6492 mp := mget() 6493 if mp == nil { 6494 // There should always be a free M since 6495 // nothing is running. 6496 unlock(&sched.lock) 6497 throw("checkdead: no m for timer") 6498 } 6499 // M must be spinning to steal. We set this to be 6500 // explicit, but since this is the only M it would 6501 // become spinning on its own anyways. 6502 sched.nmspinning.Add(1) 6503 mp.spinning = true 6504 mp.nextp.set(pp) 6505 notewakeup(&mp.park) 6506 return 6507 } 6508 } 6509 6510 // There are no goroutines running, so we can look at the P's. 6511 for _, pp := range allp { 6512 if len(pp.timers.heap) > 0 { 6513 return 6514 } 6515 } 6516 6517 unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang 6518 fatal("all goroutines are asleep - deadlock!") 6519 } 6520 6521 // forcegcperiod is the maximum time in nanoseconds between garbage 6522 // collections. If we go this long without a garbage collection, one 6523 // is forced to run. 6524 // 6525 // This is a variable for testing purposes. It normally doesn't change. 6526 var forcegcperiod int64 = 2 * 60 * 1e9 6527 6528 // haveSysmon indicates whether there is sysmon thread support. 6529 // 6530 // No threads on wasm yet, so no sysmon. 6531 const haveSysmon = GOARCH != "wasm" 6532 6533 // Always runs without a P, so write barriers are not allowed. 6534 // 6535 //go:nowritebarrierrec 6536 func sysmon() { 6537 lock(&sched.lock) 6538 sched.nmsys++ 6539 checkdead() 6540 unlock(&sched.lock) 6541 6542 lastgomaxprocs := int64(0) 6543 lasttrace := int64(0) 6544 idle := 0 // how many cycles in succession we had not wokeup somebody 6545 delay := uint32(0) 6546 6547 for { 6548 if idle == 0 { // start with 20us sleep... 6549 delay = 20 6550 } else if idle > 50 { // start doubling the sleep after 1ms... 6551 delay *= 2 6552 } 6553 if delay > 10*1000 { // up to 10ms 6554 delay = 10 * 1000 6555 } 6556 usleep(delay) 6557 6558 // sysmon should not enter deep sleep if schedtrace is enabled so that 6559 // it can print that information at the right time. 6560 // 6561 // It should also not enter deep sleep if there are any active P's so 6562 // that it can retake P's from syscalls, preempt long running G's, and 6563 // poll the network if all P's are busy for long stretches. 6564 // 6565 // It should wakeup from deep sleep if any P's become active either due 6566 // to exiting a syscall or waking up due to a timer expiring so that it 6567 // can resume performing those duties. If it wakes from a syscall it 6568 // resets idle and delay as a bet that since it had retaken a P from a 6569 // syscall before, it may need to do it again shortly after the 6570 // application starts work again. It does not reset idle when waking 6571 // from a timer to avoid adding system load to applications that spend 6572 // most of their time sleeping. 6573 now := nanotime() 6574 if debug.schedtrace <= 0 && (sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs) { 6575 lock(&sched.lock) 6576 if sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs { 6577 syscallWake := false 6578 next := timeSleepUntil() 6579 if next > now { 6580 sched.sysmonwait.Store(true) 6581 unlock(&sched.lock) 6582 // Make wake-up period small enough 6583 // for the sampling to be correct. 6584 sleep := forcegcperiod / 2 6585 if next-now < sleep { 6586 sleep = next - now 6587 } 6588 shouldRelax := sleep >= osRelaxMinNS 6589 if shouldRelax { 6590 osRelax(true) 6591 } 6592 syscallWake = notetsleep(&sched.sysmonnote, sleep) 6593 if shouldRelax { 6594 osRelax(false) 6595 } 6596 lock(&sched.lock) 6597 sched.sysmonwait.Store(false) 6598 noteclear(&sched.sysmonnote) 6599 } 6600 if syscallWake { 6601 idle = 0 6602 delay = 20 6603 } 6604 } 6605 unlock(&sched.lock) 6606 } 6607 6608 lock(&sched.sysmonlock) 6609 // Update now in case we blocked on sysmonnote or spent a long time 6610 // blocked on schedlock or sysmonlock above. 6611 now = nanotime() 6612 6613 // trigger libc interceptors if needed 6614 if *cgo_yield != nil { 6615 asmcgocall(*cgo_yield, nil) 6616 } 6617 // poll network if not polled for more than 10ms 6618 lastpoll := sched.lastpoll.Load() 6619 if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now { 6620 sched.lastpoll.CompareAndSwap(lastpoll, now) 6621 list, delta := netpoll(0) // non-blocking - returns list of goroutines 6622 if !list.empty() { 6623 // Need to decrement number of idle locked M's 6624 // (pretending that one more is running) before injectglist. 6625 // Otherwise it can lead to the following situation: 6626 // injectglist grabs all P's but before it starts M's to run the P's, 6627 // another M returns from syscall, finishes running its G, 6628 // observes that there is no work to do and no other running M's 6629 // and reports deadlock. 6630 incidlelocked(-1) 6631 injectglist(&list) 6632 incidlelocked(1) 6633 netpollAdjustWaiters(delta) 6634 } 6635 } 6636 // Check if we need to update GOMAXPROCS at most once per second. 6637 if debug.updatemaxprocs != 0 && lastgomaxprocs+1e9 <= now { 6638 sysmonUpdateGOMAXPROCS() 6639 lastgomaxprocs = now 6640 } 6641 if scavenger.sysmonWake.Load() != 0 { 6642 // Kick the scavenger awake if someone requested it. 6643 scavenger.wake() 6644 } 6645 // retake P's blocked in syscalls 6646 // and preempt long running G's 6647 if retake(now) != 0 { 6648 idle = 0 6649 } else { 6650 idle++ 6651 } 6652 // check if we need to force a GC 6653 if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && forcegc.idle.Load() { 6654 lock(&forcegc.lock) 6655 forcegc.idle.Store(false) 6656 var list gList 6657 list.push(forcegc.g) 6658 injectglist(&list) 6659 unlock(&forcegc.lock) 6660 } 6661 if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now { 6662 lasttrace = now 6663 schedtrace(debug.scheddetail > 0) 6664 } 6665 unlock(&sched.sysmonlock) 6666 } 6667 } 6668 6669 type sysmontick struct { 6670 schedtick uint32 6671 syscalltick uint32 6672 schedwhen int64 6673 syscallwhen int64 6674 } 6675 6676 // forcePreemptNS is the time slice given to a G before it is 6677 // preempted. 6678 const forcePreemptNS = 10 * 1000 * 1000 // 10ms 6679 6680 func retake(now int64) uint32 { 6681 n := 0 6682 // Prevent allp slice changes. This lock will be completely 6683 // uncontended unless we're already stopping the world. 6684 lock(&allpLock) 6685 // We can't use a range loop over allp because we may 6686 // temporarily drop the allpLock. Hence, we need to re-fetch 6687 // allp each time around the loop. 6688 for i := 0; i < len(allp); i++ { 6689 // Quickly filter out non-running Ps. Running Ps are either 6690 // in a syscall or are actually executing. Idle Ps don't 6691 // need to be retaken. 6692 // 6693 // This is best-effort, so it's OK that it's racy. Our target 6694 // is to retake Ps that have been running or in a syscall for 6695 // a long time (milliseconds), so the state has plenty of time 6696 // to stabilize. 6697 pp := allp[i] 6698 if pp == nil || atomic.Load(&pp.status) != _Prunning { 6699 // pp can be nil if procresize has grown 6700 // allp but not yet created new Ps. 6701 continue 6702 } 6703 pd := &pp.sysmontick 6704 sysretake := false 6705 6706 // Preempt G if it's running on the same schedtick for 6707 // too long. This could be from a single long-running 6708 // goroutine or a sequence of goroutines run via 6709 // runnext, which share a single schedtick time slice. 6710 schedt := int64(pp.schedtick) 6711 if int64(pd.schedtick) != schedt { 6712 pd.schedtick = uint32(schedt) 6713 pd.schedwhen = now 6714 } else if pd.schedwhen+forcePreemptNS <= now { 6715 preemptone(pp) 6716 // If pp is in a syscall, preemptone doesn't work. 6717 // The goroutine nor the thread can respond to a 6718 // preemption request because they're not in Go code, 6719 // so we need to take the P ourselves. 6720 sysretake = true 6721 } 6722 6723 // Drop allpLock so we can take sched.lock. 6724 unlock(&allpLock) 6725 6726 // Need to decrement number of idle locked M's (pretending that 6727 // one more is running) before we take the P and resume. 6728 // Otherwise the M from which we retake can exit the syscall, 6729 // increment nmidle and report deadlock. 6730 // 6731 // Can't call incidlelocked once we setBlockOnExitSyscall, due 6732 // to a lock ordering violation between sched.lock and _Gscan. 6733 incidlelocked(-1) 6734 6735 // Try to prevent the P from continuing in the syscall, if it's in one at all. 6736 thread, ok := setBlockOnExitSyscall(pp) 6737 if !ok { 6738 // Not in a syscall, or something changed out from under us. 6739 goto done 6740 } 6741 6742 // Retake the P if it's there for more than 1 sysmon tick (at least 20us). 6743 if syst := int64(pp.syscalltick); !sysretake && int64(pd.syscalltick) != syst { 6744 pd.syscalltick = uint32(syst) 6745 pd.syscallwhen = now 6746 thread.resume() 6747 goto done 6748 } 6749 6750 // On the one hand we don't want to retake Ps if there is no other work to do, 6751 // but on the other hand we want to retake them eventually 6752 // because they can prevent the sysmon thread from deep sleep. 6753 if runqempty(pp) && sched.nmspinning.Load()+sched.npidle.Load() > 0 && pd.syscallwhen+10*1000*1000 > now { 6754 thread.resume() 6755 goto done 6756 } 6757 6758 // Take the P. Note: because we have the scan bit, the goroutine 6759 // is at worst stuck spinning in exitsyscall. 6760 thread.takeP() 6761 thread.resume() 6762 n++ 6763 6764 // Handoff the P for some other thread to run it. 6765 handoffp(pp) 6766 6767 // The P has been handed off to another thread, so risk of a false 6768 // deadlock report while we hold onto it is gone. 6769 done: 6770 incidlelocked(1) 6771 lock(&allpLock) 6772 } 6773 unlock(&allpLock) 6774 return uint32(n) 6775 } 6776 6777 // syscallingThread represents a thread in a system call that temporarily 6778 // cannot advance out of the system call. 6779 type syscallingThread struct { 6780 gp *g 6781 mp *m 6782 pp *p 6783 status uint32 6784 } 6785 6786 // setBlockOnExitSyscall prevents pp's thread from advancing out of 6787 // exitsyscall. On success, returns the g/m/p state of the thread 6788 // and true. At that point, the caller owns the g/m/p links referenced, 6789 // the goroutine is in _Gsyscall, and prevented from transitioning out 6790 // of it. On failure, it returns false, and none of these guarantees are 6791 // made. 6792 // 6793 // Callers must call resume on the resulting thread state once 6794 // they're done with thread, otherwise it will remain blocked forever. 6795 // 6796 // This function races with state changes on pp, and thus may fail 6797 // if pp is not in a system call, or exits a system call concurrently 6798 // with this function. However, this function is safe to call without 6799 // any additional synchronization. 6800 func setBlockOnExitSyscall(pp *p) (syscallingThread, bool) { 6801 if pp.status != _Prunning { 6802 return syscallingThread{}, false 6803 } 6804 // Be very careful here, these reads are intentionally racy. 6805 // Once we notice the G is in _Gsyscall, acquire its scan bit, 6806 // and validate that it's still connected to the *same* M and P, 6807 // we can actually get to work. Holding the scan bit will prevent 6808 // the G from exiting the syscall. 6809 // 6810 // Our goal here is to interrupt long syscalls. If it turns out 6811 // that we're wrong and the G switched to another syscall while 6812 // we were trying to do this, that's completely fine. It's 6813 // probably making more frequent syscalls and the typical 6814 // preemption paths should be effective. 6815 mp := pp.m.ptr() 6816 if mp == nil { 6817 // Nothing to do. 6818 return syscallingThread{}, false 6819 } 6820 gp := mp.curg 6821 if gp == nil { 6822 // Nothing to do. 6823 return syscallingThread{}, false 6824 } 6825 status := readgstatus(gp) &^ _Gscan 6826 6827 // A goroutine is considered in a syscall, and may have a corresponding 6828 // P, if it's in _Gsyscall *or* _Gdeadextra. In the latter case, it's an 6829 // extra M goroutine. 6830 if status != _Gsyscall && status != _Gdeadextra { 6831 // Not in a syscall, nothing to do. 6832 return syscallingThread{}, false 6833 } 6834 if !castogscanstatus(gp, status, status|_Gscan) { 6835 // Not in _Gsyscall or _Gdeadextra anymore. Nothing to do. 6836 return syscallingThread{}, false 6837 } 6838 if gp.m != mp || gp.m.p.ptr() != pp { 6839 // This is not what we originally observed. Nothing to do. 6840 casfrom_Gscanstatus(gp, status|_Gscan, status) 6841 return syscallingThread{}, false 6842 } 6843 return syscallingThread{gp, mp, pp, status}, true 6844 } 6845 6846 // gcstopP unwires the P attached to the syscalling thread 6847 // and moves it into the _Pgcstop state. 6848 // 6849 // The caller must be stopping the world. 6850 func (s syscallingThread) gcstopP() { 6851 assertLockHeld(&sched.lock) 6852 6853 s.releaseP(_Pgcstop) 6854 s.pp.gcStopTime = nanotime() 6855 sched.stopwait-- 6856 } 6857 6858 // takeP unwires the P attached to the syscalling thread 6859 // and moves it into the _Pidle state. 6860 func (s syscallingThread) takeP() { 6861 s.releaseP(_Pidle) 6862 } 6863 6864 // releaseP unwires the P from the syscalling thread, moving 6865 // it to the provided state. Callers should prefer to use 6866 // takeP and gcstopP. 6867 func (s syscallingThread) releaseP(state uint32) { 6868 if state != _Pidle && state != _Pgcstop { 6869 throw("attempted to release P into a bad state") 6870 } 6871 trace := traceAcquire() 6872 s.pp.m = 0 6873 s.mp.p = 0 6874 atomic.Store(&s.pp.status, state) 6875 if trace.ok() { 6876 trace.ProcSteal(s.pp) 6877 traceRelease(trace) 6878 } 6879 addGSyscallNoP(s.mp) 6880 s.pp.syscalltick++ 6881 } 6882 6883 // resume allows a syscalling thread to advance beyond exitsyscall. 6884 func (s syscallingThread) resume() { 6885 casfrom_Gscanstatus(s.gp, s.status|_Gscan, s.status) 6886 } 6887 6888 // Tell all goroutines that they have been preempted and they should stop. 6889 // This function is purely best-effort. It can fail to inform a goroutine if a 6890 // processor just started running it. 6891 // No locks need to be held. 6892 // Returns true if preemption request was issued to at least one goroutine. 6893 func preemptall() bool { 6894 res := false 6895 for _, pp := range allp { 6896 if pp.status != _Prunning { 6897 continue 6898 } 6899 if preemptone(pp) { 6900 res = true 6901 } 6902 } 6903 return res 6904 } 6905 6906 // Tell the goroutine running on processor P to stop. 6907 // This function is purely best-effort. It can incorrectly fail to inform the 6908 // goroutine. It can inform the wrong goroutine. Even if it informs the 6909 // correct goroutine, that goroutine might ignore the request if it is 6910 // simultaneously executing newstack. 6911 // No lock needs to be held. 6912 // Returns true if preemption request was issued. 6913 // The actual preemption will happen at some point in the future 6914 // and will be indicated by the gp->status no longer being 6915 // Grunning 6916 func preemptone(pp *p) bool { 6917 mp := pp.m.ptr() 6918 if mp == nil || mp == getg().m { 6919 return false 6920 } 6921 gp := mp.curg 6922 if gp == nil || gp == mp.g0 { 6923 return false 6924 } 6925 if readgstatus(gp)&^_Gscan == _Gsyscall { 6926 // Don't bother trying to preempt a goroutine in a syscall. 6927 return false 6928 } 6929 6930 gp.preempt = true 6931 6932 // Every call in a goroutine checks for stack overflow by 6933 // comparing the current stack pointer to gp->stackguard0. 6934 // Setting gp->stackguard0 to StackPreempt folds 6935 // preemption into the normal stack overflow check. 6936 gp.stackguard0 = stackPreempt 6937 6938 // Request an async preemption of this P. 6939 if preemptMSupported && debug.asyncpreemptoff == 0 { 6940 pp.preempt = true 6941 preemptM(mp) 6942 } 6943 6944 return true 6945 } 6946 6947 var starttime int64 6948 6949 func schedtrace(detailed bool) { 6950 now := nanotime() 6951 if starttime == 0 { 6952 starttime = now 6953 } 6954 6955 lock(&sched.lock) 6956 print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle.Load(), " threads=", mcount(), " spinningthreads=", sched.nmspinning.Load(), " needspinning=", sched.needspinning.Load(), " idlethreads=", sched.nmidle, " runqueue=", sched.runq.size) 6957 if detailed { 6958 print(" gcwaiting=", sched.gcwaiting.Load(), " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait.Load(), "\n") 6959 } 6960 // We must be careful while reading data from P's, M's and G's. 6961 // Even if we hold schedlock, most data can be changed concurrently. 6962 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. 6963 for i, pp := range allp { 6964 h := atomic.Load(&pp.runqhead) 6965 t := atomic.Load(&pp.runqtail) 6966 if detailed { 6967 print(" P", i, ": status=", pp.status, " schedtick=", pp.schedtick, " syscalltick=", pp.syscalltick, " m=") 6968 mp := pp.m.ptr() 6969 if mp != nil { 6970 print(mp.id) 6971 } else { 6972 print("nil") 6973 } 6974 print(" runqsize=", t-h, " gfreecnt=", pp.gFree.size, " timerslen=", len(pp.timers.heap), "\n") 6975 } else { 6976 // In non-detailed mode format lengths of per-P run queues as: 6977 // [ len1 len2 len3 len4 ] 6978 print(" ") 6979 if i == 0 { 6980 print("[ ") 6981 } 6982 print(t - h) 6983 if i == len(allp)-1 { 6984 print(" ]") 6985 } 6986 } 6987 } 6988 6989 if !detailed { 6990 // Format per-P schedticks as: schedticks=[ tick1 tick2 tick3 tick4 ]. 6991 print(" schedticks=[ ") 6992 for _, pp := range allp { 6993 print(pp.schedtick) 6994 print(" ") 6995 } 6996 print("]\n") 6997 } 6998 6999 if !detailed { 7000 unlock(&sched.lock) 7001 return 7002 } 7003 7004 for mp := allm; mp != nil; mp = mp.alllink { 7005 pp := mp.p.ptr() 7006 print(" M", mp.id, ": p=") 7007 if pp != nil { 7008 print(pp.id) 7009 } else { 7010 print("nil") 7011 } 7012 print(" curg=") 7013 if mp.curg != nil { 7014 print(mp.curg.goid) 7015 } else { 7016 print("nil") 7017 } 7018 print(" mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, " locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=") 7019 if lockedg := mp.lockedg.ptr(); lockedg != nil { 7020 print(lockedg.goid) 7021 } else { 7022 print("nil") 7023 } 7024 print("\n") 7025 } 7026 7027 forEachG(func(gp *g) { 7028 print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=") 7029 if gp.m != nil { 7030 print(gp.m.id) 7031 } else { 7032 print("nil") 7033 } 7034 print(" lockedm=") 7035 if lockedm := gp.lockedm.ptr(); lockedm != nil { 7036 print(lockedm.id) 7037 } else { 7038 print("nil") 7039 } 7040 print("\n") 7041 }) 7042 unlock(&sched.lock) 7043 } 7044 7045 type updateMaxProcsGState struct { 7046 lock mutex 7047 g *g 7048 idle atomic.Bool 7049 7050 // Readable when idle == false, writable when idle == true. 7051 procs int32 // new GOMAXPROCS value 7052 } 7053 7054 var ( 7055 // GOMAXPROCS update godebug metric. Incremented if automatic 7056 // GOMAXPROCS updates actually change the value of GOMAXPROCS. 7057 updatemaxprocs = &godebugInc{name: "updatemaxprocs"} 7058 7059 // Synchronization and state between updateMaxProcsGoroutine and 7060 // sysmon. 7061 updateMaxProcsG updateMaxProcsGState 7062 7063 // Synchronization between GOMAXPROCS and sysmon. 7064 // 7065 // Setting GOMAXPROCS via a call to GOMAXPROCS disables automatic 7066 // GOMAXPROCS updates. 7067 // 7068 // We want to make two guarantees to callers of GOMAXPROCS. After 7069 // GOMAXPROCS returns: 7070 // 7071 // 1. The runtime will not make any automatic changes to GOMAXPROCS. 7072 // 7073 // 2. The runtime will not perform any of the system calls used to 7074 // determine the appropriate value of GOMAXPROCS (i.e., it won't 7075 // call defaultGOMAXPROCS). 7076 // 7077 // (1) is the baseline guarantee that everyone needs. The GOMAXPROCS 7078 // API isn't useful to anyone if automatic updates may occur after it 7079 // returns. This is easily achieved by double-checking the state under 7080 // STW before committing an automatic GOMAXPROCS update. 7081 // 7082 // (2) doesn't matter to most users, as it is isn't observable as long 7083 // as (1) holds. However, it can be important to users sandboxing Go. 7084 // They want disable these system calls and need some way to know when 7085 // they are guaranteed the calls will stop. 7086 // 7087 // This would be simple to achieve if we simply called 7088 // defaultGOMAXPROCS under STW in updateMaxProcsGoroutine below. 7089 // However, we would like to avoid scheduling this goroutine every 7090 // second when it will almost never do anything. Instead, sysmon calls 7091 // defaultGOMAXPROCS to decide whether to schedule 7092 // updateMaxProcsGoroutine. Thus we need to synchronize between sysmon 7093 // and GOMAXPROCS calls. 7094 // 7095 // GOMAXPROCS can't hold a runtime mutex across STW. It could hold a 7096 // semaphore, but sysmon cannot take semaphores. Instead, we have a 7097 // more complex scheme: 7098 // 7099 // * sysmon holds computeMaxProcsLock while calling defaultGOMAXPROCS. 7100 // * sysmon skips the current update if sched.customGOMAXPROCS is 7101 // set. 7102 // * GOMAXPROCS sets sched.customGOMAXPROCS once it is committed to 7103 // changing GOMAXPROCS. 7104 // * GOMAXPROCS takes computeMaxProcsLock to wait for outstanding 7105 // defaultGOMAXPROCS calls to complete. 7106 // 7107 // N.B. computeMaxProcsLock could simply be sched.lock, but we want to 7108 // avoid holding that lock during the potentially slow 7109 // defaultGOMAXPROCS. 7110 computeMaxProcsLock mutex 7111 ) 7112 7113 // Start GOMAXPROCS update helper goroutine. 7114 // 7115 // This is based on forcegchelper. 7116 func defaultGOMAXPROCSUpdateEnable() { 7117 if debug.updatemaxprocs == 0 { 7118 // Unconditionally increment the metric when updates are disabled. 7119 // 7120 // It would be more descriptive if we did a dry run of the 7121 // complete update, determining the appropriate value of 7122 // GOMAXPROCS and the bailing out and just incrementing the 7123 // metric if a change would occur. 7124 // 7125 // Not only is that a lot of ongoing work for a disabled 7126 // feature, but some users need to be able to completely 7127 // disable the update system calls (such as sandboxes). 7128 // Currently, updatemaxprocs=0 serves that purpose. 7129 updatemaxprocs.IncNonDefault() 7130 return 7131 } 7132 7133 go updateMaxProcsGoroutine() 7134 } 7135 7136 func updateMaxProcsGoroutine() { 7137 updateMaxProcsG.g = getg() 7138 lockInit(&updateMaxProcsG.lock, lockRankUpdateMaxProcsG) 7139 for { 7140 lock(&updateMaxProcsG.lock) 7141 if updateMaxProcsG.idle.Load() { 7142 throw("updateMaxProcsGoroutine: phase error") 7143 } 7144 updateMaxProcsG.idle.Store(true) 7145 goparkunlock(&updateMaxProcsG.lock, waitReasonUpdateGOMAXPROCSIdle, traceBlockSystemGoroutine, 1) 7146 // This goroutine is explicitly resumed by sysmon. 7147 7148 stw := stopTheWorldGC(stwGOMAXPROCS) 7149 7150 // Still OK to update? 7151 lock(&sched.lock) 7152 custom := sched.customGOMAXPROCS 7153 unlock(&sched.lock) 7154 if custom { 7155 startTheWorldGC(stw) 7156 return 7157 } 7158 7159 // newprocs will be processed by startTheWorld 7160 // 7161 // TODO(prattmic): this could use a nicer API. Perhaps add it to the 7162 // stw parameter? 7163 newprocs = updateMaxProcsG.procs 7164 lock(&sched.lock) 7165 sched.customGOMAXPROCS = false 7166 unlock(&sched.lock) 7167 7168 startTheWorldGC(stw) 7169 } 7170 } 7171 7172 func sysmonUpdateGOMAXPROCS() { 7173 // Synchronize with GOMAXPROCS. See comment on computeMaxProcsLock. 7174 lock(&computeMaxProcsLock) 7175 7176 // No update if GOMAXPROCS was set manually. 7177 lock(&sched.lock) 7178 custom := sched.customGOMAXPROCS 7179 curr := gomaxprocs 7180 unlock(&sched.lock) 7181 if custom { 7182 unlock(&computeMaxProcsLock) 7183 return 7184 } 7185 7186 // Don't hold sched.lock while we read the filesystem. 7187 procs := defaultGOMAXPROCS(0) 7188 unlock(&computeMaxProcsLock) 7189 if procs == curr { 7190 // Nothing to do. 7191 return 7192 } 7193 7194 // Sysmon can't directly stop the world. Run the helper to do so on our 7195 // behalf. If updateGOMAXPROCS.idle is false, then a previous update is 7196 // still pending. 7197 if updateMaxProcsG.idle.Load() { 7198 lock(&updateMaxProcsG.lock) 7199 updateMaxProcsG.procs = procs 7200 updateMaxProcsG.idle.Store(false) 7201 var list gList 7202 list.push(updateMaxProcsG.g) 7203 injectglist(&list) 7204 unlock(&updateMaxProcsG.lock) 7205 } 7206 } 7207 7208 // schedEnableUser enables or disables the scheduling of user 7209 // goroutines. 7210 // 7211 // This does not stop already running user goroutines, so the caller 7212 // should first stop the world when disabling user goroutines. 7213 func schedEnableUser(enable bool) { 7214 lock(&sched.lock) 7215 if sched.disable.user == !enable { 7216 unlock(&sched.lock) 7217 return 7218 } 7219 sched.disable.user = !enable 7220 if enable { 7221 n := sched.disable.runnable.size 7222 globrunqputbatch(&sched.disable.runnable) 7223 unlock(&sched.lock) 7224 for ; n != 0 && sched.npidle.Load() != 0; n-- { 7225 startm(nil, false, false) 7226 } 7227 } else { 7228 unlock(&sched.lock) 7229 } 7230 } 7231 7232 // schedEnabled reports whether gp should be scheduled. It returns 7233 // false is scheduling of gp is disabled. 7234 // 7235 // sched.lock must be held. 7236 func schedEnabled(gp *g) bool { 7237 assertLockHeld(&sched.lock) 7238 7239 if sched.disable.user { 7240 return isSystemGoroutine(gp, true) 7241 } 7242 return true 7243 } 7244 7245 // Put mp on midle list. 7246 // sched.lock must be held. 7247 // May run during STW, so write barriers are not allowed. 7248 // 7249 //go:nowritebarrierrec 7250 func mput(mp *m) { 7251 assertLockHeld(&sched.lock) 7252 7253 sched.midle.push(unsafe.Pointer(mp)) 7254 sched.nmidle++ 7255 checkdead() 7256 } 7257 7258 // Try to get an m from midle list. 7259 // sched.lock must be held. 7260 // May run during STW, so write barriers are not allowed. 7261 // 7262 //go:nowritebarrierrec 7263 func mget() *m { 7264 assertLockHeld(&sched.lock) 7265 7266 mp := (*m)(sched.midle.pop()) 7267 if mp != nil { 7268 sched.nmidle-- 7269 } 7270 return mp 7271 } 7272 7273 // Try to get a specific m from midle list. Returns nil if it isn't on the 7274 // midle list. 7275 // 7276 // sched.lock must be held. 7277 // May run during STW, so write barriers are not allowed. 7278 // 7279 //go:nowritebarrierrec 7280 func mgetSpecific(mp *m) *m { 7281 assertLockHeld(&sched.lock) 7282 7283 if mp.idleNode.prev == 0 && mp.idleNode.next == 0 { 7284 // Not on the list. 7285 return nil 7286 } 7287 7288 sched.midle.remove(unsafe.Pointer(mp)) 7289 sched.nmidle-- 7290 7291 return mp 7292 } 7293 7294 // Put gp on the global runnable queue. 7295 // sched.lock must be held. 7296 // May run during STW, so write barriers are not allowed. 7297 // 7298 //go:nowritebarrierrec 7299 func globrunqput(gp *g) { 7300 assertLockHeld(&sched.lock) 7301 7302 sched.runq.pushBack(gp) 7303 } 7304 7305 // Put gp at the head of the global runnable queue. 7306 // sched.lock must be held. 7307 // May run during STW, so write barriers are not allowed. 7308 // 7309 //go:nowritebarrierrec 7310 func globrunqputhead(gp *g) { 7311 assertLockHeld(&sched.lock) 7312 7313 sched.runq.push(gp) 7314 } 7315 7316 // Put a batch of runnable goroutines on the global runnable queue. 7317 // This clears *batch. 7318 // sched.lock must be held. 7319 // May run during STW, so write barriers are not allowed. 7320 // 7321 //go:nowritebarrierrec 7322 func globrunqputbatch(batch *gQueue) { 7323 assertLockHeld(&sched.lock) 7324 7325 sched.runq.pushBackAll(*batch) 7326 *batch = gQueue{} 7327 } 7328 7329 // Try get a single G from the global runnable queue. 7330 // sched.lock must be held. 7331 func globrunqget() *g { 7332 assertLockHeld(&sched.lock) 7333 7334 if sched.runq.size == 0 { 7335 return nil 7336 } 7337 7338 return sched.runq.pop() 7339 } 7340 7341 // Try get a batch of G's from the global runnable queue. 7342 // sched.lock must be held. 7343 func globrunqgetbatch(n int32) (gp *g, q gQueue) { 7344 assertLockHeld(&sched.lock) 7345 7346 if sched.runq.size == 0 { 7347 return 7348 } 7349 7350 n = min(n, sched.runq.size, sched.runq.size/gomaxprocs+1) 7351 7352 gp = sched.runq.pop() 7353 n-- 7354 7355 for ; n > 0; n-- { 7356 gp1 := sched.runq.pop() 7357 q.pushBack(gp1) 7358 } 7359 return 7360 } 7361 7362 // pMask is an atomic bitstring with one bit per P. 7363 type pMask []uint32 7364 7365 // read returns true if P id's bit is set. 7366 func (p pMask) read(id uint32) bool { 7367 word := id / 32 7368 mask := uint32(1) << (id % 32) 7369 return (atomic.Load(&p[word]) & mask) != 0 7370 } 7371 7372 // set sets P id's bit. 7373 func (p pMask) set(id int32) { 7374 word := id / 32 7375 mask := uint32(1) << (id % 32) 7376 atomic.Or(&p[word], mask) 7377 } 7378 7379 // clear clears P id's bit. 7380 func (p pMask) clear(id int32) { 7381 word := id / 32 7382 mask := uint32(1) << (id % 32) 7383 atomic.And(&p[word], ^mask) 7384 } 7385 7386 // any returns true if any bit in p is set. 7387 func (p pMask) any() bool { 7388 for i := range p { 7389 if atomic.Load(&p[i]) != 0 { 7390 return true 7391 } 7392 } 7393 return false 7394 } 7395 7396 // resize resizes the pMask and returns a new one. 7397 // 7398 // The result may alias p, so callers are encouraged to 7399 // discard p. Not safe for concurrent use. 7400 func (p pMask) resize(nprocs int32) pMask { 7401 maskWords := (nprocs + 31) / 32 7402 7403 if maskWords <= int32(cap(p)) { 7404 return p[:maskWords] 7405 } 7406 newMask := make([]uint32, maskWords) 7407 // No need to copy beyond len, old Ps are irrelevant. 7408 copy(newMask, p) 7409 return newMask 7410 } 7411 7412 // pidleput puts p on the _Pidle list. now must be a relatively recent call 7413 // to nanotime or zero. Returns now or the current time if now was zero. 7414 // 7415 // This releases ownership of p. Once sched.lock is released it is no longer 7416 // safe to use p. 7417 // 7418 // sched.lock must be held. 7419 // 7420 // May run during STW, so write barriers are not allowed. 7421 // 7422 //go:nowritebarrierrec 7423 func pidleput(pp *p, now int64) int64 { 7424 assertLockHeld(&sched.lock) 7425 7426 if !runqempty(pp) { 7427 throw("pidleput: P has non-empty run queue") 7428 } 7429 if now == 0 { 7430 now = nanotime() 7431 } 7432 if pp.timers.len.Load() == 0 { 7433 timerpMask.clear(pp.id) 7434 } 7435 idlepMask.set(pp.id) 7436 pp.link = sched.pidle 7437 sched.pidle.set(pp) 7438 sched.npidle.Add(1) 7439 if !pp.limiterEvent.start(limiterEventIdle, now) { 7440 throw("must be able to track idle limiter event") 7441 } 7442 return now 7443 } 7444 7445 // pidleget tries to get a p from the _Pidle list, acquiring ownership. 7446 // 7447 // sched.lock must be held. 7448 // 7449 // May run during STW, so write barriers are not allowed. 7450 // 7451 //go:nowritebarrierrec 7452 func pidleget(now int64) (*p, int64) { 7453 assertLockHeld(&sched.lock) 7454 7455 pp := sched.pidle.ptr() 7456 if pp != nil { 7457 // Timer may get added at any time now. 7458 if now == 0 { 7459 now = nanotime() 7460 } 7461 timerpMask.set(pp.id) 7462 idlepMask.clear(pp.id) 7463 sched.pidle = pp.link 7464 sched.npidle.Add(-1) 7465 pp.limiterEvent.stop(limiterEventIdle, now) 7466 } 7467 return pp, now 7468 } 7469 7470 // pidlegetSpinning tries to get a p from the _Pidle list, acquiring ownership. 7471 // This is called by spinning Ms (or callers than need a spinning M) that have 7472 // found work. If no P is available, this must synchronized with non-spinning 7473 // Ms that may be preparing to drop their P without discovering this work. 7474 // 7475 // sched.lock must be held. 7476 // 7477 // May run during STW, so write barriers are not allowed. 7478 // 7479 //go:nowritebarrierrec 7480 func pidlegetSpinning(now int64) (*p, int64) { 7481 assertLockHeld(&sched.lock) 7482 7483 pp, now := pidleget(now) 7484 if pp == nil { 7485 // See "Delicate dance" comment in findRunnable. We found work 7486 // that we cannot take, we must synchronize with non-spinning 7487 // Ms that may be preparing to drop their P. 7488 sched.needspinning.Store(1) 7489 return nil, now 7490 } 7491 7492 return pp, now 7493 } 7494 7495 // runqempty reports whether pp has no Gs on its local run queue. 7496 // It never returns true spuriously. 7497 func runqempty(pp *p) bool { 7498 // Defend against a race where 1) pp has G1 in runqnext but runqhead == runqtail, 7499 // 2) runqput on pp kicks G1 to the runq, 3) runqget on pp empties runqnext. 7500 // Simply observing that runqhead == runqtail and then observing that runqnext == nil 7501 // does not mean the queue is empty. 7502 for { 7503 head := atomic.Load(&pp.runqhead) 7504 tail := atomic.Load(&pp.runqtail) 7505 runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&pp.runnext))) 7506 if tail == atomic.Load(&pp.runqtail) { 7507 return head == tail && runnext == 0 7508 } 7509 } 7510 } 7511 7512 // To shake out latent assumptions about scheduling order, 7513 // we introduce some randomness into scheduling decisions 7514 // when running with the race detector. 7515 // The need for this was made obvious by changing the 7516 // (deterministic) scheduling order in Go 1.5 and breaking 7517 // many poorly-written tests. 7518 // With the randomness here, as long as the tests pass 7519 // consistently with -race, they shouldn't have latent scheduling 7520 // assumptions. 7521 const randomizeScheduler = raceenabled 7522 7523 // runqput tries to put g on the local runnable queue. 7524 // If next is false, runqput adds g to the tail of the runnable queue. 7525 // If next is true, runqput puts g in the pp.runnext slot. 7526 // If the run queue is full, runnext puts g on the global queue. 7527 // Executed only by the owner P. 7528 func runqput(pp *p, gp *g, next bool) { 7529 if !haveSysmon && next { 7530 // A runnext goroutine shares the same time slice as the 7531 // current goroutine (inheritTime from runqget). To prevent a 7532 // ping-pong pair of goroutines from starving all others, we 7533 // depend on sysmon to preempt "long-running goroutines". That 7534 // is, any set of goroutines sharing the same time slice. 7535 // 7536 // If there is no sysmon, we must avoid runnext entirely or 7537 // risk starvation. 7538 next = false 7539 } 7540 if randomizeScheduler && next && randn(2) == 0 { 7541 next = false 7542 } 7543 7544 if next { 7545 retryNext: 7546 oldnext := pp.runnext 7547 if !pp.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) { 7548 goto retryNext 7549 } 7550 if oldnext == 0 { 7551 return 7552 } 7553 // Kick the old runnext out to the regular run queue. 7554 gp = oldnext.ptr() 7555 } 7556 7557 retry: 7558 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 7559 t := pp.runqtail 7560 if t-h < uint32(len(pp.runq)) { 7561 pp.runq[t%uint32(len(pp.runq))].set(gp) 7562 atomic.StoreRel(&pp.runqtail, t+1) // store-release, makes the item available for consumption 7563 return 7564 } 7565 if runqputslow(pp, gp, h, t) { 7566 return 7567 } 7568 // the queue is not full, now the put above must succeed 7569 goto retry 7570 } 7571 7572 // Put g and a batch of work from local runnable queue on global queue. 7573 // Executed only by the owner P. 7574 func runqputslow(pp *p, gp *g, h, t uint32) bool { 7575 var batch [len(pp.runq)/2 + 1]*g 7576 7577 // First, grab a batch from local queue. 7578 n := t - h 7579 n = n / 2 7580 if n != uint32(len(pp.runq)/2) { 7581 throw("runqputslow: queue is not full") 7582 } 7583 for i := uint32(0); i < n; i++ { 7584 batch[i] = pp.runq[(h+i)%uint32(len(pp.runq))].ptr() 7585 } 7586 if !atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume 7587 return false 7588 } 7589 batch[n] = gp 7590 7591 if randomizeScheduler { 7592 for i := uint32(1); i <= n; i++ { 7593 j := cheaprandn(i + 1) 7594 batch[i], batch[j] = batch[j], batch[i] 7595 } 7596 } 7597 7598 // Link the goroutines. 7599 for i := uint32(0); i < n; i++ { 7600 batch[i].schedlink.set(batch[i+1]) 7601 } 7602 7603 q := gQueue{batch[0].guintptr(), batch[n].guintptr(), int32(n + 1)} 7604 7605 // Now put the batch on global queue. 7606 lock(&sched.lock) 7607 globrunqputbatch(&q) 7608 unlock(&sched.lock) 7609 return true 7610 } 7611 7612 // runqputbatch tries to put all the G's on q on the local runnable queue. 7613 // If the local runq is full the input queue still contains unqueued Gs. 7614 // Executed only by the owner P. 7615 func runqputbatch(pp *p, q *gQueue) { 7616 if q.empty() { 7617 return 7618 } 7619 h := atomic.LoadAcq(&pp.runqhead) 7620 t := pp.runqtail 7621 n := uint32(0) 7622 for !q.empty() && t-h < uint32(len(pp.runq)) { 7623 gp := q.pop() 7624 pp.runq[t%uint32(len(pp.runq))].set(gp) 7625 t++ 7626 n++ 7627 } 7628 7629 if randomizeScheduler { 7630 off := func(o uint32) uint32 { 7631 return (pp.runqtail + o) % uint32(len(pp.runq)) 7632 } 7633 for i := uint32(1); i < n; i++ { 7634 j := cheaprandn(i + 1) 7635 pp.runq[off(i)], pp.runq[off(j)] = pp.runq[off(j)], pp.runq[off(i)] 7636 } 7637 } 7638 7639 atomic.StoreRel(&pp.runqtail, t) 7640 7641 return 7642 } 7643 7644 // Get g from local runnable queue. 7645 // If inheritTime is true, gp should inherit the remaining time in the 7646 // current time slice. Otherwise, it should start a new time slice. 7647 // Executed only by the owner P. 7648 func runqget(pp *p) (gp *g, inheritTime bool) { 7649 // If there's a runnext, it's the next G to run. 7650 next := pp.runnext 7651 // If the runnext is non-0 and the CAS fails, it could only have been stolen by another P, 7652 // because other Ps can race to set runnext to 0, but only the current P can set it to non-0. 7653 // Hence, there's no need to retry this CAS if it fails. 7654 if next != 0 && pp.runnext.cas(next, 0) { 7655 return next.ptr(), true 7656 } 7657 7658 for { 7659 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 7660 t := pp.runqtail 7661 if t == h { 7662 return nil, false 7663 } 7664 gp := pp.runq[h%uint32(len(pp.runq))].ptr() 7665 if atomic.CasRel(&pp.runqhead, h, h+1) { // cas-release, commits consume 7666 return gp, false 7667 } 7668 } 7669 } 7670 7671 // runqdrain drains the local runnable queue of pp and returns all goroutines in it. 7672 // Executed only by the owner P. 7673 func runqdrain(pp *p) (drainQ gQueue) { 7674 oldNext := pp.runnext 7675 if oldNext != 0 && pp.runnext.cas(oldNext, 0) { 7676 drainQ.pushBack(oldNext.ptr()) 7677 } 7678 7679 retry: 7680 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 7681 t := pp.runqtail 7682 qn := t - h 7683 if qn == 0 { 7684 return 7685 } 7686 if qn > uint32(len(pp.runq)) { // read inconsistent h and t 7687 goto retry 7688 } 7689 7690 if !atomic.CasRel(&pp.runqhead, h, h+qn) { // cas-release, commits consume 7691 goto retry 7692 } 7693 7694 // We've inverted the order in which it gets G's from the local P's runnable queue 7695 // and then advances the head pointer because we don't want to mess up the statuses of G's 7696 // while runqdrain() and runqsteal() are running in parallel. 7697 // Thus we should advance the head pointer before draining the local P into a gQueue, 7698 // so that we can update any gp.schedlink only after we take the full ownership of G, 7699 // meanwhile, other P's can't access to all G's in local P's runnable queue and steal them. 7700 // See https://groups.google.com/g/golang-dev/c/0pTKxEKhHSc/m/6Q85QjdVBQAJ for more details. 7701 for i := uint32(0); i < qn; i++ { 7702 gp := pp.runq[(h+i)%uint32(len(pp.runq))].ptr() 7703 drainQ.pushBack(gp) 7704 } 7705 return 7706 } 7707 7708 // Grabs a batch of goroutines from pp's runnable queue into batch. 7709 // Batch is a ring buffer starting at batchHead. 7710 // Returns number of grabbed goroutines. 7711 // Can be executed by any P. 7712 func runqgrab(pp *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 { 7713 for { 7714 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 7715 t := atomic.LoadAcq(&pp.runqtail) // load-acquire, synchronize with the producer 7716 n := t - h 7717 n = n - n/2 7718 if n == 0 { 7719 if stealRunNextG { 7720 // Try to steal from pp.runnext. 7721 if next := pp.runnext; next != 0 { 7722 if pp.status == _Prunning { 7723 if mp := pp.m.ptr(); mp != nil { 7724 if gp := mp.curg; gp == nil || readgstatus(gp)&^_Gscan != _Gsyscall { 7725 // Sleep to ensure that pp isn't about to run the g 7726 // we are about to steal. 7727 // The important use case here is when the g running 7728 // on pp ready()s another g and then almost 7729 // immediately blocks. Instead of stealing runnext 7730 // in this window, back off to give pp a chance to 7731 // schedule runnext. This will avoid thrashing gs 7732 // between different Ps. 7733 // A sync chan send/recv takes ~50ns as of time of 7734 // writing, so 3us gives ~50x overshoot. 7735 // If curg is nil, we assume that the P is likely 7736 // to be in the scheduler. If curg isn't nil and isn't 7737 // in a syscall, then it's either running, waiting, or 7738 // runnable. In this case we want to sleep because the 7739 // P might either call into the scheduler soon (running), 7740 // or already is (since we found a waiting or runnable 7741 // goroutine hanging off of a running P, suggesting it 7742 // either recently transitioned out of running, or will 7743 // transition to running shortly). 7744 if !osHasLowResTimer { 7745 usleep(3) 7746 } else { 7747 // On some platforms system timer granularity is 7748 // 1-15ms, which is way too much for this 7749 // optimization. So just yield. 7750 osyield() 7751 } 7752 } 7753 } 7754 } 7755 if !pp.runnext.cas(next, 0) { 7756 continue 7757 } 7758 batch[batchHead%uint32(len(batch))] = next 7759 return 1 7760 } 7761 } 7762 return 0 7763 } 7764 if n > uint32(len(pp.runq)/2) { // read inconsistent h and t 7765 continue 7766 } 7767 for i := uint32(0); i < n; i++ { 7768 g := pp.runq[(h+i)%uint32(len(pp.runq))] 7769 batch[(batchHead+i)%uint32(len(batch))] = g 7770 } 7771 if atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume 7772 return n 7773 } 7774 } 7775 } 7776 7777 // Steal half of elements from local runnable queue of p2 7778 // and put onto local runnable queue of p. 7779 // Returns one of the stolen elements (or nil if failed). 7780 func runqsteal(pp, p2 *p, stealRunNextG bool) *g { 7781 t := pp.runqtail 7782 n := runqgrab(p2, &pp.runq, t, stealRunNextG) 7783 if n == 0 { 7784 return nil 7785 } 7786 n-- 7787 gp := pp.runq[(t+n)%uint32(len(pp.runq))].ptr() 7788 if n == 0 { 7789 return gp 7790 } 7791 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 7792 if t-h+n >= uint32(len(pp.runq)) { 7793 throw("runqsteal: runq overflow") 7794 } 7795 atomic.StoreRel(&pp.runqtail, t+n) // store-release, makes the item available for consumption 7796 return gp 7797 } 7798 7799 // A gQueue is a dequeue of Gs linked through g.schedlink. A G can only 7800 // be on one gQueue or gList at a time. 7801 type gQueue struct { 7802 head guintptr 7803 tail guintptr 7804 size int32 7805 } 7806 7807 // empty reports whether q is empty. 7808 func (q *gQueue) empty() bool { 7809 return q.head == 0 7810 } 7811 7812 // push adds gp to the head of q. 7813 func (q *gQueue) push(gp *g) { 7814 gp.schedlink = q.head 7815 q.head.set(gp) 7816 if q.tail == 0 { 7817 q.tail.set(gp) 7818 } 7819 q.size++ 7820 } 7821 7822 // pushBack adds gp to the tail of q. 7823 func (q *gQueue) pushBack(gp *g) { 7824 gp.schedlink = 0 7825 if q.tail != 0 { 7826 q.tail.ptr().schedlink.set(gp) 7827 } else { 7828 q.head.set(gp) 7829 } 7830 q.tail.set(gp) 7831 q.size++ 7832 } 7833 7834 // pushBackAll adds all Gs in q2 to the tail of q. After this q2 must 7835 // not be used. 7836 func (q *gQueue) pushBackAll(q2 gQueue) { 7837 if q2.tail == 0 { 7838 return 7839 } 7840 q2.tail.ptr().schedlink = 0 7841 if q.tail != 0 { 7842 q.tail.ptr().schedlink = q2.head 7843 } else { 7844 q.head = q2.head 7845 } 7846 q.tail = q2.tail 7847 q.size += q2.size 7848 } 7849 7850 // pop removes and returns the head of queue q. It returns nil if 7851 // q is empty. 7852 func (q *gQueue) pop() *g { 7853 gp := q.head.ptr() 7854 if gp != nil { 7855 q.head = gp.schedlink 7856 if q.head == 0 { 7857 q.tail = 0 7858 } 7859 q.size-- 7860 } 7861 return gp 7862 } 7863 7864 // popList takes all Gs in q and returns them as a gList. 7865 func (q *gQueue) popList() gList { 7866 stack := gList{q.head, q.size} 7867 *q = gQueue{} 7868 return stack 7869 } 7870 7871 // A gList is a list of Gs linked through g.schedlink. A G can only be 7872 // on one gQueue or gList at a time. 7873 type gList struct { 7874 head guintptr 7875 size int32 7876 } 7877 7878 // empty reports whether l is empty. 7879 func (l *gList) empty() bool { 7880 return l.head == 0 7881 } 7882 7883 // push adds gp to the head of l. 7884 func (l *gList) push(gp *g) { 7885 gp.schedlink = l.head 7886 l.head.set(gp) 7887 l.size++ 7888 } 7889 7890 // pushAll prepends all Gs in q to l. After this q must not be used. 7891 func (l *gList) pushAll(q gQueue) { 7892 if !q.empty() { 7893 q.tail.ptr().schedlink = l.head 7894 l.head = q.head 7895 l.size += q.size 7896 } 7897 } 7898 7899 // pop removes and returns the head of l. If l is empty, it returns nil. 7900 func (l *gList) pop() *g { 7901 gp := l.head.ptr() 7902 if gp != nil { 7903 l.head = gp.schedlink 7904 l.size-- 7905 } 7906 return gp 7907 } 7908 7909 //go:linkname setMaxThreads runtime/debug.setMaxThreads 7910 func setMaxThreads(in int) (out int) { 7911 lock(&sched.lock) 7912 out = int(sched.maxmcount) 7913 if in > 0x7fffffff { // MaxInt32 7914 sched.maxmcount = 0x7fffffff 7915 } else { 7916 sched.maxmcount = int32(in) 7917 } 7918 checkmcount() 7919 unlock(&sched.lock) 7920 return 7921 } 7922 7923 // procPin should be an internal detail, 7924 // but widely used packages access it using linkname. 7925 // Notable members of the hall of shame include: 7926 // - github.com/bytedance/gopkg 7927 // - github.com/choleraehyq/pid 7928 // - github.com/songzhibin97/gkit 7929 // 7930 // Do not remove or change the type signature. 7931 // See go.dev/issue/67401. 7932 // 7933 //go:linkname procPin 7934 //go:nosplit 7935 func procPin() int { 7936 gp := getg() 7937 mp := gp.m 7938 7939 mp.locks++ 7940 return int(mp.p.ptr().id) 7941 } 7942 7943 // procUnpin should be an internal detail, 7944 // but widely used packages access it using linkname. 7945 // Notable members of the hall of shame include: 7946 // - github.com/bytedance/gopkg 7947 // - github.com/choleraehyq/pid 7948 // - github.com/songzhibin97/gkit 7949 // 7950 // Do not remove or change the type signature. 7951 // See go.dev/issue/67401. 7952 // 7953 //go:linkname procUnpin 7954 //go:nosplit 7955 func procUnpin() { 7956 gp := getg() 7957 gp.m.locks-- 7958 } 7959 7960 //go:linkname sync_runtime_procPin sync.runtime_procPin 7961 //go:nosplit 7962 func sync_runtime_procPin() int { 7963 return procPin() 7964 } 7965 7966 //go:linkname sync_runtime_procUnpin sync.runtime_procUnpin 7967 //go:nosplit 7968 func sync_runtime_procUnpin() { 7969 procUnpin() 7970 } 7971 7972 //go:linkname sync_atomic_runtime_procPin sync/atomic.runtime_procPin 7973 //go:nosplit 7974 func sync_atomic_runtime_procPin() int { 7975 return procPin() 7976 } 7977 7978 //go:linkname sync_atomic_runtime_procUnpin sync/atomic.runtime_procUnpin 7979 //go:nosplit 7980 func sync_atomic_runtime_procUnpin() { 7981 procUnpin() 7982 } 7983 7984 // Active spinning for sync.Mutex. 7985 // 7986 //go:linkname internal_sync_runtime_canSpin internal/sync.runtime_canSpin 7987 //go:nosplit 7988 func internal_sync_runtime_canSpin(i int) bool { 7989 // sync.Mutex is cooperative, so we are conservative with spinning. 7990 // Spin only few times and only if running on a multicore machine and 7991 // GOMAXPROCS>1 and there is at least one other running P and local runq is empty. 7992 // As opposed to runtime mutex we don't do passive spinning here, 7993 // because there can be work on global runq or on other Ps. 7994 if i >= active_spin || numCPUStartup <= 1 || gomaxprocs <= sched.npidle.Load()+sched.nmspinning.Load()+1 { 7995 return false 7996 } 7997 if p := getg().m.p.ptr(); !runqempty(p) { 7998 return false 7999 } 8000 return true 8001 } 8002 8003 //go:linkname internal_sync_runtime_doSpin internal/sync.runtime_doSpin 8004 //go:nosplit 8005 func internal_sync_runtime_doSpin() { 8006 procyield(active_spin_cnt) 8007 } 8008 8009 // Active spinning for sync.Mutex. 8010 // 8011 // sync_runtime_canSpin should be an internal detail, 8012 // but widely used packages access it using linkname. 8013 // Notable members of the hall of shame include: 8014 // - github.com/livekit/protocol 8015 // - github.com/sagernet/gvisor 8016 // - gvisor.dev/gvisor 8017 // 8018 // Do not remove or change the type signature. 8019 // See go.dev/issue/67401. 8020 // 8021 //go:linkname sync_runtime_canSpin sync.runtime_canSpin 8022 //go:nosplit 8023 func sync_runtime_canSpin(i int) bool { 8024 return internal_sync_runtime_canSpin(i) 8025 } 8026 8027 // sync_runtime_doSpin should be an internal detail, 8028 // but widely used packages access it using linkname. 8029 // Notable members of the hall of shame include: 8030 // - github.com/livekit/protocol 8031 // - github.com/sagernet/gvisor 8032 // - gvisor.dev/gvisor 8033 // 8034 // Do not remove or change the type signature. 8035 // See go.dev/issue/67401. 8036 // 8037 //go:linkname sync_runtime_doSpin sync.runtime_doSpin 8038 //go:nosplit 8039 func sync_runtime_doSpin() { 8040 internal_sync_runtime_doSpin() 8041 } 8042 8043 var stealOrder randomOrder 8044 8045 // randomOrder/randomEnum are helper types for randomized work stealing. 8046 // They allow to enumerate all Ps in different pseudo-random orders without repetitions. 8047 // The algorithm is based on the fact that if we have X such that X and GOMAXPROCS 8048 // are coprime, then a sequences of (i + X) % GOMAXPROCS gives the required enumeration. 8049 type randomOrder struct { 8050 count uint32 8051 coprimes []uint32 8052 } 8053 8054 type randomEnum struct { 8055 i uint32 8056 count uint32 8057 pos uint32 8058 inc uint32 8059 } 8060 8061 func (ord *randomOrder) reset(count uint32) { 8062 ord.count = count 8063 ord.coprimes = ord.coprimes[:0] 8064 for i := uint32(1); i <= count; i++ { 8065 if gcd(i, count) == 1 { 8066 ord.coprimes = append(ord.coprimes, i) 8067 } 8068 } 8069 } 8070 8071 func (ord *randomOrder) start(i uint32) randomEnum { 8072 return randomEnum{ 8073 count: ord.count, 8074 pos: i % ord.count, 8075 inc: ord.coprimes[i/ord.count%uint32(len(ord.coprimes))], 8076 } 8077 } 8078 8079 func (enum *randomEnum) done() bool { 8080 return enum.i == enum.count 8081 } 8082 8083 func (enum *randomEnum) next() { 8084 enum.i++ 8085 enum.pos = (enum.pos + enum.inc) % enum.count 8086 } 8087 8088 func (enum *randomEnum) position() uint32 { 8089 return enum.pos 8090 } 8091 8092 func gcd(a, b uint32) uint32 { 8093 for b != 0 { 8094 a, b = b, a%b 8095 } 8096 return a 8097 } 8098 8099 // An initTask represents the set of initializations that need to be done for a package. 8100 // Keep in sync with ../../test/noinit.go:initTask 8101 type initTask struct { 8102 state uint32 // 0 = uninitialized, 1 = in progress, 2 = done 8103 nfns uint32 8104 // followed by nfns pcs, uintptr sized, one per init function to run 8105 } 8106 8107 // inittrace stores statistics for init functions which are 8108 // updated by malloc and newproc when active is true. 8109 var inittrace tracestat 8110 8111 type tracestat struct { 8112 active bool // init tracing activation status 8113 id uint64 // init goroutine id 8114 allocs uint64 // heap allocations 8115 bytes uint64 // heap allocated bytes 8116 } 8117 8118 func doInit(ts []*initTask) { 8119 for _, t := range ts { 8120 doInit1(t) 8121 } 8122 } 8123 8124 func doInit1(t *initTask) { 8125 switch t.state { 8126 case 2: // fully initialized 8127 return 8128 case 1: // initialization in progress 8129 throw("recursive call during initialization - linker skew") 8130 default: // not initialized yet 8131 t.state = 1 // initialization in progress 8132 8133 var ( 8134 start int64 8135 before tracestat 8136 ) 8137 8138 if inittrace.active { 8139 start = nanotime() 8140 // Load stats non-atomically since tracinit is updated only by this init goroutine. 8141 before = inittrace 8142 } 8143 8144 if t.nfns == 0 { 8145 // We should have pruned all of these in the linker. 8146 throw("inittask with no functions") 8147 } 8148 8149 firstFunc := add(unsafe.Pointer(t), 8) 8150 for i := uint32(0); i < t.nfns; i++ { 8151 p := add(firstFunc, uintptr(i)*goarch.PtrSize) 8152 f := *(*func())(unsafe.Pointer(&p)) 8153 f() 8154 } 8155 8156 if inittrace.active { 8157 end := nanotime() 8158 // Load stats non-atomically since tracinit is updated only by this init goroutine. 8159 after := inittrace 8160 8161 f := *(*func())(unsafe.Pointer(&firstFunc)) 8162 pkg := funcpkgpath(findfunc(abi.FuncPCABIInternal(f))) 8163 8164 var sbuf [24]byte 8165 print("init ", pkg, " @") 8166 print(string(fmtNSAsMS(sbuf[:], uint64(start-runtimeInitTime))), " ms, ") 8167 print(string(fmtNSAsMS(sbuf[:], uint64(end-start))), " ms clock, ") 8168 print(string(itoa(sbuf[:], after.bytes-before.bytes)), " bytes, ") 8169 print(string(itoa(sbuf[:], after.allocs-before.allocs)), " allocs") 8170 print("\n") 8171 } 8172 8173 t.state = 2 // initialization done 8174 } 8175 } 8176