Source file src/runtime/semasleep_test.go
1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build !plan9 && !windows && !js && !wasip1 6 7 package runtime_test 8 9 import ( 10 "io" 11 "os/exec" 12 "syscall" 13 "testing" 14 "time" 15 ) 16 17 // Issue #27250. Spurious wakeups to pthread_cond_timedwait_relative_np 18 // shouldn't cause semasleep to retry with the same timeout which would 19 // cause indefinite spinning. 20 func TestSpuriousWakeupsNeverHangSemasleep(t *testing.T) { 21 if *flagQuick { 22 t.Skip("-quick") 23 } 24 t.Parallel() // Waits for a program to sleep for 1s. 25 26 exe, err := buildTestProg(t, "testprog") 27 if err != nil { 28 t.Fatal(err) 29 } 30 31 cmd := exec.Command(exe, "After1") 32 stdout, err := cmd.StdoutPipe() 33 if err != nil { 34 t.Fatalf("StdoutPipe: %v", err) 35 } 36 beforeStart := time.Now() 37 if err := cmd.Start(); err != nil { 38 t.Fatalf("Failed to start command: %v", err) 39 } 40 41 waiting := false 42 doneCh := make(chan error, 1) 43 t.Cleanup(func() { 44 cmd.Process.Kill() 45 if waiting { 46 <-doneCh 47 } else { 48 cmd.Wait() 49 } 50 }) 51 52 // Wait for After1 to close its stdout so that we know the runtime's SIGIO 53 // handler is registered. 54 b, err := io.ReadAll(stdout) 55 if len(b) > 0 { 56 t.Logf("read from testprog stdout: %s", b) 57 } 58 if err != nil { 59 t.Fatalf("error reading from testprog: %v", err) 60 } 61 62 // Wait for child exit. 63 // 64 // Note that we must do this after waiting for the write/child end of 65 // stdout to close. Wait closes the read/parent end of stdout, so 66 // starting this goroutine prior to io.ReadAll introduces a race 67 // condition where ReadAll may get fs.ErrClosed if the child exits too 68 // quickly. 69 waiting = true 70 go func() { 71 doneCh <- cmd.Wait() 72 close(doneCh) 73 }() 74 75 // The subprocess begins sleeping for 1s after it writes to stdout, so measure 76 // the timeout from here (not from when we started creating the process). 77 // That should reduce noise from process startup overhead. 78 ready := time.Now() 79 80 // With the repro running, we can continuously send to it 81 // a signal that the runtime considers non-terminal, 82 // such as SIGIO, to spuriously wake up 83 // pthread_cond_timedwait_relative_np. 84 ticker := time.NewTicker(200 * time.Millisecond) 85 defer ticker.Stop() 86 87 checkDoneErr := func(err error) { 88 if err != nil { 89 t.Fatalf("The program returned but unfortunately with an error: %v", err) 90 } 91 if time.Since(beforeStart) < 1*time.Second { 92 // The program was supposed to sleep for a full (monotonic) second; 93 // it should not return before that has elapsed. 94 t.Fatalf("The program stopped too quickly.") 95 } 96 } 97 signaled := 0 98 for { 99 select { 100 // Wait for an arbitrary timeout longer than one second (we use the test binary's 101 // timeout). The subprocess itself attempts to sleep for one second, but if the 102 // machine running the test is heavily loaded that subprocess may not schedule 103 // very quickly even if the bug remains fixed. (This is fine, because if the bug 104 // really is unfixed we can keep the process hung indefinitely, as long as we 105 // signal it often enough.) 106 case <-t.Context().Done(): 107 // If we got paused for a long time for any reason (we're running in parallel 108 // with other tests after all) it could be that the subprocess did actually 109 // finish and not deadlock, we just got stuck as runnable or our wakeup was 110 // delayed. Double-check that we don't have anything from doneCh before 111 // declaring failure. 112 select { 113 case err := <-doneCh: 114 checkDoneErr(err) 115 return 116 default: 117 } 118 119 // Send SIGQUIT to get a goroutine dump. 120 // Stop sending SIGIO so that the program can clean up and actually terminate. 121 cmd.Process.Signal(syscall.SIGQUIT) 122 123 // For failure, we require that we have waited at least 2 seconds. Otherwise 124 // if another test just ran for a long time and we just happened to push the 125 // overall binary to timeout, we will report a failure here that will just 126 // muddy the waters for whoever is trying to investigate the timeout. 127 const minTime = 2 * time.Second 128 if dur := time.Now().Sub(ready); dur > minTime { 129 t.Logf("signaled program %d times", signaled) 130 t.Logf("time waited: %v", dur) 131 t.Error("program failed to return on time and has to be killed, issue #27520 still exists") 132 } else { 133 t.Skipf("test context deadline ended, but test ran for %s<%s, cutting it short", dur, minTime) 134 } 135 return 136 case <-ticker.C: 137 // Send the pesky signal that toggles spinning 138 // indefinitely if #27520 is not fixed. 139 cmd.Process.Signal(syscall.SIGIO) 140 signaled++ 141 case err := <-doneCh: 142 checkDoneErr(err) 143 return 144 } 145 } 146 } 147