Source file src/runtime/semasleep_test.go

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !plan9 && !windows && !js && !wasip1
     6  
     7  package runtime_test
     8  
     9  import (
    10  	"io"
    11  	"os/exec"
    12  	"syscall"
    13  	"testing"
    14  	"time"
    15  )
    16  
    17  // Issue #27250. Spurious wakeups to pthread_cond_timedwait_relative_np
    18  // shouldn't cause semasleep to retry with the same timeout which would
    19  // cause indefinite spinning.
    20  func TestSpuriousWakeupsNeverHangSemasleep(t *testing.T) {
    21  	if *flagQuick {
    22  		t.Skip("-quick")
    23  	}
    24  	t.Parallel() // Waits for a program to sleep for 1s.
    25  
    26  	exe, err := buildTestProg(t, "testprog")
    27  	if err != nil {
    28  		t.Fatal(err)
    29  	}
    30  
    31  	cmd := exec.Command(exe, "After1")
    32  	stdout, err := cmd.StdoutPipe()
    33  	if err != nil {
    34  		t.Fatalf("StdoutPipe: %v", err)
    35  	}
    36  	beforeStart := time.Now()
    37  	if err := cmd.Start(); err != nil {
    38  		t.Fatalf("Failed to start command: %v", err)
    39  	}
    40  
    41  	waiting := false
    42  	doneCh := make(chan error, 1)
    43  	t.Cleanup(func() {
    44  		cmd.Process.Kill()
    45  		if waiting {
    46  			<-doneCh
    47  		} else {
    48  			cmd.Wait()
    49  		}
    50  	})
    51  
    52  	// Wait for After1 to close its stdout so that we know the runtime's SIGIO
    53  	// handler is registered.
    54  	b, err := io.ReadAll(stdout)
    55  	if len(b) > 0 {
    56  		t.Logf("read from testprog stdout: %s", b)
    57  	}
    58  	if err != nil {
    59  		t.Fatalf("error reading from testprog: %v", err)
    60  	}
    61  
    62  	// Wait for child exit.
    63  	//
    64  	// Note that we must do this after waiting for the write/child end of
    65  	// stdout to close. Wait closes the read/parent end of stdout, so
    66  	// starting this goroutine prior to io.ReadAll introduces a race
    67  	// condition where ReadAll may get fs.ErrClosed if the child exits too
    68  	// quickly.
    69  	waiting = true
    70  	go func() {
    71  		doneCh <- cmd.Wait()
    72  		close(doneCh)
    73  	}()
    74  
    75  	// The subprocess begins sleeping for 1s after it writes to stdout, so measure
    76  	// the timeout from here (not from when we started creating the process).
    77  	// That should reduce noise from process startup overhead.
    78  	ready := time.Now()
    79  
    80  	// With the repro running, we can continuously send to it
    81  	// a signal that the runtime considers non-terminal,
    82  	// such as SIGIO, to spuriously wake up
    83  	// pthread_cond_timedwait_relative_np.
    84  	ticker := time.NewTicker(200 * time.Millisecond)
    85  	defer ticker.Stop()
    86  
    87  	checkDoneErr := func(err error) {
    88  		if err != nil {
    89  			t.Fatalf("The program returned but unfortunately with an error: %v", err)
    90  		}
    91  		if time.Since(beforeStart) < 1*time.Second {
    92  			// The program was supposed to sleep for a full (monotonic) second;
    93  			// it should not return before that has elapsed.
    94  			t.Fatalf("The program stopped too quickly.")
    95  		}
    96  	}
    97  	signaled := 0
    98  	for {
    99  		select {
   100  		// Wait for an arbitrary timeout longer than one second (we use the test binary's
   101  		// timeout). The subprocess itself attempts to sleep for one second, but if the
   102  		// machine running the test is heavily loaded that subprocess may not schedule
   103  		// very quickly even if the bug remains fixed. (This is fine, because if the bug
   104  		// really is unfixed we can keep the process hung indefinitely, as long as we
   105  		// signal it often enough.)
   106  		case <-t.Context().Done():
   107  			// If we got paused for a long time for any reason (we're running in parallel
   108  			// with other tests after all) it could be that the subprocess did actually
   109  			// finish and not deadlock, we just got stuck as runnable or our wakeup was
   110  			// delayed. Double-check that we don't have anything from doneCh before
   111  			// declaring failure.
   112  			select {
   113  			case err := <-doneCh:
   114  				checkDoneErr(err)
   115  				return
   116  			default:
   117  			}
   118  
   119  			// Send SIGQUIT to get a goroutine dump.
   120  			// Stop sending SIGIO so that the program can clean up and actually terminate.
   121  			cmd.Process.Signal(syscall.SIGQUIT)
   122  
   123  			// For failure, we require that we have waited at least 2 seconds. Otherwise
   124  			// if another test just ran for a long time and we just happened to push the
   125  			// overall binary to timeout, we will report a failure here that will just
   126  			// muddy the waters for whoever is trying to investigate the timeout.
   127  			const minTime = 2 * time.Second
   128  			if dur := time.Now().Sub(ready); dur > minTime {
   129  				t.Logf("signaled program %d times", signaled)
   130  				t.Logf("time waited: %v", dur)
   131  				t.Error("program failed to return on time and has to be killed, issue #27520 still exists")
   132  			} else {
   133  				t.Skipf("test context deadline ended, but test ran for %s<%s, cutting it short", dur, minTime)
   134  			}
   135  			return
   136  		case <-ticker.C:
   137  			// Send the pesky signal that toggles spinning
   138  			// indefinitely if #27520 is not fixed.
   139  			cmd.Process.Signal(syscall.SIGIO)
   140  			signaled++
   141  		case err := <-doneCh:
   142  			checkDoneErr(err)
   143  			return
   144  		}
   145  	}
   146  }
   147  

View as plain text