Source file src/os/pidfd_linux.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Support for pidfd was added during the course of a few Linux releases:
     6  //  v5.1: pidfd_send_signal syscall;
     7  //  v5.2: CLONE_PIDFD flag for clone syscall;
     8  //  v5.3: pidfd_open syscall, clone3 syscall;
     9  //  v5.4: P_PIDFD idtype support for waitid syscall;
    10  //  v5.6: pidfd_getfd syscall.
    11  //
    12  // N.B. Alternative Linux implementations may not follow this ordering. e.g.,
    13  // QEMU user mode 7.2 added pidfd_open, but CLONE_PIDFD was not added until
    14  // 8.0.
    15  
    16  package os
    17  
    18  import (
    19  	"errors"
    20  	"internal/syscall/unix"
    21  	"runtime"
    22  	"sync"
    23  	"syscall"
    24  	_ "unsafe" // for linkname
    25  )
    26  
    27  // ensurePidfd initializes the PidFD field in sysAttr if it is not already set.
    28  // It returns the original or modified SysProcAttr struct and a flag indicating
    29  // whether the PidFD should be duplicated before using.
    30  func ensurePidfd(sysAttr *syscall.SysProcAttr) (*syscall.SysProcAttr, bool) {
    31  	if !pidfdWorks() {
    32  		return sysAttr, false
    33  	}
    34  
    35  	var pidfd int
    36  
    37  	if sysAttr == nil {
    38  		return &syscall.SysProcAttr{
    39  			PidFD: &pidfd,
    40  		}, false
    41  	}
    42  	if sysAttr.PidFD == nil {
    43  		newSys := *sysAttr // copy
    44  		newSys.PidFD = &pidfd
    45  		return &newSys, false
    46  	}
    47  
    48  	return sysAttr, true
    49  }
    50  
    51  // getPidfd returns the value of sysAttr.PidFD (or its duplicate if needDup is
    52  // set) and a flag indicating whether the value can be used.
    53  func getPidfd(sysAttr *syscall.SysProcAttr, needDup bool) (uintptr, bool) {
    54  	if !pidfdWorks() {
    55  		return 0, false
    56  	}
    57  
    58  	h := *sysAttr.PidFD
    59  	if needDup {
    60  		dupH, e := unix.Fcntl(h, syscall.F_DUPFD_CLOEXEC, 0)
    61  		if e != nil {
    62  			return 0, false
    63  		}
    64  		h = dupH
    65  	}
    66  	return uintptr(h), true
    67  }
    68  
    69  // pidfdFind returns the process handle for pid.
    70  func pidfdFind(pid int) (uintptr, error) {
    71  	if !pidfdWorks() {
    72  		return 0, syscall.ENOSYS
    73  	}
    74  
    75  	h, err := unix.PidFDOpen(pid, 0)
    76  	if err != nil {
    77  		return 0, convertESRCH(err)
    78  	}
    79  	return h, nil
    80  }
    81  
    82  // pidfdWait waits for the process to complete,
    83  // and updates the process status to done.
    84  func (p *Process) pidfdWait() (*ProcessState, error) {
    85  	// When pidfd is used, there is no wait/kill race (described in CL 23967)
    86  	// because the PID recycle issue doesn't exist (IOW, pidfd, unlike PID,
    87  	// is guaranteed to refer to one particular process). Thus, there is no
    88  	// need for the workaround (blockUntilWaitable + sigMu) from pidWait.
    89  	//
    90  	// We _do_ need to be careful about reuse of the pidfd FD number when
    91  	// closing the pidfd. See handle for more details.
    92  	handle, status := p.handleTransientAcquire()
    93  	switch status {
    94  	case statusDone:
    95  		// Process already completed Wait, or was not found by
    96  		// pidfdFind. Return ECHILD for consistency with what the wait
    97  		// syscall would return.
    98  		return nil, NewSyscallError("wait", syscall.ECHILD)
    99  	case statusReleased:
   100  		return nil, syscall.EINVAL
   101  	}
   102  	defer p.handleTransientRelease()
   103  
   104  	var (
   105  		info   unix.SiginfoChild
   106  		rusage syscall.Rusage
   107  	)
   108  	err := ignoringEINTR(func() error {
   109  		return unix.Waitid(unix.P_PIDFD, int(handle), &info, syscall.WEXITED, &rusage)
   110  	})
   111  	if err != nil {
   112  		return nil, NewSyscallError("waitid", err)
   113  	}
   114  
   115  	// Update the Process status to statusDone.
   116  	// This also releases a reference to the handle.
   117  	p.doRelease(statusDone)
   118  
   119  	return &ProcessState{
   120  		pid:    int(info.Pid),
   121  		status: info.WaitStatus(),
   122  		rusage: &rusage,
   123  	}, nil
   124  }
   125  
   126  // pidfdSendSignal sends a signal to the process.
   127  func (p *Process) pidfdSendSignal(s syscall.Signal) error {
   128  	handle, status := p.handleTransientAcquire()
   129  	switch status {
   130  	case statusDone:
   131  		return ErrProcessDone
   132  	case statusReleased:
   133  		return errors.New("os: process already released")
   134  	}
   135  	defer p.handleTransientRelease()
   136  
   137  	return convertESRCH(unix.PidFDSendSignal(handle, s))
   138  }
   139  
   140  // pidfdWorks returns whether we can use pidfd on this system.
   141  func pidfdWorks() bool {
   142  	return checkPidfdOnce() == nil
   143  }
   144  
   145  // checkPidfdOnce is used to only check whether pidfd works once.
   146  var checkPidfdOnce = sync.OnceValue(checkPidfd)
   147  
   148  // checkPidfd checks whether all required pidfd-related syscalls work. This
   149  // consists of pidfd_open and pidfd_send_signal syscalls, waitid syscall with
   150  // idtype of P_PIDFD, and clone(CLONE_PIDFD).
   151  //
   152  // Reasons for non-working pidfd syscalls include an older kernel and an
   153  // execution environment in which the above system calls are restricted by
   154  // seccomp or a similar technology.
   155  func checkPidfd() error {
   156  	// In Android version < 12, pidfd-related system calls are not allowed
   157  	// by seccomp and trigger the SIGSYS signal. See issue #69065.
   158  	if runtime.GOOS == "android" {
   159  		ignoreSIGSYS()
   160  		defer restoreSIGSYS()
   161  	}
   162  
   163  	// Get a pidfd of the current process (opening of "/proc/self" won't
   164  	// work for waitid).
   165  	fd, err := unix.PidFDOpen(syscall.Getpid(), 0)
   166  	if err != nil {
   167  		return NewSyscallError("pidfd_open", err)
   168  	}
   169  	defer syscall.Close(int(fd))
   170  
   171  	// Check waitid(P_PIDFD) works.
   172  	err = ignoringEINTR(func() error {
   173  		return unix.Waitid(unix.P_PIDFD, int(fd), nil, syscall.WEXITED, nil)
   174  	})
   175  	// Expect ECHILD from waitid since we're not our own parent.
   176  	if err != syscall.ECHILD {
   177  		return NewSyscallError("pidfd_wait", err)
   178  	}
   179  
   180  	// Check pidfd_send_signal works (should be able to send 0 to itself).
   181  	if err := unix.PidFDSendSignal(fd, 0); err != nil {
   182  		return NewSyscallError("pidfd_send_signal", err)
   183  	}
   184  
   185  	// Verify that clone(CLONE_PIDFD) works.
   186  	//
   187  	// This shouldn't be necessary since pidfd_open was added in Linux 5.3,
   188  	// after CLONE_PIDFD in Linux 5.2, but some alternative Linux
   189  	// implementations may not adhere to this ordering.
   190  	if err := checkClonePidfd(); err != nil {
   191  		return err
   192  	}
   193  
   194  	return nil
   195  }
   196  
   197  // Provided by syscall.
   198  //
   199  //go:linkname checkClonePidfd
   200  func checkClonePidfd() error
   201  
   202  // Provided by runtime.
   203  //
   204  //go:linkname ignoreSIGSYS
   205  func ignoreSIGSYS()
   206  
   207  //go:linkname restoreSIGSYS
   208  func restoreSIGSYS()
   209  

View as plain text