Source file src/runtime/tracebuf.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Trace buffer management.
     6  
     7  package runtime
     8  
     9  import (
    10  	"internal/runtime/sys"
    11  	"internal/trace/tracev2"
    12  	"unsafe"
    13  )
    14  
    15  // Maximum number of bytes required to encode uint64 in base-128.
    16  const traceBytesPerNumber = 10
    17  
    18  // traceWriter is the interface for writing all trace data.
    19  //
    20  // This type is passed around as a value, and all of its methods return
    21  // a new traceWriter. This allows for chaining together calls in a fluent-style
    22  // API. This is partly stylistic, and very slightly for performance, since
    23  // the compiler can destructure this value and pass it between calls as
    24  // just regular arguments. However, this style is not load-bearing, and
    25  // we can change it if it's deemed too error-prone.
    26  type traceWriter struct {
    27  	traceLocker
    28  	exp tracev2.Experiment
    29  	*traceBuf
    30  }
    31  
    32  // writer returns an a traceWriter that writes into the current M's stream.
    33  //
    34  // Once this is called, the caller must guard against stack growth until
    35  // end is called on it. Therefore, it's highly recommended to use this
    36  // API in a "fluent" style, for example tl.writer().event(...).end().
    37  // Better yet, callers just looking to write events should use eventWriter
    38  // when possible, which is a much safer wrapper around this function.
    39  //
    40  // nosplit to allow for safe reentrant tracing from stack growth paths.
    41  //
    42  //go:nosplit
    43  func (tl traceLocker) writer() traceWriter {
    44  	if debugTraceReentrancy {
    45  		// Checks that the invariants of this function are being upheld.
    46  		gp := getg()
    47  		if gp == gp.m.curg {
    48  			tl.mp.trace.oldthrowsplit = gp.throwsplit
    49  			gp.throwsplit = true
    50  		}
    51  	}
    52  	return traceWriter{traceLocker: tl, traceBuf: tl.mp.trace.buf[tl.gen%2][tracev2.NoExperiment]}
    53  }
    54  
    55  // unsafeTraceWriter produces a traceWriter that doesn't lock the trace.
    56  //
    57  // It should only be used in contexts where either:
    58  // - Another traceLocker is held.
    59  // - trace.gen is prevented from advancing.
    60  //
    61  // This does not have the same stack growth restrictions as traceLocker.writer.
    62  //
    63  // buf may be nil.
    64  func unsafeTraceWriter(gen uintptr, buf *traceBuf) traceWriter {
    65  	return traceWriter{traceLocker: traceLocker{gen: gen}, traceBuf: buf}
    66  }
    67  
    68  // event writes out the bytes of an event into the event stream.
    69  //
    70  // nosplit because it's part of writing an event for an M, which must not
    71  // have any stack growth.
    72  //
    73  //go:nosplit
    74  func (w traceWriter) event(ev tracev2.EventType, args ...traceArg) traceWriter {
    75  	// N.B. Everything in this call must be nosplit to maintain
    76  	// the stack growth related invariants for writing events.
    77  
    78  	// Make sure we have room.
    79  	w, _ = w.ensure(1 + (len(args)+1)*traceBytesPerNumber)
    80  
    81  	// Compute the timestamp diff that we'll put in the trace.
    82  	ts := traceClockNow()
    83  	if ts <= w.traceBuf.lastTime {
    84  		ts = w.traceBuf.lastTime + 1
    85  	}
    86  	tsDiff := uint64(ts - w.traceBuf.lastTime)
    87  	w.traceBuf.lastTime = ts
    88  
    89  	// Write out event.
    90  	w.byte(byte(ev))
    91  	w.varint(tsDiff)
    92  	for _, arg := range args {
    93  		w.varint(uint64(arg))
    94  	}
    95  	return w
    96  }
    97  
    98  // end writes the buffer back into the m.
    99  //
   100  // nosplit because it's part of writing an event for an M, which must not
   101  // have any stack growth.
   102  //
   103  //go:nosplit
   104  func (w traceWriter) end() {
   105  	if w.mp == nil {
   106  		// Tolerate a nil mp. It makes code that creates traceWriters directly
   107  		// less error-prone.
   108  		return
   109  	}
   110  	w.mp.trace.buf[w.gen%2][w.exp] = w.traceBuf
   111  	if debugTraceReentrancy {
   112  		// The writer is no longer live, we can drop throwsplit (if it wasn't
   113  		// already set upon entry).
   114  		gp := getg()
   115  		if gp == gp.m.curg {
   116  			gp.throwsplit = w.mp.trace.oldthrowsplit
   117  		}
   118  	}
   119  }
   120  
   121  // ensure makes sure that at least maxSize bytes are available to write.
   122  //
   123  // Returns whether the buffer was flushed.
   124  //
   125  // nosplit because it's part of writing an event for an M, which must not
   126  // have any stack growth.
   127  //
   128  //go:nosplit
   129  func (w traceWriter) ensure(maxSize int) (traceWriter, bool) {
   130  	refill := w.traceBuf == nil || !w.available(maxSize)
   131  	if refill {
   132  		w = w.refill()
   133  	}
   134  	return w, refill
   135  }
   136  
   137  // flush puts w.traceBuf on the queue of full buffers.
   138  //
   139  // nosplit because it's part of writing an event for an M, which must not
   140  // have any stack growth.
   141  //
   142  //go:nosplit
   143  func (w traceWriter) flush() traceWriter {
   144  	systemstack(func() {
   145  		lock(&trace.lock)
   146  		if w.traceBuf != nil {
   147  			traceBufFlush(w.traceBuf, w.gen)
   148  		}
   149  		unlock(&trace.lock)
   150  	})
   151  	w.traceBuf = nil
   152  	return w
   153  }
   154  
   155  // refill puts w.traceBuf on the queue of full buffers and refresh's w's buffer.
   156  func (w traceWriter) refill() traceWriter {
   157  	systemstack(func() {
   158  		lock(&trace.lock)
   159  		if w.traceBuf != nil {
   160  			traceBufFlush(w.traceBuf, w.gen)
   161  		}
   162  		if trace.empty != nil {
   163  			w.traceBuf = trace.empty
   164  			trace.empty = w.traceBuf.link
   165  			unlock(&trace.lock)
   166  		} else {
   167  			unlock(&trace.lock)
   168  			w.traceBuf = (*traceBuf)(sysAlloc(unsafe.Sizeof(traceBuf{}), &memstats.other_sys, "trace buffer"))
   169  			if w.traceBuf == nil {
   170  				throw("trace: out of memory")
   171  			}
   172  		}
   173  	})
   174  	// Initialize the buffer.
   175  	ts := traceClockNow()
   176  	if ts <= w.traceBuf.lastTime {
   177  		ts = w.traceBuf.lastTime + 1
   178  	}
   179  	w.traceBuf.lastTime = ts
   180  	w.traceBuf.link = nil
   181  	w.traceBuf.pos = 0
   182  
   183  	// Tolerate a nil mp.
   184  	mID := ^uint64(0)
   185  	if w.mp != nil {
   186  		mID = uint64(w.mp.procid)
   187  	}
   188  
   189  	// Write the buffer's header.
   190  	if w.exp == tracev2.NoExperiment {
   191  		w.byte(byte(tracev2.EvEventBatch))
   192  	} else {
   193  		w.byte(byte(tracev2.EvExperimentalBatch))
   194  		w.byte(byte(w.exp))
   195  	}
   196  	w.varint(uint64(w.gen))
   197  	w.varint(uint64(mID))
   198  	w.varint(uint64(ts))
   199  	w.traceBuf.lenPos = w.varintReserve()
   200  	return w
   201  }
   202  
   203  // expWriter returns a traceWriter that writes into the current M's stream for
   204  // the given experiment.
   205  func (tl traceLocker) expWriter(exp tracev2.Experiment) traceWriter {
   206  	return traceWriter{traceLocker: tl, traceBuf: tl.mp.trace.buf[tl.gen%2][exp], exp: exp}
   207  }
   208  
   209  // unsafeTraceExpWriter produces a traceWriter for experimental trace batches
   210  // that doesn't lock the trace. Data written to experimental batches need not
   211  // conform to the standard trace format.
   212  //
   213  // It should only be used in contexts where either:
   214  // - Another traceLocker is held.
   215  // - trace.gen is prevented from advancing.
   216  //
   217  // This does not have the same stack growth restrictions as traceLocker.writer.
   218  //
   219  // buf may be nil.
   220  func unsafeTraceExpWriter(gen uintptr, buf *traceBuf, exp tracev2.Experiment) traceWriter {
   221  	return traceWriter{traceLocker: traceLocker{gen: gen}, traceBuf: buf, exp: exp}
   222  }
   223  
   224  // traceBufQueue is a FIFO of traceBufs.
   225  type traceBufQueue struct {
   226  	head, tail *traceBuf
   227  }
   228  
   229  // push queues buf into queue of buffers.
   230  func (q *traceBufQueue) push(buf *traceBuf) {
   231  	buf.link = nil
   232  	if q.head == nil {
   233  		q.head = buf
   234  	} else {
   235  		q.tail.link = buf
   236  	}
   237  	q.tail = buf
   238  }
   239  
   240  // pop dequeues from the queue of buffers.
   241  func (q *traceBufQueue) pop() *traceBuf {
   242  	buf := q.head
   243  	if buf == nil {
   244  		return nil
   245  	}
   246  	q.head = buf.link
   247  	if q.head == nil {
   248  		q.tail = nil
   249  	}
   250  	buf.link = nil
   251  	return buf
   252  }
   253  
   254  func (q *traceBufQueue) empty() bool {
   255  	return q.head == nil
   256  }
   257  
   258  // traceBufHeader is per-P tracing buffer.
   259  type traceBufHeader struct {
   260  	link     *traceBuf // in trace.empty/full
   261  	lastTime traceTime // when we wrote the last event
   262  	pos      int       // next write offset in arr
   263  	lenPos   int       // position of batch length value
   264  }
   265  
   266  // traceBuf is per-M tracing buffer.
   267  //
   268  // TODO(mknyszek): Rename traceBuf to traceBatch, since they map 1:1 with event batches.
   269  type traceBuf struct {
   270  	_ sys.NotInHeap
   271  	traceBufHeader
   272  	arr [tracev2.MaxBatchSize - unsafe.Sizeof(traceBufHeader{})]byte // underlying buffer for traceBufHeader.buf
   273  }
   274  
   275  // byte appends v to buf.
   276  //
   277  // nosplit because it's part of writing an event for an M, which must not
   278  // have any stack growth.
   279  //
   280  //go:nosplit
   281  func (buf *traceBuf) byte(v byte) {
   282  	buf.arr[buf.pos] = v
   283  	buf.pos++
   284  }
   285  
   286  // varint appends v to buf in little-endian-base-128 encoding.
   287  //
   288  // nosplit because it's part of writing an event for an M, which must not
   289  // have any stack growth.
   290  //
   291  //go:nosplit
   292  func (buf *traceBuf) varint(v uint64) {
   293  	pos := buf.pos
   294  	arr := buf.arr[pos : pos+traceBytesPerNumber]
   295  	for i := range arr {
   296  		if v < 0x80 {
   297  			pos += i + 1
   298  			arr[i] = byte(v)
   299  			break
   300  		}
   301  		arr[i] = 0x80 | byte(v)
   302  		v >>= 7
   303  	}
   304  	buf.pos = pos
   305  }
   306  
   307  // varintReserve reserves enough space in buf to hold any varint.
   308  //
   309  // Space reserved this way can be filled in with the varintAt method.
   310  //
   311  // nosplit because it's part of writing an event for an M, which must not
   312  // have any stack growth.
   313  //
   314  //go:nosplit
   315  func (buf *traceBuf) varintReserve() int {
   316  	p := buf.pos
   317  	buf.pos += traceBytesPerNumber
   318  	return p
   319  }
   320  
   321  // stringData appends s's data directly to buf.
   322  //
   323  // nosplit because it's part of writing an event for an M, which must not
   324  // have any stack growth.
   325  //
   326  //go:nosplit
   327  func (buf *traceBuf) stringData(s string) {
   328  	buf.pos += copy(buf.arr[buf.pos:], s)
   329  }
   330  
   331  // nosplit because it's part of writing an event for an M, which must not
   332  // have any stack growth.
   333  //
   334  //go:nosplit
   335  func (buf *traceBuf) available(size int) bool {
   336  	return len(buf.arr)-buf.pos >= size
   337  }
   338  
   339  // varintAt writes varint v at byte position pos in buf. This always
   340  // consumes traceBytesPerNumber bytes. This is intended for when the caller
   341  // needs to reserve space for a varint but can't populate it until later.
   342  // Use varintReserve to reserve this space.
   343  //
   344  // nosplit because it's part of writing an event for an M, which must not
   345  // have any stack growth.
   346  //
   347  //go:nosplit
   348  func (buf *traceBuf) varintAt(pos int, v uint64) {
   349  	for i := 0; i < traceBytesPerNumber; i++ {
   350  		if i < traceBytesPerNumber-1 {
   351  			buf.arr[pos] = 0x80 | byte(v)
   352  		} else {
   353  			buf.arr[pos] = byte(v)
   354  		}
   355  		v >>= 7
   356  		pos++
   357  	}
   358  	if v != 0 {
   359  		throw("v could not fit in traceBytesPerNumber")
   360  	}
   361  }
   362  
   363  // traceBufFlush flushes a trace buffer.
   364  //
   365  // Must run on the system stack because trace.lock must be held.
   366  //
   367  //go:systemstack
   368  func traceBufFlush(buf *traceBuf, gen uintptr) {
   369  	assertLockHeld(&trace.lock)
   370  
   371  	// Write out the non-header length of the batch in the header.
   372  	//
   373  	// Note: the length of the header is not included to make it easier
   374  	// to calculate this value when deserializing and reserializing the
   375  	// trace. Varints can have additional padding of zero bits that is
   376  	// quite difficult to preserve, and if we include the header we
   377  	// force serializers to do more work. Nothing else actually needs
   378  	// padding.
   379  	buf.varintAt(buf.lenPos, uint64(buf.pos-(buf.lenPos+traceBytesPerNumber)))
   380  	trace.full[gen%2].push(buf)
   381  
   382  	// Notify the scheduler that there's work available and that the trace
   383  	// reader should be scheduled.
   384  	if !trace.workAvailable.Load() {
   385  		trace.workAvailable.Store(true)
   386  	}
   387  }
   388  

View as plain text