order.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package trace
     6  
     7  import (
     8  	"fmt"
     9  	"slices"
    10  	"strings"
    11  
    12  	"internal/trace/tracev2"
    13  	"internal/trace/version"
    14  )
    15  
    16  // ordering emulates Go scheduler state for both validation and
    17  // for putting events in the right order.
    18  //
    19  // The interface to ordering consists of two methods: Advance
    20  // and Next. Advance is called to try and advance an event and
    21  // add completed events to the ordering. Next is used to pick
    22  // off events in the ordering.
    23  type ordering struct {
    24  	traceVer    version.Version
    25  	gStates     map[GoID]*gState
    26  	pStates     map[ProcID]*pState // TODO: The keys are dense, so this can be a slice.
    27  	mStates     map[ThreadID]*mState
    28  	activeTasks map[TaskID]taskState
    29  	gcSeq       uint64
    30  	gcState     gcState
    31  	initialGen  uint64
    32  	queue       queue[Event]
    33  }
    34  
    35  // Advance checks if it's valid to proceed with ev which came from thread m.
    36  //
    37  // It assumes the gen value passed to it is monotonically increasing across calls.
    38  //
    39  // If any error is returned, then the trace is broken and trace parsing must cease.
    40  // If it's not valid to advance with ev, but no error was encountered, the caller
    41  // should attempt to advance with other candidate events from other threads. If the
    42  // caller runs out of candidates, the trace is invalid.
    43  //
    44  // If this returns true, Next is guaranteed to return a complete event. However,
    45  // multiple events may be added to the ordering, so the caller should (but is not
    46  // required to) continue to call Next until it is exhausted.
    47  func (o *ordering) Advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64) (bool, error) {
    48  	if o.initialGen == 0 {
    49  		// Set the initial gen if necessary.
    50  		o.initialGen = gen
    51  	}
    52  
    53  	var curCtx, newCtx schedCtx
    54  	curCtx.M = m
    55  	newCtx.M = m
    56  
    57  	var ms *mState
    58  	if m == NoThread {
    59  		curCtx.P = NoProc
    60  		curCtx.G = NoGoroutine
    61  		newCtx = curCtx
    62  	} else {
    63  		// Pull out or create the mState for this event.
    64  		var ok bool
    65  		ms, ok = o.mStates[m]
    66  		if !ok {
    67  			ms = &mState{
    68  				g: NoGoroutine,
    69  				p: NoProc,
    70  			}
    71  			o.mStates[m] = ms
    72  		}
    73  		curCtx.P = ms.p
    74  		curCtx.G = ms.g
    75  		newCtx = curCtx
    76  	}
    77  
    78  	f := orderingDispatch[ev.typ]
    79  	if f == nil {
    80  		return false, fmt.Errorf("bad event type found while ordering: %v", ev.typ)
    81  	}
    82  	newCtx, ok, err := f(o, ev, evt, m, gen, curCtx)
    83  	if err == nil && ok && ms != nil {
    84  		// Update the mState for this event.
    85  		ms.p = newCtx.P
    86  		ms.g = newCtx.G
    87  	}
    88  	return ok, err
    89  }
    90  
    91  func (o *ordering) evName(typ tracev2.EventType) string {
    92  	return o.traceVer.EventName(typ)
    93  }
    94  
    95  type orderingHandleFunc func(o *ordering, ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error)
    96  
    97  var orderingDispatch = [256]orderingHandleFunc{
    98  	// Procs.
    99  	tracev2.EvProcsChange: (*ordering).advanceAnnotation,
   100  	tracev2.EvProcStart:   (*ordering).advanceProcStart,
   101  	tracev2.EvProcStop:    (*ordering).advanceProcStop,
   102  	tracev2.EvProcSteal:   (*ordering).advanceProcSteal,
   103  	tracev2.EvProcStatus:  (*ordering).advanceProcStatus,
   104  
   105  	// Goroutines.
   106  	tracev2.EvGoCreate:            (*ordering).advanceGoCreate,
   107  	tracev2.EvGoCreateSyscall:     (*ordering).advanceGoCreateSyscall,
   108  	tracev2.EvGoStart:             (*ordering).advanceGoStart,
   109  	tracev2.EvGoDestroy:           (*ordering).advanceGoStopExec,
   110  	tracev2.EvGoDestroySyscall:    (*ordering).advanceGoDestroySyscall,
   111  	tracev2.EvGoStop:              (*ordering).advanceGoStopExec,
   112  	tracev2.EvGoBlock:             (*ordering).advanceGoStopExec,
   113  	tracev2.EvGoUnblock:           (*ordering).advanceGoUnblock,
   114  	tracev2.EvGoSyscallBegin:      (*ordering).advanceGoSyscallBegin,
   115  	tracev2.EvGoSyscallEnd:        (*ordering).advanceGoSyscallEnd,
   116  	tracev2.EvGoSyscallEndBlocked: (*ordering).advanceGoSyscallEndBlocked,
   117  	tracev2.EvGoStatus:            (*ordering).advanceGoStatus,
   118  
   119  	// STW.
   120  	tracev2.EvSTWBegin: (*ordering).advanceGoRangeBegin,
   121  	tracev2.EvSTWEnd:   (*ordering).advanceGoRangeEnd,
   122  
   123  	// GC events.
   124  	tracev2.EvGCActive:           (*ordering).advanceGCActive,
   125  	tracev2.EvGCBegin:            (*ordering).advanceGCBegin,
   126  	tracev2.EvGCEnd:              (*ordering).advanceGCEnd,
   127  	tracev2.EvGCSweepActive:      (*ordering).advanceGCSweepActive,
   128  	tracev2.EvGCSweepBegin:       (*ordering).advanceGCSweepBegin,
   129  	tracev2.EvGCSweepEnd:         (*ordering).advanceGCSweepEnd,
   130  	tracev2.EvGCMarkAssistActive: (*ordering).advanceGoRangeActive,
   131  	tracev2.EvGCMarkAssistBegin:  (*ordering).advanceGoRangeBegin,
   132  	tracev2.EvGCMarkAssistEnd:    (*ordering).advanceGoRangeEnd,
   133  	tracev2.EvHeapAlloc:          (*ordering).advanceHeapMetric,
   134  	tracev2.EvHeapGoal:           (*ordering).advanceHeapMetric,
   135  
   136  	// Annotations.
   137  	tracev2.EvGoLabel:         (*ordering).advanceAnnotation,
   138  	tracev2.EvUserTaskBegin:   (*ordering).advanceUserTaskBegin,
   139  	tracev2.EvUserTaskEnd:     (*ordering).advanceUserTaskEnd,
   140  	tracev2.EvUserRegionBegin: (*ordering).advanceUserRegionBegin,
   141  	tracev2.EvUserRegionEnd:   (*ordering).advanceUserRegionEnd,
   142  	tracev2.EvUserLog:         (*ordering).advanceAnnotation,
   143  
   144  	// Coroutines. Added in Go 1.23.
   145  	tracev2.EvGoSwitch:        (*ordering).advanceGoSwitch,
   146  	tracev2.EvGoSwitchDestroy: (*ordering).advanceGoSwitch,
   147  	tracev2.EvGoCreateBlocked: (*ordering).advanceGoCreate,
   148  
   149  	// GoStatus event with a stack. Added in Go 1.23.
   150  	tracev2.EvGoStatusStack: (*ordering).advanceGoStatus,
   151  
   152  	// Experimental events.
   153  
   154  	// Experimental heap span events. Added in Go 1.23.
   155  	tracev2.EvSpan:      (*ordering).advanceAllocFree,
   156  	tracev2.EvSpanAlloc: (*ordering).advanceAllocFree,
   157  	tracev2.EvSpanFree:  (*ordering).advanceAllocFree,
   158  
   159  	// Experimental heap object events. Added in Go 1.23.
   160  	tracev2.EvHeapObject:      (*ordering).advanceAllocFree,
   161  	tracev2.EvHeapObjectAlloc: (*ordering).advanceAllocFree,
   162  	tracev2.EvHeapObjectFree:  (*ordering).advanceAllocFree,
   163  
   164  	// Experimental goroutine stack events. Added in Go 1.23.
   165  	tracev2.EvGoroutineStack:      (*ordering).advanceAllocFree,
   166  	tracev2.EvGoroutineStackAlloc: (*ordering).advanceAllocFree,
   167  	tracev2.EvGoroutineStackFree:  (*ordering).advanceAllocFree,
   168  }
   169  
   170  func (o *ordering) advanceProcStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   171  	pid := ProcID(ev.args[0])
   172  	status := tracev2.ProcStatus(ev.args[1])
   173  	if int(status) >= len(tracev2ProcStatus2ProcState) {
   174  		return curCtx, false, fmt.Errorf("invalid status for proc %d: %d", pid, status)
   175  	}
   176  	oldState := tracev2ProcStatus2ProcState[status]
   177  	if s, ok := o.pStates[pid]; ok {
   178  		if status == tracev2.ProcSyscallAbandoned && s.status == tracev2.ProcSyscall {
   179  			// ProcSyscallAbandoned is a special case of ProcSyscall. It indicates a
   180  			// potential loss of information, but if we're already in ProcSyscall,
   181  			// we haven't lost the relevant information. Promote the status and advance.
   182  			oldState = ProcRunning
   183  			ev.args[1] = uint64(tracev2.ProcSyscall)
   184  		} else if status == tracev2.ProcSyscallAbandoned && s.status == tracev2.ProcSyscallAbandoned {
   185  			// If we're passing through ProcSyscallAbandoned, then there's no promotion
   186  			// to do. We've lost the M that this P is associated with. However it got there,
   187  			// it's going to appear as idle in the API, so pass through as idle.
   188  			oldState = ProcIdle
   189  			ev.args[1] = uint64(tracev2.ProcSyscallAbandoned)
   190  		} else if s.status != status {
   191  			return curCtx, false, fmt.Errorf("inconsistent status for proc %d: old %v vs. new %v", pid, s.status, status)
   192  		}
   193  		s.seq = makeSeq(gen, 0) // Reset seq.
   194  	} else {
   195  		o.pStates[pid] = &pState{id: pid, status: status, seq: makeSeq(gen, 0)}
   196  		if gen == o.initialGen {
   197  			oldState = ProcUndetermined
   198  		} else {
   199  			oldState = ProcNotExist
   200  		}
   201  	}
   202  	ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
   203  
   204  	// Bind the proc to the new context, if it's running.
   205  	newCtx := curCtx
   206  	if status == tracev2.ProcRunning || status == tracev2.ProcSyscall {
   207  		newCtx.P = pid
   208  	}
   209  	// If we're advancing through ProcSyscallAbandoned *but* oldState is running then we've
   210  	// promoted it to ProcSyscall. However, because it's ProcSyscallAbandoned, we know this
   211  	// P is about to get stolen and its status very likely isn't being emitted by the same
   212  	// thread it was bound to. Since this status is Running -> Running and Running is binding,
   213  	// we need to make sure we emit it in the right context: the context to which it is bound.
   214  	// Find it, and set our current context to it.
   215  	if status == tracev2.ProcSyscallAbandoned && oldState == ProcRunning {
   216  		// N.B. This is slow but it should be fairly rare.
   217  		found := false
   218  		for mid, ms := range o.mStates {
   219  			if ms.p == pid {
   220  				curCtx.M = mid
   221  				curCtx.P = pid
   222  				curCtx.G = ms.g
   223  				found = true
   224  			}
   225  		}
   226  		if !found {
   227  			return curCtx, false, fmt.Errorf("failed to find sched context for proc %d that's about to be stolen", pid)
   228  		}
   229  	}
   230  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   231  	return newCtx, true, nil
   232  }
   233  
   234  func (o *ordering) advanceProcStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   235  	pid := ProcID(ev.args[0])
   236  	seq := makeSeq(gen, ev.args[1])
   237  
   238  	// Try to advance. We might fail here due to sequencing, because the P hasn't
   239  	// had a status emitted, or because we already have a P and we're in a syscall,
   240  	// and we haven't observed that it was stolen from us yet.
   241  	state, ok := o.pStates[pid]
   242  	if !ok || state.status != tracev2.ProcIdle || !seq.succeeds(state.seq) || curCtx.P != NoProc {
   243  		// We can't make an inference as to whether this is bad. We could just be seeing
   244  		// a ProcStart on a different M before the proc's state was emitted, or before we
   245  		// got to the right point in the trace.
   246  		//
   247  		// Note that we also don't advance here if we have a P and we're in a syscall.
   248  		return curCtx, false, nil
   249  	}
   250  	// We can advance this P. Check some invariants.
   251  	//
   252  	// We might have a goroutine if a goroutine is exiting a syscall.
   253  	reqs := schedReqs{M: mustHave, P: mustNotHave, G: mayHave}
   254  	if err := validateCtx(curCtx, reqs); err != nil {
   255  		return curCtx, false, err
   256  	}
   257  	state.status = tracev2.ProcRunning
   258  	state.seq = seq
   259  	newCtx := curCtx
   260  	newCtx.P = pid
   261  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   262  	return newCtx, true, nil
   263  }
   264  
   265  func (o *ordering) advanceProcStop(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   266  	// We must be able to advance this P.
   267  	//
   268  	// There are 2 ways a P can stop: ProcStop and ProcSteal. ProcStop is used when the P
   269  	// is stopped by the same M that started it, while ProcSteal is used when another M
   270  	// steals the P by stopping it from a distance.
   271  	//
   272  	// Since a P is bound to an M, and we're stopping on the same M we started, it must
   273  	// always be possible to advance the current M's P from a ProcStop. This is also why
   274  	// ProcStop doesn't need a sequence number.
   275  	state, ok := o.pStates[curCtx.P]
   276  	if !ok {
   277  		return curCtx, false, fmt.Errorf("event %s for proc (%v) that doesn't exist", o.evName(ev.typ), curCtx.P)
   278  	}
   279  	if state.status != tracev2.ProcRunning && state.status != tracev2.ProcSyscall {
   280  		return curCtx, false, fmt.Errorf("%s event for proc that's not %s or %s", o.evName(ev.typ), tracev2.ProcRunning, tracev2.ProcSyscall)
   281  	}
   282  	reqs := schedReqs{M: mustHave, P: mustHave, G: mayHave}
   283  	if err := validateCtx(curCtx, reqs); err != nil {
   284  		return curCtx, false, err
   285  	}
   286  	state.status = tracev2.ProcIdle
   287  	newCtx := curCtx
   288  	newCtx.P = NoProc
   289  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   290  	return newCtx, true, nil
   291  }
   292  
   293  func (o *ordering) advanceProcSteal(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   294  	pid := ProcID(ev.args[0])
   295  	seq := makeSeq(gen, ev.args[1])
   296  	state, ok := o.pStates[pid]
   297  	if !ok || (state.status != tracev2.ProcSyscall && state.status != tracev2.ProcSyscallAbandoned) || !seq.succeeds(state.seq) {
   298  		// We can't make an inference as to whether this is bad. We could just be seeing
   299  		// a ProcStart on a different M before the proc's state was emitted, or before we
   300  		// got to the right point in the trace.
   301  		return curCtx, false, nil
   302  	}
   303  	// We can advance this P. Check some invariants.
   304  	reqs := schedReqs{M: mustHave, P: mayHave, G: mayHave}
   305  	if err := validateCtx(curCtx, reqs); err != nil {
   306  		return curCtx, false, err
   307  	}
   308  	// Smuggle in the P state that let us advance so we can surface information to the event.
   309  	// Specifically, we need to make sure that the event is interpreted not as a transition of
   310  	// ProcRunning -> ProcIdle but ProcIdle -> ProcIdle instead.
   311  	//
   312  	// ProcRunning is binding, but we may be running with a P on the current M and we can't
   313  	// bind another P. This P is about to go ProcIdle anyway.
   314  	oldStatus := state.status
   315  	ev.extra(version.Go122)[0] = uint64(oldStatus)
   316  
   317  	// Update the P's status and sequence number.
   318  	state.status = tracev2.ProcIdle
   319  	state.seq = seq
   320  
   321  	// If we've lost information then don't try to do anything with the M.
   322  	// It may have moved on and we can't be sure.
   323  	if oldStatus == tracev2.ProcSyscallAbandoned {
   324  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   325  		return curCtx, true, nil
   326  	}
   327  
   328  	// Validate that the M we're stealing from is what we expect.
   329  	mid := ThreadID(ev.args[2]) // The M we're stealing from.
   330  
   331  	newCtx := curCtx
   332  	if mid == curCtx.M {
   333  		// We're stealing from ourselves. This behaves like a ProcStop.
   334  		if curCtx.P != pid {
   335  			return curCtx, false, fmt.Errorf("tried to self-steal proc %d (thread %d), but got proc %d instead", pid, mid, curCtx.P)
   336  		}
   337  		newCtx.P = NoProc
   338  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   339  		return newCtx, true, nil
   340  	}
   341  
   342  	// We're stealing from some other M.
   343  	mState, ok := o.mStates[mid]
   344  	if !ok {
   345  		return curCtx, false, fmt.Errorf("stole proc from non-existent thread %d", mid)
   346  	}
   347  
   348  	// Make sure we're actually stealing the right P.
   349  	if mState.p != pid {
   350  		return curCtx, false, fmt.Errorf("tried to steal proc %d from thread %d, but got proc %d instead", pid, mid, mState.p)
   351  	}
   352  
   353  	// Tell the M it has no P so it can proceed.
   354  	//
   355  	// This is safe because we know the P was in a syscall and
   356  	// the other M must be trying to get out of the syscall.
   357  	// GoSyscallEndBlocked cannot advance until the corresponding
   358  	// M loses its P.
   359  	mState.p = NoProc
   360  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   361  	return newCtx, true, nil
   362  }
   363  
   364  func (o *ordering) advanceGoStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   365  	gid := GoID(ev.args[0])
   366  	mid := ThreadID(ev.args[1])
   367  	status := tracev2.GoStatus(ev.args[2])
   368  
   369  	if int(status) >= len(tracev2GoStatus2GoState) {
   370  		return curCtx, false, fmt.Errorf("invalid status for goroutine %d: %d", gid, status)
   371  	}
   372  	oldState := tracev2GoStatus2GoState[status]
   373  	if s, ok := o.gStates[gid]; ok {
   374  		if s.status != status {
   375  			return curCtx, false, fmt.Errorf("inconsistent status for goroutine %d: old %v vs. new %v", gid, s.status, status)
   376  		}
   377  		s.seq = makeSeq(gen, 0) // Reset seq.
   378  	} else if gen == o.initialGen {
   379  		// Set the state.
   380  		o.gStates[gid] = &gState{id: gid, status: status, seq: makeSeq(gen, 0)}
   381  		oldState = GoUndetermined
   382  	} else {
   383  		return curCtx, false, fmt.Errorf("found goroutine status for new goroutine after the first generation: id=%v status=%v", gid, status)
   384  	}
   385  	ev.args[2] = uint64(oldState)<<32 | uint64(status) // Smuggle in the old state for StateTransition.
   386  
   387  	newCtx := curCtx
   388  	switch status {
   389  	case tracev2.GoRunning:
   390  		// Bind the goroutine to the new context, since it's running.
   391  		newCtx.G = gid
   392  	case tracev2.GoSyscall:
   393  		if mid == NoThread {
   394  			return curCtx, false, fmt.Errorf("found goroutine %d in syscall without a thread", gid)
   395  		}
   396  		// Is the syscall on this thread? If so, bind it to the context.
   397  		// Otherwise, we're talking about a G sitting in a syscall on an M.
   398  		// Validate the named M.
   399  		if mid == curCtx.M {
   400  			if gen != o.initialGen && curCtx.G != gid {
   401  				// If this isn't the first generation, we *must* have seen this
   402  				// binding occur already. Even if the G was blocked in a syscall
   403  				// for multiple generations since trace start, we would have seen
   404  				// a previous GoStatus event that bound the goroutine to an M.
   405  				return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, curCtx.G)
   406  			}
   407  			newCtx.G = gid
   408  			break
   409  		}
   410  		// Now we're talking about a thread and goroutine that have been
   411  		// blocked on a syscall for the entire generation. This case must
   412  		// not have a P; the runtime makes sure that all Ps are traced at
   413  		// the beginning of a generation, which involves taking a P back
   414  		// from every thread.
   415  		ms, ok := o.mStates[mid]
   416  		if ok {
   417  			// This M has been seen. That means we must have seen this
   418  			// goroutine go into a syscall on this thread at some point.
   419  			if ms.g != gid {
   420  				// But the G on the M doesn't match. Something's wrong.
   421  				return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, ms.g)
   422  			}
   423  			// This case is just a Syscall->Syscall event, which needs to
   424  			// appear as having the G currently bound to this M.
   425  			curCtx.G = ms.g
   426  		} else if !ok {
   427  			// The M hasn't been seen yet. That means this goroutine
   428  			// has just been sitting in a syscall on this M. Create
   429  			// a state for it.
   430  			o.mStates[mid] = &mState{g: gid, p: NoProc}
   431  			// Don't set curCtx.G in this case because this event is the
   432  			// binding event (and curCtx represents the "before" state).
   433  		}
   434  		// Update the current context to the M we're talking about.
   435  		curCtx.M = mid
   436  	}
   437  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   438  	return newCtx, true, nil
   439  }
   440  
   441  func (o *ordering) advanceGoCreate(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   442  	// Goroutines must be created on a running P, but may or may not be created
   443  	// by a running goroutine.
   444  	reqs := schedReqs{M: mustHave, P: mustHave, G: mayHave}
   445  	if err := validateCtx(curCtx, reqs); err != nil {
   446  		return curCtx, false, err
   447  	}
   448  	// If we have a goroutine, it must be running.
   449  	if state, ok := o.gStates[curCtx.G]; ok && state.status != tracev2.GoRunning {
   450  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
   451  	}
   452  	// This goroutine created another. Add a state for it.
   453  	newgid := GoID(ev.args[0])
   454  	if _, ok := o.gStates[newgid]; ok {
   455  		return curCtx, false, fmt.Errorf("tried to create goroutine (%v) that already exists", newgid)
   456  	}
   457  	status := tracev2.GoRunnable
   458  	if ev.typ == tracev2.EvGoCreateBlocked {
   459  		status = tracev2.GoWaiting
   460  	}
   461  	o.gStates[newgid] = &gState{id: newgid, status: status, seq: makeSeq(gen, 0)}
   462  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   463  	return curCtx, true, nil
   464  }
   465  
   466  func (o *ordering) advanceGoStopExec(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   467  	// These are goroutine events that all require an active running
   468  	// goroutine on some thread. They must *always* be advance-able,
   469  	// since running goroutines are bound to their M.
   470  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   471  		return curCtx, false, err
   472  	}
   473  	state, ok := o.gStates[curCtx.G]
   474  	if !ok {
   475  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
   476  	}
   477  	if state.status != tracev2.GoRunning {
   478  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
   479  	}
   480  	// Handle each case slightly differently; we just group them together
   481  	// because they have shared preconditions.
   482  	newCtx := curCtx
   483  	switch ev.typ {
   484  	case tracev2.EvGoDestroy:
   485  		// This goroutine is exiting itself.
   486  		delete(o.gStates, curCtx.G)
   487  		newCtx.G = NoGoroutine
   488  	case tracev2.EvGoStop:
   489  		// Goroutine stopped (yielded). It's runnable but not running on this M.
   490  		state.status = tracev2.GoRunnable
   491  		newCtx.G = NoGoroutine
   492  	case tracev2.EvGoBlock:
   493  		// Goroutine blocked. It's waiting now and not running on this M.
   494  		state.status = tracev2.GoWaiting
   495  		newCtx.G = NoGoroutine
   496  	}
   497  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   498  	return newCtx, true, nil
   499  }
   500  
   501  func (o *ordering) advanceGoStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   502  	gid := GoID(ev.args[0])
   503  	seq := makeSeq(gen, ev.args[1])
   504  	state, ok := o.gStates[gid]
   505  	if !ok || state.status != tracev2.GoRunnable || !seq.succeeds(state.seq) {
   506  		// We can't make an inference as to whether this is bad. We could just be seeing
   507  		// a GoStart on a different M before the goroutine was created, before it had its
   508  		// state emitted, or before we got to the right point in the trace yet.
   509  		return curCtx, false, nil
   510  	}
   511  	// We can advance this goroutine. Check some invariants.
   512  	reqs := schedReqs{M: mustHave, P: mustHave, G: mustNotHave}
   513  	if err := validateCtx(curCtx, reqs); err != nil {
   514  		return curCtx, false, err
   515  	}
   516  	state.status = tracev2.GoRunning
   517  	state.seq = seq
   518  	newCtx := curCtx
   519  	newCtx.G = gid
   520  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   521  	return newCtx, true, nil
   522  }
   523  
   524  func (o *ordering) advanceGoUnblock(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   525  	// N.B. These both reference the goroutine to unblock, not the current goroutine.
   526  	gid := GoID(ev.args[0])
   527  	seq := makeSeq(gen, ev.args[1])
   528  	state, ok := o.gStates[gid]
   529  	if !ok || state.status != tracev2.GoWaiting || !seq.succeeds(state.seq) {
   530  		// We can't make an inference as to whether this is bad. We could just be seeing
   531  		// a GoUnblock on a different M before the goroutine was created and blocked itself,
   532  		// before it had its state emitted, or before we got to the right point in the trace yet.
   533  		return curCtx, false, nil
   534  	}
   535  	state.status = tracev2.GoRunnable
   536  	state.seq = seq
   537  	// N.B. No context to validate. Basically anything can unblock
   538  	// a goroutine (e.g. sysmon).
   539  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   540  	return curCtx, true, nil
   541  }
   542  
   543  func (o *ordering) advanceGoSwitch(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   544  	// GoSwitch and GoSwitchDestroy represent a trio of events:
   545  	// - Unblock of the goroutine to switch to.
   546  	// - Block or destroy of the current goroutine.
   547  	// - Start executing the next goroutine.
   548  	//
   549  	// Because it acts like a GoStart for the next goroutine, we can
   550  	// only advance it if the sequence numbers line up.
   551  	//
   552  	// The current goroutine on the thread must be actively running.
   553  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   554  		return curCtx, false, err
   555  	}
   556  	curGState, ok := o.gStates[curCtx.G]
   557  	if !ok {
   558  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
   559  	}
   560  	if curGState.status != tracev2.GoRunning {
   561  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
   562  	}
   563  	nextg := GoID(ev.args[0])
   564  	seq := makeSeq(gen, ev.args[1]) // seq is for nextg, not curCtx.G.
   565  	nextGState, ok := o.gStates[nextg]
   566  	if !ok || nextGState.status != tracev2.GoWaiting || !seq.succeeds(nextGState.seq) {
   567  		// We can't make an inference as to whether this is bad. We could just be seeing
   568  		// a GoSwitch on a different M before the goroutine was created, before it had its
   569  		// state emitted, or before we got to the right point in the trace yet.
   570  		return curCtx, false, nil
   571  	}
   572  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   573  
   574  	// Update the state of the executing goroutine and emit an event for it
   575  	// (GoSwitch and GoSwitchDestroy will be interpreted as GoUnblock events
   576  	// for nextg).
   577  	switch ev.typ {
   578  	case tracev2.EvGoSwitch:
   579  		// Goroutine blocked. It's waiting now and not running on this M.
   580  		curGState.status = tracev2.GoWaiting
   581  
   582  		// Emit a GoBlock event.
   583  		// TODO(mknyszek): Emit a reason.
   584  		o.queue.push(makeEvent(evt, curCtx, tracev2.EvGoBlock, ev.time, 0 /* no reason */, 0 /* no stack */))
   585  	case tracev2.EvGoSwitchDestroy:
   586  		// This goroutine is exiting itself.
   587  		delete(o.gStates, curCtx.G)
   588  
   589  		// Emit a GoDestroy event.
   590  		o.queue.push(makeEvent(evt, curCtx, tracev2.EvGoDestroy, ev.time))
   591  	}
   592  	// Update the state of the next goroutine.
   593  	nextGState.status = tracev2.GoRunning
   594  	nextGState.seq = seq
   595  	newCtx := curCtx
   596  	newCtx.G = nextg
   597  
   598  	// Queue an event for the next goroutine starting to run.
   599  	startCtx := curCtx
   600  	startCtx.G = NoGoroutine
   601  	o.queue.push(makeEvent(evt, startCtx, tracev2.EvGoStart, ev.time, uint64(nextg), ev.args[1]))
   602  	return newCtx, true, nil
   603  }
   604  
   605  func (o *ordering) advanceGoSyscallBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   606  	// Entering a syscall requires an active running goroutine with a
   607  	// proc on some thread. It is always advancable.
   608  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   609  		return curCtx, false, err
   610  	}
   611  	state, ok := o.gStates[curCtx.G]
   612  	if !ok {
   613  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
   614  	}
   615  	if state.status != tracev2.GoRunning {
   616  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
   617  	}
   618  	// Goroutine entered a syscall. It's still running on this P and M.
   619  	state.status = tracev2.GoSyscall
   620  	pState, ok := o.pStates[curCtx.P]
   621  	if !ok {
   622  		return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, o.evName(ev.typ))
   623  	}
   624  	pState.status = tracev2.ProcSyscall
   625  	// Validate the P sequence number on the event and advance it.
   626  	//
   627  	// We have a P sequence number for what is supposed to be a goroutine event
   628  	// so that we can correctly model P stealing. Without this sequence number here,
   629  	// the syscall from which a ProcSteal event is stealing can be ambiguous in the
   630  	// face of broken timestamps. See the go122-syscall-steal-proc-ambiguous test for
   631  	// more details.
   632  	//
   633  	// Note that because this sequence number only exists as a tool for disambiguation,
   634  	// we can enforce that we have the right sequence number at this point; we don't need
   635  	// to back off and see if any other events will advance. This is a running P.
   636  	pSeq := makeSeq(gen, ev.args[0])
   637  	if !pSeq.succeeds(pState.seq) {
   638  		return curCtx, false, fmt.Errorf("failed to advance %s: can't make sequence: %s -> %s", o.evName(ev.typ), pState.seq, pSeq)
   639  	}
   640  	pState.seq = pSeq
   641  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   642  	return curCtx, true, nil
   643  }
   644  
   645  func (o *ordering) advanceGoSyscallEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   646  	// This event is always advance-able because it happens on the same
   647  	// thread that EvGoSyscallStart happened, and the goroutine can't leave
   648  	// that thread until its done.
   649  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   650  		return curCtx, false, err
   651  	}
   652  	state, ok := o.gStates[curCtx.G]
   653  	if !ok {
   654  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
   655  	}
   656  	if state.status != tracev2.GoSyscall {
   657  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
   658  	}
   659  	state.status = tracev2.GoRunning
   660  
   661  	// Transfer the P back to running from syscall.
   662  	pState, ok := o.pStates[curCtx.P]
   663  	if !ok {
   664  		return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, o.evName(ev.typ))
   665  	}
   666  	if pState.status != tracev2.ProcSyscall {
   667  		return curCtx, false, fmt.Errorf("expected proc %d in state %v, but got %v instead", curCtx.P, tracev2.ProcSyscall, pState.status)
   668  	}
   669  	pState.status = tracev2.ProcRunning
   670  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   671  	return curCtx, true, nil
   672  }
   673  
   674  func (o *ordering) advanceGoSyscallEndBlocked(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   675  	// This event becomes advanceable when its P is not in a syscall state
   676  	// (lack of a P altogether is also acceptable for advancing).
   677  	// The transfer out of ProcSyscall can happen either voluntarily via
   678  	// ProcStop or involuntarily via ProcSteal. We may also acquire a new P
   679  	// before we get here (after the transfer out) but that's OK: that new
   680  	// P won't be in the ProcSyscall state anymore.
   681  	//
   682  	// Basically: while we have a preemptible P, don't advance, because we
   683  	// *know* from the event that we're going to lose it at some point during
   684  	// the syscall. We shouldn't advance until that happens.
   685  	if curCtx.P != NoProc {
   686  		pState, ok := o.pStates[curCtx.P]
   687  		if !ok {
   688  			return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, o.evName(ev.typ))
   689  		}
   690  		if pState.status == tracev2.ProcSyscall {
   691  			return curCtx, false, nil
   692  		}
   693  	}
   694  	// As mentioned above, we may have a P here if we ProcStart
   695  	// before this event.
   696  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mustHave}); err != nil {
   697  		return curCtx, false, err
   698  	}
   699  	state, ok := o.gStates[curCtx.G]
   700  	if !ok {
   701  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
   702  	}
   703  	if state.status != tracev2.GoSyscall {
   704  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", o.evName(ev.typ), GoRunning)
   705  	}
   706  	newCtx := curCtx
   707  	newCtx.G = NoGoroutine
   708  	state.status = tracev2.GoRunnable
   709  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   710  	return newCtx, true, nil
   711  }
   712  
   713  func (o *ordering) advanceGoCreateSyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   714  	// This event indicates that a goroutine is effectively
   715  	// being created out of a cgo callback. Such a goroutine
   716  	// is 'created' in the syscall state.
   717  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mustNotHave}); err != nil {
   718  		return curCtx, false, err
   719  	}
   720  	// This goroutine is effectively being created. Add a state for it.
   721  	newgid := GoID(ev.args[0])
   722  	if _, ok := o.gStates[newgid]; ok {
   723  		return curCtx, false, fmt.Errorf("tried to create goroutine (%v) in syscall that already exists", newgid)
   724  	}
   725  	o.gStates[newgid] = &gState{id: newgid, status: tracev2.GoSyscall, seq: makeSeq(gen, 0)}
   726  	// Goroutine is executing. Bind it to the context.
   727  	newCtx := curCtx
   728  	newCtx.G = newgid
   729  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   730  	return newCtx, true, nil
   731  }
   732  
   733  func (o *ordering) advanceGoDestroySyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   734  	// This event indicates that a goroutine created for a
   735  	// cgo callback is disappearing, either because the callback
   736  	// ending or the C thread that called it is being destroyed.
   737  	//
   738  	// Also, treat this as if we lost our P too.
   739  	// The thread ID may be reused by the platform and we'll get
   740  	// really confused if we try to steal the P is this is running
   741  	// with later. The new M with the same ID could even try to
   742  	// steal back this P from itself!
   743  	//
   744  	// The runtime is careful to make sure that any GoCreateSyscall
   745  	// event will enter the runtime emitting events for reacquiring a P.
   746  	//
   747  	// Note: we might have a P here. The P might not be released
   748  	// eagerly by the runtime, and it might get stolen back later
   749  	// (or never again, if the program is going to exit).
   750  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mustHave}); err != nil {
   751  		return curCtx, false, err
   752  	}
   753  	// Check to make sure the goroutine exists in the right state.
   754  	state, ok := o.gStates[curCtx.G]
   755  	if !ok {
   756  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", o.evName(ev.typ), curCtx.G)
   757  	}
   758  	if state.status != tracev2.GoSyscall {
   759  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %v", o.evName(ev.typ), GoSyscall)
   760  	}
   761  	// This goroutine is exiting itself.
   762  	delete(o.gStates, curCtx.G)
   763  	newCtx := curCtx
   764  	newCtx.G = NoGoroutine
   765  
   766  	// If we have a proc, then we're dissociating from it now. See the comment at the top of the case.
   767  	if curCtx.P != NoProc {
   768  		pState, ok := o.pStates[curCtx.P]
   769  		if !ok {
   770  			return curCtx, false, fmt.Errorf("found invalid proc %d during %s", curCtx.P, o.evName(ev.typ))
   771  		}
   772  		if pState.status != tracev2.ProcSyscall {
   773  			return curCtx, false, fmt.Errorf("proc %d in unexpected state %s during %s", curCtx.P, pState.status, o.evName(ev.typ))
   774  		}
   775  		// See the go122-create-syscall-reuse-thread-id test case for more details.
   776  		pState.status = tracev2.ProcSyscallAbandoned
   777  		newCtx.P = NoProc
   778  
   779  		// Queue an extra self-ProcSteal event.
   780  		extra := makeEvent(evt, curCtx, tracev2.EvProcSteal, ev.time, uint64(curCtx.P))
   781  		extra.base.extra(version.Go122)[0] = uint64(tracev2.ProcSyscall)
   782  		o.queue.push(extra)
   783  	}
   784  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   785  	return newCtx, true, nil
   786  }
   787  
   788  func (o *ordering) advanceUserTaskBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   789  	// Handle tasks. Tasks are interesting because:
   790  	// - There's no Begin event required to reference a task.
   791  	// - End for a particular task ID can appear multiple times.
   792  	// As a result, there's very little to validate. The only
   793  	// thing we have to be sure of is that a task didn't begin
   794  	// after it had already begun. Task IDs are allowed to be
   795  	// reused, so we don't care about a Begin after an End.
   796  	id := TaskID(ev.args[0])
   797  	if _, ok := o.activeTasks[id]; ok {
   798  		return curCtx, false, fmt.Errorf("task ID conflict: %d", id)
   799  	}
   800  	// Get the parent ID, but don't validate it. There's no guarantee
   801  	// we actually have information on whether it's active.
   802  	parentID := TaskID(ev.args[1])
   803  	if parentID == BackgroundTask {
   804  		// Note: a value of 0 here actually means no parent, *not* the
   805  		// background task. Automatic background task attachment only
   806  		// applies to regions.
   807  		parentID = NoTask
   808  		ev.args[1] = uint64(NoTask)
   809  	}
   810  
   811  	// Validate the name and record it. We'll need to pass it through to
   812  	// EvUserTaskEnd.
   813  	nameID := stringID(ev.args[2])
   814  	name, ok := evt.strings.get(nameID)
   815  	if !ok {
   816  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
   817  	}
   818  	o.activeTasks[id] = taskState{name: name, parentID: parentID}
   819  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   820  		return curCtx, false, err
   821  	}
   822  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   823  	return curCtx, true, nil
   824  }
   825  
   826  func (o *ordering) advanceUserTaskEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   827  	id := TaskID(ev.args[0])
   828  	if ts, ok := o.activeTasks[id]; ok {
   829  		// Smuggle the task info. This may happen in a different generation,
   830  		// which may not have the name in its string table. Add it to the extra
   831  		// strings table so we can look it up later.
   832  		ev.extra(version.Go122)[0] = uint64(ts.parentID)
   833  		ev.extra(version.Go122)[1] = uint64(evt.addExtraString(ts.name))
   834  		delete(o.activeTasks, id)
   835  	} else {
   836  		// Explicitly clear the task info.
   837  		ev.extra(version.Go122)[0] = uint64(NoTask)
   838  		ev.extra(version.Go122)[1] = uint64(evt.addExtraString(""))
   839  	}
   840  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   841  		return curCtx, false, err
   842  	}
   843  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   844  	return curCtx, true, nil
   845  }
   846  
   847  func (o *ordering) advanceUserRegionBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   848  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   849  		return curCtx, false, err
   850  	}
   851  	tid := TaskID(ev.args[0])
   852  	nameID := stringID(ev.args[1])
   853  	name, ok := evt.strings.get(nameID)
   854  	if !ok {
   855  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
   856  	}
   857  	gState, ok := o.gStates[curCtx.G]
   858  	if !ok {
   859  		return curCtx, false, fmt.Errorf("encountered EvUserRegionBegin without known state for current goroutine %d", curCtx.G)
   860  	}
   861  	if err := gState.beginRegion(userRegion{tid, name}); err != nil {
   862  		return curCtx, false, err
   863  	}
   864  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   865  	return curCtx, true, nil
   866  }
   867  
   868  func (o *ordering) advanceUserRegionEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   869  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   870  		return curCtx, false, err
   871  	}
   872  	tid := TaskID(ev.args[0])
   873  	nameID := stringID(ev.args[1])
   874  	name, ok := evt.strings.get(nameID)
   875  	if !ok {
   876  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
   877  	}
   878  	gState, ok := o.gStates[curCtx.G]
   879  	if !ok {
   880  		return curCtx, false, fmt.Errorf("encountered EvUserRegionEnd without known state for current goroutine %d", curCtx.G)
   881  	}
   882  	if err := gState.endRegion(userRegion{tid, name}); err != nil {
   883  		return curCtx, false, err
   884  	}
   885  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   886  	return curCtx, true, nil
   887  }
   888  
   889  // Handle the GC mark phase.
   890  //
   891  // We have sequence numbers for both start and end because they
   892  // can happen on completely different threads. We want an explicit
   893  // partial order edge between start and end here, otherwise we're
   894  // relying entirely on timestamps to make sure we don't advance a
   895  // GCEnd for a _different_ GC cycle if timestamps are wildly broken.
   896  func (o *ordering) advanceGCActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   897  	seq := ev.args[0]
   898  	if gen == o.initialGen {
   899  		if o.gcState != gcUndetermined {
   900  			return curCtx, false, fmt.Errorf("GCActive in the first generation isn't first GC event")
   901  		}
   902  		o.gcSeq = seq
   903  		o.gcState = gcRunning
   904  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   905  		return curCtx, true, nil
   906  	}
   907  	if seq != o.gcSeq+1 {
   908  		// This is not the right GC cycle.
   909  		return curCtx, false, nil
   910  	}
   911  	if o.gcState != gcRunning {
   912  		return curCtx, false, fmt.Errorf("encountered GCActive while GC was not in progress")
   913  	}
   914  	o.gcSeq = seq
   915  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   916  		return curCtx, false, err
   917  	}
   918  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   919  	return curCtx, true, nil
   920  }
   921  
   922  func (o *ordering) advanceGCBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   923  	seq := ev.args[0]
   924  	if o.gcState == gcUndetermined {
   925  		o.gcSeq = seq
   926  		o.gcState = gcRunning
   927  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   928  		return curCtx, true, nil
   929  	}
   930  	if seq != o.gcSeq+1 {
   931  		// This is not the right GC cycle.
   932  		return curCtx, false, nil
   933  	}
   934  	if o.gcState == gcRunning {
   935  		return curCtx, false, fmt.Errorf("encountered GCBegin while GC was already in progress")
   936  	}
   937  	o.gcSeq = seq
   938  	o.gcState = gcRunning
   939  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   940  		return curCtx, false, err
   941  	}
   942  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   943  	return curCtx, true, nil
   944  }
   945  
   946  func (o *ordering) advanceGCEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   947  	seq := ev.args[0]
   948  	if seq != o.gcSeq+1 {
   949  		// This is not the right GC cycle.
   950  		return curCtx, false, nil
   951  	}
   952  	if o.gcState == gcNotRunning {
   953  		return curCtx, false, fmt.Errorf("encountered GCEnd when GC was not in progress")
   954  	}
   955  	if o.gcState == gcUndetermined {
   956  		return curCtx, false, fmt.Errorf("encountered GCEnd when GC was in an undetermined state")
   957  	}
   958  	o.gcSeq = seq
   959  	o.gcState = gcNotRunning
   960  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   961  		return curCtx, false, err
   962  	}
   963  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   964  	return curCtx, true, nil
   965  }
   966  
   967  func (o *ordering) advanceAnnotation(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   968  	// Handle simple instantaneous events that require a G.
   969  	if err := validateCtx(curCtx, userGoReqs); err != nil {
   970  		return curCtx, false, err
   971  	}
   972  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   973  	return curCtx, true, nil
   974  }
   975  
   976  func (o *ordering) advanceHeapMetric(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   977  	// Handle allocation metrics, which don't require a G.
   978  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mustHave, G: mayHave}); err != nil {
   979  		return curCtx, false, err
   980  	}
   981  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   982  	return curCtx, true, nil
   983  }
   984  
   985  func (o *ordering) advanceGCSweepBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   986  	// Handle sweep, which is bound to a P and doesn't require a G.
   987  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mustHave, G: mayHave}); err != nil {
   988  		return curCtx, false, err
   989  	}
   990  	if err := o.pStates[curCtx.P].beginRange(makeRangeType(ev.typ, 0)); err != nil {
   991  		return curCtx, false, err
   992  	}
   993  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   994  	return curCtx, true, nil
   995  }
   996  
   997  func (o *ordering) advanceGCSweepActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   998  	pid := ProcID(ev.args[0])
   999  	// N.B. In practice Ps can't block while they're sweeping, so this can only
  1000  	// ever reference curCtx.P. However, be lenient about this like we are with
  1001  	// GCMarkAssistActive; there's no reason the runtime couldn't change to block
  1002  	// in the middle of a sweep.
  1003  	pState, ok := o.pStates[pid]
  1004  	if !ok {
  1005  		return curCtx, false, fmt.Errorf("encountered GCSweepActive for unknown proc %d", pid)
  1006  	}
  1007  	if err := pState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil {
  1008  		return curCtx, false, err
  1009  	}
  1010  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1011  	return curCtx, true, nil
  1012  }
  1013  
  1014  func (o *ordering) advanceGCSweepEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1015  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mustHave, G: mayHave}); err != nil {
  1016  		return curCtx, false, err
  1017  	}
  1018  	_, err := o.pStates[curCtx.P].endRange(ev.typ)
  1019  	if err != nil {
  1020  		return curCtx, false, err
  1021  	}
  1022  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1023  	return curCtx, true, nil
  1024  }
  1025  
  1026  func (o *ordering) advanceGoRangeBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1027  	// Handle special goroutine-bound event ranges.
  1028  	if err := validateCtx(curCtx, userGoReqs); err != nil {
  1029  		return curCtx, false, err
  1030  	}
  1031  	desc := stringID(0)
  1032  	if ev.typ == tracev2.EvSTWBegin {
  1033  		desc = stringID(ev.args[0])
  1034  	}
  1035  	gState, ok := o.gStates[curCtx.G]
  1036  	if !ok {
  1037  		return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G)
  1038  	}
  1039  	if err := gState.beginRange(makeRangeType(ev.typ, desc)); err != nil {
  1040  		return curCtx, false, err
  1041  	}
  1042  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1043  	return curCtx, true, nil
  1044  }
  1045  
  1046  func (o *ordering) advanceGoRangeActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1047  	gid := GoID(ev.args[0])
  1048  	// N.B. Like GoStatus, this can happen at any time, because it can
  1049  	// reference a non-running goroutine. Don't check anything about the
  1050  	// current scheduler context.
  1051  	gState, ok := o.gStates[gid]
  1052  	if !ok {
  1053  		return curCtx, false, fmt.Errorf("uninitialized goroutine %d found during %s", gid, o.evName(ev.typ))
  1054  	}
  1055  	if err := gState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil {
  1056  		return curCtx, false, err
  1057  	}
  1058  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1059  	return curCtx, true, nil
  1060  }
  1061  
  1062  func (o *ordering) advanceGoRangeEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1063  	if err := validateCtx(curCtx, userGoReqs); err != nil {
  1064  		return curCtx, false, err
  1065  	}
  1066  	gState, ok := o.gStates[curCtx.G]
  1067  	if !ok {
  1068  		return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G)
  1069  	}
  1070  	desc, err := gState.endRange(ev.typ)
  1071  	if err != nil {
  1072  		return curCtx, false, err
  1073  	}
  1074  	if ev.typ == tracev2.EvSTWEnd {
  1075  		// Smuggle the kind into the event.
  1076  		// Don't use ev.extra here so we have symmetry with STWBegin.
  1077  		ev.args[0] = uint64(desc)
  1078  	}
  1079  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1080  	return curCtx, true, nil
  1081  }
  1082  
  1083  func (o *ordering) advanceAllocFree(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1084  	// Handle simple instantaneous events that may or may not have a P.
  1085  	if err := validateCtx(curCtx, schedReqs{M: mustHave, P: mayHave, G: mayHave}); err != nil {
  1086  		return curCtx, false, err
  1087  	}
  1088  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1089  	return curCtx, true, nil
  1090  }
  1091  
  1092  // Next returns the next event in the ordering.
  1093  func (o *ordering) Next() (Event, bool) {
  1094  	return o.queue.pop()
  1095  }
  1096  
  1097  // schedCtx represents the scheduling resources associated with an event.
  1098  type schedCtx struct {
  1099  	G GoID
  1100  	P ProcID
  1101  	M ThreadID
  1102  }
  1103  
  1104  // validateCtx ensures that ctx conforms to some reqs, returning an error if
  1105  // it doesn't.
  1106  func validateCtx(ctx schedCtx, reqs schedReqs) error {
  1107  	// Check thread requirements.
  1108  	if reqs.M == mustHave && ctx.M == NoThread {
  1109  		return fmt.Errorf("expected a thread but didn't have one")
  1110  	} else if reqs.M == mustNotHave && ctx.M != NoThread {
  1111  		return fmt.Errorf("expected no thread but had one")
  1112  	}
  1113  
  1114  	// Check proc requirements.
  1115  	if reqs.P == mustHave && ctx.P == NoProc {
  1116  		return fmt.Errorf("expected a proc but didn't have one")
  1117  	} else if reqs.P == mustNotHave && ctx.P != NoProc {
  1118  		return fmt.Errorf("expected no proc but had one")
  1119  	}
  1120  
  1121  	// Check goroutine requirements.
  1122  	if reqs.G == mustHave && ctx.G == NoGoroutine {
  1123  		return fmt.Errorf("expected a goroutine but didn't have one")
  1124  	} else if reqs.G == mustNotHave && ctx.G != NoGoroutine {
  1125  		return fmt.Errorf("expected no goroutine but had one")
  1126  	}
  1127  	return nil
  1128  }
  1129  
  1130  // gcState is a trinary variable for the current state of the GC.
  1131  //
  1132  // The third state besides "enabled" and "disabled" is "undetermined."
  1133  type gcState uint8
  1134  
  1135  const (
  1136  	gcUndetermined gcState = iota
  1137  	gcNotRunning
  1138  	gcRunning
  1139  )
  1140  
  1141  // String returns a human-readable string for the GC state.
  1142  func (s gcState) String() string {
  1143  	switch s {
  1144  	case gcUndetermined:
  1145  		return "Undetermined"
  1146  	case gcNotRunning:
  1147  		return "NotRunning"
  1148  	case gcRunning:
  1149  		return "Running"
  1150  	}
  1151  	return "Bad"
  1152  }
  1153  
  1154  // userRegion represents a unique user region when attached to some gState.
  1155  type userRegion struct {
  1156  	// name must be a resolved string because the string ID for the same
  1157  	// string may change across generations, but we care about checking
  1158  	// the value itself.
  1159  	taskID TaskID
  1160  	name   string
  1161  }
  1162  
  1163  // rangeType is a way to classify special ranges of time.
  1164  //
  1165  // These typically correspond 1:1 with "Begin" events, but
  1166  // they may have an optional subtype that describes the range
  1167  // in more detail.
  1168  type rangeType struct {
  1169  	typ  tracev2.EventType // "Begin" event.
  1170  	desc stringID          // Optional subtype.
  1171  }
  1172  
  1173  // makeRangeType constructs a new rangeType.
  1174  func makeRangeType(typ tracev2.EventType, desc stringID) rangeType {
  1175  	if styp := tracev2.Specs()[typ].StartEv; styp != tracev2.EvNone {
  1176  		typ = styp
  1177  	}
  1178  	return rangeType{typ, desc}
  1179  }
  1180  
  1181  // gState is the state of a goroutine at a point in the trace.
  1182  type gState struct {
  1183  	id     GoID
  1184  	status tracev2.GoStatus
  1185  	seq    seqCounter
  1186  
  1187  	// regions are the active user regions for this goroutine.
  1188  	regions []userRegion
  1189  
  1190  	// rangeState is the state of special time ranges bound to this goroutine.
  1191  	rangeState
  1192  }
  1193  
  1194  // beginRegion starts a user region on the goroutine.
  1195  func (s *gState) beginRegion(r userRegion) error {
  1196  	s.regions = append(s.regions, r)
  1197  	return nil
  1198  }
  1199  
  1200  // endRegion ends a user region on the goroutine.
  1201  func (s *gState) endRegion(r userRegion) error {
  1202  	if len(s.regions) == 0 {
  1203  		// We do not know about regions that began before tracing started.
  1204  		return nil
  1205  	}
  1206  	if next := s.regions[len(s.regions)-1]; next != r {
  1207  		return fmt.Errorf("misuse of region in goroutine %v: region end %v when the inner-most active region start event is %v", s.id, r, next)
  1208  	}
  1209  	s.regions = s.regions[:len(s.regions)-1]
  1210  	return nil
  1211  }
  1212  
  1213  // pState is the state of a proc at a point in the trace.
  1214  type pState struct {
  1215  	id     ProcID
  1216  	status tracev2.ProcStatus
  1217  	seq    seqCounter
  1218  
  1219  	// rangeState is the state of special time ranges bound to this proc.
  1220  	rangeState
  1221  }
  1222  
  1223  // mState is the state of a thread at a point in the trace.
  1224  type mState struct {
  1225  	g GoID   // Goroutine bound to this M. (The goroutine's state is Executing.)
  1226  	p ProcID // Proc bound to this M. (The proc's state is Executing.)
  1227  }
  1228  
  1229  // rangeState represents the state of special time ranges.
  1230  type rangeState struct {
  1231  	// inFlight contains the rangeTypes of any ranges bound to a resource.
  1232  	inFlight []rangeType
  1233  }
  1234  
  1235  // beginRange begins a special range in time on the goroutine.
  1236  //
  1237  // Returns an error if the range is already in progress.
  1238  func (s *rangeState) beginRange(typ rangeType) error {
  1239  	if s.hasRange(typ) {
  1240  		return fmt.Errorf("discovered event already in-flight for when starting event %v", tracev2.Specs()[typ.typ].Name)
  1241  	}
  1242  	s.inFlight = append(s.inFlight, typ)
  1243  	return nil
  1244  }
  1245  
  1246  // activeRange marks special range in time on the goroutine as active in the
  1247  // initial generation, or confirms that it is indeed active in later generations.
  1248  func (s *rangeState) activeRange(typ rangeType, isInitialGen bool) error {
  1249  	if isInitialGen {
  1250  		if s.hasRange(typ) {
  1251  			return fmt.Errorf("found named active range already in first gen: %v", typ)
  1252  		}
  1253  		s.inFlight = append(s.inFlight, typ)
  1254  	} else if !s.hasRange(typ) {
  1255  		return fmt.Errorf("resource is missing active range: %v %v", tracev2.Specs()[typ.typ].Name, s.inFlight)
  1256  	}
  1257  	return nil
  1258  }
  1259  
  1260  // hasRange returns true if a special time range on the goroutine as in progress.
  1261  func (s *rangeState) hasRange(typ rangeType) bool {
  1262  	return slices.Contains(s.inFlight, typ)
  1263  }
  1264  
  1265  // endRange ends a special range in time on the goroutine.
  1266  //
  1267  // This must line up with the start event type  of the range the goroutine is currently in.
  1268  func (s *rangeState) endRange(typ tracev2.EventType) (stringID, error) {
  1269  	st := tracev2.Specs()[typ].StartEv
  1270  	idx := -1
  1271  	for i, r := range s.inFlight {
  1272  		if r.typ == st {
  1273  			idx = i
  1274  			break
  1275  		}
  1276  	}
  1277  	if idx < 0 {
  1278  		return 0, fmt.Errorf("tried to end event %v, but not in-flight", tracev2.Specs()[st].Name)
  1279  	}
  1280  	// Swap remove.
  1281  	desc := s.inFlight[idx].desc
  1282  	s.inFlight[idx], s.inFlight[len(s.inFlight)-1] = s.inFlight[len(s.inFlight)-1], s.inFlight[idx]
  1283  	s.inFlight = s.inFlight[:len(s.inFlight)-1]
  1284  	return desc, nil
  1285  }
  1286  
  1287  // seqCounter represents a global sequence counter for a resource.
  1288  type seqCounter struct {
  1289  	gen uint64 // The generation for the local sequence counter seq.
  1290  	seq uint64 // The sequence number local to the generation.
  1291  }
  1292  
  1293  // makeSeq creates a new seqCounter.
  1294  func makeSeq(gen, seq uint64) seqCounter {
  1295  	return seqCounter{gen: gen, seq: seq}
  1296  }
  1297  
  1298  // succeeds returns true if a is the immediate successor of b.
  1299  func (a seqCounter) succeeds(b seqCounter) bool {
  1300  	return a.gen == b.gen && a.seq == b.seq+1
  1301  }
  1302  
  1303  // String returns a debug string representation of the seqCounter.
  1304  func (c seqCounter) String() string {
  1305  	return fmt.Sprintf("%d (gen=%d)", c.seq, c.gen)
  1306  }
  1307  
  1308  func dumpOrdering(order *ordering) string {
  1309  	var sb strings.Builder
  1310  	for id, state := range order.gStates {
  1311  		fmt.Fprintf(&sb, "G %d [status=%s seq=%s]\n", id, state.status, state.seq)
  1312  	}
  1313  	fmt.Fprintln(&sb)
  1314  	for id, state := range order.pStates {
  1315  		fmt.Fprintf(&sb, "P %d [status=%s seq=%s]\n", id, state.status, state.seq)
  1316  	}
  1317  	fmt.Fprintln(&sb)
  1318  	for id, state := range order.mStates {
  1319  		fmt.Fprintf(&sb, "M %d [g=%d p=%d]\n", id, state.g, state.p)
  1320  	}
  1321  	fmt.Fprintln(&sb)
  1322  	fmt.Fprintf(&sb, "GC %d %s\n", order.gcSeq, order.gcState)
  1323  	return sb.String()
  1324  }
  1325  
  1326  // taskState represents an active task.
  1327  type taskState struct {
  1328  	// name is the type of the active task.
  1329  	name string
  1330  
  1331  	// parentID is the parent ID of the active task.
  1332  	parentID TaskID
  1333  }
  1334  
  1335  // queue implements a growable ring buffer with a queue API.
  1336  type queue[T any] struct {
  1337  	start, end int
  1338  	buf        []T
  1339  }
  1340  
  1341  // push adds a new event to the back of the queue.
  1342  func (q *queue[T]) push(value T) {
  1343  	if q.end-q.start == len(q.buf) {
  1344  		q.grow()
  1345  	}
  1346  	q.buf[q.end%len(q.buf)] = value
  1347  	q.end++
  1348  }
  1349  
  1350  // grow increases the size of the queue.
  1351  func (q *queue[T]) grow() {
  1352  	if len(q.buf) == 0 {
  1353  		q.buf = make([]T, 2)
  1354  		return
  1355  	}
  1356  
  1357  	// Create new buf and copy data over.
  1358  	newBuf := make([]T, len(q.buf)*2)
  1359  	pivot := q.start % len(q.buf)
  1360  	first, last := q.buf[pivot:], q.buf[:pivot]
  1361  	copy(newBuf[:len(first)], first)
  1362  	copy(newBuf[len(first):], last)
  1363  
  1364  	// Update the queue state.
  1365  	q.start = 0
  1366  	q.end = len(q.buf)
  1367  	q.buf = newBuf
  1368  }
  1369  
  1370  // pop removes an event from the front of the queue. If the
  1371  // queue is empty, it returns an EventBad event.
  1372  func (q *queue[T]) pop() (T, bool) {
  1373  	if q.end-q.start == 0 {
  1374  		return *new(T), false
  1375  	}
  1376  	elem := &q.buf[q.start%len(q.buf)]
  1377  	value := *elem
  1378  	*elem = *new(T) // Clear the entry before returning, so we don't hold onto old tables.
  1379  	q.start++
  1380  	return value, true
  1381  }
  1382  
  1383  // makeEvent creates an Event from the provided information.
  1384  //
  1385  // It's just a convenience function; it's always OK to construct
  1386  // an Event manually if this isn't quite the right way to express
  1387  // the contents of the event.
  1388  func makeEvent(table *evTable, ctx schedCtx, typ tracev2.EventType, time Time, args ...uint64) Event {
  1389  	ev := Event{
  1390  		table: table,
  1391  		ctx:   ctx,
  1392  		base: baseEvent{
  1393  			typ:  typ,
  1394  			time: time,
  1395  		},
  1396  	}
  1397  	copy(ev.base.args[:], args)
  1398  	return ev
  1399  }
  1400  
  1401  // schedReqs is a set of constraints on what the scheduling
  1402  // context must look like.
  1403  type schedReqs struct {
  1404  	M constraint
  1405  	P constraint
  1406  	G constraint
  1407  }
  1408  
  1409  // constraint represents a various presence requirements.
  1410  type constraint uint8
  1411  
  1412  const (
  1413  	mustNotHave constraint = iota
  1414  	mayHave
  1415  	mustHave
  1416  )
  1417  
  1418  // userGoReqs is a common requirement among events that are running
  1419  // or are close to running user code.
  1420  var userGoReqs = schedReqs{M: mustHave, P: mustHave, G: mustHave}
  1421
View as plain text