Source file src/internal/trace/testtrace/validation.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package testtrace
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"internal/trace"
    11  	"slices"
    12  	"strings"
    13  )
    14  
    15  // Validator is a type used for validating a stream of trace.Events.
    16  type Validator struct {
    17  	lastTs trace.Time
    18  	gs     map[trace.GoID]*goState
    19  	ps     map[trace.ProcID]*procState
    20  	ms     map[trace.ThreadID]*schedContext
    21  	ranges map[trace.ResourceID][]string
    22  	tasks  map[trace.TaskID]string
    23  	nSync  int
    24  	Go121  bool
    25  }
    26  
    27  type schedContext struct {
    28  	M trace.ThreadID
    29  	P trace.ProcID
    30  	G trace.GoID
    31  }
    32  
    33  type goState struct {
    34  	state   trace.GoState
    35  	binding *schedContext
    36  }
    37  
    38  type procState struct {
    39  	state   trace.ProcState
    40  	binding *schedContext
    41  }
    42  
    43  // NewValidator creates a new Validator.
    44  func NewValidator() *Validator {
    45  	return &Validator{
    46  		gs:     make(map[trace.GoID]*goState),
    47  		ps:     make(map[trace.ProcID]*procState),
    48  		ms:     make(map[trace.ThreadID]*schedContext),
    49  		ranges: make(map[trace.ResourceID][]string),
    50  		tasks:  make(map[trace.TaskID]string),
    51  	}
    52  }
    53  
    54  // Event validates ev as the next event in a stream of trace.Events.
    55  //
    56  // Returns an error if validation fails.
    57  func (v *Validator) Event(ev trace.Event) error {
    58  	e := new(errAccumulator)
    59  
    60  	// Validate timestamp order.
    61  	if v.lastTs != 0 {
    62  		if ev.Time() <= v.lastTs {
    63  			e.Errorf("timestamp out-of-order (want > %v) for %+v", v.lastTs, ev)
    64  		} else {
    65  			v.lastTs = ev.Time()
    66  		}
    67  	} else {
    68  		v.lastTs = ev.Time()
    69  	}
    70  
    71  	// Validate event stack.
    72  	checkStack(e, ev.Stack())
    73  
    74  	switch ev.Kind() {
    75  	case trace.EventSync:
    76  		s := ev.Sync()
    77  		if s.N != v.nSync+1 {
    78  			e.Errorf("sync count is not sequential: expected %d, got %d", v.nSync+1, s.N)
    79  		}
    80  		v.nSync = s.N
    81  	case trace.EventMetric:
    82  		m := ev.Metric()
    83  		if !strings.Contains(m.Name, ":") {
    84  			// Should have a ":" as per runtime/metrics convention.
    85  			e.Errorf("invalid metric name %q", m.Name)
    86  		}
    87  		// Make sure the value is OK.
    88  		if m.Value.Kind() == trace.ValueBad {
    89  			e.Errorf("invalid value")
    90  		}
    91  		switch m.Value.Kind() {
    92  		case trace.ValueUint64:
    93  			// Just make sure it doesn't panic.
    94  			_ = m.Value.ToUint64()
    95  		}
    96  	case trace.EventLabel:
    97  		l := ev.Label()
    98  
    99  		// Check label.
   100  		if l.Label == "" {
   101  			e.Errorf("invalid label %q", l.Label)
   102  		}
   103  
   104  		// Check label resource.
   105  		if l.Resource.Kind == trace.ResourceNone {
   106  			e.Errorf("label resource none")
   107  		}
   108  		switch l.Resource.Kind {
   109  		case trace.ResourceGoroutine:
   110  			id := l.Resource.Goroutine()
   111  			if _, ok := v.gs[id]; !ok {
   112  				e.Errorf("label for invalid goroutine %d", id)
   113  			}
   114  		case trace.ResourceProc:
   115  			id := l.Resource.Proc()
   116  			if _, ok := v.ps[id]; !ok {
   117  				e.Errorf("label for invalid proc %d", id)
   118  			}
   119  		case trace.ResourceThread:
   120  			id := l.Resource.Thread()
   121  			if _, ok := v.ms[id]; !ok {
   122  				e.Errorf("label for invalid thread %d", id)
   123  			}
   124  		}
   125  	case trace.EventStackSample:
   126  		// Not much to check here. It's basically a sched context and a stack.
   127  		// The sched context is also not guaranteed to align with other events.
   128  		// We already checked the stack above.
   129  	case trace.EventStateTransition:
   130  		// Validate state transitions.
   131  		//
   132  		// TODO(mknyszek): A lot of logic is duplicated between goroutines and procs.
   133  		// The two are intentionally handled identically; from the perspective of the
   134  		// API, resources all have the same general properties. Consider making this
   135  		// code generic over resources and implementing validation just once.
   136  		tr := ev.StateTransition()
   137  		checkStack(e, tr.Stack)
   138  		switch tr.Resource.Kind {
   139  		case trace.ResourceGoroutine:
   140  			// Basic state transition validation.
   141  			id := tr.Resource.Goroutine()
   142  			old, new := tr.Goroutine()
   143  			if new == trace.GoUndetermined {
   144  				e.Errorf("transition to undetermined state for goroutine %d", id)
   145  			}
   146  			if v.nSync > 1 && old == trace.GoUndetermined {
   147  				e.Errorf("undetermined goroutine %d after first global sync", id)
   148  			}
   149  			if new == trace.GoNotExist && v.hasAnyRange(trace.MakeResourceID(id)) {
   150  				e.Errorf("goroutine %d died with active ranges", id)
   151  			}
   152  			state, ok := v.gs[id]
   153  			if ok {
   154  				if old != state.state {
   155  					e.Errorf("bad old state for goroutine %d: got %s, want %s", id, old, state.state)
   156  				}
   157  				state.state = new
   158  			} else {
   159  				if old != trace.GoUndetermined && old != trace.GoNotExist {
   160  					e.Errorf("bad old state for unregistered goroutine %d: %s", id, old)
   161  				}
   162  				state = &goState{state: new}
   163  				v.gs[id] = state
   164  			}
   165  			// Validate sched context.
   166  			if new.Executing() {
   167  				ctx := v.getOrCreateThread(e, ev, ev.Thread())
   168  				if ctx != nil {
   169  					if ctx.G != trace.NoGoroutine && ctx.G != id {
   170  						e.Errorf("tried to run goroutine %d when one was already executing (%d) on thread %d", id, ctx.G, ev.Thread())
   171  					}
   172  					ctx.G = id
   173  					state.binding = ctx
   174  				}
   175  			} else if old.Executing() && !new.Executing() {
   176  				if tr.Stack != ev.Stack() {
   177  					// This is a case where the transition is happening to a goroutine that is also executing, so
   178  					// these two stacks should always match.
   179  					e.Errorf("StateTransition.Stack doesn't match Event.Stack")
   180  				}
   181  				ctx := state.binding
   182  				if ctx != nil {
   183  					if ctx.G != id {
   184  						e.Errorf("tried to stop goroutine %d when it wasn't currently executing (currently executing %d) on thread %d", id, ctx.G, ev.Thread())
   185  					}
   186  					ctx.G = trace.NoGoroutine
   187  					state.binding = nil
   188  				} else {
   189  					e.Errorf("stopping goroutine %d not bound to any active context", id)
   190  				}
   191  			}
   192  		case trace.ResourceProc:
   193  			// Basic state transition validation.
   194  			id := tr.Resource.Proc()
   195  			old, new := tr.Proc()
   196  			if new == trace.ProcUndetermined {
   197  				e.Errorf("transition to undetermined state for proc %d", id)
   198  			}
   199  			if v.nSync > 1 && old == trace.ProcUndetermined {
   200  				e.Errorf("undetermined proc %d after first global sync", id)
   201  			}
   202  			if new == trace.ProcNotExist && v.hasAnyRange(trace.MakeResourceID(id)) {
   203  				e.Errorf("proc %d died with active ranges", id)
   204  			}
   205  			state, ok := v.ps[id]
   206  			if ok {
   207  				if old != state.state {
   208  					e.Errorf("bad old state for proc %d: got %s, want %s", id, old, state.state)
   209  				}
   210  				state.state = new
   211  			} else {
   212  				if old != trace.ProcUndetermined && old != trace.ProcNotExist {
   213  					e.Errorf("bad old state for unregistered proc %d: %s", id, old)
   214  				}
   215  				state = &procState{state: new}
   216  				v.ps[id] = state
   217  			}
   218  			// Validate sched context.
   219  			if new.Executing() {
   220  				ctx := v.getOrCreateThread(e, ev, ev.Thread())
   221  				if ctx != nil {
   222  					if ctx.P != trace.NoProc && ctx.P != id {
   223  						e.Errorf("tried to run proc %d when one was already executing (%d) on thread %d", id, ctx.P, ev.Thread())
   224  					}
   225  					ctx.P = id
   226  					state.binding = ctx
   227  				}
   228  			} else if old.Executing() && !new.Executing() {
   229  				ctx := state.binding
   230  				if ctx != nil {
   231  					if ctx.P != id {
   232  						e.Errorf("tried to stop proc %d when it wasn't currently executing (currently executing %d) on thread %d", id, ctx.P, ctx.M)
   233  					}
   234  					ctx.P = trace.NoProc
   235  					state.binding = nil
   236  				} else {
   237  					e.Errorf("stopping proc %d not bound to any active context", id)
   238  				}
   239  			}
   240  		}
   241  	case trace.EventRangeBegin, trace.EventRangeActive, trace.EventRangeEnd:
   242  		// Validate ranges.
   243  		r := ev.Range()
   244  		switch ev.Kind() {
   245  		case trace.EventRangeBegin:
   246  			if v.hasRange(r.Scope, r.Name) {
   247  				e.Errorf("already active range %q on %v begun again", r.Name, r.Scope)
   248  			}
   249  			v.addRange(r.Scope, r.Name)
   250  		case trace.EventRangeActive:
   251  			if !v.hasRange(r.Scope, r.Name) {
   252  				v.addRange(r.Scope, r.Name)
   253  			}
   254  		case trace.EventRangeEnd:
   255  			if !v.hasRange(r.Scope, r.Name) {
   256  				e.Errorf("inactive range %q on %v ended", r.Name, r.Scope)
   257  			}
   258  			v.deleteRange(r.Scope, r.Name)
   259  		}
   260  	case trace.EventTaskBegin:
   261  		// Validate task begin.
   262  		t := ev.Task()
   263  		if t.ID == trace.NoTask || t.ID == trace.BackgroundTask {
   264  			// The background task should never have an event emitted for it.
   265  			e.Errorf("found invalid task ID for task of type %s", t.Type)
   266  		}
   267  		if t.Parent == trace.BackgroundTask {
   268  			// It's not possible for a task to be a subtask of the background task.
   269  			e.Errorf("found background task as the parent for task of type %s", t.Type)
   270  		}
   271  		// N.B. Don't check the task type. Empty string is a valid task type.
   272  		v.tasks[t.ID] = t.Type
   273  	case trace.EventTaskEnd:
   274  		// Validate task end.
   275  		// We can see a task end without a begin, so ignore a task without information.
   276  		// Instead, if we've seen the task begin, just make sure the task end lines up.
   277  		t := ev.Task()
   278  		if typ, ok := v.tasks[t.ID]; ok {
   279  			if t.Type != typ {
   280  				e.Errorf("task end type %q doesn't match task start type %q for task %d", t.Type, typ, t.ID)
   281  			}
   282  			delete(v.tasks, t.ID)
   283  		}
   284  	case trace.EventLog:
   285  		// There's really not much here to check, except that we can
   286  		// generate a Log. The category and message are entirely user-created,
   287  		// so we can't make any assumptions as to what they are. We also
   288  		// can't validate the task, because proving the task's existence is very
   289  		// much best-effort.
   290  		_ = ev.Log()
   291  	}
   292  	return e.Errors()
   293  }
   294  
   295  func (v *Validator) hasRange(r trace.ResourceID, name string) bool {
   296  	ranges, ok := v.ranges[r]
   297  	return ok && slices.Contains(ranges, name)
   298  }
   299  
   300  func (v *Validator) addRange(r trace.ResourceID, name string) {
   301  	ranges, _ := v.ranges[r]
   302  	ranges = append(ranges, name)
   303  	v.ranges[r] = ranges
   304  }
   305  
   306  func (v *Validator) hasAnyRange(r trace.ResourceID) bool {
   307  	ranges, ok := v.ranges[r]
   308  	return ok && len(ranges) != 0
   309  }
   310  
   311  func (v *Validator) deleteRange(r trace.ResourceID, name string) {
   312  	ranges, ok := v.ranges[r]
   313  	if !ok {
   314  		return
   315  	}
   316  	i := slices.Index(ranges, name)
   317  	if i < 0 {
   318  		return
   319  	}
   320  	v.ranges[r] = slices.Delete(ranges, i, i+1)
   321  }
   322  
   323  func (v *Validator) getOrCreateThread(e *errAccumulator, ev trace.Event, m trace.ThreadID) *schedContext {
   324  	lenient := func() bool {
   325  		// Be lenient about GoUndetermined -> GoSyscall transitions if they
   326  		// originate from an old trace. These transitions lack thread
   327  		// information in trace formats older than 1.22.
   328  		if !v.Go121 {
   329  			return false
   330  		}
   331  		if ev.Kind() != trace.EventStateTransition {
   332  			return false
   333  		}
   334  		tr := ev.StateTransition()
   335  		if tr.Resource.Kind != trace.ResourceGoroutine {
   336  			return false
   337  		}
   338  		from, to := tr.Goroutine()
   339  		return from == trace.GoUndetermined && to == trace.GoSyscall
   340  	}
   341  	if m == trace.NoThread && !lenient() {
   342  		e.Errorf("must have thread, but thread ID is none")
   343  		return nil
   344  	}
   345  	s, ok := v.ms[m]
   346  	if !ok {
   347  		s = &schedContext{M: m, P: trace.NoProc, G: trace.NoGoroutine}
   348  		v.ms[m] = s
   349  		return s
   350  	}
   351  	return s
   352  }
   353  
   354  func checkStack(e *errAccumulator, stk trace.Stack) {
   355  	// Check for non-empty values, but we also check for crashes due to incorrect validation.
   356  	for i, f := range slices.Collect(stk.Frames()) {
   357  		if i == 0 {
   358  			// Allow for one fully zero stack.
   359  			//
   360  			// TODO(mknyszek): Investigate why that happens.
   361  			continue
   362  		}
   363  		if f.Func == "" || f.File == "" || f.PC == 0 || f.Line == 0 {
   364  			e.Errorf("invalid stack frame %#v: missing information", f)
   365  		}
   366  	}
   367  }
   368  
   369  type errAccumulator struct {
   370  	errs []error
   371  }
   372  
   373  func (e *errAccumulator) Errorf(f string, args ...any) {
   374  	e.errs = append(e.errs, fmt.Errorf(f, args...))
   375  }
   376  
   377  func (e *errAccumulator) Errors() error {
   378  	return errors.Join(e.errs...)
   379  }
   380  

View as plain text