Source file src/cmd/vendor/golang.org/x/tools/internal/fmtstr/parse.go

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package fmtstr defines a parser for format strings as used by [fmt.Printf].
     6  package fmtstr
     7  
     8  import (
     9  	"fmt"
    10  	"strconv"
    11  	"strings"
    12  	"unicode/utf8"
    13  )
    14  
    15  // Operation holds the parsed representation of a printf operation such as "%3.*[4]d".
    16  // It is constructed by [Parse].
    17  type Operation struct {
    18  	Text  string // full text of the operation, e.g. "%[2]*.3d"
    19  	Verb  Verb   // verb specifier, guaranteed to exist, e.g., 'd' in '%[1]d'
    20  	Range Range  // the range of Text within the overall format string
    21  	Flags string // formatting flags, e.g. "-0"
    22  	Width Size   // width specifier, e.g., '3' in '%3d'
    23  	Prec  Size   // precision specifier, e.g., '.4' in '%.4f'
    24  }
    25  
    26  // Size describes an optional width or precision in a format operation.
    27  // It may represent no value, a literal number, an asterisk, or an indexed asterisk.
    28  type Size struct {
    29  	// At most one of these two fields is non-negative.
    30  	Fixed   int // e.g. 4 from "%4d", otherwise -1
    31  	Dynamic int // index of argument providing dynamic size (e.g. %*d or %[3]*d), otherwise -1
    32  
    33  	Index int   // If the width or precision uses an indexed argument (e.g. 2 in %[2]*d), this is the index, otherwise -1
    34  	Range Range // position of the size specifier within the operation
    35  }
    36  
    37  // Verb represents the verb character of a format operation (e.g., 'd', 's', 'f').
    38  // It also includes positional information and any explicit argument indexing.
    39  type Verb struct {
    40  	Verb     rune
    41  	Range    Range // positional range of the verb in the format string
    42  	Index    int   // index of an indexed argument, (e.g. 2 in %[2]d), otherwise -1
    43  	ArgIndex int   // argument index (0-based) associated with this verb, relative to CallExpr
    44  }
    45  
    46  // byte offsets of format string
    47  type Range struct {
    48  	Start, End int
    49  }
    50  
    51  // Parse takes a format string and its index in the printf-like call,
    52  // parses out all format operations, returns a slice of parsed
    53  // [Operation] which describes flags, width, precision, verb, and argument indexing,
    54  // or an error if parsing fails.
    55  //
    56  // All error messages are in predicate form ("call has a problem")
    57  // so that they may be affixed into a subject ("log.Printf ").
    58  //
    59  // The flags will only be a subset of ['#', '0', '+', '-', ' '].
    60  // It does not perform any validation of verbs, nor the
    61  // existence of corresponding arguments (obviously it can't). The provided format string may differ
    62  // from the one in CallExpr, such as a concatenated string or a string
    63  // referred to by the argument in the CallExpr.
    64  func Parse(format string, idx int) ([]*Operation, error) {
    65  	if !strings.Contains(format, "%") {
    66  		return nil, fmt.Errorf("call has arguments but no formatting directives")
    67  	}
    68  
    69  	firstArg := idx + 1 // Arguments are immediately after format string.
    70  	argNum := firstArg
    71  	var operations []*Operation
    72  	for i, w := 0, 0; i < len(format); i += w {
    73  		w = 1
    74  		if format[i] != '%' {
    75  			continue
    76  		}
    77  		state, err := parseOperation(format[i:], firstArg, argNum)
    78  		if err != nil {
    79  			return nil, err
    80  		}
    81  
    82  		state.operation.addOffset(i)
    83  		operations = append(operations, state.operation)
    84  
    85  		w = len(state.operation.Text)
    86  		// Do not waste an argument for '%'.
    87  		if state.operation.Verb.Verb != '%' {
    88  			argNum = state.argNum + 1
    89  		}
    90  	}
    91  	return operations, nil
    92  }
    93  
    94  // Internal parsing state to operation.
    95  type state struct {
    96  	operation    *Operation
    97  	firstArg     int  // index of the first argument after the format string
    98  	argNum       int  // which argument we're expecting to format now
    99  	hasIndex     bool // whether the argument is indexed
   100  	index        int  // the encountered index
   101  	indexPos     int  // the encountered index's offset
   102  	indexPending bool // whether we have an indexed argument that has not resolved
   103  	nbytes       int  // number of bytes of the format string consumed
   104  }
   105  
   106  // parseOperation parses one format operation starting at the given substring `format`,
   107  // which should begin with '%'. It returns a fully populated state or an error
   108  // if the operation is malformed. The firstArg and argNum parameters help determine how
   109  // arguments map to this operation.
   110  //
   111  // Parse sequence: '%' -> flags -> {[N]* or width} -> .{[N]* or precision} -> [N] -> verb.
   112  func parseOperation(format string, firstArg, argNum int) (*state, error) {
   113  	state := &state{
   114  		operation: &Operation{
   115  			Text: format,
   116  			Width: Size{
   117  				Fixed:   -1,
   118  				Dynamic: -1,
   119  				Index:   -1,
   120  			},
   121  			Prec: Size{
   122  				Fixed:   -1,
   123  				Dynamic: -1,
   124  				Index:   -1,
   125  			},
   126  		},
   127  		firstArg:     firstArg,
   128  		argNum:       argNum,
   129  		hasIndex:     false,
   130  		index:        0,
   131  		indexPos:     0,
   132  		indexPending: false,
   133  		nbytes:       len("%"), // There's guaranteed to be a percent sign.
   134  	}
   135  	// There may be flags.
   136  	state.parseFlags()
   137  	// There may be an index.
   138  	if err := state.parseIndex(); err != nil {
   139  		return nil, err
   140  	}
   141  	// There may be a width.
   142  	state.parseSize(Width)
   143  	// There may be a precision.
   144  	if err := state.parsePrecision(); err != nil {
   145  		return nil, err
   146  	}
   147  	// Now a verb, possibly prefixed by an index (which we may already have).
   148  	if !state.indexPending {
   149  		if err := state.parseIndex(); err != nil {
   150  			return nil, err
   151  		}
   152  	}
   153  	if state.nbytes == len(state.operation.Text) {
   154  		return nil, fmt.Errorf("format %s is missing verb at end of string", state.operation.Text)
   155  	}
   156  	verb, w := utf8.DecodeRuneInString(state.operation.Text[state.nbytes:])
   157  
   158  	// Ensure there must be a verb.
   159  	if state.indexPending {
   160  		state.operation.Verb = Verb{
   161  			Verb: verb,
   162  			Range: Range{
   163  				Start: state.indexPos,
   164  				End:   state.nbytes + w,
   165  			},
   166  			Index:    state.index,
   167  			ArgIndex: state.argNum,
   168  		}
   169  	} else {
   170  		state.operation.Verb = Verb{
   171  			Verb: verb,
   172  			Range: Range{
   173  				Start: state.nbytes,
   174  				End:   state.nbytes + w,
   175  			},
   176  			Index:    -1,
   177  			ArgIndex: state.argNum,
   178  		}
   179  	}
   180  
   181  	state.nbytes += w
   182  	state.operation.Text = state.operation.Text[:state.nbytes]
   183  	return state, nil
   184  }
   185  
   186  // addOffset adjusts the recorded positions in Verb, Width, Prec, and the
   187  // operation's overall Range to be relative to the position in the full format string.
   188  func (s *Operation) addOffset(parsedLen int) {
   189  	s.Verb.Range.Start += parsedLen
   190  	s.Verb.Range.End += parsedLen
   191  
   192  	s.Range.Start = parsedLen
   193  	s.Range.End = s.Verb.Range.End
   194  
   195  	// one of Fixed or Dynamic is non-negative means existence.
   196  	if s.Prec.Fixed != -1 || s.Prec.Dynamic != -1 {
   197  		s.Prec.Range.Start += parsedLen
   198  		s.Prec.Range.End += parsedLen
   199  	}
   200  	if s.Width.Fixed != -1 || s.Width.Dynamic != -1 {
   201  		s.Width.Range.Start += parsedLen
   202  		s.Width.Range.End += parsedLen
   203  	}
   204  }
   205  
   206  // parseFlags accepts any printf flags.
   207  func (s *state) parseFlags() {
   208  	s.operation.Flags = prefixOf(s.operation.Text[s.nbytes:], "#0+- ")
   209  	s.nbytes += len(s.operation.Flags)
   210  }
   211  
   212  // prefixOf returns the prefix of s composed only of runes from the specified set.
   213  func prefixOf(s, set string) string {
   214  	rest := strings.TrimLeft(s, set)
   215  	return s[:len(s)-len(rest)]
   216  }
   217  
   218  // parseIndex parses an argument index of the form "[n]" that can appear
   219  // in a printf operation (e.g., "%[2]d"). Returns an error if syntax is
   220  // malformed or index is invalid.
   221  func (s *state) parseIndex() error {
   222  	if s.nbytes == len(s.operation.Text) || s.operation.Text[s.nbytes] != '[' {
   223  		return nil
   224  	}
   225  	// Argument index present.
   226  	s.nbytes++ // skip '['
   227  	start := s.nbytes
   228  	if num, ok := s.scanNum(); ok {
   229  		// Later consumed/stored by a '*' or verb.
   230  		s.index = num
   231  		s.indexPos = start - 1
   232  	}
   233  
   234  	ok := true
   235  	if s.nbytes == len(s.operation.Text) || s.nbytes == start || s.operation.Text[s.nbytes] != ']' {
   236  		ok = false // syntax error is either missing "]" or invalid index.
   237  		s.nbytes = strings.Index(s.operation.Text[start:], "]")
   238  		if s.nbytes < 0 {
   239  			return fmt.Errorf("format %s is missing closing ]", s.operation.Text)
   240  		}
   241  		s.nbytes = s.nbytes + start
   242  	}
   243  	arg32, err := strconv.ParseInt(s.operation.Text[start:s.nbytes], 10, 32)
   244  	if err != nil || !ok || arg32 <= 0 {
   245  		return fmt.Errorf("format has invalid argument index [%s]", s.operation.Text[start:s.nbytes])
   246  	}
   247  
   248  	s.nbytes++ // skip ']'
   249  	arg := int(arg32)
   250  	arg += s.firstArg - 1 // We want to zero-index the actual arguments.
   251  	s.argNum = arg
   252  	s.hasIndex = true
   253  	s.indexPending = true
   254  	return nil
   255  }
   256  
   257  // scanNum advances through a decimal number if present, which represents a [Size] or [Index].
   258  func (s *state) scanNum() (int, bool) {
   259  	start := s.nbytes
   260  	for ; s.nbytes < len(s.operation.Text); s.nbytes++ {
   261  		c := s.operation.Text[s.nbytes]
   262  		if c < '0' || '9' < c {
   263  			if start < s.nbytes {
   264  				num, _ := strconv.ParseInt(s.operation.Text[start:s.nbytes], 10, 32)
   265  				return int(num), true
   266  			} else {
   267  				return 0, false
   268  			}
   269  		}
   270  	}
   271  	return 0, false
   272  }
   273  
   274  type sizeType int
   275  
   276  const (
   277  	Width sizeType = iota
   278  	Precision
   279  )
   280  
   281  // parseSize parses a width or precision specifier. It handles literal numeric
   282  // values (e.g., "%3d"), asterisk values (e.g., "%*d"), or indexed asterisk values (e.g., "%[2]*d").
   283  func (s *state) parseSize(kind sizeType) {
   284  	if s.nbytes < len(s.operation.Text) && s.operation.Text[s.nbytes] == '*' {
   285  		s.nbytes++
   286  		if s.indexPending {
   287  			// Absorb it.
   288  			s.indexPending = false
   289  			size := Size{
   290  				Fixed:   -1,
   291  				Dynamic: s.argNum,
   292  				Index:   s.index,
   293  				Range: Range{
   294  					Start: s.indexPos,
   295  					End:   s.nbytes,
   296  				},
   297  			}
   298  			switch kind {
   299  			case Width:
   300  				s.operation.Width = size
   301  			case Precision:
   302  				// Include the leading '.'.
   303  				size.Range.Start -= len(".")
   304  				s.operation.Prec = size
   305  			default:
   306  				panic(kind)
   307  			}
   308  		} else {
   309  			// Non-indexed asterisk: "%*d".
   310  			size := Size{
   311  				Dynamic: s.argNum,
   312  				Index:   -1,
   313  				Fixed:   -1,
   314  				Range: Range{
   315  					Start: s.nbytes - 1,
   316  					End:   s.nbytes,
   317  				},
   318  			}
   319  			switch kind {
   320  			case Width:
   321  				s.operation.Width = size
   322  			case Precision:
   323  				// For precision, include the '.' in the range.
   324  				size.Range.Start -= 1
   325  				s.operation.Prec = size
   326  			default:
   327  				panic(kind)
   328  			}
   329  		}
   330  		s.argNum++
   331  	} else { // Literal number, e.g. "%10d"
   332  		start := s.nbytes
   333  		if num, ok := s.scanNum(); ok {
   334  			size := Size{
   335  				Fixed:   num,
   336  				Index:   -1,
   337  				Dynamic: -1,
   338  				Range: Range{
   339  					Start: start,
   340  					End:   s.nbytes,
   341  				},
   342  			}
   343  			switch kind {
   344  			case Width:
   345  				s.operation.Width = size
   346  			case Precision:
   347  				// Include the leading '.'.
   348  				size.Range.Start -= 1
   349  				s.operation.Prec = size
   350  			default:
   351  				panic(kind)
   352  			}
   353  		}
   354  	}
   355  }
   356  
   357  // parsePrecision checks if there's a precision specified after a '.' character.
   358  // If found, it may also parse an index or an asterisk. Returns an error if any index
   359  // parsing fails.
   360  func (s *state) parsePrecision() error {
   361  	// If there's a period, there may be a precision.
   362  	if s.nbytes < len(s.operation.Text) && s.operation.Text[s.nbytes] == '.' {
   363  		s.nbytes++
   364  		if err := s.parseIndex(); err != nil {
   365  			return err
   366  		}
   367  		s.parseSize(Precision)
   368  	}
   369  	return nil
   370  }
   371  

View as plain text