Source file src/runtime/string.go

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"internal/abi"
     9  	"internal/bytealg"
    10  	"internal/goarch"
    11  	"internal/runtime/math"
    12  	"internal/runtime/strconv"
    13  	"internal/runtime/sys"
    14  	"unsafe"
    15  )
    16  
    17  // The constant is known to the compiler.
    18  // There is no fundamental theory behind this number.
    19  const tmpStringBufSize = 32
    20  
    21  type tmpBuf [tmpStringBufSize]byte
    22  
    23  // concatstrings implements a Go string concatenation x+y+z+...
    24  // The operands are passed in the slice a.
    25  // If buf != nil, the compiler has determined that the result does not
    26  // escape the calling function, so the string data can be stored in buf
    27  // if small enough.
    28  func concatstrings(buf *tmpBuf, a []string) string {
    29  	idx := 0
    30  	l := 0
    31  	count := 0
    32  	for i, x := range a {
    33  		n := len(x)
    34  		if n == 0 {
    35  			continue
    36  		}
    37  		if l+n < l {
    38  			throw("string concatenation too long")
    39  		}
    40  		l += n
    41  		count++
    42  		idx = i
    43  	}
    44  	if count == 0 {
    45  		return ""
    46  	}
    47  
    48  	// If there is just one string and either it is not on the stack
    49  	// or our result does not escape the calling frame (buf != nil),
    50  	// then we can return that string directly.
    51  	if count == 1 && (buf != nil || !stringDataOnStack(a[idx])) {
    52  		return a[idx]
    53  	}
    54  	s, b := rawstringtmp(buf, l)
    55  	for _, x := range a {
    56  		n := copy(b, x)
    57  		b = b[n:]
    58  	}
    59  	return s
    60  }
    61  
    62  // concatstring2 helps make the callsite smaller (compared to concatstrings),
    63  // and we think this is currently more valuable than omitting one call in the
    64  // chain, the same goes for concatstring{3,4,5}.
    65  func concatstring2(buf *tmpBuf, a0, a1 string) string {
    66  	return concatstrings(buf, []string{a0, a1})
    67  }
    68  
    69  func concatstring3(buf *tmpBuf, a0, a1, a2 string) string {
    70  	return concatstrings(buf, []string{a0, a1, a2})
    71  }
    72  
    73  func concatstring4(buf *tmpBuf, a0, a1, a2, a3 string) string {
    74  	return concatstrings(buf, []string{a0, a1, a2, a3})
    75  }
    76  
    77  func concatstring5(buf *tmpBuf, a0, a1, a2, a3, a4 string) string {
    78  	return concatstrings(buf, []string{a0, a1, a2, a3, a4})
    79  }
    80  
    81  // concatbytes implements a Go string concatenation x+y+z+... returning a slice
    82  // of bytes.
    83  // The operands are passed in the slice a.
    84  func concatbytes(buf *tmpBuf, a []string) []byte {
    85  	l := 0
    86  	for _, x := range a {
    87  		n := len(x)
    88  		if l+n < l {
    89  			throw("string concatenation too long")
    90  		}
    91  		l += n
    92  	}
    93  	if l == 0 {
    94  		// This is to match the return type of the non-optimized concatenation.
    95  		return []byte{}
    96  	}
    97  
    98  	var b []byte
    99  	if buf != nil && l <= len(buf) {
   100  		*buf = tmpBuf{}
   101  		b = buf[:l]
   102  	} else {
   103  		b = rawbyteslice(l)
   104  	}
   105  	offset := 0
   106  	for _, x := range a {
   107  		copy(b[offset:], x)
   108  		offset += len(x)
   109  	}
   110  
   111  	return b
   112  }
   113  
   114  // concatbyte2 helps make the callsite smaller (compared to concatbytes),
   115  // and we think this is currently more valuable than omitting one call in
   116  // the chain, the same goes for concatbyte{3,4,5}.
   117  func concatbyte2(buf *tmpBuf, a0, a1 string) []byte {
   118  	return concatbytes(buf, []string{a0, a1})
   119  }
   120  
   121  func concatbyte3(buf *tmpBuf, a0, a1, a2 string) []byte {
   122  	return concatbytes(buf, []string{a0, a1, a2})
   123  }
   124  
   125  func concatbyte4(buf *tmpBuf, a0, a1, a2, a3 string) []byte {
   126  	return concatbytes(buf, []string{a0, a1, a2, a3})
   127  }
   128  
   129  func concatbyte5(buf *tmpBuf, a0, a1, a2, a3, a4 string) []byte {
   130  	return concatbytes(buf, []string{a0, a1, a2, a3, a4})
   131  }
   132  
   133  // slicebytetostring converts a byte slice to a string.
   134  // It is inserted by the compiler into generated code.
   135  // ptr is a pointer to the first element of the slice;
   136  // n is the length of the slice.
   137  // Buf is a fixed-size buffer for the result,
   138  // it is not nil if the result does not escape.
   139  func slicebytetostring(buf *tmpBuf, ptr *byte, n int) string {
   140  	if n == 0 {
   141  		// Turns out to be a relatively common case.
   142  		// Consider that you want to parse out data between parens in "foo()bar",
   143  		// you find the indices and convert the subslice to string.
   144  		return ""
   145  	}
   146  	if raceenabled {
   147  		racereadrangepc(unsafe.Pointer(ptr),
   148  			uintptr(n),
   149  			sys.GetCallerPC(),
   150  			abi.FuncPCABIInternal(slicebytetostring))
   151  	}
   152  	if msanenabled {
   153  		msanread(unsafe.Pointer(ptr), uintptr(n))
   154  	}
   155  	if asanenabled {
   156  		asanread(unsafe.Pointer(ptr), uintptr(n))
   157  	}
   158  	if n == 1 {
   159  		p := unsafe.Pointer(&staticuint64s[*ptr])
   160  		if goarch.BigEndian {
   161  			p = add(p, 7)
   162  		}
   163  		return unsafe.String((*byte)(p), 1)
   164  	}
   165  
   166  	var p unsafe.Pointer
   167  	if buf != nil && n <= len(buf) {
   168  		p = unsafe.Pointer(buf)
   169  	} else {
   170  		p = mallocgc(uintptr(n), nil, false)
   171  	}
   172  	memmove(p, unsafe.Pointer(ptr), uintptr(n))
   173  	return unsafe.String((*byte)(p), n)
   174  }
   175  
   176  // stringDataOnStack reports whether the string's data is
   177  // stored on the current goroutine's stack.
   178  func stringDataOnStack(s string) bool {
   179  	ptr := uintptr(unsafe.Pointer(unsafe.StringData(s)))
   180  	stk := getg().stack
   181  	return stk.lo <= ptr && ptr < stk.hi
   182  }
   183  
   184  func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) {
   185  	if buf != nil && l <= len(buf) {
   186  		b = buf[:l]
   187  		s = slicebytetostringtmp(&b[0], len(b))
   188  	} else {
   189  		s, b = rawstring(l)
   190  	}
   191  	return
   192  }
   193  
   194  // slicebytetostringtmp returns a "string" referring to the actual []byte bytes.
   195  //
   196  // Callers need to ensure that the returned string will not be used after
   197  // the calling goroutine modifies the original slice or synchronizes with
   198  // another goroutine.
   199  //
   200  // The function is only called when instrumenting
   201  // and otherwise intrinsified by the compiler.
   202  //
   203  // Some internal compiler optimizations use this function.
   204  //   - Used for m[T1{... Tn{..., string(k), ...} ...}] and m[string(k)]
   205  //     where k is []byte, T1 to Tn is a nesting of struct and array literals.
   206  //   - Used for "<"+string(b)+">" concatenation where b is []byte.
   207  //   - Used for string(b)=="foo" comparison where b is []byte.
   208  func slicebytetostringtmp(ptr *byte, n int) string {
   209  	if raceenabled && n > 0 {
   210  		racereadrangepc(unsafe.Pointer(ptr),
   211  			uintptr(n),
   212  			sys.GetCallerPC(),
   213  			abi.FuncPCABIInternal(slicebytetostringtmp))
   214  	}
   215  	if msanenabled && n > 0 {
   216  		msanread(unsafe.Pointer(ptr), uintptr(n))
   217  	}
   218  	if asanenabled && n > 0 {
   219  		asanread(unsafe.Pointer(ptr), uintptr(n))
   220  	}
   221  	return unsafe.String(ptr, n)
   222  }
   223  
   224  func stringtoslicebyte(buf *tmpBuf, s string) []byte {
   225  	var b []byte
   226  	if buf != nil && len(s) <= len(buf) {
   227  		*buf = tmpBuf{}
   228  		b = buf[:len(s)]
   229  	} else {
   230  		b = rawbyteslice(len(s))
   231  	}
   232  	copy(b, s)
   233  	return b
   234  }
   235  
   236  func stringtoslicerune(buf *[tmpStringBufSize]rune, s string) []rune {
   237  	// two passes.
   238  	// unlike slicerunetostring, no race because strings are immutable.
   239  	n := 0
   240  	for range s {
   241  		n++
   242  	}
   243  
   244  	var a []rune
   245  	if buf != nil && n <= len(buf) {
   246  		*buf = [tmpStringBufSize]rune{}
   247  		a = buf[:n]
   248  	} else {
   249  		a = rawruneslice(n)
   250  	}
   251  
   252  	n = 0
   253  	for _, r := range s {
   254  		a[n] = r
   255  		n++
   256  	}
   257  	return a
   258  }
   259  
   260  func slicerunetostring(buf *tmpBuf, a []rune) string {
   261  	if raceenabled && len(a) > 0 {
   262  		racereadrangepc(unsafe.Pointer(&a[0]),
   263  			uintptr(len(a))*unsafe.Sizeof(a[0]),
   264  			sys.GetCallerPC(),
   265  			abi.FuncPCABIInternal(slicerunetostring))
   266  	}
   267  	if msanenabled && len(a) > 0 {
   268  		msanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
   269  	}
   270  	if asanenabled && len(a) > 0 {
   271  		asanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
   272  	}
   273  	var dum [4]byte
   274  	size1 := 0
   275  	for _, r := range a {
   276  		size1 += encoderune(dum[:], r)
   277  	}
   278  	s, b := rawstringtmp(buf, size1+3)
   279  	size2 := 0
   280  	for _, r := range a {
   281  		// check for race
   282  		if size2 >= size1 {
   283  			break
   284  		}
   285  		size2 += encoderune(b[size2:], r)
   286  	}
   287  	return s[:size2]
   288  }
   289  
   290  type stringStruct struct {
   291  	str unsafe.Pointer
   292  	len int
   293  }
   294  
   295  // Variant with *byte pointer type for DWARF debugging.
   296  type stringStructDWARF struct {
   297  	str *byte
   298  	len int
   299  }
   300  
   301  func stringStructOf(sp *string) *stringStruct {
   302  	return (*stringStruct)(unsafe.Pointer(sp))
   303  }
   304  
   305  func intstring(buf *[4]byte, v int64) (s string) {
   306  	var b []byte
   307  	if buf != nil {
   308  		b = buf[:]
   309  		s = slicebytetostringtmp(&b[0], len(b))
   310  	} else {
   311  		s, b = rawstring(4)
   312  	}
   313  	if int64(rune(v)) != v {
   314  		v = runeError
   315  	}
   316  	n := encoderune(b, rune(v))
   317  	return s[:n]
   318  }
   319  
   320  // rawstring allocates storage for a new string. The returned
   321  // string and byte slice both refer to the same storage.
   322  // The storage is not zeroed. Callers should use
   323  // b to set the string contents and then drop b.
   324  func rawstring(size int) (s string, b []byte) {
   325  	p := mallocgc(uintptr(size), nil, false)
   326  	return unsafe.String((*byte)(p), size), unsafe.Slice((*byte)(p), size)
   327  }
   328  
   329  // rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
   330  func rawbyteslice(size int) (b []byte) {
   331  	cap := roundupsize(uintptr(size), true)
   332  	p := mallocgc(cap, nil, false)
   333  	if cap != uintptr(size) {
   334  		memclrNoHeapPointers(add(p, uintptr(size)), cap-uintptr(size))
   335  	}
   336  
   337  	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(cap)}
   338  	return
   339  }
   340  
   341  // rawruneslice allocates a new rune slice. The rune slice is not zeroed.
   342  func rawruneslice(size int) (b []rune) {
   343  	if uintptr(size) > maxAlloc/4 {
   344  		throw("out of memory")
   345  	}
   346  	mem := roundupsize(uintptr(size)*4, true)
   347  	p := mallocgc(mem, nil, false)
   348  	if mem != uintptr(size)*4 {
   349  		memclrNoHeapPointers(add(p, uintptr(size)*4), mem-uintptr(size)*4)
   350  	}
   351  
   352  	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(mem / 4)}
   353  	return
   354  }
   355  
   356  // used by cmd/cgo
   357  func gobytes(p *byte, n int) (b []byte) {
   358  	if n == 0 {
   359  		return make([]byte, 0)
   360  	}
   361  
   362  	if n < 0 || uintptr(n) > maxAlloc {
   363  		panic(errorString("gobytes: length out of range"))
   364  	}
   365  
   366  	bp := mallocgc(uintptr(n), nil, false)
   367  	memmove(bp, unsafe.Pointer(p), uintptr(n))
   368  
   369  	*(*slice)(unsafe.Pointer(&b)) = slice{bp, n, n}
   370  	return
   371  }
   372  
   373  // This is exported via linkname to assembly in syscall (for Plan9) and cgo.
   374  //
   375  //go:linkname gostring
   376  func gostring(p *byte) string {
   377  	l := findnull(p)
   378  	if l == 0 {
   379  		return ""
   380  	}
   381  	s, b := rawstring(l)
   382  	memmove(unsafe.Pointer(&b[0]), unsafe.Pointer(p), uintptr(l))
   383  	return s
   384  }
   385  
   386  // internal_syscall_gostring is a version of gostring for internal/syscall/unix.
   387  //
   388  //go:linkname internal_syscall_gostring internal/syscall/unix.gostring
   389  func internal_syscall_gostring(p *byte) string {
   390  	return gostring(p)
   391  }
   392  
   393  func gostringn(p *byte, l int) string {
   394  	if l == 0 {
   395  		return ""
   396  	}
   397  	s, b := rawstring(l)
   398  	memmove(unsafe.Pointer(&b[0]), unsafe.Pointer(p), uintptr(l))
   399  	return s
   400  }
   401  
   402  // parseByteCount parses a string that represents a count of bytes.
   403  //
   404  // s must match the following regular expression:
   405  //
   406  //	^[0-9]+(([KMGT]i)?B)?$
   407  //
   408  // In other words, an integer byte count with an optional unit
   409  // suffix. Acceptable suffixes include one of
   410  // - KiB, MiB, GiB, TiB which represent binary IEC/ISO 80000 units, or
   411  // - B, which just represents bytes.
   412  //
   413  // Returns an int64 because that's what its callers want and receive,
   414  // but the result is always non-negative.
   415  func parseByteCount(s string) (int64, bool) {
   416  	// The empty string is not valid.
   417  	if s == "" {
   418  		return 0, false
   419  	}
   420  	// Handle the easy non-suffix case.
   421  	last := s[len(s)-1]
   422  	if last >= '0' && last <= '9' {
   423  		n, ok := strconv.Atoi64(s)
   424  		if !ok || n < 0 {
   425  			return 0, false
   426  		}
   427  		return n, ok
   428  	}
   429  	// Failing a trailing digit, this must always end in 'B'.
   430  	// Also at this point there must be at least one digit before
   431  	// that B.
   432  	if last != 'B' || len(s) < 2 {
   433  		return 0, false
   434  	}
   435  	// The one before that must always be a digit or 'i'.
   436  	if c := s[len(s)-2]; c >= '0' && c <= '9' {
   437  		// Trivial 'B' suffix.
   438  		n, ok := strconv.Atoi64(s[:len(s)-1])
   439  		if !ok || n < 0 {
   440  			return 0, false
   441  		}
   442  		return n, ok
   443  	} else if c != 'i' {
   444  		return 0, false
   445  	}
   446  	// Finally, we need at least 4 characters now, for the unit
   447  	// prefix and at least one digit.
   448  	if len(s) < 4 {
   449  		return 0, false
   450  	}
   451  	power := 0
   452  	switch s[len(s)-3] {
   453  	case 'K':
   454  		power = 1
   455  	case 'M':
   456  		power = 2
   457  	case 'G':
   458  		power = 3
   459  	case 'T':
   460  		power = 4
   461  	default:
   462  		// Invalid suffix.
   463  		return 0, false
   464  	}
   465  	m := uint64(1)
   466  	for i := 0; i < power; i++ {
   467  		m *= 1024
   468  	}
   469  	n, ok := strconv.Atoi64(s[:len(s)-3])
   470  	if !ok || n < 0 {
   471  		return 0, false
   472  	}
   473  	un := uint64(n)
   474  	if un > math.MaxUint64/m {
   475  		// Overflow.
   476  		return 0, false
   477  	}
   478  	un *= m
   479  	if un > uint64(math.MaxInt64) {
   480  		// Overflow.
   481  		return 0, false
   482  	}
   483  	return int64(un), true
   484  }
   485  
   486  //go:nosplit
   487  func findnull(s *byte) int {
   488  	if s == nil {
   489  		return 0
   490  	}
   491  
   492  	// Avoid IndexByteString on Plan 9 because it uses SSE instructions
   493  	// on x86 machines, and those are classified as floating point instructions,
   494  	// which are illegal in a note handler.
   495  	if GOOS == "plan9" {
   496  		p := (*[maxAlloc/2 - 1]byte)(unsafe.Pointer(s))
   497  		l := 0
   498  		for p[l] != 0 {
   499  			l++
   500  		}
   501  		return l
   502  	}
   503  
   504  	// pageSize is the unit we scan at a time looking for NULL.
   505  	// It must be the minimum page size for any architecture Go
   506  	// runs on. It's okay (just a minor performance loss) if the
   507  	// actual system page size is larger than this value.
   508  	const pageSize = 4096
   509  
   510  	offset := 0
   511  	ptr := unsafe.Pointer(s)
   512  	// IndexByteString uses wide reads, so we need to be careful
   513  	// with page boundaries. Call IndexByteString on
   514  	// [ptr, endOfPage) interval.
   515  	safeLen := int(pageSize - uintptr(ptr)%pageSize)
   516  
   517  	for {
   518  		t := *(*string)(unsafe.Pointer(&stringStruct{ptr, safeLen}))
   519  		// Check one page at a time.
   520  		if i := bytealg.IndexByteString(t, 0); i != -1 {
   521  			return offset + i
   522  		}
   523  		// Move to next page
   524  		ptr = unsafe.Pointer(uintptr(ptr) + uintptr(safeLen))
   525  		offset += safeLen
   526  		safeLen = pageSize
   527  	}
   528  }
   529  
   530  func findnullw(s *uint16) int {
   531  	if s == nil {
   532  		return 0
   533  	}
   534  	p := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(s))
   535  	l := 0
   536  	for p[l] != 0 {
   537  		l++
   538  	}
   539  	return l
   540  }
   541  
   542  //go:nosplit
   543  func gostringnocopy(str *byte) string {
   544  	ss := stringStruct{str: unsafe.Pointer(str), len: findnull(str)}
   545  	s := *(*string)(unsafe.Pointer(&ss))
   546  	return s
   547  }
   548  
   549  func gostringw(strw *uint16) string {
   550  	var buf [8]byte
   551  	str := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(strw))
   552  	n1 := 0
   553  	for i := 0; str[i] != 0; i++ {
   554  		n1 += encoderune(buf[:], rune(str[i]))
   555  	}
   556  	s, b := rawstring(n1 + 4)
   557  	n2 := 0
   558  	for i := 0; str[i] != 0; i++ {
   559  		// check for race
   560  		if n2 >= n1 {
   561  			break
   562  		}
   563  		n2 += encoderune(b[n2:], rune(str[i]))
   564  	}
   565  	b[n2] = 0 // for luck
   566  	return s[:n2]
   567  }
   568  

View as plain text