Source file src/runtime/string.go

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"internal/abi"
     9  	"internal/bytealg"
    10  	"internal/goarch"
    11  	"internal/runtime/sys"
    12  	"unsafe"
    13  )
    14  
    15  // The constant is known to the compiler.
    16  // There is no fundamental theory behind this number.
    17  const tmpStringBufSize = 32
    18  
    19  type tmpBuf [tmpStringBufSize]byte
    20  
    21  // concatstrings implements a Go string concatenation x+y+z+...
    22  // The operands are passed in the slice a.
    23  // If buf != nil, the compiler has determined that the result does not
    24  // escape the calling function, so the string data can be stored in buf
    25  // if small enough.
    26  func concatstrings(buf *tmpBuf, a []string) string {
    27  	idx := 0
    28  	l := 0
    29  	count := 0
    30  	for i, x := range a {
    31  		n := len(x)
    32  		if n == 0 {
    33  			continue
    34  		}
    35  		if l+n < l {
    36  			throw("string concatenation too long")
    37  		}
    38  		l += n
    39  		count++
    40  		idx = i
    41  	}
    42  	if count == 0 {
    43  		return ""
    44  	}
    45  
    46  	// If there is just one string and either it is not on the stack
    47  	// or our result does not escape the calling frame (buf != nil),
    48  	// then we can return that string directly.
    49  	if count == 1 && (buf != nil || !stringDataOnStack(a[idx])) {
    50  		return a[idx]
    51  	}
    52  	s, b := rawstringtmp(buf, l)
    53  	for _, x := range a {
    54  		n := copy(b, x)
    55  		b = b[n:]
    56  	}
    57  	return s
    58  }
    59  
    60  func concatstring2(buf *tmpBuf, a0, a1 string) string {
    61  	return concatstrings(buf, []string{a0, a1})
    62  }
    63  
    64  func concatstring3(buf *tmpBuf, a0, a1, a2 string) string {
    65  	return concatstrings(buf, []string{a0, a1, a2})
    66  }
    67  
    68  func concatstring4(buf *tmpBuf, a0, a1, a2, a3 string) string {
    69  	return concatstrings(buf, []string{a0, a1, a2, a3})
    70  }
    71  
    72  func concatstring5(buf *tmpBuf, a0, a1, a2, a3, a4 string) string {
    73  	return concatstrings(buf, []string{a0, a1, a2, a3, a4})
    74  }
    75  
    76  // concatbytes implements a Go string concatenation x+y+z+... returning a slice
    77  // of bytes.
    78  // The operands are passed in the slice a.
    79  func concatbytes(buf *tmpBuf, a []string) []byte {
    80  	l := 0
    81  	for _, x := range a {
    82  		n := len(x)
    83  		if l+n < l {
    84  			throw("string concatenation too long")
    85  		}
    86  		l += n
    87  	}
    88  	if l == 0 {
    89  		// This is to match the return type of the non-optimized concatenation.
    90  		return []byte{}
    91  	}
    92  
    93  	var b []byte
    94  	if buf != nil && l <= len(buf) {
    95  		*buf = tmpBuf{}
    96  		b = buf[:l]
    97  	} else {
    98  		b = rawbyteslice(l)
    99  	}
   100  	offset := 0
   101  	for _, x := range a {
   102  		copy(b[offset:], x)
   103  		offset += len(x)
   104  	}
   105  
   106  	return b
   107  }
   108  
   109  func concatbyte2(buf *tmpBuf, a0, a1 string) []byte {
   110  	return concatbytes(buf, []string{a0, a1})
   111  }
   112  
   113  func concatbyte3(buf *tmpBuf, a0, a1, a2 string) []byte {
   114  	return concatbytes(buf, []string{a0, a1, a2})
   115  }
   116  
   117  func concatbyte4(buf *tmpBuf, a0, a1, a2, a3 string) []byte {
   118  	return concatbytes(buf, []string{a0, a1, a2, a3})
   119  }
   120  
   121  func concatbyte5(buf *tmpBuf, a0, a1, a2, a3, a4 string) []byte {
   122  	return concatbytes(buf, []string{a0, a1, a2, a3, a4})
   123  }
   124  
   125  // slicebytetostring converts a byte slice to a string.
   126  // It is inserted by the compiler into generated code.
   127  // ptr is a pointer to the first element of the slice;
   128  // n is the length of the slice.
   129  // Buf is a fixed-size buffer for the result,
   130  // it is not nil if the result does not escape.
   131  func slicebytetostring(buf *tmpBuf, ptr *byte, n int) string {
   132  	if n == 0 {
   133  		// Turns out to be a relatively common case.
   134  		// Consider that you want to parse out data between parens in "foo()bar",
   135  		// you find the indices and convert the subslice to string.
   136  		return ""
   137  	}
   138  	if raceenabled {
   139  		racereadrangepc(unsafe.Pointer(ptr),
   140  			uintptr(n),
   141  			sys.GetCallerPC(),
   142  			abi.FuncPCABIInternal(slicebytetostring))
   143  	}
   144  	if msanenabled {
   145  		msanread(unsafe.Pointer(ptr), uintptr(n))
   146  	}
   147  	if asanenabled {
   148  		asanread(unsafe.Pointer(ptr), uintptr(n))
   149  	}
   150  	if n == 1 {
   151  		p := unsafe.Pointer(&staticuint64s[*ptr])
   152  		if goarch.BigEndian {
   153  			p = add(p, 7)
   154  		}
   155  		return unsafe.String((*byte)(p), 1)
   156  	}
   157  
   158  	var p unsafe.Pointer
   159  	if buf != nil && n <= len(buf) {
   160  		p = unsafe.Pointer(buf)
   161  	} else {
   162  		p = mallocgc(uintptr(n), nil, false)
   163  	}
   164  	memmove(p, unsafe.Pointer(ptr), uintptr(n))
   165  	return unsafe.String((*byte)(p), n)
   166  }
   167  
   168  // stringDataOnStack reports whether the string's data is
   169  // stored on the current goroutine's stack.
   170  func stringDataOnStack(s string) bool {
   171  	ptr := uintptr(unsafe.Pointer(unsafe.StringData(s)))
   172  	stk := getg().stack
   173  	return stk.lo <= ptr && ptr < stk.hi
   174  }
   175  
   176  func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) {
   177  	if buf != nil && l <= len(buf) {
   178  		b = buf[:l]
   179  		s = slicebytetostringtmp(&b[0], len(b))
   180  	} else {
   181  		s, b = rawstring(l)
   182  	}
   183  	return
   184  }
   185  
   186  // slicebytetostringtmp returns a "string" referring to the actual []byte bytes.
   187  //
   188  // Callers need to ensure that the returned string will not be used after
   189  // the calling goroutine modifies the original slice or synchronizes with
   190  // another goroutine.
   191  //
   192  // The function is only called when instrumenting
   193  // and otherwise intrinsified by the compiler.
   194  //
   195  // Some internal compiler optimizations use this function.
   196  //   - Used for m[T1{... Tn{..., string(k), ...} ...}] and m[string(k)]
   197  //     where k is []byte, T1 to Tn is a nesting of struct and array literals.
   198  //   - Used for "<"+string(b)+">" concatenation where b is []byte.
   199  //   - Used for string(b)=="foo" comparison where b is []byte.
   200  func slicebytetostringtmp(ptr *byte, n int) string {
   201  	if raceenabled && n > 0 {
   202  		racereadrangepc(unsafe.Pointer(ptr),
   203  			uintptr(n),
   204  			sys.GetCallerPC(),
   205  			abi.FuncPCABIInternal(slicebytetostringtmp))
   206  	}
   207  	if msanenabled && n > 0 {
   208  		msanread(unsafe.Pointer(ptr), uintptr(n))
   209  	}
   210  	if asanenabled && n > 0 {
   211  		asanread(unsafe.Pointer(ptr), uintptr(n))
   212  	}
   213  	return unsafe.String(ptr, n)
   214  }
   215  
   216  func stringtoslicebyte(buf *tmpBuf, s string) []byte {
   217  	var b []byte
   218  	if buf != nil && len(s) <= len(buf) {
   219  		*buf = tmpBuf{}
   220  		b = buf[:len(s)]
   221  	} else {
   222  		b = rawbyteslice(len(s))
   223  	}
   224  	copy(b, s)
   225  	return b
   226  }
   227  
   228  func stringtoslicerune(buf *[tmpStringBufSize]rune, s string) []rune {
   229  	// two passes.
   230  	// unlike slicerunetostring, no race because strings are immutable.
   231  	n := 0
   232  	for range s {
   233  		n++
   234  	}
   235  
   236  	var a []rune
   237  	if buf != nil && n <= len(buf) {
   238  		*buf = [tmpStringBufSize]rune{}
   239  		a = buf[:n]
   240  	} else {
   241  		a = rawruneslice(n)
   242  	}
   243  
   244  	n = 0
   245  	for _, r := range s {
   246  		a[n] = r
   247  		n++
   248  	}
   249  	return a
   250  }
   251  
   252  func slicerunetostring(buf *tmpBuf, a []rune) string {
   253  	if raceenabled && len(a) > 0 {
   254  		racereadrangepc(unsafe.Pointer(&a[0]),
   255  			uintptr(len(a))*unsafe.Sizeof(a[0]),
   256  			sys.GetCallerPC(),
   257  			abi.FuncPCABIInternal(slicerunetostring))
   258  	}
   259  	if msanenabled && len(a) > 0 {
   260  		msanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
   261  	}
   262  	if asanenabled && len(a) > 0 {
   263  		asanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
   264  	}
   265  	var dum [4]byte
   266  	size1 := 0
   267  	for _, r := range a {
   268  		size1 += encoderune(dum[:], r)
   269  	}
   270  	s, b := rawstringtmp(buf, size1+3)
   271  	size2 := 0
   272  	for _, r := range a {
   273  		// check for race
   274  		if size2 >= size1 {
   275  			break
   276  		}
   277  		size2 += encoderune(b[size2:], r)
   278  	}
   279  	return s[:size2]
   280  }
   281  
   282  type stringStruct struct {
   283  	str unsafe.Pointer
   284  	len int
   285  }
   286  
   287  // Variant with *byte pointer type for DWARF debugging.
   288  type stringStructDWARF struct {
   289  	str *byte
   290  	len int
   291  }
   292  
   293  func stringStructOf(sp *string) *stringStruct {
   294  	return (*stringStruct)(unsafe.Pointer(sp))
   295  }
   296  
   297  func intstring(buf *[4]byte, v int64) (s string) {
   298  	var b []byte
   299  	if buf != nil {
   300  		b = buf[:]
   301  		s = slicebytetostringtmp(&b[0], len(b))
   302  	} else {
   303  		s, b = rawstring(4)
   304  	}
   305  	if int64(rune(v)) != v {
   306  		v = runeError
   307  	}
   308  	n := encoderune(b, rune(v))
   309  	return s[:n]
   310  }
   311  
   312  // rawstring allocates storage for a new string. The returned
   313  // string and byte slice both refer to the same storage.
   314  // The storage is not zeroed. Callers should use
   315  // b to set the string contents and then drop b.
   316  func rawstring(size int) (s string, b []byte) {
   317  	p := mallocgc(uintptr(size), nil, false)
   318  	return unsafe.String((*byte)(p), size), unsafe.Slice((*byte)(p), size)
   319  }
   320  
   321  // rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
   322  func rawbyteslice(size int) (b []byte) {
   323  	cap := roundupsize(uintptr(size), true)
   324  	p := mallocgc(cap, nil, false)
   325  	if cap != uintptr(size) {
   326  		memclrNoHeapPointers(add(p, uintptr(size)), cap-uintptr(size))
   327  	}
   328  
   329  	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(cap)}
   330  	return
   331  }
   332  
   333  // rawruneslice allocates a new rune slice. The rune slice is not zeroed.
   334  func rawruneslice(size int) (b []rune) {
   335  	if uintptr(size) > maxAlloc/4 {
   336  		throw("out of memory")
   337  	}
   338  	mem := roundupsize(uintptr(size)*4, true)
   339  	p := mallocgc(mem, nil, false)
   340  	if mem != uintptr(size)*4 {
   341  		memclrNoHeapPointers(add(p, uintptr(size)*4), mem-uintptr(size)*4)
   342  	}
   343  
   344  	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(mem / 4)}
   345  	return
   346  }
   347  
   348  // used by cmd/cgo
   349  func gobytes(p *byte, n int) (b []byte) {
   350  	if n == 0 {
   351  		return make([]byte, 0)
   352  	}
   353  
   354  	if n < 0 || uintptr(n) > maxAlloc {
   355  		panic(errorString("gobytes: length out of range"))
   356  	}
   357  
   358  	bp := mallocgc(uintptr(n), nil, false)
   359  	memmove(bp, unsafe.Pointer(p), uintptr(n))
   360  
   361  	*(*slice)(unsafe.Pointer(&b)) = slice{bp, n, n}
   362  	return
   363  }
   364  
   365  // This is exported via linkname to assembly in syscall (for Plan9) and cgo.
   366  //
   367  //go:linkname gostring
   368  func gostring(p *byte) string {
   369  	l := findnull(p)
   370  	if l == 0 {
   371  		return ""
   372  	}
   373  	s, b := rawstring(l)
   374  	memmove(unsafe.Pointer(&b[0]), unsafe.Pointer(p), uintptr(l))
   375  	return s
   376  }
   377  
   378  // internal_syscall_gostring is a version of gostring for internal/syscall/unix.
   379  //
   380  //go:linkname internal_syscall_gostring internal/syscall/unix.gostring
   381  func internal_syscall_gostring(p *byte) string {
   382  	return gostring(p)
   383  }
   384  
   385  func gostringn(p *byte, l int) string {
   386  	if l == 0 {
   387  		return ""
   388  	}
   389  	s, b := rawstring(l)
   390  	memmove(unsafe.Pointer(&b[0]), unsafe.Pointer(p), uintptr(l))
   391  	return s
   392  }
   393  
   394  const (
   395  	maxUint64 = ^uint64(0)
   396  	maxInt64  = int64(maxUint64 >> 1)
   397  )
   398  
   399  // atoi64 parses an int64 from a string s.
   400  // The bool result reports whether s is a number
   401  // representable by a value of type int64.
   402  func atoi64(s string) (int64, bool) {
   403  	if s == "" {
   404  		return 0, false
   405  	}
   406  
   407  	neg := false
   408  	if s[0] == '-' {
   409  		neg = true
   410  		s = s[1:]
   411  	}
   412  
   413  	un := uint64(0)
   414  	for i := 0; i < len(s); i++ {
   415  		c := s[i]
   416  		if c < '0' || c > '9' {
   417  			return 0, false
   418  		}
   419  		if un > maxUint64/10 {
   420  			// overflow
   421  			return 0, false
   422  		}
   423  		un *= 10
   424  		un1 := un + uint64(c) - '0'
   425  		if un1 < un {
   426  			// overflow
   427  			return 0, false
   428  		}
   429  		un = un1
   430  	}
   431  
   432  	if !neg && un > uint64(maxInt64) {
   433  		return 0, false
   434  	}
   435  	if neg && un > uint64(maxInt64)+1 {
   436  		return 0, false
   437  	}
   438  
   439  	n := int64(un)
   440  	if neg {
   441  		n = -n
   442  	}
   443  
   444  	return n, true
   445  }
   446  
   447  // atoi is like atoi64 but for integers
   448  // that fit into an int.
   449  func atoi(s string) (int, bool) {
   450  	if n, ok := atoi64(s); n == int64(int(n)) {
   451  		return int(n), ok
   452  	}
   453  	return 0, false
   454  }
   455  
   456  // atoi32 is like atoi but for integers
   457  // that fit into an int32.
   458  func atoi32(s string) (int32, bool) {
   459  	if n, ok := atoi64(s); n == int64(int32(n)) {
   460  		return int32(n), ok
   461  	}
   462  	return 0, false
   463  }
   464  
   465  // parseByteCount parses a string that represents a count of bytes.
   466  //
   467  // s must match the following regular expression:
   468  //
   469  //	^[0-9]+(([KMGT]i)?B)?$
   470  //
   471  // In other words, an integer byte count with an optional unit
   472  // suffix. Acceptable suffixes include one of
   473  // - KiB, MiB, GiB, TiB which represent binary IEC/ISO 80000 units, or
   474  // - B, which just represents bytes.
   475  //
   476  // Returns an int64 because that's what its callers want and receive,
   477  // but the result is always non-negative.
   478  func parseByteCount(s string) (int64, bool) {
   479  	// The empty string is not valid.
   480  	if s == "" {
   481  		return 0, false
   482  	}
   483  	// Handle the easy non-suffix case.
   484  	last := s[len(s)-1]
   485  	if last >= '0' && last <= '9' {
   486  		n, ok := atoi64(s)
   487  		if !ok || n < 0 {
   488  			return 0, false
   489  		}
   490  		return n, ok
   491  	}
   492  	// Failing a trailing digit, this must always end in 'B'.
   493  	// Also at this point there must be at least one digit before
   494  	// that B.
   495  	if last != 'B' || len(s) < 2 {
   496  		return 0, false
   497  	}
   498  	// The one before that must always be a digit or 'i'.
   499  	if c := s[len(s)-2]; c >= '0' && c <= '9' {
   500  		// Trivial 'B' suffix.
   501  		n, ok := atoi64(s[:len(s)-1])
   502  		if !ok || n < 0 {
   503  			return 0, false
   504  		}
   505  		return n, ok
   506  	} else if c != 'i' {
   507  		return 0, false
   508  	}
   509  	// Finally, we need at least 4 characters now, for the unit
   510  	// prefix and at least one digit.
   511  	if len(s) < 4 {
   512  		return 0, false
   513  	}
   514  	power := 0
   515  	switch s[len(s)-3] {
   516  	case 'K':
   517  		power = 1
   518  	case 'M':
   519  		power = 2
   520  	case 'G':
   521  		power = 3
   522  	case 'T':
   523  		power = 4
   524  	default:
   525  		// Invalid suffix.
   526  		return 0, false
   527  	}
   528  	m := uint64(1)
   529  	for i := 0; i < power; i++ {
   530  		m *= 1024
   531  	}
   532  	n, ok := atoi64(s[:len(s)-3])
   533  	if !ok || n < 0 {
   534  		return 0, false
   535  	}
   536  	un := uint64(n)
   537  	if un > maxUint64/m {
   538  		// Overflow.
   539  		return 0, false
   540  	}
   541  	un *= m
   542  	if un > uint64(maxInt64) {
   543  		// Overflow.
   544  		return 0, false
   545  	}
   546  	return int64(un), true
   547  }
   548  
   549  //go:nosplit
   550  func findnull(s *byte) int {
   551  	if s == nil {
   552  		return 0
   553  	}
   554  
   555  	// Avoid IndexByteString on Plan 9 because it uses SSE instructions
   556  	// on x86 machines, and those are classified as floating point instructions,
   557  	// which are illegal in a note handler.
   558  	if GOOS == "plan9" {
   559  		p := (*[maxAlloc/2 - 1]byte)(unsafe.Pointer(s))
   560  		l := 0
   561  		for p[l] != 0 {
   562  			l++
   563  		}
   564  		return l
   565  	}
   566  
   567  	// pageSize is the unit we scan at a time looking for NULL.
   568  	// It must be the minimum page size for any architecture Go
   569  	// runs on. It's okay (just a minor performance loss) if the
   570  	// actual system page size is larger than this value.
   571  	const pageSize = 4096
   572  
   573  	offset := 0
   574  	ptr := unsafe.Pointer(s)
   575  	// IndexByteString uses wide reads, so we need to be careful
   576  	// with page boundaries. Call IndexByteString on
   577  	// [ptr, endOfPage) interval.
   578  	safeLen := int(pageSize - uintptr(ptr)%pageSize)
   579  
   580  	for {
   581  		t := *(*string)(unsafe.Pointer(&stringStruct{ptr, safeLen}))
   582  		// Check one page at a time.
   583  		if i := bytealg.IndexByteString(t, 0); i != -1 {
   584  			return offset + i
   585  		}
   586  		// Move to next page
   587  		ptr = unsafe.Pointer(uintptr(ptr) + uintptr(safeLen))
   588  		offset += safeLen
   589  		safeLen = pageSize
   590  	}
   591  }
   592  
   593  func findnullw(s *uint16) int {
   594  	if s == nil {
   595  		return 0
   596  	}
   597  	p := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(s))
   598  	l := 0
   599  	for p[l] != 0 {
   600  		l++
   601  	}
   602  	return l
   603  }
   604  
   605  //go:nosplit
   606  func gostringnocopy(str *byte) string {
   607  	ss := stringStruct{str: unsafe.Pointer(str), len: findnull(str)}
   608  	s := *(*string)(unsafe.Pointer(&ss))
   609  	return s
   610  }
   611  
   612  func gostringw(strw *uint16) string {
   613  	var buf [8]byte
   614  	str := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(strw))
   615  	n1 := 0
   616  	for i := 0; str[i] != 0; i++ {
   617  		n1 += encoderune(buf[:], rune(str[i]))
   618  	}
   619  	s, b := rawstring(n1 + 4)
   620  	n2 := 0
   621  	for i := 0; str[i] != 0; i++ {
   622  		// check for race
   623  		if n2 >= n1 {
   624  			break
   625  		}
   626  		n2 += encoderune(b[n2:], rune(str[i]))
   627  	}
   628  	b[n2] = 0 // for luck
   629  	return s[:n2]
   630  }
   631  

View as plain text