Source file src/unicode/utf8/utf8_test.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package utf8_test
     6  
     7  import (
     8  	"bytes"
     9  	"strings"
    10  	"testing"
    11  	"unicode"
    12  	. "unicode/utf8"
    13  )
    14  
    15  // Validate the constants redefined from unicode.
    16  func TestConstants(t *testing.T) {
    17  	if MaxRune != unicode.MaxRune {
    18  		t.Errorf("utf8.MaxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
    19  	}
    20  	if RuneError != unicode.ReplacementChar {
    21  		t.Errorf("utf8.RuneError is wrong: %x should be %x", RuneError, unicode.ReplacementChar)
    22  	}
    23  }
    24  
    25  type Utf8Map struct {
    26  	r   rune
    27  	str string
    28  }
    29  
    30  var utf8map = []Utf8Map{
    31  	{0x0000, "\x00"},
    32  	{0x0001, "\x01"},
    33  	{0x007e, "\x7e"},
    34  	{0x007f, "\x7f"},
    35  	{0x0080, "\xc2\x80"},
    36  	{0x0081, "\xc2\x81"},
    37  	{0x00bf, "\xc2\xbf"},
    38  	{0x00c0, "\xc3\x80"},
    39  	{0x00c1, "\xc3\x81"},
    40  	{0x00c8, "\xc3\x88"},
    41  	{0x00d0, "\xc3\x90"},
    42  	{0x00e0, "\xc3\xa0"},
    43  	{0x00f0, "\xc3\xb0"},
    44  	{0x00f8, "\xc3\xb8"},
    45  	{0x00ff, "\xc3\xbf"},
    46  	{0x0100, "\xc4\x80"},
    47  	{0x07ff, "\xdf\xbf"},
    48  	{0x0400, "\xd0\x80"},
    49  	{0x0800, "\xe0\xa0\x80"},
    50  	{0x0801, "\xe0\xa0\x81"},
    51  	{0x1000, "\xe1\x80\x80"},
    52  	{0xd000, "\xed\x80\x80"},
    53  	{0xd7ff, "\xed\x9f\xbf"}, // last code point before surrogate half.
    54  	{0xe000, "\xee\x80\x80"}, // first code point after surrogate half.
    55  	{0xfffe, "\xef\xbf\xbe"},
    56  	{0xffff, "\xef\xbf\xbf"},
    57  	{0x10000, "\xf0\x90\x80\x80"},
    58  	{0x10001, "\xf0\x90\x80\x81"},
    59  	{0x40000, "\xf1\x80\x80\x80"},
    60  	{0x10fffe, "\xf4\x8f\xbf\xbe"},
    61  	{0x10ffff, "\xf4\x8f\xbf\xbf"},
    62  	{0xFFFD, "\xef\xbf\xbd"},
    63  }
    64  
    65  var surrogateMap = []Utf8Map{
    66  	{0xd800, "\xed\xa0\x80"}, // surrogate min decodes to (RuneError, 1)
    67  	{0xdfff, "\xed\xbf\xbf"}, // surrogate max decodes to (RuneError, 1)
    68  }
    69  
    70  var testStrings = []string{
    71  	"",
    72  	"abcd",
    73  	"☺☻☹",
    74  	"日a本b語ç日ð本Ê語þ日¥本¼語i日©",
    75  	"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©",
    76  	"\x80\x80\x80\x80",
    77  }
    78  
    79  func TestFullRune(t *testing.T) {
    80  	for _, m := range utf8map {
    81  		b := []byte(m.str)
    82  		if !FullRune(b) {
    83  			t.Errorf("FullRune(%q) (%U) = false, want true", b, m.r)
    84  		}
    85  		s := m.str
    86  		if !FullRuneInString(s) {
    87  			t.Errorf("FullRuneInString(%q) (%U) = false, want true", s, m.r)
    88  		}
    89  		b1 := b[0 : len(b)-1]
    90  		if FullRune(b1) {
    91  			t.Errorf("FullRune(%q) = true, want false", b1)
    92  		}
    93  		s1 := string(b1)
    94  		if FullRuneInString(s1) {
    95  			t.Errorf("FullRune(%q) = true, want false", s1)
    96  		}
    97  	}
    98  	for _, s := range []string{"\xc0", "\xc1"} {
    99  		b := []byte(s)
   100  		if !FullRune(b) {
   101  			t.Errorf("FullRune(%q) = false, want true", s)
   102  		}
   103  		if !FullRuneInString(s) {
   104  			t.Errorf("FullRuneInString(%q) = false, want true", s)
   105  		}
   106  	}
   107  }
   108  
   109  func TestEncodeRune(t *testing.T) {
   110  	for _, m := range utf8map {
   111  		b := []byte(m.str)
   112  		var buf [10]byte
   113  		n := EncodeRune(buf[0:], m.r)
   114  		b1 := buf[0:n]
   115  		if !bytes.Equal(b, b1) {
   116  			t.Errorf("EncodeRune(%#04x) = %q want %q", m.r, b1, b)
   117  		}
   118  	}
   119  }
   120  
   121  func TestAppendRune(t *testing.T) {
   122  	for _, m := range utf8map {
   123  		if buf := AppendRune(nil, m.r); string(buf) != m.str {
   124  			t.Errorf("AppendRune(nil, %#04x) = %s, want %s", m.r, buf, m.str)
   125  		}
   126  		if buf := AppendRune([]byte("init"), m.r); string(buf) != "init"+m.str {
   127  			t.Errorf("AppendRune(init, %#04x) = %s, want %s", m.r, buf, "init"+m.str)
   128  		}
   129  	}
   130  }
   131  
   132  func TestDecodeRune(t *testing.T) {
   133  	for _, m := range utf8map {
   134  		b := []byte(m.str)
   135  		r, size := DecodeRune(b)
   136  		if r != m.r || size != len(b) {
   137  			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, m.r, len(b))
   138  		}
   139  		s := m.str
   140  		r, size = DecodeRuneInString(s)
   141  		if r != m.r || size != len(b) {
   142  			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, m.r, len(b))
   143  		}
   144  
   145  		// there's an extra byte that bytes left behind - make sure trailing byte works
   146  		r, size = DecodeRune(b[0:cap(b)])
   147  		if r != m.r || size != len(b) {
   148  			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, m.r, len(b))
   149  		}
   150  		s = m.str + "\x00"
   151  		r, size = DecodeRuneInString(s)
   152  		if r != m.r || size != len(b) {
   153  			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, m.r, len(b))
   154  		}
   155  
   156  		// make sure missing bytes fail
   157  		wantsize := 1
   158  		if wantsize >= len(b) {
   159  			wantsize = 0
   160  		}
   161  		r, size = DecodeRune(b[0 : len(b)-1])
   162  		if r != RuneError || size != wantsize {
   163  			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b[:len(b)-1], r, size, RuneError, wantsize)
   164  		}
   165  		s = m.str[0 : len(m.str)-1]
   166  		r, size = DecodeRuneInString(s)
   167  		if r != RuneError || size != wantsize {
   168  			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, RuneError, wantsize)
   169  		}
   170  
   171  		// make sure bad sequences fail
   172  		if len(b) == 1 {
   173  			b[0] = 0x80
   174  		} else {
   175  			b[len(b)-1] = 0x7F
   176  		}
   177  		r, size = DecodeRune(b)
   178  		if r != RuneError || size != 1 {
   179  			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, RuneError, 1)
   180  		}
   181  		s = string(b)
   182  		r, size = DecodeRuneInString(s)
   183  		if r != RuneError || size != 1 {
   184  			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, RuneError, 1)
   185  		}
   186  
   187  	}
   188  }
   189  
   190  func TestDecodeSurrogateRune(t *testing.T) {
   191  	for _, m := range surrogateMap {
   192  		b := []byte(m.str)
   193  		r, size := DecodeRune(b)
   194  		if r != RuneError || size != 1 {
   195  			t.Errorf("DecodeRune(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
   196  		}
   197  		s := m.str
   198  		r, size = DecodeRuneInString(s)
   199  		if r != RuneError || size != 1 {
   200  			t.Errorf("DecodeRuneInString(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
   201  		}
   202  	}
   203  }
   204  
   205  // Check that DecodeRune and DecodeLastRune correspond to
   206  // the equivalent range loop.
   207  func TestSequencing(t *testing.T) {
   208  	for _, ts := range testStrings {
   209  		for _, m := range utf8map {
   210  			for _, s := range []string{ts + m.str, m.str + ts, ts + m.str + ts} {
   211  				testSequence(t, s)
   212  			}
   213  		}
   214  	}
   215  }
   216  
   217  func runtimeRuneCount(s string) int {
   218  	return len([]rune(s)) // Replaced by gc with call to runtime.countrunes(s).
   219  }
   220  
   221  // Check that a range loop, len([]rune(string)) optimization and
   222  // []rune conversions visit the same runes.
   223  // Not really a test of this package, but the assumption is used here and
   224  // it's good to verify.
   225  func TestRuntimeConversion(t *testing.T) {
   226  	for _, ts := range testStrings {
   227  		count := RuneCountInString(ts)
   228  		if n := runtimeRuneCount(ts); n != count {
   229  			t.Errorf("%q: len([]rune()) counted %d runes; got %d from RuneCountInString", ts, n, count)
   230  			break
   231  		}
   232  
   233  		runes := []rune(ts)
   234  		if n := len(runes); n != count {
   235  			t.Errorf("%q: []rune() has length %d; got %d from RuneCountInString", ts, n, count)
   236  			break
   237  		}
   238  		i := 0
   239  		for _, r := range ts {
   240  			if r != runes[i] {
   241  				t.Errorf("%q[%d]: expected %c (%U); got %c (%U)", ts, i, runes[i], runes[i], r, r)
   242  			}
   243  			i++
   244  		}
   245  	}
   246  }
   247  
   248  var invalidSequenceTests = []string{
   249  	"\xed\xa0\x80\x80", // surrogate min
   250  	"\xed\xbf\xbf\x80", // surrogate max
   251  
   252  	// xx
   253  	"\x91\x80\x80\x80",
   254  
   255  	// s1
   256  	"\xC2\x7F\x80\x80",
   257  	"\xC2\xC0\x80\x80",
   258  	"\xDF\x7F\x80\x80",
   259  	"\xDF\xC0\x80\x80",
   260  
   261  	// s2
   262  	"\xE0\x9F\xBF\x80",
   263  	"\xE0\xA0\x7F\x80",
   264  	"\xE0\xBF\xC0\x80",
   265  	"\xE0\xC0\x80\x80",
   266  
   267  	// s3
   268  	"\xE1\x7F\xBF\x80",
   269  	"\xE1\x80\x7F\x80",
   270  	"\xE1\xBF\xC0\x80",
   271  	"\xE1\xC0\x80\x80",
   272  
   273  	//s4
   274  	"\xED\x7F\xBF\x80",
   275  	"\xED\x80\x7F\x80",
   276  	"\xED\x9F\xC0\x80",
   277  	"\xED\xA0\x80\x80",
   278  
   279  	// s5
   280  	"\xF0\x8F\xBF\xBF",
   281  	"\xF0\x90\x7F\xBF",
   282  	"\xF0\x90\x80\x7F",
   283  	"\xF0\xBF\xBF\xC0",
   284  	"\xF0\xBF\xC0\x80",
   285  	"\xF0\xC0\x80\x80",
   286  
   287  	// s6
   288  	"\xF1\x7F\xBF\xBF",
   289  	"\xF1\x80\x7F\xBF",
   290  	"\xF1\x80\x80\x7F",
   291  	"\xF1\xBF\xBF\xC0",
   292  	"\xF1\xBF\xC0\x80",
   293  	"\xF1\xC0\x80\x80",
   294  
   295  	// s7
   296  	"\xF4\x7F\xBF\xBF",
   297  	"\xF4\x80\x7F\xBF",
   298  	"\xF4\x80\x80\x7F",
   299  	"\xF4\x8F\xBF\xC0",
   300  	"\xF4\x8F\xC0\x80",
   301  	"\xF4\x90\x80\x80",
   302  }
   303  
   304  func runtimeDecodeRune(s string) rune {
   305  	for _, r := range s {
   306  		return r
   307  	}
   308  	return -1
   309  }
   310  
   311  func TestDecodeInvalidSequence(t *testing.T) {
   312  	for _, s := range invalidSequenceTests {
   313  		r1, _ := DecodeRune([]byte(s))
   314  		if want := RuneError; r1 != want {
   315  			t.Errorf("DecodeRune(%#x) = %#04x, want %#04x", s, r1, want)
   316  			return
   317  		}
   318  		r2, _ := DecodeRuneInString(s)
   319  		if want := RuneError; r2 != want {
   320  			t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s, r2, want)
   321  			return
   322  		}
   323  		if r1 != r2 {
   324  			t.Errorf("DecodeRune(%#x) = %#04x mismatch with DecodeRuneInString(%q) = %#04x", s, r1, s, r2)
   325  			return
   326  		}
   327  		r3 := runtimeDecodeRune(s)
   328  		if r2 != r3 {
   329  			t.Errorf("DecodeRuneInString(%q) = %#04x mismatch with runtime.decoderune(%q) = %#04x", s, r2, s, r3)
   330  			return
   331  		}
   332  	}
   333  }
   334  
   335  func testSequence(t *testing.T, s string) {
   336  	type info struct {
   337  		index int
   338  		r     rune
   339  	}
   340  	index := make([]info, len(s))
   341  	b := []byte(s)
   342  	si := 0
   343  	j := 0
   344  	for i, r := range s {
   345  		if si != i {
   346  			t.Errorf("Sequence(%q) mismatched index %d, want %d", s, si, i)
   347  			return
   348  		}
   349  		index[j] = info{i, r}
   350  		j++
   351  		r1, size1 := DecodeRune(b[i:])
   352  		if r != r1 {
   353  			t.Errorf("DecodeRune(%q) = %#04x, want %#04x", s[i:], r1, r)
   354  			return
   355  		}
   356  		r2, size2 := DecodeRuneInString(s[i:])
   357  		if r != r2 {
   358  			t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s[i:], r2, r)
   359  			return
   360  		}
   361  		if size1 != size2 {
   362  			t.Errorf("DecodeRune/DecodeRuneInString(%q) size mismatch %d/%d", s[i:], size1, size2)
   363  			return
   364  		}
   365  		si += size1
   366  	}
   367  	j--
   368  	for si = len(s); si > 0; {
   369  		r1, size1 := DecodeLastRune(b[0:si])
   370  		r2, size2 := DecodeLastRuneInString(s[0:si])
   371  		if size1 != size2 {
   372  			t.Errorf("DecodeLastRune/DecodeLastRuneInString(%q, %d) size mismatch %d/%d", s, si, size1, size2)
   373  			return
   374  		}
   375  		if r1 != index[j].r {
   376  			t.Errorf("DecodeLastRune(%q, %d) = %#04x, want %#04x", s, si, r1, index[j].r)
   377  			return
   378  		}
   379  		if r2 != index[j].r {
   380  			t.Errorf("DecodeLastRuneInString(%q, %d) = %#04x, want %#04x", s, si, r2, index[j].r)
   381  			return
   382  		}
   383  		si -= size1
   384  		if si != index[j].index {
   385  			t.Errorf("DecodeLastRune(%q) index mismatch at %d, want %d", s, si, index[j].index)
   386  			return
   387  		}
   388  		j--
   389  	}
   390  	if si != 0 {
   391  		t.Errorf("DecodeLastRune(%q) finished at %d, not 0", s, si)
   392  	}
   393  }
   394  
   395  // Check that negative runes encode as U+FFFD.
   396  func TestNegativeRune(t *testing.T) {
   397  	errorbuf := make([]byte, UTFMax)
   398  	errorbuf = errorbuf[0:EncodeRune(errorbuf, RuneError)]
   399  	buf := make([]byte, UTFMax)
   400  	buf = buf[0:EncodeRune(buf, -1)]
   401  	if !bytes.Equal(buf, errorbuf) {
   402  		t.Errorf("incorrect encoding [% x] for -1; expected [% x]", buf, errorbuf)
   403  	}
   404  }
   405  
   406  type RuneCountTest struct {
   407  	in  string
   408  	out int
   409  }
   410  
   411  var runecounttests = []RuneCountTest{
   412  	{"abcd", 4},
   413  	{"☺☻☹", 3},
   414  	{"1,2,3,4", 7},
   415  	{"\xe2\x00", 2},
   416  	{"\xe2\x80", 2},
   417  	{"a\xe2\x80", 3},
   418  }
   419  
   420  func TestRuneCount(t *testing.T) {
   421  	for _, tt := range runecounttests {
   422  		if out := RuneCountInString(tt.in); out != tt.out {
   423  			t.Errorf("RuneCountInString(%q) = %d, want %d", tt.in, out, tt.out)
   424  		}
   425  		if out := RuneCount([]byte(tt.in)); out != tt.out {
   426  			t.Errorf("RuneCount(%q) = %d, want %d", tt.in, out, tt.out)
   427  		}
   428  	}
   429  }
   430  
   431  func TestRuneCountNonASCIIAllocation(t *testing.T) {
   432  	if n := testing.AllocsPerRun(10, func() {
   433  		s := []byte("日本語日本語日本語日")
   434  		_ = RuneCount(s)
   435  	}); n > 0 {
   436  		t.Errorf("unexpected RuneCount allocation, got %v, want 0", n)
   437  	}
   438  }
   439  
   440  type RuneLenTest struct {
   441  	r    rune
   442  	size int
   443  }
   444  
   445  var runelentests = []RuneLenTest{
   446  	{0, 1},
   447  	{'e', 1},
   448  	{'é', 2},
   449  	{'☺', 3},
   450  	{RuneError, 3},
   451  	{MaxRune, 4},
   452  	{0xD800, -1},
   453  	{0xDFFF, -1},
   454  	{MaxRune + 1, -1},
   455  	{-1, -1},
   456  }
   457  
   458  func TestRuneLen(t *testing.T) {
   459  	for _, tt := range runelentests {
   460  		if size := RuneLen(tt.r); size != tt.size {
   461  			t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, size, tt.size)
   462  		}
   463  	}
   464  }
   465  
   466  type ValidTest struct {
   467  	in  string
   468  	out bool
   469  }
   470  
   471  var validTests = []ValidTest{
   472  	{"", true},
   473  	{"a", true},
   474  	{"abc", true},
   475  	{"Ж", true},
   476  	{"ЖЖ", true},
   477  	{"брэд-ЛГТМ", true},
   478  	{"☺☻☹", true},
   479  	{"aa\xe2", false},
   480  	{string([]byte{66, 250}), false},
   481  	{string([]byte{66, 250, 67}), false},
   482  	{"a\uFFFDb", true},
   483  	{string("\xF4\x8F\xBF\xBF"), true},      // U+10FFFF
   484  	{string("\xF4\x90\x80\x80"), false},     // U+10FFFF+1; out of range
   485  	{string("\xF7\xBF\xBF\xBF"), false},     // 0x1FFFFF; out of range
   486  	{string("\xFB\xBF\xBF\xBF\xBF"), false}, // 0x3FFFFFF; out of range
   487  	{string("\xc0\x80"), false},             // U+0000 encoded in two bytes: incorrect
   488  	{string("\xed\xa0\x80"), false},         // U+D800 high surrogate (sic)
   489  	{string("\xed\xbf\xbf"), false},         // U+DFFF low surrogate (sic)
   490  }
   491  
   492  func TestValid(t *testing.T) {
   493  	for _, tt := range validTests {
   494  		if Valid([]byte(tt.in)) != tt.out {
   495  			t.Errorf("Valid(%q) = %v; want %v", tt.in, !tt.out, tt.out)
   496  		}
   497  		if ValidString(tt.in) != tt.out {
   498  			t.Errorf("ValidString(%q) = %v; want %v", tt.in, !tt.out, tt.out)
   499  		}
   500  	}
   501  }
   502  
   503  type ValidRuneTest struct {
   504  	r  rune
   505  	ok bool
   506  }
   507  
   508  var validrunetests = []ValidRuneTest{
   509  	{0, true},
   510  	{'e', true},
   511  	{'é', true},
   512  	{'☺', true},
   513  	{RuneError, true},
   514  	{MaxRune, true},
   515  	{0xD7FF, true},
   516  	{0xD800, false},
   517  	{0xDFFF, false},
   518  	{0xE000, true},
   519  	{MaxRune + 1, false},
   520  	{-1, false},
   521  }
   522  
   523  func TestValidRune(t *testing.T) {
   524  	for _, tt := range validrunetests {
   525  		if ok := ValidRune(tt.r); ok != tt.ok {
   526  			t.Errorf("ValidRune(%#U) = %t, want %t", tt.r, ok, tt.ok)
   527  		}
   528  	}
   529  }
   530  
   531  func BenchmarkRuneCountTenASCIIChars(b *testing.B) {
   532  	s := []byte("0123456789")
   533  	for i := 0; i < b.N; i++ {
   534  		RuneCount(s)
   535  	}
   536  }
   537  
   538  func BenchmarkRuneCountTenJapaneseChars(b *testing.B) {
   539  	s := []byte("日本語日本語日本語日")
   540  	for i := 0; i < b.N; i++ {
   541  		RuneCount(s)
   542  	}
   543  }
   544  
   545  func BenchmarkRuneCountInStringTenASCIIChars(b *testing.B) {
   546  	for i := 0; i < b.N; i++ {
   547  		RuneCountInString("0123456789")
   548  	}
   549  }
   550  
   551  func BenchmarkRuneCountInStringTenJapaneseChars(b *testing.B) {
   552  	for i := 0; i < b.N; i++ {
   553  		RuneCountInString("日本語日本語日本語日")
   554  	}
   555  }
   556  
   557  var ascii100000 = strings.Repeat("0123456789", 10000)
   558  
   559  func BenchmarkValidTenASCIIChars(b *testing.B) {
   560  	s := []byte("0123456789")
   561  	for i := 0; i < b.N; i++ {
   562  		Valid(s)
   563  	}
   564  }
   565  
   566  func BenchmarkValid100KASCIIChars(b *testing.B) {
   567  	s := []byte(ascii100000)
   568  	for i := 0; i < b.N; i++ {
   569  		Valid(s)
   570  	}
   571  }
   572  
   573  func BenchmarkValidTenJapaneseChars(b *testing.B) {
   574  	s := []byte("日本語日本語日本語日")
   575  	for i := 0; i < b.N; i++ {
   576  		Valid(s)
   577  	}
   578  }
   579  func BenchmarkValidLongMostlyASCII(b *testing.B) {
   580  	longMostlyASCII := []byte(longStringMostlyASCII)
   581  	for i := 0; i < b.N; i++ {
   582  		Valid(longMostlyASCII)
   583  	}
   584  }
   585  
   586  func BenchmarkValidLongJapanese(b *testing.B) {
   587  	longJapanese := []byte(longStringJapanese)
   588  	for i := 0; i < b.N; i++ {
   589  		Valid(longJapanese)
   590  	}
   591  }
   592  
   593  func BenchmarkValidStringTenASCIIChars(b *testing.B) {
   594  	for i := 0; i < b.N; i++ {
   595  		ValidString("0123456789")
   596  	}
   597  }
   598  
   599  func BenchmarkValidString100KASCIIChars(b *testing.B) {
   600  	for i := 0; i < b.N; i++ {
   601  		ValidString(ascii100000)
   602  	}
   603  }
   604  
   605  func BenchmarkValidStringTenJapaneseChars(b *testing.B) {
   606  	for i := 0; i < b.N; i++ {
   607  		ValidString("日本語日本語日本語日")
   608  	}
   609  }
   610  
   611  func BenchmarkValidStringLongMostlyASCII(b *testing.B) {
   612  	for i := 0; i < b.N; i++ {
   613  		ValidString(longStringMostlyASCII)
   614  	}
   615  }
   616  
   617  func BenchmarkValidStringLongJapanese(b *testing.B) {
   618  	for i := 0; i < b.N; i++ {
   619  		ValidString(longStringJapanese)
   620  	}
   621  }
   622  
   623  var longStringMostlyASCII string // ~100KB, ~97% ASCII
   624  var longStringJapanese string    // ~100KB, non-ASCII
   625  
   626  func init() {
   627  	const japanese = "日本語日本語日本語日"
   628  	var b strings.Builder
   629  	for i := 0; b.Len() < 100_000; i++ {
   630  		if i%100 == 0 {
   631  			b.WriteString(japanese)
   632  		} else {
   633  			b.WriteString("0123456789")
   634  		}
   635  	}
   636  	longStringMostlyASCII = b.String()
   637  	longStringJapanese = strings.Repeat(japanese, 100_000/len(japanese))
   638  }
   639  
   640  func BenchmarkEncodeASCIIRune(b *testing.B) {
   641  	buf := make([]byte, UTFMax)
   642  	for i := 0; i < b.N; i++ {
   643  		EncodeRune(buf, 'a') // 1 byte
   644  	}
   645  }
   646  
   647  func BenchmarkEncodeSpanishRune(b *testing.B) {
   648  	buf := make([]byte, UTFMax)
   649  	for i := 0; i < b.N; i++ {
   650  		EncodeRune(buf, 'Ñ') // 2 bytes
   651  	}
   652  }
   653  
   654  func BenchmarkEncodeJapaneseRune(b *testing.B) {
   655  	buf := make([]byte, UTFMax)
   656  	for i := 0; i < b.N; i++ {
   657  		EncodeRune(buf, '本') // 3 bytes
   658  	}
   659  }
   660  
   661  func BenchmarkEncodeMaxRune(b *testing.B) {
   662  	buf := make([]byte, UTFMax)
   663  	for i := 0; i < b.N; i++ {
   664  		EncodeRune(buf, MaxRune) // 4 bytes
   665  	}
   666  }
   667  
   668  func BenchmarkEncodeInvalidRuneMaxPlusOne(b *testing.B) {
   669  	buf := make([]byte, UTFMax)
   670  	for i := 0; i < b.N; i++ {
   671  		EncodeRune(buf, MaxRune+1) // 3 bytes: RuneError
   672  	}
   673  }
   674  
   675  func BenchmarkEncodeInvalidRuneSurrogate(b *testing.B) {
   676  	buf := make([]byte, UTFMax)
   677  	for i := 0; i < b.N; i++ {
   678  		EncodeRune(buf, 0xD800) // 3 bytes: RuneError
   679  	}
   680  }
   681  
   682  func BenchmarkEncodeInvalidRuneNegative(b *testing.B) {
   683  	buf := make([]byte, UTFMax)
   684  	for i := 0; i < b.N; i++ {
   685  		EncodeRune(buf, -1) // 3 bytes: RuneError
   686  	}
   687  }
   688  
   689  func BenchmarkAppendASCIIRune(b *testing.B) {
   690  	buf := make([]byte, UTFMax)
   691  	for i := 0; i < b.N; i++ {
   692  		AppendRune(buf[:0], 'a') // 1 byte
   693  	}
   694  }
   695  
   696  func BenchmarkAppendSpanishRune(b *testing.B) {
   697  	buf := make([]byte, UTFMax)
   698  	for i := 0; i < b.N; i++ {
   699  		AppendRune(buf[:0], 'Ñ') // 2 bytes
   700  	}
   701  }
   702  
   703  func BenchmarkAppendJapaneseRune(b *testing.B) {
   704  	buf := make([]byte, UTFMax)
   705  	for i := 0; i < b.N; i++ {
   706  		AppendRune(buf[:0], '本') // 3 bytes
   707  	}
   708  }
   709  
   710  func BenchmarkAppendMaxRune(b *testing.B) {
   711  	buf := make([]byte, UTFMax)
   712  	for i := 0; i < b.N; i++ {
   713  		AppendRune(buf[:0], MaxRune) // 4 bytes
   714  	}
   715  }
   716  
   717  func BenchmarkAppendInvalidRuneMaxPlusOne(b *testing.B) {
   718  	buf := make([]byte, UTFMax)
   719  	for i := 0; i < b.N; i++ {
   720  		AppendRune(buf[:0], MaxRune+1) // 3 bytes: RuneError
   721  	}
   722  }
   723  
   724  func BenchmarkAppendInvalidRuneSurrogate(b *testing.B) {
   725  	buf := make([]byte, UTFMax)
   726  	for i := 0; i < b.N; i++ {
   727  		AppendRune(buf[:0], 0xD800) // 3 bytes: RuneError
   728  	}
   729  }
   730  
   731  func BenchmarkAppendInvalidRuneNegative(b *testing.B) {
   732  	buf := make([]byte, UTFMax)
   733  	for i := 0; i < b.N; i++ {
   734  		AppendRune(buf[:0], -1) // 3 bytes: RuneError
   735  	}
   736  }
   737  
   738  func BenchmarkDecodeASCIIRune(b *testing.B) {
   739  	a := []byte{'a'}
   740  	for i := 0; i < b.N; i++ {
   741  		DecodeRune(a)
   742  	}
   743  }
   744  
   745  func BenchmarkDecodeJapaneseRune(b *testing.B) {
   746  	nihon := []byte("本")
   747  	for i := 0; i < b.N; i++ {
   748  		DecodeRune(nihon)
   749  	}
   750  }
   751  
   752  // boolSink is used to reference the return value of benchmarked
   753  // functions to avoid dead code elimination.
   754  var boolSink bool
   755  
   756  func BenchmarkFullRune(b *testing.B) {
   757  	benchmarks := []struct {
   758  		name string
   759  		data []byte
   760  	}{
   761  		{"ASCII", []byte("a")},
   762  		{"Incomplete", []byte("\xf0\x90\x80")},
   763  		{"Japanese", []byte("本")},
   764  	}
   765  	for _, bm := range benchmarks {
   766  		b.Run(bm.name, func(b *testing.B) {
   767  			for i := 0; i < b.N; i++ {
   768  				boolSink = FullRune(bm.data)
   769  			}
   770  		})
   771  	}
   772  }
   773  

View as plain text