Source file
src/testing/benchmark.go
1
2
3
4
5 package testing
6
7 import (
8 "context"
9 "flag"
10 "fmt"
11 "internal/sysinfo"
12 "io"
13 "math"
14 "os"
15 "runtime"
16 "slices"
17 "strconv"
18 "strings"
19 "sync"
20 "sync/atomic"
21 "time"
22 "unicode"
23 )
24
25 func initBenchmarkFlags() {
26 matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
27 benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
28 flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d` or N times if `d` is of the form Nx")
29 }
30
31 var (
32 matchBenchmarks *string
33 benchmarkMemory *bool
34
35 benchTime = durationOrCountFlag{d: 1 * time.Second}
36 )
37
38 type durationOrCountFlag struct {
39 d time.Duration
40 n int
41 allowZero bool
42 }
43
44 func (f *durationOrCountFlag) String() string {
45 if f.n > 0 {
46 return fmt.Sprintf("%dx", f.n)
47 }
48 return f.d.String()
49 }
50
51 func (f *durationOrCountFlag) Set(s string) error {
52 if strings.HasSuffix(s, "x") {
53 n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
54 if err != nil || n < 0 || (!f.allowZero && n == 0) {
55 return fmt.Errorf("invalid count")
56 }
57 *f = durationOrCountFlag{n: int(n)}
58 return nil
59 }
60 d, err := time.ParseDuration(s)
61 if err != nil || d < 0 || (!f.allowZero && d == 0) {
62 return fmt.Errorf("invalid duration")
63 }
64 *f = durationOrCountFlag{d: d}
65 return nil
66 }
67
68
69 var benchmarkLock sync.Mutex
70
71
72 var memStats runtime.MemStats
73
74
75
76 type InternalBenchmark struct {
77 Name string
78 F func(b *B)
79 }
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 type B struct {
95 common
96 importPath string
97 bstate *benchState
98 N int
99 previousN int
100 previousDuration time.Duration
101 benchFunc func(b *B)
102 benchTime durationOrCountFlag
103 bytes int64
104 missingBytes bool
105 timerOn bool
106 showAllocResult bool
107 result BenchmarkResult
108 parallelism int
109
110 startAllocs uint64
111 startBytes uint64
112
113 netAllocs uint64
114 netBytes uint64
115
116 extra map[string]float64
117
118
119 loop struct {
120
121
122
123 n uint64
124
125
126
127
128
129 i uint64
130
131 done bool
132 }
133 }
134
135
136
137
138 func (b *B) StartTimer() {
139 if !b.timerOn {
140 runtime.ReadMemStats(&memStats)
141 b.startAllocs = memStats.Mallocs
142 b.startBytes = memStats.TotalAlloc
143 b.start = highPrecisionTimeNow()
144 b.timerOn = true
145 b.loop.i &^= loopPoisonTimer
146 }
147 }
148
149
150
151 func (b *B) StopTimer() {
152 if b.timerOn {
153 b.duration += highPrecisionTimeSince(b.start)
154 runtime.ReadMemStats(&memStats)
155 b.netAllocs += memStats.Mallocs - b.startAllocs
156 b.netBytes += memStats.TotalAlloc - b.startBytes
157 b.timerOn = false
158
159 b.loop.i |= loopPoisonTimer
160 }
161 }
162
163
164
165
166 func (b *B) ResetTimer() {
167 if b.extra == nil {
168
169
170 b.extra = make(map[string]float64, 16)
171 } else {
172 clear(b.extra)
173 }
174 if b.timerOn {
175 runtime.ReadMemStats(&memStats)
176 b.startAllocs = memStats.Mallocs
177 b.startBytes = memStats.TotalAlloc
178 b.start = highPrecisionTimeNow()
179 }
180 b.duration = 0
181 b.netAllocs = 0
182 b.netBytes = 0
183 }
184
185
186
187 func (b *B) SetBytes(n int64) { b.bytes = n }
188
189
190
191
192 func (b *B) ReportAllocs() {
193 b.showAllocResult = true
194 }
195
196
197 func (b *B) runN(n int) {
198 benchmarkLock.Lock()
199 defer benchmarkLock.Unlock()
200 ctx, cancelCtx := context.WithCancel(context.Background())
201 defer func() {
202 b.runCleanup(normalPanic)
203 b.checkRaces()
204 }()
205
206
207 runtime.GC()
208 b.resetRaces()
209 b.N = n
210 b.loop.n = 0
211 b.loop.i = 0
212 b.loop.done = false
213 b.ctx = ctx
214 b.cancelCtx = cancelCtx
215
216 b.parallelism = 1
217 b.ResetTimer()
218 b.StartTimer()
219 b.benchFunc(b)
220 b.StopTimer()
221 b.previousN = n
222 b.previousDuration = b.duration
223
224 if b.loop.n > 0 && !b.loop.done && !b.failed {
225 b.Error("benchmark function returned without B.Loop() == false (break or return in loop?)")
226 }
227 }
228
229
230
231 func (b *B) run1() bool {
232 if bstate := b.bstate; bstate != nil {
233
234 if n := len(b.name) + bstate.extLen + 1; n > bstate.maxLen {
235 bstate.maxLen = n + 8
236 }
237 }
238 go func() {
239
240
241 defer func() {
242 b.signal <- true
243 }()
244
245 b.runN(1)
246 }()
247 <-b.signal
248 if b.failed {
249 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), b.name, b.output)
250 return false
251 }
252
253
254 b.mu.RLock()
255 finished := b.finished
256 b.mu.RUnlock()
257 if b.hasSub.Load() || finished {
258 tag := "BENCH"
259 if b.skipped {
260 tag = "SKIP"
261 }
262 if b.chatty != nil && (len(b.output) > 0 || finished) {
263 b.trimOutput()
264 fmt.Fprintf(b.w, "%s--- %s: %s\n%s", b.chatty.prefix(), tag, b.name, b.output)
265 }
266 return false
267 }
268 return true
269 }
270
271 var labelsOnce sync.Once
272
273
274
275 func (b *B) run() {
276 labelsOnce.Do(func() {
277 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
278 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
279 if b.importPath != "" {
280 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
281 }
282 if cpu := sysinfo.CPUName(); cpu != "" {
283 fmt.Fprintf(b.w, "cpu: %s\n", cpu)
284 }
285 })
286 if b.bstate != nil {
287
288 b.bstate.processBench(b)
289 } else {
290
291 b.doBench()
292 }
293 }
294
295 func (b *B) doBench() BenchmarkResult {
296 go b.launch()
297 <-b.signal
298 return b.result
299 }
300
301 func predictN(goalns int64, prevIters int64, prevns int64, last int64) int {
302 if prevns == 0 {
303
304 prevns = 1
305 }
306
307
308
309
310
311
312 n := goalns * prevIters / prevns
313
314 n += n / 5
315
316 n = min(n, 100*last)
317
318 n = max(n, last+1)
319
320 n = min(n, 1e9)
321 return int(n)
322 }
323
324
325
326
327
328 func (b *B) launch() {
329
330
331 defer func() {
332 b.signal <- true
333 }()
334
335
336
337 if b.loop.n == 0 {
338
339 if b.benchTime.n > 0 {
340
341
342
343 if b.benchTime.n > 1 {
344 b.runN(b.benchTime.n)
345 }
346 } else {
347 d := b.benchTime.d
348 for n := int64(1); !b.failed && b.duration < d && n < 1e9; {
349 last := n
350
351 goalns := d.Nanoseconds()
352 prevIters := int64(b.N)
353 n = int64(predictN(goalns, prevIters, b.duration.Nanoseconds(), last))
354 b.runN(int(n))
355 }
356 }
357 }
358 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
359 }
360
361
362
363
364 func (b *B) Elapsed() time.Duration {
365 d := b.duration
366 if b.timerOn {
367 d += highPrecisionTimeSince(b.start)
368 }
369 return d
370 }
371
372
373
374
375
376
377
378
379
380
381 func (b *B) ReportMetric(n float64, unit string) {
382 if unit == "" {
383 panic("metric unit must not be empty")
384 }
385 if strings.IndexFunc(unit, unicode.IsSpace) >= 0 {
386 panic("metric unit must not contain whitespace")
387 }
388 b.extra[unit] = n
389 }
390
391 func (b *B) stopOrScaleBLoop() bool {
392 t := b.Elapsed()
393 if t >= b.benchTime.d {
394
395 return false
396 }
397
398 goalns := b.benchTime.d.Nanoseconds()
399 prevIters := int64(b.loop.n)
400 b.loop.n = uint64(predictN(goalns, prevIters, t.Nanoseconds(), prevIters))
401 if b.loop.n&loopPoisonMask != 0 {
402
403
404 panic("loop iteration target overflow")
405 }
406 return true
407 }
408
409 func (b *B) loopSlowPath() bool {
410
411 if !b.timerOn {
412 b.Fatal("B.Loop called with timer stopped")
413 }
414 if b.loop.i&loopPoisonMask != 0 {
415 panic(fmt.Sprintf("unknown loop stop condition: %#x", b.loop.i))
416 }
417
418 if b.loop.n == 0 {
419
420 if b.benchTime.n > 0 {
421
422 b.loop.n = uint64(b.benchTime.n)
423 } else {
424
425 b.loop.n = 1
426 }
427
428 b.N = 0
429 b.ResetTimer()
430
431
432 b.loop.i++
433 return true
434 }
435
436
437 var more bool
438 if b.benchTime.n > 0 {
439
440
441 if b.loop.i != uint64(b.benchTime.n) {
442
443 panic(fmt.Sprintf("iteration count %d < fixed target %d", b.loop.i, b.benchTime.n))
444 }
445 more = false
446 } else {
447
448 more = b.stopOrScaleBLoop()
449 }
450 if !more {
451 b.StopTimer()
452
453 b.N = int(b.loop.n)
454 b.loop.done = true
455 return false
456 }
457
458
459 b.loop.i++
460 return true
461 }
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497 func (b *B) Loop() bool {
498
499
500
501
502
503
504
505
506
507
508
509
510 if b.loop.i < b.loop.n {
511 b.loop.i++
512 return true
513 }
514 return b.loopSlowPath()
515 }
516
517
518
519 const (
520 loopPoisonTimer = uint64(1 << (63 - iota))
521
522
523
524
525
526
527 loopPoisonMask = ^uint64((1 << (63 - (iota - 1))) - 1)
528 )
529
530
531 type BenchmarkResult struct {
532 N int
533 T time.Duration
534 Bytes int64
535 MemAllocs uint64
536 MemBytes uint64
537
538
539 Extra map[string]float64
540 }
541
542
543 func (r BenchmarkResult) NsPerOp() int64 {
544 if v, ok := r.Extra["ns/op"]; ok {
545 return int64(v)
546 }
547 if r.N <= 0 {
548 return 0
549 }
550 return r.T.Nanoseconds() / int64(r.N)
551 }
552
553
554 func (r BenchmarkResult) mbPerSec() float64 {
555 if v, ok := r.Extra["MB/s"]; ok {
556 return v
557 }
558 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
559 return 0
560 }
561 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
562 }
563
564
565
566 func (r BenchmarkResult) AllocsPerOp() int64 {
567 if v, ok := r.Extra["allocs/op"]; ok {
568 return int64(v)
569 }
570 if r.N <= 0 {
571 return 0
572 }
573 return int64(r.MemAllocs) / int64(r.N)
574 }
575
576
577
578 func (r BenchmarkResult) AllocedBytesPerOp() int64 {
579 if v, ok := r.Extra["B/op"]; ok {
580 return int64(v)
581 }
582 if r.N <= 0 {
583 return 0
584 }
585 return int64(r.MemBytes) / int64(r.N)
586 }
587
588
589
590
591
592
593
594
595 func (r BenchmarkResult) String() string {
596 buf := new(strings.Builder)
597 fmt.Fprintf(buf, "%8d", r.N)
598
599
600 ns, ok := r.Extra["ns/op"]
601 if !ok {
602 ns = float64(r.T.Nanoseconds()) / float64(r.N)
603 }
604 if ns != 0 {
605 buf.WriteByte('\t')
606 prettyPrint(buf, ns, "ns/op")
607 }
608
609 if mbs := r.mbPerSec(); mbs != 0 {
610 fmt.Fprintf(buf, "\t%7.2f MB/s", mbs)
611 }
612
613
614
615 var extraKeys []string
616 for k := range r.Extra {
617 switch k {
618 case "ns/op", "MB/s", "B/op", "allocs/op":
619
620 continue
621 }
622 extraKeys = append(extraKeys, k)
623 }
624 slices.Sort(extraKeys)
625 for _, k := range extraKeys {
626 buf.WriteByte('\t')
627 prettyPrint(buf, r.Extra[k], k)
628 }
629 return buf.String()
630 }
631
632 func prettyPrint(w io.Writer, x float64, unit string) {
633
634
635
636
637 var format string
638 switch y := math.Abs(x); {
639 case y == 0 || y >= 999.95:
640 format = "%10.0f %s"
641 case y >= 99.995:
642 format = "%12.1f %s"
643 case y >= 9.9995:
644 format = "%13.2f %s"
645 case y >= 0.99995:
646 format = "%14.3f %s"
647 case y >= 0.099995:
648 format = "%15.4f %s"
649 case y >= 0.0099995:
650 format = "%16.5f %s"
651 case y >= 0.00099995:
652 format = "%17.6f %s"
653 default:
654 format = "%18.7f %s"
655 }
656 fmt.Fprintf(w, format, x, unit)
657 }
658
659
660 func (r BenchmarkResult) MemString() string {
661 return fmt.Sprintf("%8d B/op\t%8d allocs/op",
662 r.AllocedBytesPerOp(), r.AllocsPerOp())
663 }
664
665
666 func benchmarkName(name string, n int) string {
667 if n != 1 {
668 return fmt.Sprintf("%s-%d", name, n)
669 }
670 return name
671 }
672
673 type benchState struct {
674 match *matcher
675
676 maxLen int
677 extLen int
678 }
679
680
681
682 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
683 runBenchmarks("", matchString, benchmarks)
684 }
685
686 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
687
688 if len(*matchBenchmarks) == 0 {
689 return true
690 }
691
692 maxprocs := 1
693 for _, procs := range cpuList {
694 if procs > maxprocs {
695 maxprocs = procs
696 }
697 }
698 bstate := &benchState{
699 match: newMatcher(matchString, *matchBenchmarks, "-test.bench", *skip),
700 extLen: len(benchmarkName("", maxprocs)),
701 }
702 var bs []InternalBenchmark
703 for _, Benchmark := range benchmarks {
704 if _, matched, _ := bstate.match.fullName(nil, Benchmark.Name); matched {
705 bs = append(bs, Benchmark)
706 benchName := benchmarkName(Benchmark.Name, maxprocs)
707 if l := len(benchName) + bstate.extLen + 1; l > bstate.maxLen {
708 bstate.maxLen = l
709 }
710 }
711 }
712 main := &B{
713 common: common{
714 name: "Main",
715 w: os.Stdout,
716 bench: true,
717 },
718 importPath: importPath,
719 benchFunc: func(b *B) {
720 for _, Benchmark := range bs {
721 b.Run(Benchmark.Name, Benchmark.F)
722 }
723 },
724 benchTime: benchTime,
725 bstate: bstate,
726 }
727 if Verbose() {
728 main.chatty = newChattyPrinter(main.w)
729 }
730 main.runN(1)
731 return !main.failed
732 }
733
734
735 func (s *benchState) processBench(b *B) {
736 for i, procs := range cpuList {
737 for j := uint(0); j < *count; j++ {
738 runtime.GOMAXPROCS(procs)
739 benchName := benchmarkName(b.name, procs)
740
741
742 if b.chatty == nil {
743 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
744 }
745
746 if i > 0 || j > 0 {
747 b = &B{
748 common: common{
749 signal: make(chan bool),
750 name: b.name,
751 w: b.w,
752 chatty: b.chatty,
753 bench: true,
754 },
755 benchFunc: b.benchFunc,
756 benchTime: b.benchTime,
757 }
758 b.run1()
759 }
760 r := b.doBench()
761 if b.failed {
762
763
764
765 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), benchName, b.output)
766 continue
767 }
768 results := r.String()
769 if b.chatty != nil {
770 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
771 }
772 if *benchmarkMemory || b.showAllocResult {
773 results += "\t" + r.MemString()
774 }
775 fmt.Fprintln(b.w, results)
776
777
778 if len(b.output) > 0 {
779 b.trimOutput()
780 fmt.Fprintf(b.w, "%s--- BENCH: %s\n%s", b.chatty.prefix(), benchName, b.output)
781 }
782 if p := runtime.GOMAXPROCS(-1); p != procs {
783 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
784 }
785 if b.chatty != nil && b.chatty.json {
786 b.chatty.Updatef("", "=== NAME %s\n", "")
787 }
788 }
789 }
790 }
791
792
793
794
795 var hideStdoutForTesting = false
796
797
798
799
800
801
802 func (b *B) Run(name string, f func(b *B)) bool {
803
804
805 b.hasSub.Store(true)
806 benchmarkLock.Unlock()
807 defer benchmarkLock.Lock()
808
809 benchName, ok, partial := b.name, true, false
810 if b.bstate != nil {
811 benchName, ok, partial = b.bstate.match.fullName(&b.common, name)
812 }
813 if !ok {
814 return true
815 }
816 var pc [maxStackLen]uintptr
817 n := runtime.Callers(2, pc[:])
818 sub := &B{
819 common: common{
820 signal: make(chan bool),
821 name: benchName,
822 parent: &b.common,
823 level: b.level + 1,
824 creator: pc[:n],
825 w: b.w,
826 chatty: b.chatty,
827 bench: true,
828 },
829 importPath: b.importPath,
830 benchFunc: f,
831 benchTime: b.benchTime,
832 bstate: b.bstate,
833 }
834 if partial {
835
836
837 sub.hasSub.Store(true)
838 }
839
840 if b.chatty != nil {
841 labelsOnce.Do(func() {
842 fmt.Printf("goos: %s\n", runtime.GOOS)
843 fmt.Printf("goarch: %s\n", runtime.GOARCH)
844 if b.importPath != "" {
845 fmt.Printf("pkg: %s\n", b.importPath)
846 }
847 if cpu := sysinfo.CPUName(); cpu != "" {
848 fmt.Printf("cpu: %s\n", cpu)
849 }
850 })
851
852 if !hideStdoutForTesting {
853 if b.chatty.json {
854 b.chatty.Updatef(benchName, "=== RUN %s\n", benchName)
855 }
856 fmt.Println(benchName)
857 }
858 }
859
860 if sub.run1() {
861 sub.run()
862 }
863 b.add(sub.result)
864 return !sub.failed
865 }
866
867
868
869
870 func (b *B) add(other BenchmarkResult) {
871 r := &b.result
872
873
874 r.N = 1
875 r.T += time.Duration(other.NsPerOp())
876 if other.Bytes == 0 {
877
878
879 b.missingBytes = true
880 r.Bytes = 0
881 }
882 if !b.missingBytes {
883 r.Bytes += other.Bytes
884 }
885 r.MemAllocs += uint64(other.AllocsPerOp())
886 r.MemBytes += uint64(other.AllocedBytesPerOp())
887 }
888
889
890 func (b *B) trimOutput() {
891
892
893
894 const maxNewlines = 10
895 for nlCount, j := 0, 0; j < len(b.output); j++ {
896 if b.output[j] == '\n' {
897 nlCount++
898 if nlCount >= maxNewlines {
899 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
900 break
901 }
902 }
903 }
904 }
905
906
907 type PB struct {
908 globalN *atomic.Uint64
909 grain uint64
910 cache uint64
911 bN uint64
912 }
913
914
915 func (pb *PB) Next() bool {
916 if pb.cache == 0 {
917 n := pb.globalN.Add(pb.grain)
918 if n <= pb.bN {
919 pb.cache = pb.grain
920 } else if n < pb.bN+pb.grain {
921 pb.cache = pb.bN + pb.grain - n
922 } else {
923 return false
924 }
925 }
926 pb.cache--
927 return true
928 }
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943 func (b *B) RunParallel(body func(*PB)) {
944 if b.N == 0 {
945 return
946 }
947
948
949
950 grain := uint64(0)
951 if b.previousN > 0 && b.previousDuration > 0 {
952 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
953 }
954 if grain < 1 {
955 grain = 1
956 }
957
958
959 if grain > 1e4 {
960 grain = 1e4
961 }
962
963 var n atomic.Uint64
964 numProcs := b.parallelism * runtime.GOMAXPROCS(0)
965 var wg sync.WaitGroup
966 wg.Add(numProcs)
967 for p := 0; p < numProcs; p++ {
968 go func() {
969 defer wg.Done()
970 pb := &PB{
971 globalN: &n,
972 grain: grain,
973 bN: uint64(b.N),
974 }
975 body(pb)
976 }()
977 }
978 wg.Wait()
979 if n.Load() <= uint64(b.N) && !b.Failed() {
980 b.Fatal("RunParallel: body exited without pb.Next() == false")
981 }
982 }
983
984
985
986
987 func (b *B) SetParallelism(p int) {
988 if p >= 1 {
989 b.parallelism = p
990 }
991 }
992
993
994
995
996
997
998
999
1000
1001 func Benchmark(f func(b *B)) BenchmarkResult {
1002 b := &B{
1003 common: common{
1004 signal: make(chan bool),
1005 w: discard{},
1006 },
1007 benchFunc: f,
1008 benchTime: benchTime,
1009 }
1010 if b.run1() {
1011 b.run()
1012 }
1013 return b.result
1014 }
1015
1016 type discard struct{}
1017
1018 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
1019
View as plain text