Source file
src/testing/benchmark.go
1
2
3
4
5 package testing
6
7 import (
8 "context"
9 "flag"
10 "fmt"
11 "internal/sysinfo"
12 "io"
13 "math"
14 "os"
15 "runtime"
16 "slices"
17 "strconv"
18 "strings"
19 "sync"
20 "sync/atomic"
21 "time"
22 "unicode"
23 )
24
25 func initBenchmarkFlags() {
26 matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
27 benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
28 flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d` or N times if `d` is of the form Nx")
29 }
30
31 var (
32 matchBenchmarks *string
33 benchmarkMemory *bool
34
35 benchTime = durationOrCountFlag{d: 1 * time.Second}
36 )
37
38 type durationOrCountFlag struct {
39 d time.Duration
40 n int
41 allowZero bool
42 }
43
44 func (f *durationOrCountFlag) String() string {
45 if f.n > 0 {
46 return fmt.Sprintf("%dx", f.n)
47 }
48 return f.d.String()
49 }
50
51 func (f *durationOrCountFlag) Set(s string) error {
52 if strings.HasSuffix(s, "x") {
53 n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
54 if err != nil || n < 0 || (!f.allowZero && n == 0) {
55 return fmt.Errorf("invalid count")
56 }
57 *f = durationOrCountFlag{n: int(n)}
58 return nil
59 }
60 d, err := time.ParseDuration(s)
61 if err != nil || d < 0 || (!f.allowZero && d == 0) {
62 return fmt.Errorf("invalid duration")
63 }
64 *f = durationOrCountFlag{d: d}
65 return nil
66 }
67
68
69 var benchmarkLock sync.Mutex
70
71
72 var memStats runtime.MemStats
73
74
75
76 type InternalBenchmark struct {
77 Name string
78 F func(b *B)
79 }
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 type B struct {
95 common
96 importPath string
97 bstate *benchState
98 N int
99 previousN int
100 previousDuration time.Duration
101 benchFunc func(b *B)
102 benchTime durationOrCountFlag
103 bytes int64
104 missingBytes bool
105 timerOn bool
106 showAllocResult bool
107 result BenchmarkResult
108 parallelism int
109
110 startAllocs uint64
111 startBytes uint64
112
113 netAllocs uint64
114 netBytes uint64
115
116 extra map[string]float64
117
118
119 loop struct {
120
121
122
123 n uint64
124
125
126
127
128
129 i uint64
130
131 done bool
132 }
133 }
134
135
136
137
138 func (b *B) StartTimer() {
139 if !b.timerOn {
140 runtime.ReadMemStats(&memStats)
141 b.startAllocs = memStats.Mallocs
142 b.startBytes = memStats.TotalAlloc
143 b.start = highPrecisionTimeNow()
144 b.timerOn = true
145 b.loop.i &^= loopPoisonTimer
146 }
147 }
148
149
150
151 func (b *B) StopTimer() {
152 if b.timerOn {
153 b.duration += highPrecisionTimeSince(b.start)
154 runtime.ReadMemStats(&memStats)
155 b.netAllocs += memStats.Mallocs - b.startAllocs
156 b.netBytes += memStats.TotalAlloc - b.startBytes
157 b.timerOn = false
158
159 b.loop.i |= loopPoisonTimer
160 }
161 }
162
163
164
165
166 func (b *B) ResetTimer() {
167 if b.extra == nil {
168
169
170 b.extra = make(map[string]float64, 16)
171 } else {
172 clear(b.extra)
173 }
174 if b.timerOn {
175 runtime.ReadMemStats(&memStats)
176 b.startAllocs = memStats.Mallocs
177 b.startBytes = memStats.TotalAlloc
178 b.start = highPrecisionTimeNow()
179 }
180 b.duration = 0
181 b.netAllocs = 0
182 b.netBytes = 0
183 }
184
185
186
187 func (b *B) SetBytes(n int64) { b.bytes = n }
188
189
190
191
192 func (b *B) ReportAllocs() {
193 b.showAllocResult = true
194 }
195
196
197 func (b *B) runN(n int) {
198 benchmarkLock.Lock()
199 defer benchmarkLock.Unlock()
200 ctx, cancelCtx := context.WithCancel(context.Background())
201 defer func() {
202 b.runCleanup(normalPanic)
203 b.checkRaces()
204 }()
205
206
207 runtime.GC()
208 b.resetRaces()
209 b.N = n
210 b.loop.n = 0
211 b.loop.i = 0
212 b.loop.done = false
213 b.ctx = ctx
214 b.cancelCtx = cancelCtx
215
216 b.parallelism = 1
217 b.ResetTimer()
218 b.StartTimer()
219 b.benchFunc(b)
220 b.StopTimer()
221 b.previousN = n
222 b.previousDuration = b.duration
223
224 if b.loop.n > 0 && !b.loop.done && !b.failed {
225 b.Error("benchmark function returned without B.Loop() == false (break or return in loop?)")
226 }
227 }
228
229
230
231 func (b *B) run1() bool {
232 if bstate := b.bstate; bstate != nil {
233
234 if n := len(b.name) + bstate.extLen + 1; n > bstate.maxLen {
235 bstate.maxLen = n + 8
236 }
237 }
238 go func() {
239
240
241 defer func() {
242 b.signal <- true
243 }()
244
245 b.runN(1)
246 }()
247 <-b.signal
248 if b.failed {
249 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), b.name, b.output)
250 return false
251 }
252
253
254 b.mu.RLock()
255 finished := b.finished
256 b.mu.RUnlock()
257 if b.hasSub.Load() || finished {
258 tag := "BENCH"
259 if b.skipped {
260 tag = "SKIP"
261 }
262 if b.chatty != nil && (len(b.output) > 0 || finished) {
263 b.trimOutput()
264 fmt.Fprintf(b.w, "%s--- %s: %s\n%s", b.chatty.prefix(), tag, b.name, b.output)
265 }
266 return false
267 }
268 return true
269 }
270
271 var labelsOnce sync.Once
272
273
274
275 func (b *B) run() {
276 labelsOnce.Do(func() {
277 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
278 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
279 if b.importPath != "" {
280 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
281 }
282 if cpu := sysinfo.CPUName(); cpu != "" {
283 fmt.Fprintf(b.w, "cpu: %s\n", cpu)
284 }
285 })
286 if b.bstate != nil {
287
288 b.bstate.processBench(b)
289 } else {
290
291 b.doBench()
292 }
293 }
294
295 func (b *B) doBench() BenchmarkResult {
296 go b.launch()
297 <-b.signal
298 return b.result
299 }
300
301 func predictN(goalns int64, prevIters int64, prevns int64, last int64) int {
302 if prevns == 0 {
303
304 prevns = 1
305 }
306
307
308
309
310
311
312 n := goalns * prevIters / prevns
313
314 n += n / 5
315
316 n = min(n, 100*last)
317
318 n = max(n, last+1)
319
320 n = min(n, 1e9)
321 return int(n)
322 }
323
324
325
326
327
328 func (b *B) launch() {
329
330
331 defer func() {
332 b.signal <- true
333 }()
334
335
336
337 if b.loop.n == 0 {
338
339 if b.benchTime.n > 0 {
340
341
342
343 if b.benchTime.n > 1 {
344 b.runN(b.benchTime.n)
345 }
346 } else {
347 d := b.benchTime.d
348 for n := int64(1); !b.failed && b.duration < d && n < 1e9; {
349 last := n
350
351 goalns := d.Nanoseconds()
352 prevIters := int64(b.N)
353 n = int64(predictN(goalns, prevIters, b.duration.Nanoseconds(), last))
354 b.runN(int(n))
355 }
356 }
357 }
358 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
359 }
360
361
362
363
364 func (b *B) Elapsed() time.Duration {
365 d := b.duration
366 if b.timerOn {
367 d += highPrecisionTimeSince(b.start)
368 }
369 return d
370 }
371
372
373
374
375
376
377
378
379
380
381 func (b *B) ReportMetric(n float64, unit string) {
382 if unit == "" {
383 panic("metric unit must not be empty")
384 }
385 if strings.IndexFunc(unit, unicode.IsSpace) >= 0 {
386 panic("metric unit must not contain whitespace")
387 }
388 b.extra[unit] = n
389 }
390
391 func (b *B) stopOrScaleBLoop() bool {
392 t := b.Elapsed()
393 if t >= b.benchTime.d {
394
395 return false
396 }
397
398 goalns := b.benchTime.d.Nanoseconds()
399 prevIters := int64(b.loop.n)
400 b.loop.n = uint64(predictN(goalns, prevIters, t.Nanoseconds(), prevIters))
401 if b.loop.n&loopPoisonMask != 0 {
402
403
404 panic("loop iteration target overflow")
405 }
406 return true
407 }
408
409 func (b *B) loopSlowPath() bool {
410
411 if !b.timerOn {
412 b.Fatal("B.Loop called with timer stopped")
413 }
414 if b.loop.i&loopPoisonMask != 0 {
415 panic(fmt.Sprintf("unknown loop stop condition: %#x", b.loop.i))
416 }
417
418 if b.loop.n == 0 {
419
420 if b.benchTime.n > 0 {
421
422 b.loop.n = uint64(b.benchTime.n)
423 } else {
424
425 b.loop.n = 1
426 }
427
428 b.N = 0
429 b.ResetTimer()
430
431
432 b.loop.i++
433 return true
434 }
435
436
437 var more bool
438 if b.benchTime.n > 0 {
439
440
441 if b.loop.i != uint64(b.benchTime.n) {
442
443 panic(fmt.Sprintf("iteration count %d < fixed target %d", b.loop.i, b.benchTime.n))
444 }
445 more = false
446 } else {
447
448 more = b.stopOrScaleBLoop()
449 }
450 if !more {
451 b.StopTimer()
452
453 b.N = int(b.loop.n)
454 b.loop.done = true
455 return false
456 }
457
458
459 b.loop.i++
460 return true
461 }
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497 func (b *B) Loop() bool {
498
499
500
501
502
503
504
505
506
507
508
509
510 if b.loop.i < b.loop.n {
511 b.loop.i++
512 return true
513 }
514 return b.loopSlowPath()
515 }
516
517
518
519 const (
520 loopPoisonTimer = uint64(1 << (63 - iota))
521
522
523
524
525
526
527 loopPoisonMask = ^uint64((1 << (63 - (iota - 1))) - 1)
528 )
529
530
531 type BenchmarkResult struct {
532 N int
533 T time.Duration
534 Bytes int64
535 MemAllocs uint64
536 MemBytes uint64
537
538
539 Extra map[string]float64
540 }
541
542
543 func (r BenchmarkResult) NsPerOp() int64 {
544 if v, ok := r.Extra["ns/op"]; ok {
545 return int64(v)
546 }
547 if r.N <= 0 {
548 return 0
549 }
550 return r.T.Nanoseconds() / int64(r.N)
551 }
552
553
554 func (r BenchmarkResult) mbPerSec() float64 {
555 if v, ok := r.Extra["MB/s"]; ok {
556 return v
557 }
558 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
559 return 0
560 }
561 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
562 }
563
564
565
566 func (r BenchmarkResult) AllocsPerOp() int64 {
567 if v, ok := r.Extra["allocs/op"]; ok {
568 return int64(v)
569 }
570 if r.N <= 0 {
571 return 0
572 }
573 return int64(r.MemAllocs) / int64(r.N)
574 }
575
576
577
578 func (r BenchmarkResult) AllocedBytesPerOp() int64 {
579 if v, ok := r.Extra["B/op"]; ok {
580 return int64(v)
581 }
582 if r.N <= 0 {
583 return 0
584 }
585 return int64(r.MemBytes) / int64(r.N)
586 }
587
588
589
590
591
592
593
594
595 func (r BenchmarkResult) String() string {
596 buf := new(strings.Builder)
597 fmt.Fprintf(buf, "%8d", r.N)
598
599
600 ns, ok := r.Extra["ns/op"]
601 if !ok {
602 ns = float64(r.T.Nanoseconds()) / float64(r.N)
603 }
604 if ns != 0 {
605 buf.WriteByte('\t')
606 prettyPrint(buf, ns, "ns/op")
607 }
608
609 if mbs := r.mbPerSec(); mbs != 0 {
610 fmt.Fprintf(buf, "\t%7.2f MB/s", mbs)
611 }
612
613
614
615 var extraKeys []string
616 for k := range r.Extra {
617 switch k {
618 case "ns/op", "MB/s", "B/op", "allocs/op":
619
620 continue
621 }
622 extraKeys = append(extraKeys, k)
623 }
624 slices.Sort(extraKeys)
625 for _, k := range extraKeys {
626 buf.WriteByte('\t')
627 prettyPrint(buf, r.Extra[k], k)
628 }
629 return buf.String()
630 }
631
632 func prettyPrint(w io.Writer, x float64, unit string) {
633
634
635
636
637 var format string
638 switch y := math.Abs(x); {
639 case y == 0 || y >= 999.95:
640 format = "%10.0f %s"
641 case y >= 99.995:
642 format = "%12.1f %s"
643 case y >= 9.9995:
644 format = "%13.2f %s"
645 case y >= 0.99995:
646 format = "%14.3f %s"
647 case y >= 0.099995:
648 format = "%15.4f %s"
649 case y >= 0.0099995:
650 format = "%16.5f %s"
651 case y >= 0.00099995:
652 format = "%17.6f %s"
653 default:
654 format = "%18.7f %s"
655 }
656 fmt.Fprintf(w, format, x, unit)
657 }
658
659
660 func (r BenchmarkResult) MemString() string {
661 return fmt.Sprintf("%8d B/op\t%8d allocs/op",
662 r.AllocedBytesPerOp(), r.AllocsPerOp())
663 }
664
665
666 func benchmarkName(name string, n int) string {
667 if n != 1 {
668 return fmt.Sprintf("%s-%d", name, n)
669 }
670 return name
671 }
672
673 type benchState struct {
674 match *matcher
675
676 maxLen int
677 extLen int
678 }
679
680
681
682 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
683 runBenchmarks("", matchString, benchmarks)
684 }
685
686 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
687
688 if len(*matchBenchmarks) == 0 {
689 return true
690 }
691
692 maxprocs := 1
693 for _, procs := range cpuList {
694 if procs > maxprocs {
695 maxprocs = procs
696 }
697 }
698 bstate := &benchState{
699 match: newMatcher(matchString, *matchBenchmarks, "-test.bench", *skip),
700 extLen: len(benchmarkName("", maxprocs)),
701 }
702 var bs []InternalBenchmark
703 for _, Benchmark := range benchmarks {
704 if _, matched, _ := bstate.match.fullName(nil, Benchmark.Name); matched {
705 bs = append(bs, Benchmark)
706 benchName := benchmarkName(Benchmark.Name, maxprocs)
707 if l := len(benchName) + bstate.extLen + 1; l > bstate.maxLen {
708 bstate.maxLen = l
709 }
710 }
711 }
712 main := &B{
713 common: common{
714 name: "Main",
715 w: os.Stdout,
716 bench: true,
717 },
718 importPath: importPath,
719 benchFunc: func(b *B) {
720 for _, Benchmark := range bs {
721 b.Run(Benchmark.Name, Benchmark.F)
722 }
723 },
724 benchTime: benchTime,
725 bstate: bstate,
726 }
727 if Verbose() {
728 main.chatty = newChattyPrinter(main.w)
729 }
730 main.runN(1)
731 return !main.failed
732 }
733
734
735 func (s *benchState) processBench(b *B) {
736 for i, procs := range cpuList {
737 for j := uint(0); j < *count; j++ {
738 runtime.GOMAXPROCS(procs)
739 benchName := benchmarkName(b.name, procs)
740
741
742 if b.chatty == nil {
743 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
744 }
745
746 if i > 0 || j > 0 {
747 b = &B{
748 common: common{
749 signal: make(chan bool),
750 name: b.name,
751 w: b.w,
752 chatty: b.chatty,
753 bench: true,
754 },
755 benchFunc: b.benchFunc,
756 benchTime: b.benchTime,
757 }
758 b.setOutputWriter()
759 b.run1()
760 }
761 r := b.doBench()
762 if b.failed {
763
764
765
766 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), benchName, b.output)
767 continue
768 }
769 results := r.String()
770 if b.chatty != nil {
771 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
772 }
773 if *benchmarkMemory || b.showAllocResult {
774 results += "\t" + r.MemString()
775 }
776 fmt.Fprintln(b.w, results)
777
778
779 if len(b.output) > 0 {
780 b.trimOutput()
781 fmt.Fprintf(b.w, "%s--- BENCH: %s\n%s", b.chatty.prefix(), benchName, b.output)
782 }
783 if p := runtime.GOMAXPROCS(-1); p != procs {
784 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
785 }
786 if b.chatty != nil && b.chatty.json {
787 b.chatty.Updatef("", "=== NAME %s\n", "")
788 }
789 }
790 }
791 }
792
793
794
795
796 var hideStdoutForTesting = false
797
798
799
800
801
802
803 func (b *B) Run(name string, f func(b *B)) bool {
804
805
806 b.hasSub.Store(true)
807 benchmarkLock.Unlock()
808 defer benchmarkLock.Lock()
809
810 benchName, ok, partial := b.name, true, false
811 if b.bstate != nil {
812 benchName, ok, partial = b.bstate.match.fullName(&b.common, name)
813 }
814 if !ok {
815 return true
816 }
817 var pc [maxStackLen]uintptr
818 n := runtime.Callers(2, pc[:])
819 sub := &B{
820 common: common{
821 signal: make(chan bool),
822 name: benchName,
823 parent: &b.common,
824 level: b.level + 1,
825 creator: pc[:n],
826 w: b.w,
827 chatty: b.chatty,
828 bench: true,
829 },
830 importPath: b.importPath,
831 benchFunc: f,
832 benchTime: b.benchTime,
833 bstate: b.bstate,
834 }
835 sub.setOutputWriter()
836 if partial {
837
838
839 sub.hasSub.Store(true)
840 }
841
842 if b.chatty != nil {
843 labelsOnce.Do(func() {
844 fmt.Printf("goos: %s\n", runtime.GOOS)
845 fmt.Printf("goarch: %s\n", runtime.GOARCH)
846 if b.importPath != "" {
847 fmt.Printf("pkg: %s\n", b.importPath)
848 }
849 if cpu := sysinfo.CPUName(); cpu != "" {
850 fmt.Printf("cpu: %s\n", cpu)
851 }
852 })
853
854 if !hideStdoutForTesting {
855 if b.chatty.json {
856 b.chatty.Updatef(benchName, "=== RUN %s\n", benchName)
857 }
858 fmt.Println(benchName)
859 }
860 }
861
862 if sub.run1() {
863 sub.run()
864 }
865 b.add(sub.result)
866 return !sub.failed
867 }
868
869
870
871
872 func (b *B) add(other BenchmarkResult) {
873 r := &b.result
874
875
876 r.N = 1
877 r.T += time.Duration(other.NsPerOp())
878 if other.Bytes == 0 {
879
880
881 b.missingBytes = true
882 r.Bytes = 0
883 }
884 if !b.missingBytes {
885 r.Bytes += other.Bytes
886 }
887 r.MemAllocs += uint64(other.AllocsPerOp())
888 r.MemBytes += uint64(other.AllocedBytesPerOp())
889 }
890
891
892 func (b *B) trimOutput() {
893
894
895
896 const maxNewlines = 10
897 for nlCount, j := 0, 0; j < len(b.output); j++ {
898 if b.output[j] == '\n' {
899 nlCount++
900 if nlCount >= maxNewlines {
901 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
902 break
903 }
904 }
905 }
906 }
907
908
909 type PB struct {
910 globalN *atomic.Uint64
911 grain uint64
912 cache uint64
913 bN uint64
914 }
915
916
917 func (pb *PB) Next() bool {
918 if pb.cache == 0 {
919 n := pb.globalN.Add(pb.grain)
920 if n <= pb.bN {
921 pb.cache = pb.grain
922 } else if n < pb.bN+pb.grain {
923 pb.cache = pb.bN + pb.grain - n
924 } else {
925 return false
926 }
927 }
928 pb.cache--
929 return true
930 }
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945 func (b *B) RunParallel(body func(*PB)) {
946 if b.N == 0 {
947 return
948 }
949
950
951
952 grain := uint64(0)
953 if b.previousN > 0 && b.previousDuration > 0 {
954 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
955 }
956 if grain < 1 {
957 grain = 1
958 }
959
960
961 if grain > 1e4 {
962 grain = 1e4
963 }
964
965 var n atomic.Uint64
966 numProcs := b.parallelism * runtime.GOMAXPROCS(0)
967 var wg sync.WaitGroup
968 wg.Add(numProcs)
969 for p := 0; p < numProcs; p++ {
970 go func() {
971 defer wg.Done()
972 pb := &PB{
973 globalN: &n,
974 grain: grain,
975 bN: uint64(b.N),
976 }
977 body(pb)
978 }()
979 }
980 wg.Wait()
981 if n.Load() <= uint64(b.N) && !b.Failed() {
982 b.Fatal("RunParallel: body exited without pb.Next() == false")
983 }
984 }
985
986
987
988
989 func (b *B) SetParallelism(p int) {
990 if p >= 1 {
991 b.parallelism = p
992 }
993 }
994
995
996
997
998
999
1000
1001
1002
1003 func Benchmark(f func(b *B)) BenchmarkResult {
1004 b := &B{
1005 common: common{
1006 signal: make(chan bool),
1007 w: discard{},
1008 },
1009 benchFunc: f,
1010 benchTime: benchTime,
1011 }
1012 b.setOutputWriter()
1013 if b.run1() {
1014 b.run()
1015 }
1016 return b.result
1017 }
1018
1019 type discard struct{}
1020
1021 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
1022
View as plain text