Source file
src/testing/benchmark.go
1
2
3
4
5 package testing
6
7 import (
8 "context"
9 "flag"
10 "fmt"
11 "internal/sysinfo"
12 "io"
13 "math"
14 "os"
15 "runtime"
16 "slices"
17 "strconv"
18 "strings"
19 "sync"
20 "sync/atomic"
21 "time"
22 "unicode"
23 )
24
25 func initBenchmarkFlags() {
26 matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
27 benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
28 flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d` or N times if `d` is of the form Nx")
29 }
30
31 var (
32 matchBenchmarks *string
33 benchmarkMemory *bool
34
35 benchTime = durationOrCountFlag{d: 1 * time.Second}
36 )
37
38 type durationOrCountFlag struct {
39 d time.Duration
40 n int
41 allowZero bool
42 }
43
44 func (f *durationOrCountFlag) String() string {
45 if f.n > 0 {
46 return fmt.Sprintf("%dx", f.n)
47 }
48 return f.d.String()
49 }
50
51 func (f *durationOrCountFlag) Set(s string) error {
52 if strings.HasSuffix(s, "x") {
53 n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
54 if err != nil || n < 0 || (!f.allowZero && n == 0) {
55 return fmt.Errorf("invalid count")
56 }
57 *f = durationOrCountFlag{n: int(n)}
58 return nil
59 }
60 d, err := time.ParseDuration(s)
61 if err != nil || d < 0 || (!f.allowZero && d == 0) {
62 return fmt.Errorf("invalid duration")
63 }
64 *f = durationOrCountFlag{d: d}
65 return nil
66 }
67
68
69 var benchmarkLock sync.Mutex
70
71
72 var memStats runtime.MemStats
73
74
75
76 type InternalBenchmark struct {
77 Name string
78 F func(b *B)
79 }
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 type B struct {
95 common
96 importPath string
97 bstate *benchState
98 N int
99 previousN int
100 previousDuration time.Duration
101 benchFunc func(b *B)
102 benchTime durationOrCountFlag
103 bytes int64
104 missingBytes bool
105 timerOn bool
106 showAllocResult bool
107 result BenchmarkResult
108 parallelism int
109
110 startAllocs uint64
111 startBytes uint64
112
113 netAllocs uint64
114 netBytes uint64
115
116 extra map[string]float64
117
118
119 loop struct {
120
121
122
123 n uint64
124
125
126
127
128
129 i uint64
130
131 done bool
132 }
133 }
134
135
136
137
138 func (b *B) StartTimer() {
139 if !b.timerOn {
140 runtime.ReadMemStats(&memStats)
141 b.startAllocs = memStats.Mallocs
142 b.startBytes = memStats.TotalAlloc
143 b.start = highPrecisionTimeNow()
144 b.timerOn = true
145 b.loop.i &^= loopPoisonTimer
146 }
147 }
148
149
150
151 func (b *B) StopTimer() {
152 if b.timerOn {
153 b.duration += highPrecisionTimeSince(b.start)
154 runtime.ReadMemStats(&memStats)
155 b.netAllocs += memStats.Mallocs - b.startAllocs
156 b.netBytes += memStats.TotalAlloc - b.startBytes
157 b.timerOn = false
158
159 b.loop.i |= loopPoisonTimer
160 }
161 }
162
163
164
165
166 func (b *B) ResetTimer() {
167 if b.extra == nil {
168
169
170 b.extra = make(map[string]float64, 16)
171 } else {
172 clear(b.extra)
173 }
174 if b.timerOn {
175 runtime.ReadMemStats(&memStats)
176 b.startAllocs = memStats.Mallocs
177 b.startBytes = memStats.TotalAlloc
178 b.start = highPrecisionTimeNow()
179 }
180 b.duration = 0
181 b.netAllocs = 0
182 b.netBytes = 0
183 }
184
185
186
187 func (b *B) SetBytes(n int64) { b.bytes = n }
188
189
190
191
192 func (b *B) ReportAllocs() {
193 b.showAllocResult = true
194 }
195
196
197 func (b *B) runN(n int) {
198 benchmarkLock.Lock()
199 defer benchmarkLock.Unlock()
200 ctx, cancelCtx := context.WithCancel(context.Background())
201 defer func() {
202 b.runCleanup(normalPanic)
203 b.checkRaces()
204 }()
205
206
207 runtime.GC()
208 b.resetRaces()
209 b.N = n
210 b.loop.n = 0
211 b.loop.i = 0
212 b.loop.done = false
213 b.ctx = ctx
214 b.cancelCtx = cancelCtx
215
216 b.parallelism = 1
217 b.ResetTimer()
218 b.StartTimer()
219 b.benchFunc(b)
220 b.StopTimer()
221 b.previousN = n
222 b.previousDuration = b.duration
223
224 if b.loop.n > 0 && !b.loop.done && !b.failed {
225 b.Error("benchmark function returned without B.Loop() == false (break or return in loop?)")
226 }
227 }
228
229
230
231 func (b *B) run1() bool {
232 if bstate := b.bstate; bstate != nil {
233
234 if n := len(b.name) + bstate.extLen + 1; n > bstate.maxLen {
235 bstate.maxLen = n + 8
236 }
237 }
238 go func() {
239
240
241 defer func() {
242 b.signal <- true
243 }()
244
245 b.runN(1)
246 }()
247 <-b.signal
248 if b.failed {
249 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), b.name, b.output)
250 return false
251 }
252
253
254 b.mu.RLock()
255 finished := b.finished
256 b.mu.RUnlock()
257 if b.hasSub.Load() || finished {
258 tag := "BENCH"
259 if b.skipped {
260 tag = "SKIP"
261 }
262 if b.chatty != nil && (len(b.output) > 0 || finished) {
263 b.trimOutput()
264 fmt.Fprintf(b.w, "%s--- %s: %s\n%s", b.chatty.prefix(), tag, b.name, b.output)
265 }
266 return false
267 }
268 return true
269 }
270
271 var labelsOnce sync.Once
272
273
274
275 func (b *B) run() {
276 labelsOnce.Do(func() {
277 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
278 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
279 if b.importPath != "" {
280 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
281 }
282 if cpu := sysinfo.CPUName(); cpu != "" {
283 fmt.Fprintf(b.w, "cpu: %s\n", cpu)
284 }
285 })
286 if b.bstate != nil {
287
288 b.bstate.processBench(b)
289 } else {
290
291 b.doBench()
292 }
293 }
294
295 func (b *B) doBench() BenchmarkResult {
296 go b.launch()
297 <-b.signal
298 return b.result
299 }
300
301
302 const maxBenchPredictIters = 1_000_000_000
303
304 func predictN(goalns int64, prevIters int64, prevns int64, last int64) int {
305 if prevns == 0 {
306
307 prevns = 1
308 }
309
310
311
312
313
314
315 n := goalns * prevIters / prevns
316
317 n += n / 5
318
319 n = min(n, 100*last)
320
321 n = max(n, last+1)
322
323 n = min(n, maxBenchPredictIters)
324 return int(n)
325 }
326
327
328
329
330
331 func (b *B) launch() {
332
333
334 defer func() {
335 b.signal <- true
336 }()
337
338
339
340 if b.loop.n == 0 {
341
342 if b.benchTime.n > 0 {
343
344
345
346 if b.benchTime.n > 1 {
347 b.runN(b.benchTime.n)
348 }
349 } else {
350 d := b.benchTime.d
351 for n := int64(1); !b.failed && b.duration < d && n < 1e9; {
352 last := n
353
354 goalns := d.Nanoseconds()
355 prevIters := int64(b.N)
356 n = int64(predictN(goalns, prevIters, b.duration.Nanoseconds(), last))
357 b.runN(int(n))
358 }
359 }
360 }
361 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
362 }
363
364
365
366
367 func (b *B) Elapsed() time.Duration {
368 d := b.duration
369 if b.timerOn {
370 d += highPrecisionTimeSince(b.start)
371 }
372 return d
373 }
374
375
376
377
378
379
380
381
382
383
384 func (b *B) ReportMetric(n float64, unit string) {
385 if unit == "" {
386 panic("metric unit must not be empty")
387 }
388 if strings.IndexFunc(unit, unicode.IsSpace) >= 0 {
389 panic("metric unit must not contain whitespace")
390 }
391 b.extra[unit] = n
392 }
393
394 func (b *B) stopOrScaleBLoop() bool {
395 t := b.Elapsed()
396 if t >= b.benchTime.d {
397
398 return false
399 }
400
401 goalns := b.benchTime.d.Nanoseconds()
402 prevIters := int64(b.loop.n)
403 b.loop.n = uint64(predictN(goalns, prevIters, t.Nanoseconds(), prevIters))
404 if b.loop.n&loopPoisonMask != 0 {
405
406
407 panic("loop iteration target overflow")
408 }
409
410
411 return uint64(prevIters) < b.loop.n
412 }
413
414 func (b *B) loopSlowPath() bool {
415
416 if !b.timerOn {
417 b.Fatal("B.Loop called with timer stopped")
418 }
419 if b.loop.i&loopPoisonMask != 0 {
420 panic(fmt.Sprintf("unknown loop stop condition: %#x", b.loop.i))
421 }
422
423 if b.loop.n == 0 {
424
425 if b.benchTime.n > 0 {
426
427 b.loop.n = uint64(b.benchTime.n)
428 } else {
429
430 b.loop.n = 1
431 }
432
433 b.N = 0
434 b.ResetTimer()
435
436
437 b.loop.i++
438 return true
439 }
440
441
442 var more bool
443 if b.benchTime.n > 0 {
444
445
446 if b.loop.i != uint64(b.benchTime.n) {
447
448 panic(fmt.Sprintf("iteration count %d < fixed target %d", b.loop.i, b.benchTime.n))
449 }
450 more = false
451 } else {
452
453 more = b.stopOrScaleBLoop()
454 }
455 if !more {
456 b.StopTimer()
457
458 b.N = int(b.loop.n)
459 b.loop.done = true
460 return false
461 }
462
463
464 b.loop.i++
465 return true
466 }
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502 func (b *B) Loop() bool {
503
504
505
506
507
508
509
510
511
512
513
514
515 if b.loop.i < b.loop.n {
516 b.loop.i++
517 return true
518 }
519 return b.loopSlowPath()
520 }
521
522
523
524 const (
525 loopPoisonTimer = uint64(1 << (63 - iota))
526
527
528
529
530
531
532 loopPoisonMask = ^uint64((1 << (63 - (iota - 1))) - 1)
533 )
534
535
536 type BenchmarkResult struct {
537 N int
538 T time.Duration
539 Bytes int64
540 MemAllocs uint64
541 MemBytes uint64
542
543
544 Extra map[string]float64
545 }
546
547
548 func (r BenchmarkResult) NsPerOp() int64 {
549 if v, ok := r.Extra["ns/op"]; ok {
550 return int64(v)
551 }
552 if r.N <= 0 {
553 return 0
554 }
555 return r.T.Nanoseconds() / int64(r.N)
556 }
557
558
559 func (r BenchmarkResult) mbPerSec() float64 {
560 if v, ok := r.Extra["MB/s"]; ok {
561 return v
562 }
563 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
564 return 0
565 }
566 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
567 }
568
569
570
571 func (r BenchmarkResult) AllocsPerOp() int64 {
572 if v, ok := r.Extra["allocs/op"]; ok {
573 return int64(v)
574 }
575 if r.N <= 0 {
576 return 0
577 }
578 return int64(r.MemAllocs) / int64(r.N)
579 }
580
581
582
583 func (r BenchmarkResult) AllocedBytesPerOp() int64 {
584 if v, ok := r.Extra["B/op"]; ok {
585 return int64(v)
586 }
587 if r.N <= 0 {
588 return 0
589 }
590 return int64(r.MemBytes) / int64(r.N)
591 }
592
593
594
595
596
597
598
599
600 func (r BenchmarkResult) String() string {
601 buf := new(strings.Builder)
602 fmt.Fprintf(buf, "%8d", r.N)
603
604
605 ns, ok := r.Extra["ns/op"]
606 if !ok {
607 ns = float64(r.T.Nanoseconds()) / float64(r.N)
608 }
609 if ns != 0 {
610 buf.WriteByte('\t')
611 prettyPrint(buf, ns, "ns/op")
612 }
613
614 if mbs := r.mbPerSec(); mbs != 0 {
615 fmt.Fprintf(buf, "\t%7.2f MB/s", mbs)
616 }
617
618
619
620 var extraKeys []string
621 for k := range r.Extra {
622 switch k {
623 case "ns/op", "MB/s", "B/op", "allocs/op":
624
625 continue
626 }
627 extraKeys = append(extraKeys, k)
628 }
629 slices.Sort(extraKeys)
630 for _, k := range extraKeys {
631 buf.WriteByte('\t')
632 prettyPrint(buf, r.Extra[k], k)
633 }
634 return buf.String()
635 }
636
637 func prettyPrint(w io.Writer, x float64, unit string) {
638
639
640
641
642 var format string
643 switch y := math.Abs(x); {
644 case y == 0 || y >= 999.95:
645 format = "%10.0f %s"
646 case y >= 99.995:
647 format = "%12.1f %s"
648 case y >= 9.9995:
649 format = "%13.2f %s"
650 case y >= 0.99995:
651 format = "%14.3f %s"
652 case y >= 0.099995:
653 format = "%15.4f %s"
654 case y >= 0.0099995:
655 format = "%16.5f %s"
656 case y >= 0.00099995:
657 format = "%17.6f %s"
658 default:
659 format = "%18.7f %s"
660 }
661 fmt.Fprintf(w, format, x, unit)
662 }
663
664
665 func (r BenchmarkResult) MemString() string {
666 return fmt.Sprintf("%8d B/op\t%8d allocs/op",
667 r.AllocedBytesPerOp(), r.AllocsPerOp())
668 }
669
670
671 func benchmarkName(name string, n int) string {
672 if n != 1 {
673 return fmt.Sprintf("%s-%d", name, n)
674 }
675 return name
676 }
677
678 type benchState struct {
679 match *matcher
680
681 maxLen int
682 extLen int
683 }
684
685
686
687 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
688 runBenchmarks("", matchString, benchmarks)
689 }
690
691 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
692
693 if len(*matchBenchmarks) == 0 {
694 return true
695 }
696
697 maxprocs := 1
698 for _, procs := range cpuList {
699 if procs > maxprocs {
700 maxprocs = procs
701 }
702 }
703 bstate := &benchState{
704 match: newMatcher(matchString, *matchBenchmarks, "-test.bench", *skip),
705 extLen: len(benchmarkName("", maxprocs)),
706 }
707 var bs []InternalBenchmark
708 for _, Benchmark := range benchmarks {
709 if _, matched, _ := bstate.match.fullName(nil, Benchmark.Name); matched {
710 bs = append(bs, Benchmark)
711 benchName := benchmarkName(Benchmark.Name, maxprocs)
712 if l := len(benchName) + bstate.extLen + 1; l > bstate.maxLen {
713 bstate.maxLen = l
714 }
715 }
716 }
717 main := &B{
718 common: common{
719 name: "Main",
720 w: os.Stdout,
721 bench: true,
722 },
723 importPath: importPath,
724 benchFunc: func(b *B) {
725 for _, Benchmark := range bs {
726 b.Run(Benchmark.Name, Benchmark.F)
727 }
728 },
729 benchTime: benchTime,
730 bstate: bstate,
731 }
732 if Verbose() {
733 main.chatty = newChattyPrinter(main.w)
734 }
735 main.runN(1)
736 return !main.failed
737 }
738
739
740 func (s *benchState) processBench(b *B) {
741 for i, procs := range cpuList {
742 for j := uint(0); j < *count; j++ {
743 runtime.GOMAXPROCS(procs)
744 benchName := benchmarkName(b.name, procs)
745
746
747 if b.chatty == nil {
748 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
749 }
750
751 if i > 0 || j > 0 {
752 b = &B{
753 common: common{
754 signal: make(chan bool),
755 name: b.name,
756 w: b.w,
757 chatty: b.chatty,
758 bench: true,
759 },
760 benchFunc: b.benchFunc,
761 benchTime: b.benchTime,
762 }
763 b.setOutputWriter()
764 b.run1()
765 }
766 r := b.doBench()
767 if b.failed {
768
769
770
771 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), benchName, b.output)
772 continue
773 }
774 results := r.String()
775 if b.chatty != nil {
776 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
777 }
778 if *benchmarkMemory || b.showAllocResult {
779 results += "\t" + r.MemString()
780 }
781 fmt.Fprintln(b.w, results)
782
783
784 if len(b.output) > 0 {
785 b.trimOutput()
786 fmt.Fprintf(b.w, "%s--- BENCH: %s\n%s", b.chatty.prefix(), benchName, b.output)
787 }
788 if p := runtime.GOMAXPROCS(-1); p != procs {
789 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
790 }
791 if b.chatty != nil && b.chatty.json {
792 b.chatty.Updatef("", "=== NAME %s\n", "")
793 }
794 }
795 }
796 }
797
798
799
800
801 var hideStdoutForTesting = false
802
803
804
805
806
807
808 func (b *B) Run(name string, f func(b *B)) bool {
809
810
811 b.hasSub.Store(true)
812 benchmarkLock.Unlock()
813 defer benchmarkLock.Lock()
814
815 benchName, ok, partial := b.name, true, false
816 if b.bstate != nil {
817 benchName, ok, partial = b.bstate.match.fullName(&b.common, name)
818 }
819 if !ok {
820 return true
821 }
822 var pc [maxStackLen]uintptr
823 n := runtime.Callers(2, pc[:])
824 sub := &B{
825 common: common{
826 signal: make(chan bool),
827 name: benchName,
828 parent: &b.common,
829 level: b.level + 1,
830 creator: pc[:n],
831 w: b.w,
832 chatty: b.chatty,
833 bench: true,
834 },
835 importPath: b.importPath,
836 benchFunc: f,
837 benchTime: b.benchTime,
838 bstate: b.bstate,
839 }
840 sub.setOutputWriter()
841 if partial {
842
843
844 sub.hasSub.Store(true)
845 }
846
847 if b.chatty != nil {
848 labelsOnce.Do(func() {
849 fmt.Printf("goos: %s\n", runtime.GOOS)
850 fmt.Printf("goarch: %s\n", runtime.GOARCH)
851 if b.importPath != "" {
852 fmt.Printf("pkg: %s\n", b.importPath)
853 }
854 if cpu := sysinfo.CPUName(); cpu != "" {
855 fmt.Printf("cpu: %s\n", cpu)
856 }
857 })
858
859 if !hideStdoutForTesting {
860 if b.chatty.json {
861 b.chatty.Updatef(benchName, "=== RUN %s\n", benchName)
862 }
863 fmt.Println(benchName)
864 }
865 }
866
867 if sub.run1() {
868 sub.run()
869 }
870 b.add(sub.result)
871 return !sub.failed
872 }
873
874
875
876
877 func (b *B) add(other BenchmarkResult) {
878 r := &b.result
879
880
881 r.N = 1
882 r.T += time.Duration(other.NsPerOp())
883 if other.Bytes == 0 {
884
885
886 b.missingBytes = true
887 r.Bytes = 0
888 }
889 if !b.missingBytes {
890 r.Bytes += other.Bytes
891 }
892 r.MemAllocs += uint64(other.AllocsPerOp())
893 r.MemBytes += uint64(other.AllocedBytesPerOp())
894 }
895
896
897 func (b *B) trimOutput() {
898
899
900
901 const maxNewlines = 10
902 for nlCount, j := 0, 0; j < len(b.output); j++ {
903 if b.output[j] == '\n' {
904 nlCount++
905 if nlCount >= maxNewlines {
906 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
907 break
908 }
909 }
910 }
911 }
912
913
914 type PB struct {
915 globalN *atomic.Uint64
916 grain uint64
917 cache uint64
918 bN uint64
919 }
920
921
922 func (pb *PB) Next() bool {
923 if pb.cache == 0 {
924 n := pb.globalN.Add(pb.grain)
925 if n <= pb.bN {
926 pb.cache = pb.grain
927 } else if n < pb.bN+pb.grain {
928 pb.cache = pb.bN + pb.grain - n
929 } else {
930 return false
931 }
932 }
933 pb.cache--
934 return true
935 }
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950 func (b *B) RunParallel(body func(*PB)) {
951 if b.N == 0 {
952 return
953 }
954
955
956
957 grain := uint64(0)
958 if b.previousN > 0 && b.previousDuration > 0 {
959 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
960 }
961 if grain < 1 {
962 grain = 1
963 }
964
965
966 if grain > 1e4 {
967 grain = 1e4
968 }
969
970 var n atomic.Uint64
971 numProcs := b.parallelism * runtime.GOMAXPROCS(0)
972 var wg sync.WaitGroup
973 wg.Add(numProcs)
974 for p := 0; p < numProcs; p++ {
975 go func() {
976 defer wg.Done()
977 pb := &PB{
978 globalN: &n,
979 grain: grain,
980 bN: uint64(b.N),
981 }
982 body(pb)
983 }()
984 }
985 wg.Wait()
986 if n.Load() <= uint64(b.N) && !b.Failed() {
987 b.Fatal("RunParallel: body exited without pb.Next() == false")
988 }
989 }
990
991
992
993
994 func (b *B) SetParallelism(p int) {
995 if p >= 1 {
996 b.parallelism = p
997 }
998 }
999
1000
1001
1002
1003
1004
1005
1006
1007
1008 func Benchmark(f func(b *B)) BenchmarkResult {
1009 b := &B{
1010 common: common{
1011 signal: make(chan bool),
1012 w: discard{},
1013 },
1014 benchFunc: f,
1015 benchTime: benchTime,
1016 }
1017 b.setOutputWriter()
1018 if b.run1() {
1019 b.run()
1020 }
1021 return b.result
1022 }
1023
1024 type discard struct{}
1025
1026 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
1027
View as plain text