Source file
src/testing/benchmark.go
1
2
3
4
5 package testing
6
7 import (
8 "context"
9 "flag"
10 "fmt"
11 "internal/sysinfo"
12 "io"
13 "math"
14 "os"
15 "runtime"
16 "slices"
17 "strconv"
18 "strings"
19 "sync"
20 "sync/atomic"
21 "time"
22 "unicode"
23 )
24
25 func initBenchmarkFlags() {
26 matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
27 benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
28 flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d` or N times if `d` is of the form Nx")
29 }
30
31 var (
32 matchBenchmarks *string
33 benchmarkMemory *bool
34
35 benchTime = durationOrCountFlag{d: 1 * time.Second}
36 )
37
38 type durationOrCountFlag struct {
39 d time.Duration
40 n int
41 allowZero bool
42 }
43
44 func (f *durationOrCountFlag) String() string {
45 if f.n > 0 {
46 return fmt.Sprintf("%dx", f.n)
47 }
48 return f.d.String()
49 }
50
51 func (f *durationOrCountFlag) Set(s string) error {
52 if strings.HasSuffix(s, "x") {
53 n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
54 if err != nil || n < 0 || (!f.allowZero && n == 0) {
55 return fmt.Errorf("invalid count")
56 }
57 *f = durationOrCountFlag{n: int(n)}
58 return nil
59 }
60 d, err := time.ParseDuration(s)
61 if err != nil || d < 0 || (!f.allowZero && d == 0) {
62 return fmt.Errorf("invalid duration")
63 }
64 *f = durationOrCountFlag{d: d}
65 return nil
66 }
67
68
69 var benchmarkLock sync.Mutex
70
71
72 var memStats runtime.MemStats
73
74
75
76 type InternalBenchmark struct {
77 Name string
78 F func(b *B)
79 }
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 type B struct {
95 common
96 importPath string
97 bstate *benchState
98 N int
99 previousN int
100 previousDuration time.Duration
101 benchFunc func(b *B)
102 benchTime durationOrCountFlag
103 bytes int64
104 missingBytes bool
105 timerOn bool
106 showAllocResult bool
107 result BenchmarkResult
108 parallelism int
109
110 startAllocs uint64
111 startBytes uint64
112
113 netAllocs uint64
114 netBytes uint64
115
116 extra map[string]float64
117
118
119 loop struct {
120
121
122
123 n uint64
124
125
126
127
128
129 i uint64
130
131 done bool
132 }
133 }
134
135
136
137
138 func (b *B) StartTimer() {
139 if !b.timerOn {
140 runtime.ReadMemStats(&memStats)
141 b.startAllocs = memStats.Mallocs
142 b.startBytes = memStats.TotalAlloc
143 b.start = highPrecisionTimeNow()
144 b.timerOn = true
145 b.loop.i &^= loopPoisonTimer
146 }
147 }
148
149
150
151 func (b *B) StopTimer() {
152 if b.timerOn {
153 b.duration += highPrecisionTimeSince(b.start)
154 runtime.ReadMemStats(&memStats)
155 b.netAllocs += memStats.Mallocs - b.startAllocs
156 b.netBytes += memStats.TotalAlloc - b.startBytes
157 b.timerOn = false
158
159 b.loop.i |= loopPoisonTimer
160 }
161 }
162
163
164
165
166 func (b *B) ResetTimer() {
167 if b.extra == nil {
168
169
170 b.extra = make(map[string]float64, 16)
171 } else {
172 clear(b.extra)
173 }
174 if b.timerOn {
175 runtime.ReadMemStats(&memStats)
176 b.startAllocs = memStats.Mallocs
177 b.startBytes = memStats.TotalAlloc
178 b.start = highPrecisionTimeNow()
179 }
180 b.duration = 0
181 b.netAllocs = 0
182 b.netBytes = 0
183 }
184
185
186
187 func (b *B) SetBytes(n int64) { b.bytes = n }
188
189
190
191
192 func (b *B) ReportAllocs() {
193 b.showAllocResult = true
194 }
195
196
197 func (b *B) runN(n int) {
198 benchmarkLock.Lock()
199 defer benchmarkLock.Unlock()
200 ctx, cancelCtx := context.WithCancel(context.Background())
201 defer func() {
202 b.runCleanup(normalPanic)
203 b.checkRaces()
204 }()
205
206
207 runtime.GC()
208 b.resetRaces()
209 b.N = n
210 b.loop.n = 0
211 b.loop.i = 0
212 b.loop.done = false
213 b.ctx = ctx
214 b.cancelCtx = cancelCtx
215
216 b.parallelism = 1
217 b.ResetTimer()
218 b.StartTimer()
219 b.benchFunc(b)
220 b.StopTimer()
221 b.previousN = n
222 b.previousDuration = b.duration
223
224 if b.loop.n > 0 && !b.loop.done && !b.failed {
225 b.Error("benchmark function returned without B.Loop() == false (break or return in loop?)")
226 }
227 }
228
229
230
231 func (b *B) run1() bool {
232 if bstate := b.bstate; bstate != nil {
233
234 if n := len(b.name) + bstate.extLen + 1; n > bstate.maxLen {
235 bstate.maxLen = n + 8
236 }
237 }
238 go func() {
239
240
241 defer func() {
242 b.signal <- true
243 }()
244
245 b.runN(1)
246 }()
247 <-b.signal
248 if b.failed {
249 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), b.name, b.output)
250 return false
251 }
252
253
254 b.mu.RLock()
255 finished := b.finished
256 b.mu.RUnlock()
257 if b.hasSub.Load() || finished {
258 tag := "BENCH"
259 if b.skipped {
260 tag = "SKIP"
261 }
262 if b.chatty != nil && (len(b.output) > 0 || finished) {
263 b.trimOutput()
264 fmt.Fprintf(b.w, "%s--- %s: %s\n%s", b.chatty.prefix(), tag, b.name, b.output)
265 }
266 return false
267 }
268 return true
269 }
270
271 var labelsOnce sync.Once
272
273
274
275 func (b *B) run() {
276 labelsOnce.Do(func() {
277 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
278 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
279 if b.importPath != "" {
280 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
281 }
282 if cpu := sysinfo.CPUName(); cpu != "" {
283 fmt.Fprintf(b.w, "cpu: %s\n", cpu)
284 }
285 })
286 if b.bstate != nil {
287
288 b.bstate.processBench(b)
289 } else {
290
291 b.doBench()
292 }
293 }
294
295 func (b *B) doBench() BenchmarkResult {
296 go b.launch()
297 <-b.signal
298 return b.result
299 }
300
301 func predictN(goalns int64, prevIters int64, prevns int64, last int64) int {
302 if prevns == 0 {
303
304 prevns = 1
305 }
306
307
308
309
310
311
312 n := goalns * prevIters / prevns
313
314 n += n / 5
315
316 n = min(n, 100*last)
317
318 n = max(n, last+1)
319
320 n = min(n, 1e9)
321 return int(n)
322 }
323
324
325
326
327
328 func (b *B) launch() {
329
330
331 defer func() {
332 b.signal <- true
333 }()
334
335
336
337 if b.loop.n == 0 {
338
339 if b.benchTime.n > 0 {
340
341
342
343 if b.benchTime.n > 1 {
344 b.runN(b.benchTime.n)
345 }
346 } else {
347 d := b.benchTime.d
348 for n := int64(1); !b.failed && b.duration < d && n < 1e9; {
349 last := n
350
351 goalns := d.Nanoseconds()
352 prevIters := int64(b.N)
353 n = int64(predictN(goalns, prevIters, b.duration.Nanoseconds(), last))
354 b.runN(int(n))
355 }
356 }
357 }
358 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
359 }
360
361
362
363
364 func (b *B) Elapsed() time.Duration {
365 d := b.duration
366 if b.timerOn {
367 d += highPrecisionTimeSince(b.start)
368 }
369 return d
370 }
371
372
373
374
375
376
377
378
379
380
381 func (b *B) ReportMetric(n float64, unit string) {
382 if unit == "" {
383 panic("metric unit must not be empty")
384 }
385 if strings.IndexFunc(unit, unicode.IsSpace) >= 0 {
386 panic("metric unit must not contain whitespace")
387 }
388 b.extra[unit] = n
389 }
390
391 func (b *B) stopOrScaleBLoop() bool {
392 t := b.Elapsed()
393 if t >= b.benchTime.d {
394
395 b.StopTimer()
396
397 b.N = int(b.loop.n)
398 b.loop.done = true
399 return false
400 }
401
402 goalns := b.benchTime.d.Nanoseconds()
403 prevIters := int64(b.loop.n)
404 b.loop.n = uint64(predictN(goalns, prevIters, t.Nanoseconds(), prevIters))
405 if b.loop.n&loopPoisonMask != 0 {
406
407
408 panic("loop iteration target overflow")
409 }
410 b.loop.i++
411 return true
412 }
413
414 func (b *B) loopSlowPath() bool {
415
416 if !b.timerOn {
417 b.Fatal("B.Loop called with timer stopped")
418 }
419 if b.loop.i&loopPoisonMask != 0 {
420 panic(fmt.Sprintf("unknown loop stop condition: %#x", b.loop.i))
421 }
422
423 if b.loop.n == 0 {
424
425
426
427 b.loop.n = 1
428
429 b.N = 0
430 b.loop.i++
431 b.ResetTimer()
432 return true
433 }
434
435 if b.benchTime.n > 0 {
436 if b.loop.n < uint64(b.benchTime.n) {
437 b.loop.n = uint64(b.benchTime.n)
438 b.loop.i++
439 return true
440 }
441 b.StopTimer()
442
443 b.N = int(b.loop.n)
444 b.loop.done = true
445 return false
446 }
447
448 return b.stopOrScaleBLoop()
449 }
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484 func (b *B) Loop() bool {
485
486
487
488
489
490
491
492
493
494
495
496
497 if b.loop.i < b.loop.n {
498 b.loop.i++
499 return true
500 }
501 return b.loopSlowPath()
502 }
503
504
505
506 const (
507 loopPoisonTimer = uint64(1 << (63 - iota))
508
509
510
511
512
513
514 loopPoisonMask = ^uint64((1 << (63 - (iota - 1))) - 1)
515 )
516
517
518 type BenchmarkResult struct {
519 N int
520 T time.Duration
521 Bytes int64
522 MemAllocs uint64
523 MemBytes uint64
524
525
526 Extra map[string]float64
527 }
528
529
530 func (r BenchmarkResult) NsPerOp() int64 {
531 if v, ok := r.Extra["ns/op"]; ok {
532 return int64(v)
533 }
534 if r.N <= 0 {
535 return 0
536 }
537 return r.T.Nanoseconds() / int64(r.N)
538 }
539
540
541 func (r BenchmarkResult) mbPerSec() float64 {
542 if v, ok := r.Extra["MB/s"]; ok {
543 return v
544 }
545 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
546 return 0
547 }
548 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
549 }
550
551
552
553 func (r BenchmarkResult) AllocsPerOp() int64 {
554 if v, ok := r.Extra["allocs/op"]; ok {
555 return int64(v)
556 }
557 if r.N <= 0 {
558 return 0
559 }
560 return int64(r.MemAllocs) / int64(r.N)
561 }
562
563
564
565 func (r BenchmarkResult) AllocedBytesPerOp() int64 {
566 if v, ok := r.Extra["B/op"]; ok {
567 return int64(v)
568 }
569 if r.N <= 0 {
570 return 0
571 }
572 return int64(r.MemBytes) / int64(r.N)
573 }
574
575
576
577
578
579
580
581
582 func (r BenchmarkResult) String() string {
583 buf := new(strings.Builder)
584 fmt.Fprintf(buf, "%8d", r.N)
585
586
587 ns, ok := r.Extra["ns/op"]
588 if !ok {
589 ns = float64(r.T.Nanoseconds()) / float64(r.N)
590 }
591 if ns != 0 {
592 buf.WriteByte('\t')
593 prettyPrint(buf, ns, "ns/op")
594 }
595
596 if mbs := r.mbPerSec(); mbs != 0 {
597 fmt.Fprintf(buf, "\t%7.2f MB/s", mbs)
598 }
599
600
601
602 var extraKeys []string
603 for k := range r.Extra {
604 switch k {
605 case "ns/op", "MB/s", "B/op", "allocs/op":
606
607 continue
608 }
609 extraKeys = append(extraKeys, k)
610 }
611 slices.Sort(extraKeys)
612 for _, k := range extraKeys {
613 buf.WriteByte('\t')
614 prettyPrint(buf, r.Extra[k], k)
615 }
616 return buf.String()
617 }
618
619 func prettyPrint(w io.Writer, x float64, unit string) {
620
621
622
623
624 var format string
625 switch y := math.Abs(x); {
626 case y == 0 || y >= 999.95:
627 format = "%10.0f %s"
628 case y >= 99.995:
629 format = "%12.1f %s"
630 case y >= 9.9995:
631 format = "%13.2f %s"
632 case y >= 0.99995:
633 format = "%14.3f %s"
634 case y >= 0.099995:
635 format = "%15.4f %s"
636 case y >= 0.0099995:
637 format = "%16.5f %s"
638 case y >= 0.00099995:
639 format = "%17.6f %s"
640 default:
641 format = "%18.7f %s"
642 }
643 fmt.Fprintf(w, format, x, unit)
644 }
645
646
647 func (r BenchmarkResult) MemString() string {
648 return fmt.Sprintf("%8d B/op\t%8d allocs/op",
649 r.AllocedBytesPerOp(), r.AllocsPerOp())
650 }
651
652
653 func benchmarkName(name string, n int) string {
654 if n != 1 {
655 return fmt.Sprintf("%s-%d", name, n)
656 }
657 return name
658 }
659
660 type benchState struct {
661 match *matcher
662
663 maxLen int
664 extLen int
665 }
666
667
668
669 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
670 runBenchmarks("", matchString, benchmarks)
671 }
672
673 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
674
675 if len(*matchBenchmarks) == 0 {
676 return true
677 }
678
679 maxprocs := 1
680 for _, procs := range cpuList {
681 if procs > maxprocs {
682 maxprocs = procs
683 }
684 }
685 bstate := &benchState{
686 match: newMatcher(matchString, *matchBenchmarks, "-test.bench", *skip),
687 extLen: len(benchmarkName("", maxprocs)),
688 }
689 var bs []InternalBenchmark
690 for _, Benchmark := range benchmarks {
691 if _, matched, _ := bstate.match.fullName(nil, Benchmark.Name); matched {
692 bs = append(bs, Benchmark)
693 benchName := benchmarkName(Benchmark.Name, maxprocs)
694 if l := len(benchName) + bstate.extLen + 1; l > bstate.maxLen {
695 bstate.maxLen = l
696 }
697 }
698 }
699 main := &B{
700 common: common{
701 name: "Main",
702 w: os.Stdout,
703 bench: true,
704 },
705 importPath: importPath,
706 benchFunc: func(b *B) {
707 for _, Benchmark := range bs {
708 b.Run(Benchmark.Name, Benchmark.F)
709 }
710 },
711 benchTime: benchTime,
712 bstate: bstate,
713 }
714 if Verbose() {
715 main.chatty = newChattyPrinter(main.w)
716 }
717 main.runN(1)
718 return !main.failed
719 }
720
721
722 func (s *benchState) processBench(b *B) {
723 for i, procs := range cpuList {
724 for j := uint(0); j < *count; j++ {
725 runtime.GOMAXPROCS(procs)
726 benchName := benchmarkName(b.name, procs)
727
728
729 if b.chatty == nil {
730 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
731 }
732
733 if i > 0 || j > 0 {
734 b = &B{
735 common: common{
736 signal: make(chan bool),
737 name: b.name,
738 w: b.w,
739 chatty: b.chatty,
740 bench: true,
741 },
742 benchFunc: b.benchFunc,
743 benchTime: b.benchTime,
744 }
745 b.run1()
746 }
747 r := b.doBench()
748 if b.failed {
749
750
751
752 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), benchName, b.output)
753 continue
754 }
755 results := r.String()
756 if b.chatty != nil {
757 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
758 }
759 if *benchmarkMemory || b.showAllocResult {
760 results += "\t" + r.MemString()
761 }
762 fmt.Fprintln(b.w, results)
763
764
765 if len(b.output) > 0 {
766 b.trimOutput()
767 fmt.Fprintf(b.w, "%s--- BENCH: %s\n%s", b.chatty.prefix(), benchName, b.output)
768 }
769 if p := runtime.GOMAXPROCS(-1); p != procs {
770 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
771 }
772 if b.chatty != nil && b.chatty.json {
773 b.chatty.Updatef("", "=== NAME %s\n", "")
774 }
775 }
776 }
777 }
778
779
780
781
782 var hideStdoutForTesting = false
783
784
785
786
787
788
789 func (b *B) Run(name string, f func(b *B)) bool {
790
791
792 b.hasSub.Store(true)
793 benchmarkLock.Unlock()
794 defer benchmarkLock.Lock()
795
796 benchName, ok, partial := b.name, true, false
797 if b.bstate != nil {
798 benchName, ok, partial = b.bstate.match.fullName(&b.common, name)
799 }
800 if !ok {
801 return true
802 }
803 var pc [maxStackLen]uintptr
804 n := runtime.Callers(2, pc[:])
805 sub := &B{
806 common: common{
807 signal: make(chan bool),
808 name: benchName,
809 parent: &b.common,
810 level: b.level + 1,
811 creator: pc[:n],
812 w: b.w,
813 chatty: b.chatty,
814 bench: true,
815 },
816 importPath: b.importPath,
817 benchFunc: f,
818 benchTime: b.benchTime,
819 bstate: b.bstate,
820 }
821 if partial {
822
823
824 sub.hasSub.Store(true)
825 }
826
827 if b.chatty != nil {
828 labelsOnce.Do(func() {
829 fmt.Printf("goos: %s\n", runtime.GOOS)
830 fmt.Printf("goarch: %s\n", runtime.GOARCH)
831 if b.importPath != "" {
832 fmt.Printf("pkg: %s\n", b.importPath)
833 }
834 if cpu := sysinfo.CPUName(); cpu != "" {
835 fmt.Printf("cpu: %s\n", cpu)
836 }
837 })
838
839 if !hideStdoutForTesting {
840 if b.chatty.json {
841 b.chatty.Updatef(benchName, "=== RUN %s\n", benchName)
842 }
843 fmt.Println(benchName)
844 }
845 }
846
847 if sub.run1() {
848 sub.run()
849 }
850 b.add(sub.result)
851 return !sub.failed
852 }
853
854
855
856
857 func (b *B) add(other BenchmarkResult) {
858 r := &b.result
859
860
861 r.N = 1
862 r.T += time.Duration(other.NsPerOp())
863 if other.Bytes == 0 {
864
865
866 b.missingBytes = true
867 r.Bytes = 0
868 }
869 if !b.missingBytes {
870 r.Bytes += other.Bytes
871 }
872 r.MemAllocs += uint64(other.AllocsPerOp())
873 r.MemBytes += uint64(other.AllocedBytesPerOp())
874 }
875
876
877 func (b *B) trimOutput() {
878
879
880
881 const maxNewlines = 10
882 for nlCount, j := 0, 0; j < len(b.output); j++ {
883 if b.output[j] == '\n' {
884 nlCount++
885 if nlCount >= maxNewlines {
886 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
887 break
888 }
889 }
890 }
891 }
892
893
894 type PB struct {
895 globalN *atomic.Uint64
896 grain uint64
897 cache uint64
898 bN uint64
899 }
900
901
902 func (pb *PB) Next() bool {
903 if pb.cache == 0 {
904 n := pb.globalN.Add(pb.grain)
905 if n <= pb.bN {
906 pb.cache = pb.grain
907 } else if n < pb.bN+pb.grain {
908 pb.cache = pb.bN + pb.grain - n
909 } else {
910 return false
911 }
912 }
913 pb.cache--
914 return true
915 }
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930 func (b *B) RunParallel(body func(*PB)) {
931 if b.N == 0 {
932 return
933 }
934
935
936
937 grain := uint64(0)
938 if b.previousN > 0 && b.previousDuration > 0 {
939 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
940 }
941 if grain < 1 {
942 grain = 1
943 }
944
945
946 if grain > 1e4 {
947 grain = 1e4
948 }
949
950 var n atomic.Uint64
951 numProcs := b.parallelism * runtime.GOMAXPROCS(0)
952 var wg sync.WaitGroup
953 wg.Add(numProcs)
954 for p := 0; p < numProcs; p++ {
955 go func() {
956 defer wg.Done()
957 pb := &PB{
958 globalN: &n,
959 grain: grain,
960 bN: uint64(b.N),
961 }
962 body(pb)
963 }()
964 }
965 wg.Wait()
966 if n.Load() <= uint64(b.N) && !b.Failed() {
967 b.Fatal("RunParallel: body exited without pb.Next() == false")
968 }
969 }
970
971
972
973
974 func (b *B) SetParallelism(p int) {
975 if p >= 1 {
976 b.parallelism = p
977 }
978 }
979
980
981
982
983
984
985
986
987
988 func Benchmark(f func(b *B)) BenchmarkResult {
989 b := &B{
990 common: common{
991 signal: make(chan bool),
992 w: discard{},
993 },
994 benchFunc: f,
995 benchTime: benchTime,
996 }
997 if b.run1() {
998 b.run()
999 }
1000 return b.result
1001 }
1002
1003 type discard struct{}
1004
1005 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
1006
View as plain text