1
2
3
4
5 package amd64
6
7 import (
8 "cmd/compile/internal/ir"
9 "cmd/compile/internal/objw"
10 "cmd/compile/internal/types"
11 "cmd/internal/obj"
12 "cmd/internal/obj/x86"
13 )
14
15
16
17 const (
18 dzBlocks = 16
19 dzBlockLen = 4
20 dzBlockSize = 23
21 dzMovSize = 5
22 dzLeaqSize = 4
23 dzClearStep = 16
24
25 dzClearLen = dzClearStep * dzBlockLen
26 dzSize = dzBlocks * dzBlockSize
27 )
28
29
30
31 func dzOff(b int64) int64 {
32 off := int64(dzSize)
33 off -= b / dzClearLen * dzBlockSize
34 tailLen := b % dzClearLen
35 if tailLen >= dzClearStep {
36 off -= dzLeaqSize + dzMovSize*(tailLen/dzClearStep)
37 }
38 return off
39 }
40
41
42
43 func dzDI(b int64) int64 {
44 tailLen := b % dzClearLen
45 if tailLen < dzClearStep {
46 return 0
47 }
48 tailSteps := tailLen / dzClearStep
49 return -dzClearStep * (dzBlockLen - tailSteps)
50 }
51
52 func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog {
53 const (
54 r13 = 1 << iota
55 )
56
57 if cnt == 0 {
58 return p
59 }
60
61 if cnt == 8 {
62 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off)
63 } else if cnt <= int64(8*types.RegSize) {
64 for i := int64(0); i < cnt/16; i++ {
65 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16)
66 }
67
68 if cnt%16 != 0 {
69 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16))
70 }
71 } else if cnt <= int64(128*types.RegSize) {
72
73
74 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
75
76 p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0)
77 p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt))
78 p.To.Sym = ir.Syms.Duffzero
79 if cnt%16 != 0 {
80 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8))
81 }
82
83 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
84
85 } else {
86
87
88
89
90
91
92
93
94
95 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
96 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0)
97 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0)
98
99
100 p = pp.Append(p, x86.AXORL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_AX, 0)
101 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
102 p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
103 p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
104 p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
105
106
107 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
108 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0)
109 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0)
110
111
112 *state &= ^uint32(r13)
113 }
114
115 return p
116 }
117
118 func ginsnop(pp *objw.Progs) *obj.Prog {
119
120
121
122
123
124
125 p := pp.Prog(x86.AXCHGL)
126 p.From.Type = obj.TYPE_REG
127 p.From.Reg = x86.REG_AX
128 p.To.Type = obj.TYPE_REG
129 p.To.Reg = x86.REG_AX
130 return p
131 }
132
View as plain text