Source file
src/simd/archsimd/pkginternal_test.go
1
2
3
4
5
6
7 package archsimd_test
8
9 import (
10 "fmt"
11 "os"
12 "simd/archsimd"
13 "simd/archsimd/internal/test_helpers"
14 "testing"
15 )
16
17 func TestMain(m *testing.M) {
18 if !archsimd.X86.AVX() {
19 fmt.Fprintln(os.Stderr, "Skipping tests: AVX is not available")
20 os.Exit(0)
21 }
22 os.Exit(m.Run())
23 }
24
25 func TestConcatSelectedConstant64(t *testing.T) {
26 a := make([]int64, 2)
27 x := archsimd.LoadInt64x2Slice([]int64{4, 5})
28 y := archsimd.LoadInt64x2Slice([]int64{6, 7})
29 z := x.ExportTestConcatSelectedConstant(0b10, y)
30 z.StoreSlice(a)
31 test_helpers.CheckSlices[int64](t, a, []int64{4, 7})
32 }
33
34 func TestConcatSelectedConstantGrouped64(t *testing.T) {
35 a := make([]float64, 4)
36 x := archsimd.LoadFloat64x4Slice([]float64{4, 5, 8, 9})
37 y := archsimd.LoadFloat64x4Slice([]float64{6, 7, 10, 11})
38 z := x.ExportTestConcatSelectedConstantGrouped(0b_11_10, y)
39 z.StoreSlice(a)
40 test_helpers.CheckSlices[float64](t, a, []float64{4, 7, 9, 11})
41 }
42
43 func TestConcatSelectedConstant32(t *testing.T) {
44 a := make([]float32, 4)
45 x := archsimd.LoadFloat32x4Slice([]float32{4, 5, 8, 9})
46 y := archsimd.LoadFloat32x4Slice([]float32{6, 7, 10, 11})
47 z := x.ExportTestConcatSelectedConstant(0b_11_01_10_00, y)
48 z.StoreSlice(a)
49 test_helpers.CheckSlices[float32](t, a, []float32{4, 8, 7, 11})
50 }
51
52 func TestConcatSelectedConstantGrouped32(t *testing.T) {
53 a := make([]uint32, 8)
54 x := archsimd.LoadUint32x8Slice([]uint32{0, 1, 2, 3, 8, 9, 10, 11})
55 y := archsimd.LoadUint32x8Slice([]uint32{4, 5, 6, 7, 12, 13, 14, 15})
56 z := x.ExportTestConcatSelectedConstantGrouped(0b_11_01_00_10, y)
57 z.StoreSlice(a)
58 test_helpers.CheckSlices[uint32](t, a, []uint32{2, 0, 5, 7, 10, 8, 13, 15})
59 }
60
61 func TestTern(t *testing.T) {
62 if !archsimd.X86.AVX512() {
63 t.Skip("This test needs AVX512")
64 }
65 x := archsimd.LoadInt32x8Slice([]int32{0, 0, 0, 0, 1, 1, 1, 1})
66 y := archsimd.LoadInt32x8Slice([]int32{0, 0, 1, 1, 0, 0, 1, 1})
67 z := archsimd.LoadInt32x8Slice([]int32{0, 1, 0, 1, 0, 1, 0, 1})
68
69 foo := func(w archsimd.Int32x8, k uint8) {
70 a := make([]int32, 8)
71 w.StoreSlice(a)
72 t.Logf("For k=%0b, w=%v", k, a)
73 for i, b := range a {
74 if (int32(k)>>i)&1 != b {
75 t.Errorf("Element %d of stored slice (=%d) did not match corresponding bit in 0b%b",
76 i, b, k)
77 }
78 }
79 }
80
81 foo(x.ExportTestTern(0b1111_0000, y, z), 0b1111_0000)
82 foo(x.ExportTestTern(0b1100_1100, y, z), 0b1100_1100)
83 foo(x.ExportTestTern(0b1010_1010, y, z), 0b1010_1010)
84 }
85
86 func TestSelect2x4x32(t *testing.T) {
87 for a := range uint8(8) {
88 for b := range uint8(8) {
89 for c := range uint8(8) {
90 for d := range uint8(8) {
91 x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
92 y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
93 z := select2x4x32(x, a, b, c, d, y)
94 w := make([]int32, 4, 4)
95 z.StoreSlice(w)
96 if w[0] != int32(a) || w[1] != int32(b) ||
97 w[2] != int32(c) || w[3] != int32(d) {
98 t.Errorf("Expected [%d %d %d %d] got %v", a, b, c, d, w)
99 }
100 }
101 }
102 }
103 }
104 }
105
106 func TestSelect2x8x32Grouped(t *testing.T) {
107 for a := range uint8(8) {
108 for b := range uint8(8) {
109 for c := range uint8(8) {
110 for d := range uint8(8) {
111 x := archsimd.LoadInt32x8Slice([]int32{0, 1, 2, 3, 10, 11, 12, 13})
112 y := archsimd.LoadInt32x8Slice([]int32{4, 5, 6, 7, 14, 15, 16, 17})
113 z := select2x8x32Grouped(x, a, b, c, d, y)
114 w := make([]int32, 8, 8)
115 z.StoreSlice(w)
116 if w[0] != int32(a) || w[1] != int32(b) ||
117 w[2] != int32(c) || w[3] != int32(d) ||
118 w[4] != int32(10+a) || w[5] != int32(10+b) ||
119 w[6] != int32(10+c) || w[7] != int32(10+d) {
120 t.Errorf("Expected [%d %d %d %d %d %d %d %d] got %v", a, b, c, d, 10+a, 10+b, 10+c, 10+d, w)
121 }
122 }
123 }
124 }
125 }
126 }
127
128
129
130
131 func select2x4x32(x archsimd.Int32x4, a, b, c, d uint8, y archsimd.Int32x4) archsimd.Int32x4 {
132 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
133
134 a, b, c, d = a&3, b&3, c&3, d&3
135
136 switch pattern {
137 case archsimd.LLLL:
138 return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), x)
139 case archsimd.HHHH:
140 return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), y)
141 case archsimd.LLHH:
142 return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), y)
143 case archsimd.HHLL:
144 return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), x)
145
146 case archsimd.HLLL:
147 z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), x)
148 return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), x)
149 case archsimd.LHLL:
150 z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), y)
151 return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), x)
152
153 case archsimd.HLHH:
154 z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), x)
155 return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), y)
156 case archsimd.LHHH:
157 z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), y)
158 return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), y)
159
160 case archsimd.LLLH:
161 z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), y)
162 return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
163 case archsimd.LLHL:
164 z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), x)
165 return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
166 case archsimd.HHLH:
167 z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), y)
168 return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
169 case archsimd.HHHL:
170 z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), x)
171 return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
172
173 case archsimd.LHLH:
174 z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, c, b, d), y)
175 return z.ExportTestConcatSelectedConstant(0b11_01_10_00 , z)
176 case archsimd.HLHL:
177 z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(b, d, a, c), y)
178 return z.ExportTestConcatSelectedConstant(0b01_11_00_10 , z)
179 case archsimd.HLLH:
180 z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(b, c, a, d), y)
181 return z.ExportTestConcatSelectedConstant(0b11_01_00_10 , z)
182 case archsimd.LHHL:
183 z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, d, b, c), y)
184 return z.ExportTestConcatSelectedConstant(0b01_11_10_00 , z)
185 }
186 panic("missing case, switch should be exhaustive")
187 }
188
189
190
191
192
193 func select2x8x32Grouped(x archsimd.Int32x8, a, b, c, d uint8, y archsimd.Int32x8) archsimd.Int32x8 {
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
214
215 a, b, c, d = a&3, b&3, c&3, d&3
216
217 switch pattern {
218 case archsimd.LLLL:
219 return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), x)
220 case archsimd.HHHH:
221 return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), y)
222 case archsimd.LLHH:
223 return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), y)
224 case archsimd.HHLL:
225 return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), x)
226
227 case archsimd.HLLL:
228 z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), x)
229 return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), x)
230 case archsimd.LHLL:
231 z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), y)
232 return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), x)
233
234 case archsimd.HLHH:
235 z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), x)
236 return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), y)
237 case archsimd.LHHH:
238 z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), y)
239 return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), y)
240
241 case archsimd.LLLH:
242 z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), y)
243 return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
244 case archsimd.LLHL:
245 z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), x)
246 return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
247 case archsimd.HHLH:
248 z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), y)
249 return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
250 case archsimd.HHHL:
251 z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), x)
252 return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
253
254 case archsimd.LHLH:
255 z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, c, b, d), y)
256 return z.ExportTestConcatSelectedConstantGrouped(0b11_01_10_00 , z)
257 case archsimd.HLHL:
258 z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(b, d, a, c), y)
259 return z.ExportTestConcatSelectedConstantGrouped(0b01_11_00_10 , z)
260 case archsimd.HLLH:
261 z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(b, c, a, d), y)
262 return z.ExportTestConcatSelectedConstantGrouped(0b11_01_00_10 , z)
263 case archsimd.LHHL:
264 z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, d, b, c), y)
265 return z.ExportTestConcatSelectedConstantGrouped(0b01_11_10_00 , z)
266 }
267 panic("missing case, switch should be exhaustive")
268 }
269
View as plain text