Source file src/simd/archsimd/pkginternal_test.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.simd && amd64
     6  
     7  package archsimd_test
     8  
     9  import (
    10  	"fmt"
    11  	"os"
    12  	"simd/archsimd"
    13  	"simd/archsimd/internal/test_helpers"
    14  	"testing"
    15  )
    16  
    17  func TestMain(m *testing.M) {
    18  	if !archsimd.X86.AVX() {
    19  		fmt.Fprintln(os.Stderr, "Skipping tests: AVX is not available")
    20  		os.Exit(0)
    21  	}
    22  	os.Exit(m.Run())
    23  }
    24  
    25  func TestConcatSelectedConstant64(t *testing.T) {
    26  	a := make([]int64, 2)
    27  	x := archsimd.LoadInt64x2Slice([]int64{4, 5})
    28  	y := archsimd.LoadInt64x2Slice([]int64{6, 7})
    29  	z := x.ExportTestConcatSelectedConstant(0b10, y)
    30  	z.StoreSlice(a)
    31  	test_helpers.CheckSlices[int64](t, a, []int64{4, 7})
    32  }
    33  
    34  func TestConcatSelectedConstantGrouped64(t *testing.T) {
    35  	a := make([]float64, 4)
    36  	x := archsimd.LoadFloat64x4Slice([]float64{4, 5, 8, 9})
    37  	y := archsimd.LoadFloat64x4Slice([]float64{6, 7, 10, 11})
    38  	z := x.ExportTestConcatSelectedConstantGrouped(0b_11_10, y)
    39  	z.StoreSlice(a)
    40  	test_helpers.CheckSlices[float64](t, a, []float64{4, 7, 9, 11})
    41  }
    42  
    43  func TestConcatSelectedConstant32(t *testing.T) {
    44  	a := make([]float32, 4)
    45  	x := archsimd.LoadFloat32x4Slice([]float32{4, 5, 8, 9})
    46  	y := archsimd.LoadFloat32x4Slice([]float32{6, 7, 10, 11})
    47  	z := x.ExportTestConcatSelectedConstant(0b_11_01_10_00, y)
    48  	z.StoreSlice(a)
    49  	test_helpers.CheckSlices[float32](t, a, []float32{4, 8, 7, 11})
    50  }
    51  
    52  func TestConcatSelectedConstantGrouped32(t *testing.T) {
    53  	a := make([]uint32, 8)
    54  	x := archsimd.LoadUint32x8Slice([]uint32{0, 1, 2, 3, 8, 9, 10, 11})
    55  	y := archsimd.LoadUint32x8Slice([]uint32{4, 5, 6, 7, 12, 13, 14, 15})
    56  	z := x.ExportTestConcatSelectedConstantGrouped(0b_11_01_00_10, y)
    57  	z.StoreSlice(a)
    58  	test_helpers.CheckSlices[uint32](t, a, []uint32{2, 0, 5, 7, 10, 8, 13, 15})
    59  }
    60  
    61  func TestTern(t *testing.T) {
    62  	if !archsimd.X86.AVX512() {
    63  		t.Skip("This test needs AVX512")
    64  	}
    65  	x := archsimd.LoadInt32x8Slice([]int32{0, 0, 0, 0, 1, 1, 1, 1})
    66  	y := archsimd.LoadInt32x8Slice([]int32{0, 0, 1, 1, 0, 0, 1, 1})
    67  	z := archsimd.LoadInt32x8Slice([]int32{0, 1, 0, 1, 0, 1, 0, 1})
    68  
    69  	foo := func(w archsimd.Int32x8, k uint8) {
    70  		a := make([]int32, 8)
    71  		w.StoreSlice(a)
    72  		t.Logf("For k=%0b, w=%v", k, a)
    73  		for i, b := range a {
    74  			if (int32(k)>>i)&1 != b {
    75  				t.Errorf("Element %d of stored slice (=%d) did not match corresponding bit in 0b%b",
    76  					i, b, k)
    77  			}
    78  		}
    79  	}
    80  
    81  	foo(x.ExportTestTern(0b1111_0000, y, z), 0b1111_0000)
    82  	foo(x.ExportTestTern(0b1100_1100, y, z), 0b1100_1100)
    83  	foo(x.ExportTestTern(0b1010_1010, y, z), 0b1010_1010)
    84  }
    85  
    86  func TestSelect2x4x32(t *testing.T) {
    87  	for a := range uint8(8) {
    88  		for b := range uint8(8) {
    89  			for c := range uint8(8) {
    90  				for d := range uint8(8) {
    91  					x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
    92  					y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
    93  					z := select2x4x32(x, a, b, c, d, y)
    94  					w := make([]int32, 4, 4)
    95  					z.StoreSlice(w)
    96  					if w[0] != int32(a) || w[1] != int32(b) ||
    97  						w[2] != int32(c) || w[3] != int32(d) {
    98  						t.Errorf("Expected [%d %d %d %d] got %v", a, b, c, d, w)
    99  					}
   100  				}
   101  			}
   102  		}
   103  	}
   104  }
   105  
   106  func TestSelect2x8x32Grouped(t *testing.T) {
   107  	for a := range uint8(8) {
   108  		for b := range uint8(8) {
   109  			for c := range uint8(8) {
   110  				for d := range uint8(8) {
   111  					x := archsimd.LoadInt32x8Slice([]int32{0, 1, 2, 3, 10, 11, 12, 13})
   112  					y := archsimd.LoadInt32x8Slice([]int32{4, 5, 6, 7, 14, 15, 16, 17})
   113  					z := select2x8x32Grouped(x, a, b, c, d, y)
   114  					w := make([]int32, 8, 8)
   115  					z.StoreSlice(w)
   116  					if w[0] != int32(a) || w[1] != int32(b) ||
   117  						w[2] != int32(c) || w[3] != int32(d) ||
   118  						w[4] != int32(10+a) || w[5] != int32(10+b) ||
   119  						w[6] != int32(10+c) || w[7] != int32(10+d) {
   120  						t.Errorf("Expected [%d %d %d %d %d %d %d %d] got %v", a, b, c, d, 10+a, 10+b, 10+c, 10+d, w)
   121  					}
   122  				}
   123  			}
   124  		}
   125  	}
   126  }
   127  
   128  // select2x4x32 returns a selection of 4 elements in x and y, numbered
   129  // 0-7, where 0-3 are the four elements of x and 4-7 are the four elements
   130  // of y.
   131  func select2x4x32(x archsimd.Int32x4, a, b, c, d uint8, y archsimd.Int32x4) archsimd.Int32x4 {
   132  	pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
   133  
   134  	a, b, c, d = a&3, b&3, c&3, d&3
   135  
   136  	switch pattern {
   137  	case archsimd.LLLL:
   138  		return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), x)
   139  	case archsimd.HHHH:
   140  		return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), y)
   141  	case archsimd.LLHH:
   142  		return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), y)
   143  	case archsimd.HHLL:
   144  		return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), x)
   145  
   146  	case archsimd.HLLL:
   147  		z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), x)
   148  		return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), x)
   149  	case archsimd.LHLL:
   150  		z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), y)
   151  		return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), x)
   152  
   153  	case archsimd.HLHH:
   154  		z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), x)
   155  		return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), y)
   156  	case archsimd.LHHH:
   157  		z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), y)
   158  		return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), y)
   159  
   160  	case archsimd.LLLH:
   161  		z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), y)
   162  		return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
   163  	case archsimd.LLHL:
   164  		z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), x)
   165  		return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
   166  	case archsimd.HHLH:
   167  		z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), y)
   168  		return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
   169  	case archsimd.HHHL:
   170  		z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), x)
   171  		return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
   172  
   173  	case archsimd.LHLH:
   174  		z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, c, b, d), y)
   175  		return z.ExportTestConcatSelectedConstant(0b11_01_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 1, 3) */, z)
   176  	case archsimd.HLHL:
   177  		z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(b, d, a, c), y)
   178  		return z.ExportTestConcatSelectedConstant(0b01_11_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 3, 1) */, z)
   179  	case archsimd.HLLH:
   180  		z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(b, c, a, d), y)
   181  		return z.ExportTestConcatSelectedConstant(0b11_01_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 1, 3) */, z)
   182  	case archsimd.LHHL:
   183  		z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, d, b, c), y)
   184  		return z.ExportTestConcatSelectedConstant(0b01_11_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 3, 1) */, z)
   185  	}
   186  	panic("missing case, switch should be exhaustive")
   187  }
   188  
   189  // select2x8x32Grouped returns a pair of selection of 4 elements in x and y,
   190  // numbered 0-7, where 0-3 are the four elements of x's two groups (lower and
   191  // upper 128 bits) and 4-7 are the four elements of y's two groups.
   192  
   193  func select2x8x32Grouped(x archsimd.Int32x8, a, b, c, d uint8, y archsimd.Int32x8) archsimd.Int32x8 {
   194  	// selections as being expressible in the ExportTestConcatSelectedConstant pattern,
   195  	// or not. Classification is by H and L, where H is a selection from 4-7
   196  	// and L is a selection from 0-3.
   197  	// archsimd.LLHH -> CSC(x,y, a, b, c&3, d&3)
   198  	// archsimd.HHLL -> CSC(y,x, a&3, b&3, c, d)
   199  	// archsimd.LLLL -> CSC(x,x, a, b, c, d)
   200  	// archsimd.HHHH -> CSC(y,y, a&3, b&3, c&3, d&3)
   201  
   202  	// archsimd.LLLH -> z = CSC(x, y, c, c, d&3, d&3); CSC(x, z, a, b, 0, 2)
   203  	// archsimd.LLHL -> z = CSC(x, y, c&3, c&3, d, d); CSC(x, z, a, b, 0, 2)
   204  	// archsimd.HHLH -> z = CSC(x, y, c, c, d&3, d&3); CSC(y, z, a&3, b&3, 0, 2)
   205  	// archsimd.HHHL -> z = CSC(x, y, c&3, c&3, d, d); CSC(y, z, a&3, b&3, 0, 2)
   206  
   207  	// archsimd.LHLL -> z = CSC(x, y, a, a, b&3, b&3); CSC(z, x, 0, 2, c, d)
   208  	// etc
   209  
   210  	// archsimd.LHLH -> z = CSC(x, y, a, c, b&3, d&3); CSC(z, z, 0, 2, 1, 3)
   211  	// archsimd.HLHL -> z = CSC(x, y, b, d, a&3, c&3); CSC(z, z, 2, 0, 3, 1)
   212  
   213  	pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
   214  
   215  	a, b, c, d = a&3, b&3, c&3, d&3
   216  
   217  	switch pattern {
   218  	case archsimd.LLLL:
   219  		return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), x)
   220  	case archsimd.HHHH:
   221  		return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), y)
   222  	case archsimd.LLHH:
   223  		return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), y)
   224  	case archsimd.HHLL:
   225  		return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), x)
   226  
   227  	case archsimd.HLLL:
   228  		z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), x)
   229  		return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), x)
   230  	case archsimd.LHLL:
   231  		z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), y)
   232  		return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), x)
   233  
   234  	case archsimd.HLHH:
   235  		z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), x)
   236  		return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), y)
   237  	case archsimd.LHHH:
   238  		z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), y)
   239  		return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), y)
   240  
   241  	case archsimd.LLLH:
   242  		z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), y)
   243  		return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
   244  	case archsimd.LLHL:
   245  		z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), x)
   246  		return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
   247  	case archsimd.HHLH:
   248  		z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), y)
   249  		return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
   250  	case archsimd.HHHL:
   251  		z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), x)
   252  		return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
   253  
   254  	case archsimd.LHLH:
   255  		z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, c, b, d), y)
   256  		return z.ExportTestConcatSelectedConstantGrouped(0b11_01_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 1, 3) */, z)
   257  	case archsimd.HLHL:
   258  		z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(b, d, a, c), y)
   259  		return z.ExportTestConcatSelectedConstantGrouped(0b01_11_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 3, 1) */, z)
   260  	case archsimd.HLLH:
   261  		z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(b, c, a, d), y)
   262  		return z.ExportTestConcatSelectedConstantGrouped(0b11_01_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 1, 3) */, z)
   263  	case archsimd.LHHL:
   264  		z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, d, b, c), y)
   265  		return z.ExportTestConcatSelectedConstantGrouped(0b01_11_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 3, 1) */, z)
   266  	}
   267  	panic("missing case, switch should be exhaustive")
   268  }
   269  

View as plain text