Source file test/codegen/mathbits.go
1 // asmcheck 2 3 // Copyright 2018 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package codegen 8 9 import "math/bits" 10 11 // ----------------------- // 12 // bits.LeadingZeros // 13 // ----------------------- // 14 15 func LeadingZeros(n uint) int { 16 // amd64/v1,amd64/v2:"BSRQ" 17 // amd64/v3:"LZCNTQ", -"BSRQ" 18 // s390x:"FLOGR" 19 // arm:"CLZ" arm64:"CLZ" 20 // loong64:"CLZV",-"SUB" 21 // mips:"CLZ" 22 // wasm:"I64Clz" 23 // ppc64x:"CNTLZD" 24 return bits.LeadingZeros(n) 25 } 26 27 func LeadingZeros64(n uint64) int { 28 // amd64/v1,amd64/v2:"BSRQ" 29 // amd64/v3:"LZCNTQ", -"BSRQ" 30 // s390x:"FLOGR" 31 // arm:"CLZ" arm64:"CLZ" 32 // loong64:"CLZV",-"SUB" 33 // mips:"CLZ" 34 // wasm:"I64Clz" 35 // ppc64x:"CNTLZD" 36 return bits.LeadingZeros64(n) 37 } 38 39 func LeadingZeros32(n uint32) int { 40 // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" 41 // amd64/v3: "LZCNTL",- "BSRL" 42 // s390x:"FLOGR" 43 // arm:"CLZ" arm64:"CLZW" 44 // loong64:"CLZW",-"SUB" 45 // mips:"CLZ" 46 // wasm:"I64Clz" 47 // ppc64x:"CNTLZW" 48 return bits.LeadingZeros32(n) 49 } 50 51 func LeadingZeros16(n uint16) int { 52 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 53 // amd64/v3: "LZCNTL",- "BSRL" 54 // s390x:"FLOGR" 55 // arm:"CLZ" arm64:"CLZ" 56 // loong64:"CLZV" 57 // mips:"CLZ" 58 // wasm:"I64Clz" 59 // ppc64x:"CNTLZD" 60 return bits.LeadingZeros16(n) 61 } 62 63 func LeadingZeros8(n uint8) int { 64 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 65 // amd64/v3: "LZCNTL",- "BSRL" 66 // s390x:"FLOGR" 67 // arm:"CLZ" arm64:"CLZ" 68 // loong64:"CLZV" 69 // mips:"CLZ" 70 // wasm:"I64Clz" 71 // ppc64x:"CNTLZD" 72 return bits.LeadingZeros8(n) 73 } 74 75 // --------------- // 76 // bits.Len* // 77 // --------------- // 78 79 func Len(n uint) int { 80 // amd64/v1,amd64/v2:"BSRQ" 81 // amd64/v3: "LZCNTQ" 82 // s390x:"FLOGR" 83 // arm:"CLZ" arm64:"CLZ" 84 // loong64:"CLZV" 85 // mips:"CLZ" 86 // wasm:"I64Clz" 87 // ppc64x:"SUBC","CNTLZD" 88 return bits.Len(n) 89 } 90 91 func Len64(n uint64) int { 92 // amd64/v1,amd64/v2:"BSRQ" 93 // amd64/v3: "LZCNTQ" 94 // s390x:"FLOGR" 95 // arm:"CLZ" arm64:"CLZ" 96 // loong64:"CLZV" 97 // mips:"CLZ" 98 // wasm:"I64Clz" 99 // ppc64x:"SUBC","CNTLZD" 100 return bits.Len64(n) 101 } 102 103 func SubFromLen64(n uint64) int { 104 // loong64:"CLZV",-"ADD" 105 // ppc64x:"CNTLZD",-"SUBC" 106 return 64 - bits.Len64(n) 107 } 108 109 func CompareWithLen64(n uint64) bool { 110 // loong64:"CLZV",-"ADD",-"[$]64",-"[$]9" 111 return bits.Len64(n) < 9 112 } 113 114 func Len32(n uint32) int { 115 // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" 116 // amd64/v3: "LZCNTL" 117 // s390x:"FLOGR" 118 // arm:"CLZ" arm64:"CLZ" 119 // loong64:"CLZW" 120 // mips:"CLZ" 121 // wasm:"I64Clz" 122 // ppc64x: "CNTLZW" 123 return bits.Len32(n) 124 } 125 126 func Len16(n uint16) int { 127 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 128 // amd64/v3: "LZCNTL" 129 // s390x:"FLOGR" 130 // arm:"CLZ" arm64:"CLZ" 131 // loong64:"CLZV" 132 // mips:"CLZ" 133 // wasm:"I64Clz" 134 // ppc64x:"SUBC","CNTLZD" 135 return bits.Len16(n) 136 } 137 138 func Len8(n uint8) int { 139 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 140 // amd64/v3: "LZCNTL" 141 // s390x:"FLOGR" 142 // arm:"CLZ" arm64:"CLZ" 143 // loong64:"CLZV" 144 // mips:"CLZ" 145 // wasm:"I64Clz" 146 // ppc64x:"SUBC","CNTLZD" 147 return bits.Len8(n) 148 } 149 150 // -------------------- // 151 // bits.OnesCount // 152 // -------------------- // 153 154 // TODO(register args) Restore a m d 6 4 / v 1 :.*x86HasPOPCNT when only one ABI is tested. 155 func OnesCount(n uint) int { 156 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 157 // amd64:"POPCNTQ" 158 // arm64:"VCNT","VUADDLV" 159 // loong64:"VPCNTV" 160 // s390x:"POPCNT" 161 // ppc64x:"POPCNTD" 162 // wasm:"I64Popcnt" 163 return bits.OnesCount(n) 164 } 165 166 func OnesCount64(n uint64) int { 167 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 168 // amd64:"POPCNTQ" 169 // arm64:"VCNT","VUADDLV" 170 // loong64:"VPCNTV" 171 // s390x:"POPCNT" 172 // ppc64x:"POPCNTD" 173 // wasm:"I64Popcnt" 174 return bits.OnesCount64(n) 175 } 176 177 func OnesCount32(n uint32) int { 178 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 179 // amd64:"POPCNTL" 180 // arm64:"VCNT","VUADDLV" 181 // loong64:"VPCNTW" 182 // s390x:"POPCNT" 183 // ppc64x:"POPCNTW" 184 // wasm:"I64Popcnt" 185 return bits.OnesCount32(n) 186 } 187 188 func OnesCount16(n uint16) int { 189 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 190 // amd64:"POPCNTL" 191 // arm64:"VCNT","VUADDLV" 192 // loong64:"VPCNTH" 193 // s390x:"POPCNT" 194 // ppc64x:"POPCNTW" 195 // wasm:"I64Popcnt" 196 return bits.OnesCount16(n) 197 } 198 199 func OnesCount8(n uint8) int { 200 // s390x:"POPCNT" 201 // ppc64x:"POPCNTB" 202 // wasm:"I64Popcnt" 203 return bits.OnesCount8(n) 204 } 205 206 // ------------------ // 207 // bits.Reverse // 208 // ------------------ // 209 210 func Reverse(n uint) uint { 211 // loong64:"BITREVV" 212 return bits.Reverse(n) 213 } 214 215 func Reverse64(n uint64) uint64 { 216 // loong64:"BITREVV" 217 return bits.Reverse64(n) 218 } 219 220 func Reverse32(n uint32) uint32 { 221 // loong64:"BITREVW" 222 return bits.Reverse32(n) 223 } 224 225 func Reverse16(n uint16) uint16 { 226 // loong64:"BITREV4B","REVB2H" 227 return bits.Reverse16(n) 228 } 229 230 func Reverse8(n uint8) uint8 { 231 // loong64:"BITREV4B" 232 return bits.Reverse8(n) 233 } 234 235 // ----------------------- // 236 // bits.ReverseBytes // 237 // ----------------------- // 238 239 func ReverseBytes(n uint) uint { 240 // amd64:"BSWAPQ" 241 // 386:"BSWAPL" 242 // s390x:"MOVDBR" 243 // arm64:"REV" 244 // loong64:"REVBV" 245 return bits.ReverseBytes(n) 246 } 247 248 func ReverseBytes64(n uint64) uint64 { 249 // amd64:"BSWAPQ" 250 // 386:"BSWAPL" 251 // s390x:"MOVDBR" 252 // arm64:"REV" 253 // ppc64x/power10: "BRD" 254 // loong64:"REVBV" 255 return bits.ReverseBytes64(n) 256 } 257 258 func ReverseBytes32(n uint32) uint32 { 259 // amd64:"BSWAPL" 260 // 386:"BSWAPL" 261 // s390x:"MOVWBR" 262 // arm64:"REVW" 263 // loong64:"REVB2W" 264 // ppc64x/power10: "BRW" 265 return bits.ReverseBytes32(n) 266 } 267 268 func ReverseBytes16(n uint16) uint16 { 269 // amd64:"ROLW" 270 // arm64:"REV16W",-"UBFX",-"ORR" 271 // arm/5:"SLL","SRL","ORR" 272 // arm/6:"REV16" 273 // arm/7:"REV16" 274 // loong64:"REVB2H" 275 // ppc64x/power10: "BRH" 276 return bits.ReverseBytes16(n) 277 } 278 279 // --------------------- // 280 // bits.RotateLeft // 281 // --------------------- // 282 283 func RotateLeft64(n uint64) uint64 { 284 // amd64:"ROLQ" 285 // arm64:"ROR" 286 // loong64:"ROTRV" 287 // ppc64x:"ROTL" 288 // riscv64:"RORI" 289 // s390x:"RISBGZ\t[$]0, [$]63, [$]37, " 290 // wasm:"I64Rotl" 291 return bits.RotateLeft64(n, 37) 292 } 293 294 func RotateLeft32(n uint32) uint32 { 295 // amd64:"ROLL" 386:"ROLL" 296 // arm:`MOVW\tR[0-9]+@>23` 297 // arm64:"RORW" 298 // loong64:"ROTR\t" 299 // ppc64x:"ROTLW" 300 // riscv64:"RORIW" 301 // s390x:"RLL" 302 // wasm:"I32Rotl" 303 return bits.RotateLeft32(n, 9) 304 } 305 306 func RotateLeft16(n uint16, s int) uint16 { 307 // amd64:"ROLW" 386:"ROLW" 308 // arm64:"RORW",-"CSEL" 309 // loong64:"ROTR\t","SLLV" 310 return bits.RotateLeft16(n, s) 311 } 312 313 func RotateLeft8(n uint8, s int) uint8 { 314 // amd64:"ROLB" 386:"ROLB" 315 // arm64:"LSL","LSR",-"CSEL" 316 // loong64:"OR","SLLV","SRLV" 317 return bits.RotateLeft8(n, s) 318 } 319 320 func RotateLeftVariable(n uint, m int) uint { 321 // amd64:"ROLQ" 322 // arm64:"ROR" 323 // loong64:"ROTRV" 324 // ppc64x:"ROTL" 325 // riscv64:"ROL" 326 // s390x:"RLLG" 327 // wasm:"I64Rotl" 328 return bits.RotateLeft(n, m) 329 } 330 331 func RotateLeftVariable64(n uint64, m int) uint64 { 332 // amd64:"ROLQ" 333 // arm64:"ROR" 334 // loong64:"ROTRV" 335 // ppc64x:"ROTL" 336 // riscv64:"ROL" 337 // s390x:"RLLG" 338 // wasm:"I64Rotl" 339 return bits.RotateLeft64(n, m) 340 } 341 342 func RotateLeftVariable32(n uint32, m int) uint32 { 343 // arm:`MOVW\tR[0-9]+@>R[0-9]+` 344 // amd64:"ROLL" 345 // arm64:"RORW" 346 // loong64:"ROTR\t" 347 // ppc64x:"ROTLW" 348 // riscv64:"ROLW" 349 // s390x:"RLL" 350 // wasm:"I32Rotl" 351 return bits.RotateLeft32(n, m) 352 } 353 354 // ------------------------ // 355 // bits.TrailingZeros // 356 // ------------------------ // 357 358 func TrailingZeros(n uint) int { 359 // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" 360 // amd64/v3:"TZCNTQ" 361 // 386:"BSFL" 362 // arm:"CLZ" 363 // arm64:"RBIT","CLZ" 364 // loong64:"CTZV" 365 // s390x:"FLOGR" 366 // ppc64x/power8:"ANDN","POPCNTD" 367 // ppc64x/power9: "CNTTZD" 368 // wasm:"I64Ctz" 369 return bits.TrailingZeros(n) 370 } 371 372 func TrailingZeros64(n uint64) int { 373 // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" 374 // amd64/v3:"TZCNTQ" 375 // 386:"BSFL" 376 // arm64:"RBIT","CLZ" 377 // loong64:"CTZV" 378 // s390x:"FLOGR" 379 // ppc64x/power8:"ANDN","POPCNTD" 380 // ppc64x/power9: "CNTTZD" 381 // wasm:"I64Ctz" 382 return bits.TrailingZeros64(n) 383 } 384 385 func TrailingZeros64Subtract(n uint64) int { 386 // ppc64x/power8:"NEG","SUBC","ANDN","POPCNTD" 387 // ppc64x/power9:"SUBC","CNTTZD" 388 return bits.TrailingZeros64(1 - n) 389 } 390 391 func TrailingZeros32(n uint32) int { 392 // amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ" 393 // amd64/v3:"TZCNTL" 394 // 386:"BSFL" 395 // arm:"CLZ" 396 // arm64:"RBITW","CLZW" 397 // loong64:"CTZW" 398 // s390x:"FLOGR","MOVWZ" 399 // ppc64x/power8:"ANDN","POPCNTW" 400 // ppc64x/power9: "CNTTZW" 401 // wasm:"I64Ctz" 402 return bits.TrailingZeros32(n) 403 } 404 405 func TrailingZeros16(n uint16) int { 406 // amd64:"BSFL","ORL\\t\\$65536" 407 // 386:"BSFL\t" 408 // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR" 409 // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" 410 // loong64:"CTZV" 411 // s390x:"FLOGR","OR\t\\$65536" 412 // ppc64x/power8:"POPCNTD","ORIS\\t\\$1" 413 // ppc64x/power9:"CNTTZD","ORIS\\t\\$1" 414 // wasm:"I64Ctz" 415 return bits.TrailingZeros16(n) 416 } 417 418 func TrailingZeros8(n uint8) int { 419 // amd64:"BSFL","ORL\\t\\$256" 420 // 386:"BSFL" 421 // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" 422 // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" 423 // loong64:"CTZV" 424 // s390x:"FLOGR","OR\t\\$256" 425 // wasm:"I64Ctz" 426 return bits.TrailingZeros8(n) 427 } 428 429 // IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero. 430 431 func IterateBits(n uint) int { 432 i := 0 433 for n != 0 { 434 // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" 435 // amd64/v3:"TZCNTQ" 436 i += bits.TrailingZeros(n) 437 n &= n - 1 438 } 439 return i 440 } 441 442 func IterateBits64(n uint64) int { 443 i := 0 444 for n != 0 { 445 // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" 446 // amd64/v3:"TZCNTQ" 447 i += bits.TrailingZeros64(n) 448 n &= n - 1 449 } 450 return i 451 } 452 453 func IterateBits32(n uint32) int { 454 i := 0 455 for n != 0 { 456 // amd64/v1,amd64/v2:"BSFL",-"BTSQ" 457 // amd64/v3:"TZCNTL" 458 i += bits.TrailingZeros32(n) 459 n &= n - 1 460 } 461 return i 462 } 463 464 func IterateBits16(n uint16) int { 465 i := 0 466 for n != 0 { 467 // amd64/v1,amd64/v2:"BSFL",-"BTSL" 468 // amd64/v3:"TZCNTL" 469 // arm64:"RBITW","CLZW",-"ORR" 470 i += bits.TrailingZeros16(n) 471 n &= n - 1 472 } 473 return i 474 } 475 476 func IterateBits8(n uint8) int { 477 i := 0 478 for n != 0 { 479 // amd64/v1,amd64/v2:"BSFL",-"BTSL" 480 // amd64/v3:"TZCNTL" 481 // arm64:"RBITW","CLZW",-"ORR" 482 i += bits.TrailingZeros8(n) 483 n &= n - 1 484 } 485 return i 486 } 487 488 // --------------- // 489 // bits.Add* // 490 // --------------- // 491 492 func Add(x, y, ci uint) (r, co uint) { 493 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 494 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 495 // ppc64x: "ADDC", "ADDE", "ADDZE" 496 // s390x:"ADDE","ADDC\t[$]-1," 497 // riscv64: "ADD","SLTU" 498 return bits.Add(x, y, ci) 499 } 500 501 func AddC(x, ci uint) (r, co uint) { 502 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 503 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 504 // loong64: "ADDV", "SGTU" 505 // ppc64x: "ADDC", "ADDE", "ADDZE" 506 // s390x:"ADDE","ADDC\t[$]-1," 507 // mips64:"ADDV","SGTU" 508 // riscv64: "ADD","SLTU" 509 return bits.Add(x, 7, ci) 510 } 511 512 func AddZ(x, y uint) (r, co uint) { 513 // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" 514 // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" 515 // loong64: "ADDV", "SGTU" 516 // ppc64x: "ADDC", -"ADDE", "ADDZE" 517 // s390x:"ADDC",-"ADDC\t[$]-1," 518 // mips64:"ADDV","SGTU" 519 // riscv64: "ADD","SLTU" 520 return bits.Add(x, y, 0) 521 } 522 523 func AddR(x, y, ci uint) uint { 524 // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" 525 // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" 526 // loong64: "ADDV", -"SGTU" 527 // ppc64x: "ADDC", "ADDE", -"ADDZE" 528 // s390x:"ADDE","ADDC\t[$]-1," 529 // mips64:"ADDV",-"SGTU" 530 // riscv64: "ADD",-"SLTU" 531 r, _ := bits.Add(x, y, ci) 532 return r 533 } 534 535 func AddM(p, q, r *[3]uint) { 536 var c uint 537 r[0], c = bits.Add(p[0], q[0], c) 538 // arm64:"ADCS",-"ADD\t",-"CMP" 539 // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" 540 // s390x:"ADDE",-"ADDC\t[$]-1," 541 r[1], c = bits.Add(p[1], q[1], c) 542 r[2], c = bits.Add(p[2], q[2], c) 543 } 544 545 func Add64(x, y, ci uint64) (r, co uint64) { 546 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 547 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 548 // loong64: "ADDV", "SGTU" 549 // ppc64x: "ADDC", "ADDE", "ADDZE" 550 // s390x:"ADDE","ADDC\t[$]-1," 551 // mips64:"ADDV","SGTU" 552 // riscv64: "ADD","SLTU" 553 return bits.Add64(x, y, ci) 554 } 555 556 func Add64C(x, ci uint64) (r, co uint64) { 557 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 558 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 559 // loong64: "ADDV", "SGTU" 560 // ppc64x: "ADDC", "ADDE", "ADDZE" 561 // s390x:"ADDE","ADDC\t[$]-1," 562 // mips64:"ADDV","SGTU" 563 // riscv64: "ADD","SLTU" 564 return bits.Add64(x, 7, ci) 565 } 566 567 func Add64Z(x, y uint64) (r, co uint64) { 568 // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" 569 // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" 570 // loong64: "ADDV", "SGTU" 571 // ppc64x: "ADDC", -"ADDE", "ADDZE" 572 // s390x:"ADDC",-"ADDC\t[$]-1," 573 // mips64:"ADDV","SGTU" 574 // riscv64: "ADD","SLTU" 575 return bits.Add64(x, y, 0) 576 } 577 578 func Add64R(x, y, ci uint64) uint64 { 579 // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" 580 // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" 581 // loong64: "ADDV", -"SGTU" 582 // ppc64x: "ADDC", "ADDE", -"ADDZE" 583 // s390x:"ADDE","ADDC\t[$]-1," 584 // mips64:"ADDV",-"SGTU" 585 // riscv64: "ADD",-"SLTU" 586 r, _ := bits.Add64(x, y, ci) 587 return r 588 } 589 590 func Add64M(p, q, r *[3]uint64) { 591 var c uint64 592 r[0], c = bits.Add64(p[0], q[0], c) 593 // arm64:"ADCS",-"ADD\t",-"CMP" 594 // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" 595 // ppc64x: -"ADDC", "ADDE", -"ADDZE" 596 // s390x:"ADDE",-"ADDC\t[$]-1," 597 r[1], c = bits.Add64(p[1], q[1], c) 598 r[2], c = bits.Add64(p[2], q[2], c) 599 } 600 601 func Add64M0(p, q, r *[3]uint64) { 602 var c uint64 603 r[0], c = bits.Add64(p[0], q[0], 0) 604 // ppc64x: -"ADDC", -"ADDE", "ADDZE\tR[1-9]" 605 r[1], c = bits.Add64(p[1], 0, c) 606 // ppc64x: -"ADDC", "ADDE", -"ADDZE" 607 r[2], c = bits.Add64(p[2], p[2], c) 608 } 609 610 func Add64MSaveC(p, q, r, c *[2]uint64) { 611 // ppc64x: "ADDC\tR", "ADDZE" 612 r[0], c[0] = bits.Add64(p[0], q[0], 0) 613 // ppc64x: "ADDC\t[$]-1", "ADDE", "ADDZE" 614 r[1], c[1] = bits.Add64(p[1], q[1], c[0]) 615 } 616 617 func Add64PanicOnOverflowEQ(a, b uint64) uint64 { 618 r, c := bits.Add64(a, b, 0) 619 // s390x:"BRC\t[$]3,",-"ADDE" 620 if c == 1 { 621 panic("overflow") 622 } 623 return r 624 } 625 626 func Add64PanicOnOverflowNE(a, b uint64) uint64 { 627 r, c := bits.Add64(a, b, 0) 628 // s390x:"BRC\t[$]3,",-"ADDE" 629 if c != 0 { 630 panic("overflow") 631 } 632 return r 633 } 634 635 func Add64PanicOnOverflowGT(a, b uint64) uint64 { 636 r, c := bits.Add64(a, b, 0) 637 // s390x:"BRC\t[$]3,",-"ADDE" 638 if c > 0 { 639 panic("overflow") 640 } 641 return r 642 } 643 644 func Add64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { 645 var r [2]uint64 646 var c uint64 647 r[0], c = bits.Add64(a[0], b[0], c) 648 r[1], c = bits.Add64(a[1], b[1], c) 649 // s390x:"BRC\t[$]3," 650 if c == 1 { 651 panic("overflow") 652 } 653 return r 654 } 655 656 func Add64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { 657 var r [2]uint64 658 var c uint64 659 r[0], c = bits.Add64(a[0], b[0], c) 660 r[1], c = bits.Add64(a[1], b[1], c) 661 // s390x:"BRC\t[$]3," 662 if c != 0 { 663 panic("overflow") 664 } 665 return r 666 } 667 668 func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { 669 var r [2]uint64 670 var c uint64 671 r[0], c = bits.Add64(a[0], b[0], c) 672 r[1], c = bits.Add64(a[1], b[1], c) 673 // s390x:"BRC\t[$]3," 674 if c > 0 { 675 panic("overflow") 676 } 677 return r 678 } 679 680 // Verify independent carry chain operations are scheduled efficiently 681 // and do not cause unnecessary save/restore of the CA bit. 682 // 683 // This is an example of why CarryChainTail priority must be lower 684 // (earlier in the block) than Memory. f[0]=f1 could be scheduled 685 // after the first two lower 64 bit limb adds, but before either 686 // high 64 bit limbs are added. 687 // 688 // This is what happened on PPC64 when compiling 689 // crypto/internal/edwards25519/field.feMulGeneric. 690 func Add64MultipleChains(a, b, c, d [2]uint64) { 691 var cx, d1, d2 uint64 692 a1, a2 := a[0], a[1] 693 b1, b2 := b[0], b[1] 694 c1, c2 := c[0], c[1] 695 696 // ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" 697 d1, cx = bits.Add64(a1, b1, 0) 698 // ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER" 699 d2, _ = bits.Add64(a2, b2, cx) 700 701 // ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" 702 d1, cx = bits.Add64(c1, d1, 0) 703 // ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER" 704 d2, _ = bits.Add64(c2, d2, cx) 705 d[0] = d1 706 d[1] = d2 707 } 708 709 // --------------- // 710 // bits.Sub* // 711 // --------------- // 712 713 func Sub(x, y, ci uint) (r, co uint) { 714 // amd64:"NEGL","SBBQ","NEGQ" 715 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 716 // loong64:"SUBV","SGTU" 717 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 718 // s390x:"SUBE" 719 // mips64:"SUBV","SGTU" 720 // riscv64: "SUB","SLTU" 721 return bits.Sub(x, y, ci) 722 } 723 724 func SubC(x, ci uint) (r, co uint) { 725 // amd64:"NEGL","SBBQ","NEGQ" 726 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 727 // loong64:"SUBV","SGTU" 728 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 729 // s390x:"SUBE" 730 // mips64:"SUBV","SGTU" 731 // riscv64: "SUB","SLTU" 732 return bits.Sub(x, 7, ci) 733 } 734 735 func SubZ(x, y uint) (r, co uint) { 736 // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" 737 // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" 738 // loong64:"SUBV","SGTU" 739 // ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG" 740 // s390x:"SUBC" 741 // mips64:"SUBV","SGTU" 742 // riscv64: "SUB","SLTU" 743 return bits.Sub(x, y, 0) 744 } 745 746 func SubR(x, y, ci uint) uint { 747 // amd64:"NEGL","SBBQ",-"NEGQ" 748 // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" 749 // loong64:"SUBV",-"SGTU" 750 // ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG" 751 // s390x:"SUBE" 752 // riscv64: "SUB",-"SLTU" 753 r, _ := bits.Sub(x, y, ci) 754 return r 755 } 756 func SubM(p, q, r *[3]uint) { 757 var c uint 758 r[0], c = bits.Sub(p[0], q[0], c) 759 // amd64:"SBBQ",-"NEGL",-"NEGQ" 760 // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" 761 // ppc64x:-"SUBC", "SUBE", -"SUBZE", -"NEG" 762 // s390x:"SUBE" 763 r[1], c = bits.Sub(p[1], q[1], c) 764 r[2], c = bits.Sub(p[2], q[2], c) 765 } 766 767 func Sub64(x, y, ci uint64) (r, co uint64) { 768 // amd64:"NEGL","SBBQ","NEGQ" 769 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 770 // loong64:"SUBV","SGTU" 771 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 772 // s390x:"SUBE" 773 // mips64:"SUBV","SGTU" 774 // riscv64: "SUB","SLTU" 775 return bits.Sub64(x, y, ci) 776 } 777 778 func Sub64C(x, ci uint64) (r, co uint64) { 779 // amd64:"NEGL","SBBQ","NEGQ" 780 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 781 // loong64:"SUBV","SGTU" 782 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 783 // s390x:"SUBE" 784 // mips64:"SUBV","SGTU" 785 // riscv64: "SUB","SLTU" 786 return bits.Sub64(x, 7, ci) 787 } 788 789 func Sub64Z(x, y uint64) (r, co uint64) { 790 // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" 791 // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" 792 // loong64:"SUBV","SGTU" 793 // ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG" 794 // s390x:"SUBC" 795 // mips64:"SUBV","SGTU" 796 // riscv64: "SUB","SLTU" 797 return bits.Sub64(x, y, 0) 798 } 799 800 func Sub64R(x, y, ci uint64) uint64 { 801 // amd64:"NEGL","SBBQ",-"NEGQ" 802 // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" 803 // loong64:"SUBV",-"SGTU" 804 // ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG" 805 // s390x:"SUBE" 806 // riscv64: "SUB",-"SLTU" 807 r, _ := bits.Sub64(x, y, ci) 808 return r 809 } 810 func Sub64M(p, q, r *[3]uint64) { 811 var c uint64 812 r[0], c = bits.Sub64(p[0], q[0], c) 813 // amd64:"SBBQ",-"NEGL",-"NEGQ" 814 // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" 815 // s390x:"SUBE" 816 r[1], c = bits.Sub64(p[1], q[1], c) 817 r[2], c = bits.Sub64(p[2], q[2], c) 818 } 819 820 func Sub64MSaveC(p, q, r, c *[2]uint64) { 821 // ppc64x:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG" 822 r[0], c[0] = bits.Sub64(p[0], q[0], 0) 823 // ppc64x:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG" 824 r[1], c[1] = bits.Sub64(p[1], q[1], c[0]) 825 } 826 827 func Sub64PanicOnOverflowEQ(a, b uint64) uint64 { 828 r, b := bits.Sub64(a, b, 0) 829 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 830 if b == 1 { 831 panic("overflow") 832 } 833 return r 834 } 835 836 func Sub64PanicOnOverflowNE(a, b uint64) uint64 { 837 r, b := bits.Sub64(a, b, 0) 838 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 839 if b != 0 { 840 panic("overflow") 841 } 842 return r 843 } 844 845 func Sub64PanicOnOverflowGT(a, b uint64) uint64 { 846 r, b := bits.Sub64(a, b, 0) 847 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 848 if b > 0 { 849 panic("overflow") 850 } 851 return r 852 } 853 854 func Sub64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { 855 var r [2]uint64 856 var c uint64 857 r[0], c = bits.Sub64(a[0], b[0], c) 858 r[1], c = bits.Sub64(a[1], b[1], c) 859 // s390x:"BRC\t[$]12," 860 if c == 1 { 861 panic("overflow") 862 } 863 return r 864 } 865 866 func Sub64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { 867 var r [2]uint64 868 var c uint64 869 r[0], c = bits.Sub64(a[0], b[0], c) 870 r[1], c = bits.Sub64(a[1], b[1], c) 871 // s390x:"BRC\t[$]12," 872 if c != 0 { 873 panic("overflow") 874 } 875 return r 876 } 877 878 func Sub64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { 879 var r [2]uint64 880 var c uint64 881 r[0], c = bits.Sub64(a[0], b[0], c) 882 r[1], c = bits.Sub64(a[1], b[1], c) 883 // s390x:"BRC\t[$]12," 884 if c > 0 { 885 panic("overflow") 886 } 887 return r 888 } 889 890 // --------------- // 891 // bits.Mul* // 892 // --------------- // 893 894 func Mul(x, y uint) (hi, lo uint) { 895 // amd64:"MULQ" 896 // arm64:"UMULH","MUL" 897 // ppc64x:"MULHDU","MULLD" 898 // s390x:"MLGR" 899 // mips64: "MULVU" 900 // riscv64:"MULHU","MUL" 901 return bits.Mul(x, y) 902 } 903 904 func Mul64(x, y uint64) (hi, lo uint64) { 905 // amd64:"MULQ" 906 // arm64:"UMULH","MUL" 907 // ppc64x:"MULHDU","MULLD" 908 // s390x:"MLGR" 909 // mips64: "MULVU" 910 // riscv64:"MULHU","MUL" 911 return bits.Mul64(x, y) 912 } 913 914 func Mul64HiOnly(x, y uint64) uint64 { 915 // arm64:"UMULH",-"MUL" 916 // riscv64:"MULHU",-"MUL\t" 917 hi, _ := bits.Mul64(x, y) 918 return hi 919 } 920 921 func Mul64LoOnly(x, y uint64) uint64 { 922 // arm64:"MUL",-"UMULH" 923 // riscv64:"MUL\t",-"MULHU" 924 _, lo := bits.Mul64(x, y) 925 return lo 926 } 927 928 // --------------- // 929 // bits.Div* // 930 // --------------- // 931 932 func Div(hi, lo, x uint) (q, r uint) { 933 // amd64:"DIVQ" 934 return bits.Div(hi, lo, x) 935 } 936 937 func Div32(hi, lo, x uint32) (q, r uint32) { 938 // arm64:"ORR","UDIV","MSUB",-"UREM" 939 return bits.Div32(hi, lo, x) 940 } 941 942 func Div64(hi, lo, x uint64) (q, r uint64) { 943 // amd64:"DIVQ" 944 return bits.Div64(hi, lo, x) 945 } 946 947 func Div64degenerate(x uint64) (q, r uint64) { 948 // amd64:-"DIVQ" 949 return bits.Div64(0, x, 5) 950 } 951