diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2022-11-24 13:14:34 +0300 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2022-11-24 14:46:00 +0300 |
commit | 87f7fceed34bcafb8aaff351dd493a35c916986f (patch) | |
tree | 26809ec8f550aba8eb019e59adc3d48e51913eb2 /contrib/go/_std_1.18/src/math/big | |
parent | 11bc4015b8010ae201bf3eb33db7dba425aca35e (diff) | |
download | ydb-87f7fceed34bcafb8aaff351dd493a35c916986f.tar.gz |
Ydb stable 22-4-4322.4.43
x-stable-origin-commit: 8d49d46cc834835bf3e50870516acd7376a63bcf
Diffstat (limited to 'contrib/go/_std_1.18/src/math/big')
23 files changed, 9570 insertions, 0 deletions
diff --git a/contrib/go/_std_1.18/src/math/big/accuracy_string.go b/contrib/go/_std_1.18/src/math/big/accuracy_string.go new file mode 100644 index 0000000000..1501ace00d --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/accuracy_string.go @@ -0,0 +1,17 @@ +// Code generated by "stringer -type=Accuracy"; DO NOT EDIT. + +package big + +import "strconv" + +const _Accuracy_name = "BelowExactAbove" + +var _Accuracy_index = [...]uint8{0, 5, 10, 15} + +func (i Accuracy) String() string { + i -= -1 + if i < 0 || i >= Accuracy(len(_Accuracy_index)-1) { + return "Accuracy(" + strconv.FormatInt(int64(i+-1), 10) + ")" + } + return _Accuracy_name[_Accuracy_index[i]:_Accuracy_index[i+1]] +} diff --git a/contrib/go/_std_1.18/src/math/big/arith.go b/contrib/go/_std_1.18/src/math/big/arith.go new file mode 100644 index 0000000000..8f55c195d4 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/arith.go @@ -0,0 +1,277 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file provides Go implementations of elementary multi-precision +// arithmetic operations on word vectors. These have the suffix _g. +// These are needed for platforms without assembly implementations of these routines. +// This file also contains elementary operations that can be implemented +// sufficiently efficiently in Go. + +package big + +import "math/bits" + +// A Word represents a single digit of a multi-precision unsigned integer. +type Word uint + +const ( + _S = _W / 8 // word size in bytes + + _W = bits.UintSize // word size in bits + _B = 1 << _W // digit base + _M = _B - 1 // digit mask +) + +// Many of the loops in this file are of the form +// for i := 0; i < len(z) && i < len(x) && i < len(y); i++ +// i < len(z) is the real condition. +// However, checking i < len(x) && i < len(y) as well is faster than +// having the compiler do a bounds check in the body of the loop; +// remarkably it is even faster than hoisting the bounds check +// out of the loop, by doing something like +// _, _ = x[len(z)-1], y[len(z)-1] +// There are other ways to hoist the bounds check out of the loop, +// but the compiler's BCE isn't powerful enough for them (yet?). +// See the discussion in CL 164966. + +// ---------------------------------------------------------------------------- +// Elementary operations on words +// +// These operations are used by the vector operations below. + +// z1<<_W + z0 = x*y +func mulWW_g(x, y Word) (z1, z0 Word) { + hi, lo := bits.Mul(uint(x), uint(y)) + return Word(hi), Word(lo) +} + +// z1<<_W + z0 = x*y + c +func mulAddWWW_g(x, y, c Word) (z1, z0 Word) { + hi, lo := bits.Mul(uint(x), uint(y)) + var cc uint + lo, cc = bits.Add(lo, uint(c), 0) + return Word(hi + cc), Word(lo) +} + +// nlz returns the number of leading zeros in x. +// Wraps bits.LeadingZeros call for convenience. +func nlz(x Word) uint { + return uint(bits.LeadingZeros(uint(x))) +} + +// The resulting carry c is either 0 or 1. +func addVV_g(z, x, y []Word) (c Word) { + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { + zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c)) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +// The resulting carry c is either 0 or 1. +func subVV_g(z, x, y []Word) (c Word) { + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { + zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c)) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +// The resulting carry c is either 0 or 1. +func addVW_g(z, x []Word, y Word) (c Word) { + c = y + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + zi, cc := bits.Add(uint(x[i]), uint(c), 0) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +// addVWlarge is addVW, but intended for large z. +// The only difference is that we check on every iteration +// whether we are done with carries, +// and if so, switch to a much faster copy instead. +// This is only a good idea for large z, +// because the overhead of the check and the function call +// outweigh the benefits when z is small. +func addVWlarge(z, x []Word, y Word) (c Word) { + c = y + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + if c == 0 { + copy(z[i:], x[i:]) + return + } + zi, cc := bits.Add(uint(x[i]), uint(c), 0) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +func subVW_g(z, x []Word, y Word) (c Word) { + c = y + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + zi, cc := bits.Sub(uint(x[i]), uint(c), 0) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +// subVWlarge is to subVW as addVWlarge is to addVW. +func subVWlarge(z, x []Word, y Word) (c Word) { + c = y + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + if c == 0 { + copy(z[i:], x[i:]) + return + } + zi, cc := bits.Sub(uint(x[i]), uint(c), 0) + z[i] = Word(zi) + c = Word(cc) + } + return +} + +func shlVU_g(z, x []Word, s uint) (c Word) { + if s == 0 { + copy(z, x) + return + } + if len(z) == 0 { + return + } + s &= _W - 1 // hint to the compiler that shifts by s don't need guard code + ŝ := _W - s + ŝ &= _W - 1 // ditto + c = x[len(z)-1] >> ŝ + for i := len(z) - 1; i > 0; i-- { + z[i] = x[i]<<s | x[i-1]>>ŝ + } + z[0] = x[0] << s + return +} + +func shrVU_g(z, x []Word, s uint) (c Word) { + if s == 0 { + copy(z, x) + return + } + if len(z) == 0 { + return + } + if len(x) != len(z) { + // This is an invariant guaranteed by the caller. + panic("len(x) != len(z)") + } + s &= _W - 1 // hint to the compiler that shifts by s don't need guard code + ŝ := _W - s + ŝ &= _W - 1 // ditto + c = x[0] << ŝ + for i := 1; i < len(z); i++ { + z[i-1] = x[i-1]>>s | x[i]<<ŝ + } + z[len(z)-1] = x[len(z)-1] >> s + return +} + +func mulAddVWW_g(z, x []Word, y, r Word) (c Word) { + c = r + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + c, z[i] = mulAddWWW_g(x[i], y, c) + } + return +} + +func addMulVVW_g(z, x []Word, y Word) (c Word) { + // The comment near the top of this file discusses this for loop condition. + for i := 0; i < len(z) && i < len(x); i++ { + z1, z0 := mulAddWWW_g(x[i], y, z[i]) + lo, cc := bits.Add(uint(z0), uint(c), 0) + c, z[i] = Word(cc), Word(lo) + c += z1 + } + return +} + +// q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y. +// An approximate reciprocal with a reference to "Improved Division by Invariant Integers +// (IEEE Transactions on Computers, 11 Jun. 2010)" +func divWW(x1, x0, y, m Word) (q, r Word) { + s := nlz(y) + if s != 0 { + x1 = x1<<s | x0>>(_W-s) + x0 <<= s + y <<= s + } + d := uint(y) + // We know that + // m = ⎣(B^2-1)/d⎦-B + // ⎣(B^2-1)/d⎦ = m+B + // (B^2-1)/d = m+B+delta1 0 <= delta1 <= (d-1)/d + // B^2/d = m+B+delta2 0 <= delta2 <= 1 + // The quotient we're trying to compute is + // quotient = ⎣(x1*B+x0)/d⎦ + // = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦ + // = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦ + // = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦ + // The latter two terms of this three-term sum are between 0 and 1. + // So we can compute just the first term, and we will be low by at most 2. + t1, t0 := bits.Mul(uint(m), uint(x1)) + _, c := bits.Add(t0, uint(x0), 0) + t1, _ = bits.Add(t1, uint(x1), c) + // The quotient is either t1, t1+1, or t1+2. + // We'll try t1 and adjust if needed. + qq := t1 + // compute remainder r=x-d*q. + dq1, dq0 := bits.Mul(d, qq) + r0, b := bits.Sub(uint(x0), dq0, 0) + r1, _ := bits.Sub(uint(x1), dq1, b) + // The remainder we just computed is bounded above by B+d: + // r = x1*B + x0 - d*q. + // = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦ + // = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha) 0 <= alpha < 1 + // = x1*B + x0 - x1*d/B*m - x1*d - x0*d/B + d*alpha + // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha + // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha + // = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta) - x1*d - x0*d/B + d*alpha 0 <= beta < 1 + // = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha + // = x0 + x1/B + x1*d/B*beta - x0*d/B + d*alpha + // = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha + // < B*(1-d/B) + d*B/B + d because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1 + // = B - d + d + d + // = B+d + // So r1 can only be 0 or 1. If r1 is 1, then we know q was too small. + // Add 1 to q and subtract d from r. That guarantees that r is <B, so + // we no longer need to keep track of r1. + if r1 != 0 { + qq++ + r0 -= d + } + // If the remainder is still too large, increment q one more time. + if r0 >= d { + qq++ + r0 -= d + } + return Word(qq), Word(r0 >> s) +} + +// reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1). +func reciprocalWord(d1 Word) Word { + u := uint(d1 << nlz(d1)) + x1 := ^u + x0 := uint(_M) + rec, _ := bits.Div(x1, x0, u) // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U + return Word(rec) +} diff --git a/contrib/go/_std_1.18/src/math/big/arith_amd64.go b/contrib/go/_std_1.18/src/math/big/arith_amd64.go new file mode 100644 index 0000000000..89108fe149 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/arith_amd64.go @@ -0,0 +1,12 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +package big + +import "internal/cpu" + +var support_adx = cpu.X86.HasADX && cpu.X86.HasBMI2 diff --git a/contrib/go/_std_1.18/src/math/big/arith_amd64.s b/contrib/go/_std_1.18/src/math/big/arith_amd64.s new file mode 100644 index 0000000000..5c72a27d8d --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/arith_amd64.s @@ -0,0 +1,526 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB),NOSPLIT,$0 + MOVQ x+0(FP), AX + MULQ y+8(FP) + MOVQ DX, z1+16(FP) + MOVQ AX, z0+24(FP) + RET + + + +// The carry bit is saved with SBBQ Rx, Rx: if the carry was set, Rx is -1, otherwise it is 0. +// It is restored with ADDQ Rx, Rx: if Rx was -1 the carry is set, otherwise it is cleared. +// This is faster than using rotate instructions. + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ z+0(FP), R10 + + MOVQ $0, CX // c = 0 + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V1 // if n < 0 goto V1 + +U1: // n >= 0 + // regular loop body unrolled 4x + ADDQ CX, CX // restore CF + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + ADCQ 0(R9)(SI*8), R11 + ADCQ 8(R9)(SI*8), R12 + ADCQ 16(R9)(SI*8), R13 + ADCQ 24(R9)(SI*8), R14 + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + SBBQ CX, CX // save CF + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U1 // if n >= 0 goto U1 + +V1: ADDQ $4, DI // n += 4 + JLE E1 // if n <= 0 goto E1 + +L1: // n > 0 + ADDQ CX, CX // restore CF + MOVQ 0(R8)(SI*8), R11 + ADCQ 0(R9)(SI*8), R11 + MOVQ R11, 0(R10)(SI*8) + SBBQ CX, CX // save CF + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L1 // if n > 0 goto L1 + +E1: NEGQ CX + MOVQ CX, c+72(FP) // return c + RET + + +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SBBQ instead of ADCQ and label names) +TEXT ·subVV(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ z+0(FP), R10 + + MOVQ $0, CX // c = 0 + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V2 // if n < 0 goto V2 + +U2: // n >= 0 + // regular loop body unrolled 4x + ADDQ CX, CX // restore CF + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + SBBQ 0(R9)(SI*8), R11 + SBBQ 8(R9)(SI*8), R12 + SBBQ 16(R9)(SI*8), R13 + SBBQ 24(R9)(SI*8), R14 + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + SBBQ CX, CX // save CF + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U2 // if n >= 0 goto U2 + +V2: ADDQ $4, DI // n += 4 + JLE E2 // if n <= 0 goto E2 + +L2: // n > 0 + ADDQ CX, CX // restore CF + MOVQ 0(R8)(SI*8), R11 + SBBQ 0(R9)(SI*8), R11 + MOVQ R11, 0(R10)(SI*8) + SBBQ CX, CX // save CF + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L2 // if n > 0 goto L2 + +E2: NEGQ CX + MOVQ CX, c+72(FP) // return c + RET + + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + CMPQ DI, $32 + JG large + MOVQ x+24(FP), R8 + MOVQ y+48(FP), CX // c = y + MOVQ z+0(FP), R10 + + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V3 // if n < 4 goto V3 + +U3: // n >= 0 + // regular loop body unrolled 4x + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + ADDQ CX, R11 + ADCQ $0, R12 + ADCQ $0, R13 + ADCQ $0, R14 + SBBQ CX, CX // save CF + NEGQ CX + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U3 // if n >= 0 goto U3 + +V3: ADDQ $4, DI // n += 4 + JLE E3 // if n <= 0 goto E3 + +L3: // n > 0 + ADDQ 0(R8)(SI*8), CX + MOVQ CX, 0(R10)(SI*8) + SBBQ CX, CX // save CF + NEGQ CX + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L3 // if n > 0 goto L3 + +E3: MOVQ CX, c+56(FP) // return c + RET +large: + JMP ·addVWlarge(SB) + + +// func subVW(z, x []Word, y Word) (c Word) +// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names) +TEXT ·subVW(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + CMPQ DI, $32 + JG large + MOVQ x+24(FP), R8 + MOVQ y+48(FP), CX // c = y + MOVQ z+0(FP), R10 + + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V4 // if n < 4 goto V4 + +U4: // n >= 0 + // regular loop body unrolled 4x + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + SUBQ CX, R11 + SBBQ $0, R12 + SBBQ $0, R13 + SBBQ $0, R14 + SBBQ CX, CX // save CF + NEGQ CX + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U4 // if n >= 0 goto U4 + +V4: ADDQ $4, DI // n += 4 + JLE E4 // if n <= 0 goto E4 + +L4: // n > 0 + MOVQ 0(R8)(SI*8), R11 + SUBQ CX, R11 + MOVQ R11, 0(R10)(SI*8) + SBBQ CX, CX // save CF + NEGQ CX + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L4 // if n > 0 goto L4 + +E4: MOVQ CX, c+56(FP) // return c + RET +large: + JMP ·subVWlarge(SB) + + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), BX // i = z + SUBQ $1, BX // i-- + JL X8b // i < 0 (n <= 0) + + // n > 0 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ s+48(FP), CX + MOVQ (R8)(BX*8), AX // w1 = x[n-1] + MOVQ $0, DX + SHLQ CX, AX, DX // w1>>ŝ + MOVQ DX, c+56(FP) + + CMPQ BX, $0 + JLE X8a // i <= 0 + + // i > 0 +L8: MOVQ AX, DX // w = w1 + MOVQ -8(R8)(BX*8), AX // w1 = x[i-1] + SHLQ CX, AX, DX // w<<s | w1>>ŝ + MOVQ DX, (R10)(BX*8) // z[i] = w<<s | w1>>ŝ + SUBQ $1, BX // i-- + JG L8 // i > 0 + + // i <= 0 +X8a: SHLQ CX, AX // w1<<s + MOVQ AX, (R10) // z[0] = w1<<s + RET + +X8b: MOVQ $0, c+56(FP) + RET + + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), R11 + SUBQ $1, R11 // n-- + JL X9b // n < 0 (n <= 0) + + // n > 0 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ s+48(FP), CX + MOVQ (R8), AX // w1 = x[0] + MOVQ $0, DX + SHRQ CX, AX, DX // w1<<ŝ + MOVQ DX, c+56(FP) + + MOVQ $0, BX // i = 0 + JMP E9 + + // i < n-1 +L9: MOVQ AX, DX // w = w1 + MOVQ 8(R8)(BX*8), AX // w1 = x[i+1] + SHRQ CX, AX, DX // w>>s | w1<<ŝ + MOVQ DX, (R10)(BX*8) // z[i] = w>>s | w1<<ŝ + ADDQ $1, BX // i++ + +E9: CMPQ BX, R11 + JL L9 // i < n-1 + + // i >= n-1 +X9a: SHRQ CX, AX // w1>>s + MOVQ AX, (R10)(R11*8) // z[n-1] = w1>>s + RET + +X9b: MOVQ $0, c+56(FP) + RET + + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ r+56(FP), CX // c = r + MOVQ z_len+8(FP), R11 + MOVQ $0, BX // i = 0 + + CMPQ R11, $4 + JL E5 + +U5: // i+4 <= n + // regular loop body unrolled 4x + MOVQ (0*8)(R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + MOVQ AX, (0*8)(R10)(BX*8) + MOVQ DX, CX + MOVQ (1*8)(R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + MOVQ AX, (1*8)(R10)(BX*8) + MOVQ DX, CX + MOVQ (2*8)(R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + MOVQ AX, (2*8)(R10)(BX*8) + MOVQ DX, CX + MOVQ (3*8)(R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + MOVQ AX, (3*8)(R10)(BX*8) + MOVQ DX, CX + ADDQ $4, BX // i += 4 + + LEAQ 4(BX), DX + CMPQ DX, R11 + JLE U5 + JMP E5 + +L5: MOVQ (R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + MOVQ AX, (R10)(BX*8) + MOVQ DX, CX + ADDQ $1, BX // i++ + +E5: CMPQ BX, R11 // i < n + JL L5 + + MOVQ CX, c+64(FP) + RET + + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + CMPB ·support_adx(SB), $1 + JEQ adx + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ z_len+8(FP), R11 + MOVQ $0, BX // i = 0 + MOVQ $0, CX // c = 0 + MOVQ R11, R12 + ANDQ $-2, R12 + CMPQ R11, $2 + JAE A6 + JMP E6 + +A6: + MOVQ (R8)(BX*8), AX + MULQ R9 + ADDQ (R10)(BX*8), AX + ADCQ $0, DX + ADDQ CX, AX + ADCQ $0, DX + MOVQ DX, CX + MOVQ AX, (R10)(BX*8) + + MOVQ (8)(R8)(BX*8), AX + MULQ R9 + ADDQ (8)(R10)(BX*8), AX + ADCQ $0, DX + ADDQ CX, AX + ADCQ $0, DX + MOVQ DX, CX + MOVQ AX, (8)(R10)(BX*8) + + ADDQ $2, BX + CMPQ BX, R12 + JL A6 + JMP E6 + +L6: MOVQ (R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + ADDQ AX, (R10)(BX*8) + ADCQ $0, DX + MOVQ DX, CX + ADDQ $1, BX // i++ + +E6: CMPQ BX, R11 // i < n + JL L6 + + MOVQ CX, c+56(FP) + RET + +adx: + MOVQ z_len+8(FP), R11 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ y+48(FP), DX + MOVQ $0, BX // i = 0 + MOVQ $0, CX // carry + CMPQ R11, $8 + JAE adx_loop_header + CMPQ BX, R11 + JL adx_short + MOVQ CX, c+56(FP) + RET + +adx_loop_header: + MOVQ R11, R13 + ANDQ $-8, R13 +adx_loop: + XORQ R9, R9 // unset flags + MULXQ (R8), SI, DI + ADCXQ CX,SI + ADOXQ (R10), SI + MOVQ SI,(R10) + + MULXQ 8(R8), AX, CX + ADCXQ DI, AX + ADOXQ 8(R10), AX + MOVQ AX, 8(R10) + + MULXQ 16(R8), SI, DI + ADCXQ CX, SI + ADOXQ 16(R10), SI + MOVQ SI, 16(R10) + + MULXQ 24(R8), AX, CX + ADCXQ DI, AX + ADOXQ 24(R10), AX + MOVQ AX, 24(R10) + + MULXQ 32(R8), SI, DI + ADCXQ CX, SI + ADOXQ 32(R10), SI + MOVQ SI, 32(R10) + + MULXQ 40(R8), AX, CX + ADCXQ DI, AX + ADOXQ 40(R10), AX + MOVQ AX, 40(R10) + + MULXQ 48(R8), SI, DI + ADCXQ CX, SI + ADOXQ 48(R10), SI + MOVQ SI, 48(R10) + + MULXQ 56(R8), AX, CX + ADCXQ DI, AX + ADOXQ 56(R10), AX + MOVQ AX, 56(R10) + + ADCXQ R9, CX + ADOXQ R9, CX + + ADDQ $64, R8 + ADDQ $64, R10 + ADDQ $8, BX + + CMPQ BX, R13 + JL adx_loop + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + CMPQ BX, R11 + JL adx_short + MOVQ CX, c+56(FP) + RET + +adx_short: + MULXQ (R8)(BX*8), SI, DI + ADDQ CX, SI + ADCQ $0, DI + ADDQ SI, (R10)(BX*8) + ADCQ $0, DI + MOVQ DI, CX + ADDQ $1, BX // i++ + + CMPQ BX, R11 + JL adx_short + + MOVQ CX, c+56(FP) + RET + + + diff --git a/contrib/go/_std_1.18/src/math/big/arith_decl.go b/contrib/go/_std_1.18/src/math/big/arith_decl.go new file mode 100644 index 0000000000..eea3d6b325 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/arith_decl.go @@ -0,0 +1,19 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !math_big_pure_go +// +build !math_big_pure_go + +package big + +// implemented in arith_$GOARCH.s +func mulWW(x, y Word) (z1, z0 Word) +func addVV(z, x, y []Word) (c Word) +func subVV(z, x, y []Word) (c Word) +func addVW(z, x []Word, y Word) (c Word) +func subVW(z, x []Word, y Word) (c Word) +func shlVU(z, x []Word, s uint) (c Word) +func shrVU(z, x []Word, s uint) (c Word) +func mulAddVWW(z, x []Word, y, r Word) (c Word) +func addMulVVW(z, x []Word, y Word) (c Word) diff --git a/contrib/go/_std_1.18/src/math/big/decimal.go b/contrib/go/_std_1.18/src/math/big/decimal.go new file mode 100644 index 0000000000..716f03bfa4 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/decimal.go @@ -0,0 +1,270 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements multi-precision decimal numbers. +// The implementation is for float to decimal conversion only; +// not general purpose use. +// The only operations are precise conversion from binary to +// decimal and rounding. +// +// The key observation and some code (shr) is borrowed from +// strconv/decimal.go: conversion of binary fractional values can be done +// precisely in multi-precision decimal because 2 divides 10 (required for +// >> of mantissa); but conversion of decimal floating-point values cannot +// be done precisely in binary representation. +// +// In contrast to strconv/decimal.go, only right shift is implemented in +// decimal format - left shift can be done precisely in binary format. + +package big + +// A decimal represents an unsigned floating-point number in decimal representation. +// The value of a non-zero decimal d is d.mant * 10**d.exp with 0.1 <= d.mant < 1, +// with the most-significant mantissa digit at index 0. For the zero decimal, the +// mantissa length and exponent are 0. +// The zero value for decimal represents a ready-to-use 0.0. +type decimal struct { + mant []byte // mantissa ASCII digits, big-endian + exp int // exponent +} + +// at returns the i'th mantissa digit, starting with the most significant digit at 0. +func (d *decimal) at(i int) byte { + if 0 <= i && i < len(d.mant) { + return d.mant[i] + } + return '0' +} + +// Maximum shift amount that can be done in one pass without overflow. +// A Word has _W bits and (1<<maxShift - 1)*10 + 9 must fit into Word. +const maxShift = _W - 4 + +// TODO(gri) Since we know the desired decimal precision when converting +// a floating-point number, we may be able to limit the number of decimal +// digits that need to be computed by init by providing an additional +// precision argument and keeping track of when a number was truncated early +// (equivalent of "sticky bit" in binary rounding). + +// TODO(gri) Along the same lines, enforce some limit to shift magnitudes +// to avoid "infinitely" long running conversions (until we run out of space). + +// Init initializes x to the decimal representation of m << shift (for +// shift >= 0), or m >> -shift (for shift < 0). +func (x *decimal) init(m nat, shift int) { + // special case 0 + if len(m) == 0 { + x.mant = x.mant[:0] + x.exp = 0 + return + } + + // Optimization: If we need to shift right, first remove any trailing + // zero bits from m to reduce shift amount that needs to be done in + // decimal format (since that is likely slower). + if shift < 0 { + ntz := m.trailingZeroBits() + s := uint(-shift) + if s >= ntz { + s = ntz // shift at most ntz bits + } + m = nat(nil).shr(m, s) + shift += int(s) + } + + // Do any shift left in binary representation. + if shift > 0 { + m = nat(nil).shl(m, uint(shift)) + shift = 0 + } + + // Convert mantissa into decimal representation. + s := m.utoa(10) + n := len(s) + x.exp = n + // Trim trailing zeros; instead the exponent is tracking + // the decimal point independent of the number of digits. + for n > 0 && s[n-1] == '0' { + n-- + } + x.mant = append(x.mant[:0], s[:n]...) + + // Do any (remaining) shift right in decimal representation. + if shift < 0 { + for shift < -maxShift { + shr(x, maxShift) + shift += maxShift + } + shr(x, uint(-shift)) + } +} + +// shr implements x >> s, for s <= maxShift. +func shr(x *decimal, s uint) { + // Division by 1<<s using shift-and-subtract algorithm. + + // pick up enough leading digits to cover first shift + r := 0 // read index + var n Word + for n>>s == 0 && r < len(x.mant) { + ch := Word(x.mant[r]) + r++ + n = n*10 + ch - '0' + } + if n == 0 { + // x == 0; shouldn't get here, but handle anyway + x.mant = x.mant[:0] + return + } + for n>>s == 0 { + r++ + n *= 10 + } + x.exp += 1 - r + + // read a digit, write a digit + w := 0 // write index + mask := Word(1)<<s - 1 + for r < len(x.mant) { + ch := Word(x.mant[r]) + r++ + d := n >> s + n &= mask // n -= d << s + x.mant[w] = byte(d + '0') + w++ + n = n*10 + ch - '0' + } + + // write extra digits that still fit + for n > 0 && w < len(x.mant) { + d := n >> s + n &= mask + x.mant[w] = byte(d + '0') + w++ + n = n * 10 + } + x.mant = x.mant[:w] // the number may be shorter (e.g. 1024 >> 10) + + // append additional digits that didn't fit + for n > 0 { + d := n >> s + n &= mask + x.mant = append(x.mant, byte(d+'0')) + n = n * 10 + } + + trim(x) +} + +func (x *decimal) String() string { + if len(x.mant) == 0 { + return "0" + } + + var buf []byte + switch { + case x.exp <= 0: + // 0.00ddd + buf = make([]byte, 0, 2+(-x.exp)+len(x.mant)) + buf = append(buf, "0."...) + buf = appendZeros(buf, -x.exp) + buf = append(buf, x.mant...) + + case /* 0 < */ x.exp < len(x.mant): + // dd.ddd + buf = make([]byte, 0, 1+len(x.mant)) + buf = append(buf, x.mant[:x.exp]...) + buf = append(buf, '.') + buf = append(buf, x.mant[x.exp:]...) + + default: // len(x.mant) <= x.exp + // ddd00 + buf = make([]byte, 0, x.exp) + buf = append(buf, x.mant...) + buf = appendZeros(buf, x.exp-len(x.mant)) + } + + return string(buf) +} + +// appendZeros appends n 0 digits to buf and returns buf. +func appendZeros(buf []byte, n int) []byte { + for ; n > 0; n-- { + buf = append(buf, '0') + } + return buf +} + +// shouldRoundUp reports if x should be rounded up +// if shortened to n digits. n must be a valid index +// for x.mant. +func shouldRoundUp(x *decimal, n int) bool { + if x.mant[n] == '5' && n+1 == len(x.mant) { + // exactly halfway - round to even + return n > 0 && (x.mant[n-1]-'0')&1 != 0 + } + // not halfway - digit tells all (x.mant has no trailing zeros) + return x.mant[n] >= '5' +} + +// round sets x to (at most) n mantissa digits by rounding it +// to the nearest even value with n (or fever) mantissa digits. +// If n < 0, x remains unchanged. +func (x *decimal) round(n int) { + if n < 0 || n >= len(x.mant) { + return // nothing to do + } + + if shouldRoundUp(x, n) { + x.roundUp(n) + } else { + x.roundDown(n) + } +} + +func (x *decimal) roundUp(n int) { + if n < 0 || n >= len(x.mant) { + return // nothing to do + } + // 0 <= n < len(x.mant) + + // find first digit < '9' + for n > 0 && x.mant[n-1] >= '9' { + n-- + } + + if n == 0 { + // all digits are '9's => round up to '1' and update exponent + x.mant[0] = '1' // ok since len(x.mant) > n + x.mant = x.mant[:1] + x.exp++ + return + } + + // n > 0 && x.mant[n-1] < '9' + x.mant[n-1]++ + x.mant = x.mant[:n] + // x already trimmed +} + +func (x *decimal) roundDown(n int) { + if n < 0 || n >= len(x.mant) { + return // nothing to do + } + x.mant = x.mant[:n] + trim(x) +} + +// trim cuts off any trailing zeros from x's mantissa; +// they are meaningless for the value of x. +func trim(x *decimal) { + i := len(x.mant) + for i > 0 && x.mant[i-1] == '0' { + i-- + } + x.mant = x.mant[:i] + if i == 0 { + x.exp = 0 + } +} diff --git a/contrib/go/_std_1.18/src/math/big/doc.go b/contrib/go/_std_1.18/src/math/big/doc.go new file mode 100644 index 0000000000..65ed019b74 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/doc.go @@ -0,0 +1,99 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package big implements arbitrary-precision arithmetic (big numbers). +The following numeric types are supported: + + Int signed integers + Rat rational numbers + Float floating-point numbers + +The zero value for an Int, Rat, or Float correspond to 0. Thus, new +values can be declared in the usual ways and denote 0 without further +initialization: + + var x Int // &x is an *Int of value 0 + var r = &Rat{} // r is a *Rat of value 0 + y := new(Float) // y is a *Float of value 0 + +Alternatively, new values can be allocated and initialized with factory +functions of the form: + + func NewT(v V) *T + +For instance, NewInt(x) returns an *Int set to the value of the int64 +argument x, NewRat(a, b) returns a *Rat set to the fraction a/b where +a and b are int64 values, and NewFloat(f) returns a *Float initialized +to the float64 argument f. More flexibility is provided with explicit +setters, for instance: + + var z1 Int + z1.SetUint64(123) // z1 := 123 + z2 := new(Rat).SetFloat64(1.25) // z2 := 5/4 + z3 := new(Float).SetInt(z1) // z3 := 123.0 + +Setters, numeric operations and predicates are represented as methods of +the form: + + func (z *T) SetV(v V) *T // z = v + func (z *T) Unary(x *T) *T // z = unary x + func (z *T) Binary(x, y *T) *T // z = x binary y + func (x *T) Pred() P // p = pred(x) + +with T one of Int, Rat, or Float. For unary and binary operations, the +result is the receiver (usually named z in that case; see below); if it +is one of the operands x or y it may be safely overwritten (and its memory +reused). + +Arithmetic expressions are typically written as a sequence of individual +method calls, with each call corresponding to an operation. The receiver +denotes the result and the method arguments are the operation's operands. +For instance, given three *Int values a, b and c, the invocation + + c.Add(a, b) + +computes the sum a + b and stores the result in c, overwriting whatever +value was held in c before. Unless specified otherwise, operations permit +aliasing of parameters, so it is perfectly ok to write + + sum.Add(sum, x) + +to accumulate values x in a sum. + +(By always passing in a result value via the receiver, memory use can be +much better controlled. Instead of having to allocate new memory for each +result, an operation can reuse the space allocated for the result value, +and overwrite that value with the new result in the process.) + +Notational convention: Incoming method parameters (including the receiver) +are named consistently in the API to clarify their use. Incoming operands +are usually named x, y, a, b, and so on, but never z. A parameter specifying +the result is named z (typically the receiver). + +For instance, the arguments for (*Int).Add are named x and y, and because +the receiver specifies the result destination, it is called z: + + func (z *Int) Add(x, y *Int) *Int + +Methods of this form typically return the incoming receiver as well, to +enable simple call chaining. + +Methods which don't require a result value to be passed in (for instance, +Int.Sign), simply return the result. In this case, the receiver is typically +the first operand, named x: + + func (x *Int) Sign() int + +Various methods support conversions between strings and corresponding +numeric values, and vice versa: *Int, *Rat, and *Float values implement +the Stringer interface for a (default) string representation of the value, +but also provide SetString methods to initialize a value from a string in +a variety of supported formats (see the respective SetString documentation). + +Finally, *Int, *Rat, and *Float satisfy the fmt package's Scanner interface +for scanning and (except for *Rat) the Formatter interface for formatted +printing. +*/ +package big diff --git a/contrib/go/_std_1.18/src/math/big/float.go b/contrib/go/_std_1.18/src/math/big/float.go new file mode 100644 index 0000000000..a8c91a6e54 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/float.go @@ -0,0 +1,1732 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements multi-precision floating-point numbers. +// Like in the GNU MPFR library (https://www.mpfr.org/), operands +// can be of mixed precision. Unlike MPFR, the rounding mode is +// not specified with each operation, but with each operand. The +// rounding mode of the result operand determines the rounding +// mode of an operation. This is a from-scratch implementation. + +package big + +import ( + "fmt" + "math" + "math/bits" +) + +const debugFloat = false // enable for debugging + +// A nonzero finite Float represents a multi-precision floating point number +// +// sign × mantissa × 2**exponent +// +// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. +// A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). +// All Floats are ordered, and the ordering of two Floats x and y +// is defined by x.Cmp(y). +// +// Each Float value also has a precision, rounding mode, and accuracy. +// The precision is the maximum number of mantissa bits available to +// represent the value. The rounding mode specifies how a result should +// be rounded to fit into the mantissa bits, and accuracy describes the +// rounding error with respect to the exact result. +// +// Unless specified otherwise, all operations (including setters) that +// specify a *Float variable for the result (usually via the receiver +// with the exception of MantExp), round the numeric result according +// to the precision and rounding mode of the result variable. +// +// If the provided result precision is 0 (see below), it is set to the +// precision of the argument with the largest precision value before any +// rounding takes place, and the rounding mode remains unchanged. Thus, +// uninitialized Floats provided as result arguments will have their +// precision set to a reasonable value determined by the operands, and +// their mode is the zero value for RoundingMode (ToNearestEven). +// +// By setting the desired precision to 24 or 53 and using matching rounding +// mode (typically ToNearestEven), Float operations produce the same results +// as the corresponding float32 or float64 IEEE-754 arithmetic for operands +// that correspond to normal (i.e., not denormal) float32 or float64 numbers. +// Exponent underflow and overflow lead to a 0 or an Infinity for different +// values than IEEE-754 because Float exponents have a much larger range. +// +// The zero (uninitialized) value for a Float is ready to use and represents +// the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. +// +// Operations always take pointer arguments (*Float) rather +// than Float values, and each unique Float value requires +// its own unique *Float pointer. To "copy" a Float value, +// an existing (or newly allocated) Float must be set to +// a new value using the Float.Set method; shallow copies +// of Floats are not supported and may lead to errors. +type Float struct { + prec uint32 + mode RoundingMode + acc Accuracy + form form + neg bool + mant nat + exp int32 +} + +// An ErrNaN panic is raised by a Float operation that would lead to +// a NaN under IEEE-754 rules. An ErrNaN implements the error interface. +type ErrNaN struct { + msg string +} + +func (err ErrNaN) Error() string { + return err.msg +} + +// NewFloat allocates and returns a new Float set to x, +// with precision 53 and rounding mode ToNearestEven. +// NewFloat panics with ErrNaN if x is a NaN. +func NewFloat(x float64) *Float { + if math.IsNaN(x) { + panic(ErrNaN{"NewFloat(NaN)"}) + } + return new(Float).SetFloat64(x) +} + +// Exponent and precision limits. +const ( + MaxExp = math.MaxInt32 // largest supported exponent + MinExp = math.MinInt32 // smallest supported exponent + MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited +) + +// Internal representation: The mantissa bits x.mant of a nonzero finite +// Float x are stored in a nat slice long enough to hold up to x.prec bits; +// the slice may (but doesn't have to) be shorter if the mantissa contains +// trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., +// the msb is shifted all the way "to the left"). Thus, if the mantissa has +// trailing 0 bits or x.prec is not a multiple of the Word size _W, +// x.mant[0] has trailing zero bits. The msb of the mantissa corresponds +// to the value 0.5; the exponent x.exp shifts the binary point as needed. +// +// A zero or non-finite Float x ignores x.mant and x.exp. +// +// x form neg mant exp +// ---------------------------------------------------------- +// ±0 zero sign - - +// 0 < |x| < +Inf finite sign mantissa exponent +// ±Inf inf sign - - + +// A form value describes the internal representation. +type form byte + +// The form value order is relevant - do not change! +const ( + zero form = iota + finite + inf +) + +// RoundingMode determines how a Float value is rounded to the +// desired precision. Rounding may change the Float value; the +// rounding error is described by the Float's Accuracy. +type RoundingMode byte + +// These constants define supported rounding modes. +const ( + ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven + ToNearestAway // == IEEE 754-2008 roundTiesToAway + ToZero // == IEEE 754-2008 roundTowardZero + AwayFromZero // no IEEE 754-2008 equivalent + ToNegativeInf // == IEEE 754-2008 roundTowardNegative + ToPositiveInf // == IEEE 754-2008 roundTowardPositive +) + +//go:generate stringer -type=RoundingMode + +// Accuracy describes the rounding error produced by the most recent +// operation that generated a Float value, relative to the exact value. +type Accuracy int8 + +// Constants describing the Accuracy of a Float. +const ( + Below Accuracy = -1 + Exact Accuracy = 0 + Above Accuracy = +1 +) + +//go:generate stringer -type=Accuracy + +// SetPrec sets z's precision to prec and returns the (possibly) rounded +// value of z. Rounding occurs according to z's rounding mode if the mantissa +// cannot be represented in prec bits without loss of precision. +// SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. +// If prec > MaxPrec, it is set to MaxPrec. +func (z *Float) SetPrec(prec uint) *Float { + z.acc = Exact // optimistically assume no rounding is needed + + // special case + if prec == 0 { + z.prec = 0 + if z.form == finite { + // truncate z to 0 + z.acc = makeAcc(z.neg) + z.form = zero + } + return z + } + + // general case + if prec > MaxPrec { + prec = MaxPrec + } + old := z.prec + z.prec = uint32(prec) + if z.prec < old { + z.round(0) + } + return z +} + +func makeAcc(above bool) Accuracy { + if above { + return Above + } + return Below +} + +// SetMode sets z's rounding mode to mode and returns an exact z. +// z remains unchanged otherwise. +// z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. +func (z *Float) SetMode(mode RoundingMode) *Float { + z.mode = mode + z.acc = Exact + return z +} + +// Prec returns the mantissa precision of x in bits. +// The result may be 0 for |x| == 0 and |x| == Inf. +func (x *Float) Prec() uint { + return uint(x.prec) +} + +// MinPrec returns the minimum precision required to represent x exactly +// (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). +// The result is 0 for |x| == 0 and |x| == Inf. +func (x *Float) MinPrec() uint { + if x.form != finite { + return 0 + } + return uint(len(x.mant))*_W - x.mant.trailingZeroBits() +} + +// Mode returns the rounding mode of x. +func (x *Float) Mode() RoundingMode { + return x.mode +} + +// Acc returns the accuracy of x produced by the most recent +// operation, unless explicitly documented otherwise by that +// operation. +func (x *Float) Acc() Accuracy { + return x.acc +} + +// Sign returns: +// +// -1 if x < 0 +// 0 if x is ±0 +// +1 if x > 0 +// +func (x *Float) Sign() int { + if debugFloat { + x.validate() + } + if x.form == zero { + return 0 + } + if x.neg { + return -1 + } + return 1 +} + +// MantExp breaks x into its mantissa and exponent components +// and returns the exponent. If a non-nil mant argument is +// provided its value is set to the mantissa of x, with the +// same precision and rounding mode as x. The components +// satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. +// Calling MantExp with a nil argument is an efficient way to +// get the exponent of the receiver. +// +// Special cases are: +// +// ( ±0).MantExp(mant) = 0, with mant set to ±0 +// (±Inf).MantExp(mant) = 0, with mant set to ±Inf +// +// x and mant may be the same in which case x is set to its +// mantissa value. +func (x *Float) MantExp(mant *Float) (exp int) { + if debugFloat { + x.validate() + } + if x.form == finite { + exp = int(x.exp) + } + if mant != nil { + mant.Copy(x) + if mant.form == finite { + mant.exp = 0 + } + } + return +} + +func (z *Float) setExpAndRound(exp int64, sbit uint) { + if exp < MinExp { + // underflow + z.acc = makeAcc(z.neg) + z.form = zero + return + } + + if exp > MaxExp { + // overflow + z.acc = makeAcc(!z.neg) + z.form = inf + return + } + + z.form = finite + z.exp = int32(exp) + z.round(sbit) +} + +// SetMantExp sets z to mant × 2**exp and returns z. +// The result z has the same precision and rounding mode +// as mant. SetMantExp is an inverse of MantExp but does +// not require 0.5 <= |mant| < 1.0. Specifically, for a +// given x of type *Float, SetMantExp relates to MantExp +// as follows: +// +// mant := new(Float) +// new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0 +// +// Special cases are: +// +// z.SetMantExp( ±0, exp) = ±0 +// z.SetMantExp(±Inf, exp) = ±Inf +// +// z and mant may be the same in which case z's exponent +// is set to exp. +func (z *Float) SetMantExp(mant *Float, exp int) *Float { + if debugFloat { + z.validate() + mant.validate() + } + z.Copy(mant) + + if z.form == finite { + // 0 < |mant| < +Inf + z.setExpAndRound(int64(z.exp)+int64(exp), 0) + } + return z +} + +// Signbit reports whether x is negative or negative zero. +func (x *Float) Signbit() bool { + return x.neg +} + +// IsInf reports whether x is +Inf or -Inf. +func (x *Float) IsInf() bool { + return x.form == inf +} + +// IsInt reports whether x is an integer. +// ±Inf values are not integers. +func (x *Float) IsInt() bool { + if debugFloat { + x.validate() + } + // special cases + if x.form != finite { + return x.form == zero + } + // x.form == finite + if x.exp <= 0 { + return false + } + // x.exp > 0 + return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa +} + +// debugging support +func (x *Float) validate() { + if !debugFloat { + // avoid performance bugs + panic("validate called but debugFloat is not set") + } + if x.form != finite { + return + } + m := len(x.mant) + if m == 0 { + panic("nonzero finite number with empty mantissa") + } + const msb = 1 << (_W - 1) + if x.mant[m-1]&msb == 0 { + panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))) + } + if x.prec == 0 { + panic("zero precision finite number") + } +} + +// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. +// sbit must be 0 or 1 and summarizes any "sticky bit" information one might +// have before calling round. z's mantissa must be normalized (with the msb set) +// or empty. +// +// CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the +// sign of z. For correct rounding, the sign of z must be set correctly before +// calling round. +func (z *Float) round(sbit uint) { + if debugFloat { + z.validate() + } + + z.acc = Exact + if z.form != finite { + // ±0 or ±Inf => nothing left to do + return + } + // z.form == finite && len(z.mant) > 0 + // m > 0 implies z.prec > 0 (checked by validate) + + m := uint32(len(z.mant)) // present mantissa length in words + bits := m * _W // present mantissa bits; bits > 0 + if bits <= z.prec { + // mantissa fits => nothing to do + return + } + // bits > z.prec + + // Rounding is based on two bits: the rounding bit (rbit) and the + // sticky bit (sbit). The rbit is the bit immediately before the + // z.prec leading mantissa bits (the "0.5"). The sbit is set if any + // of the bits before the rbit are set (the "0.25", "0.125", etc.): + // + // rbit sbit => "fractional part" + // + // 0 0 == 0 + // 0 1 > 0 , < 0.5 + // 1 0 == 0.5 + // 1 1 > 0.5, < 1.0 + + // bits > z.prec: mantissa too large => round + r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 + rbit := z.mant.bit(r) & 1 // rounding bit; be safe and ensure it's a single bit + // The sticky bit is only needed for rounding ToNearestEven + // or when the rounding bit is zero. Avoid computation otherwise. + if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) { + sbit = z.mant.sticky(r) + } + sbit &= 1 // be safe and ensure it's a single bit + + // cut off extra words + n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision + if m > n { + copy(z.mant, z.mant[m-n:]) // move n last words to front + z.mant = z.mant[:n] + } + + // determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word + ntz := n*_W - z.prec // 0 <= ntz < _W + lsb := Word(1) << ntz + + // round if result is inexact + if rbit|sbit != 0 { + // Make rounding decision: The result mantissa is truncated ("rounded down") + // by default. Decide if we need to increment, or "round up", the (unsigned) + // mantissa. + inc := false + switch z.mode { + case ToNegativeInf: + inc = z.neg + case ToZero: + // nothing to do + case ToNearestEven: + inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0) + case ToNearestAway: + inc = rbit != 0 + case AwayFromZero: + inc = true + case ToPositiveInf: + inc = !z.neg + default: + panic("unreachable") + } + + // A positive result (!z.neg) is Above the exact result if we increment, + // and it's Below if we truncate (Exact results require no rounding). + // For a negative result (z.neg) it is exactly the opposite. + z.acc = makeAcc(inc != z.neg) + + if inc { + // add 1 to mantissa + if addVW(z.mant, z.mant, lsb) != 0 { + // mantissa overflow => adjust exponent + if z.exp >= MaxExp { + // exponent overflow + z.form = inf + return + } + z.exp++ + // adjust mantissa: divide by 2 to compensate for exponent adjustment + shrVU(z.mant, z.mant, 1) + // set msb == carry == 1 from the mantissa overflow above + const msb = 1 << (_W - 1) + z.mant[n-1] |= msb + } + } + } + + // zero out trailing bits in least-significant word + z.mant[0] &^= lsb - 1 + + if debugFloat { + z.validate() + } +} + +func (z *Float) setBits64(neg bool, x uint64) *Float { + if z.prec == 0 { + z.prec = 64 + } + z.acc = Exact + z.neg = neg + if x == 0 { + z.form = zero + return z + } + // x != 0 + z.form = finite + s := bits.LeadingZeros64(x) + z.mant = z.mant.setUint64(x << uint(s)) + z.exp = int32(64 - s) // always fits + if z.prec < 64 { + z.round(0) + } + return z +} + +// SetUint64 sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to 64 (and rounding will have +// no effect). +func (z *Float) SetUint64(x uint64) *Float { + return z.setBits64(false, x) +} + +// SetInt64 sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to 64 (and rounding will have +// no effect). +func (z *Float) SetInt64(x int64) *Float { + u := x + if u < 0 { + u = -u + } + // We cannot simply call z.SetUint64(uint64(u)) and change + // the sign afterwards because the sign affects rounding. + return z.setBits64(x < 0, uint64(u)) +} + +// SetFloat64 sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to 53 (and rounding will have +// no effect). SetFloat64 panics with ErrNaN if x is a NaN. +func (z *Float) SetFloat64(x float64) *Float { + if z.prec == 0 { + z.prec = 53 + } + if math.IsNaN(x) { + panic(ErrNaN{"Float.SetFloat64(NaN)"}) + } + z.acc = Exact + z.neg = math.Signbit(x) // handle -0, -Inf correctly + if x == 0 { + z.form = zero + return z + } + if math.IsInf(x, 0) { + z.form = inf + return z + } + // normalized x != 0 + z.form = finite + fmant, exp := math.Frexp(x) // get normalized mantissa + z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) + z.exp = int32(exp) // always fits + if z.prec < 53 { + z.round(0) + } + return z +} + +// fnorm normalizes mantissa m by shifting it to the left +// such that the msb of the most-significant word (msw) is 1. +// It returns the shift amount. It assumes that len(m) != 0. +func fnorm(m nat) int64 { + if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { + panic("msw of mantissa is 0") + } + s := nlz(m[len(m)-1]) + if s > 0 { + c := shlVU(m, m, s) + if debugFloat && c != 0 { + panic("nlz or shlVU incorrect") + } + } + return int64(s) +} + +// SetInt sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to the larger of x.BitLen() +// or 64 (and rounding will have no effect). +func (z *Float) SetInt(x *Int) *Float { + // TODO(gri) can be more efficient if z.prec > 0 + // but small compared to the size of x, or if there + // are many trailing 0's. + bits := uint32(x.BitLen()) + if z.prec == 0 { + z.prec = umax32(bits, 64) + } + z.acc = Exact + z.neg = x.neg + if len(x.abs) == 0 { + z.form = zero + return z + } + // x != 0 + z.mant = z.mant.set(x.abs) + fnorm(z.mant) + z.setExpAndRound(int64(bits), 0) + return z +} + +// SetRat sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to the largest of a.BitLen(), +// b.BitLen(), or 64; with x = a/b. +func (z *Float) SetRat(x *Rat) *Float { + if x.IsInt() { + return z.SetInt(x.Num()) + } + var a, b Float + a.SetInt(x.Num()) + b.SetInt(x.Denom()) + if z.prec == 0 { + z.prec = umax32(a.prec, b.prec) + } + return z.Quo(&a, &b) +} + +// SetInf sets z to the infinite Float -Inf if signbit is +// set, or +Inf if signbit is not set, and returns z. The +// precision of z is unchanged and the result is always +// Exact. +func (z *Float) SetInf(signbit bool) *Float { + z.acc = Exact + z.form = inf + z.neg = signbit + return z +} + +// Set sets z to the (possibly rounded) value of x and returns z. +// If z's precision is 0, it is changed to the precision of x +// before setting z (and rounding will have no effect). +// Rounding is performed according to z's precision and rounding +// mode; and z's accuracy reports the result error relative to the +// exact (not rounded) result. +func (z *Float) Set(x *Float) *Float { + if debugFloat { + x.validate() + } + z.acc = Exact + if z != x { + z.form = x.form + z.neg = x.neg + if x.form == finite { + z.exp = x.exp + z.mant = z.mant.set(x.mant) + } + if z.prec == 0 { + z.prec = x.prec + } else if z.prec < x.prec { + z.round(0) + } + } + return z +} + +// Copy sets z to x, with the same precision, rounding mode, and +// accuracy as x, and returns z. x is not changed even if z and +// x are the same. +func (z *Float) Copy(x *Float) *Float { + if debugFloat { + x.validate() + } + if z != x { + z.prec = x.prec + z.mode = x.mode + z.acc = x.acc + z.form = x.form + z.neg = x.neg + if z.form == finite { + z.mant = z.mant.set(x.mant) + z.exp = x.exp + } + } + return z +} + +// msb32 returns the 32 most significant bits of x. +func msb32(x nat) uint32 { + i := len(x) - 1 + if i < 0 { + return 0 + } + if debugFloat && x[i]&(1<<(_W-1)) == 0 { + panic("x not normalized") + } + switch _W { + case 32: + return uint32(x[i]) + case 64: + return uint32(x[i] >> 32) + } + panic("unreachable") +} + +// msb64 returns the 64 most significant bits of x. +func msb64(x nat) uint64 { + i := len(x) - 1 + if i < 0 { + return 0 + } + if debugFloat && x[i]&(1<<(_W-1)) == 0 { + panic("x not normalized") + } + switch _W { + case 32: + v := uint64(x[i]) << 32 + if i > 0 { + v |= uint64(x[i-1]) + } + return v + case 64: + return uint64(x[i]) + } + panic("unreachable") +} + +// Uint64 returns the unsigned integer resulting from truncating x +// towards zero. If 0 <= x <= math.MaxUint64, the result is Exact +// if x is an integer and Below otherwise. +// The result is (0, Above) for x < 0, and (math.MaxUint64, Below) +// for x > math.MaxUint64. +func (x *Float) Uint64() (uint64, Accuracy) { + if debugFloat { + x.validate() + } + + switch x.form { + case finite: + if x.neg { + return 0, Above + } + // 0 < x < +Inf + if x.exp <= 0 { + // 0 < x < 1 + return 0, Below + } + // 1 <= x < Inf + if x.exp <= 64 { + // u = trunc(x) fits into a uint64 + u := msb64(x.mant) >> (64 - uint32(x.exp)) + if x.MinPrec() <= 64 { + return u, Exact + } + return u, Below // x truncated + } + // x too large + return math.MaxUint64, Below + + case zero: + return 0, Exact + + case inf: + if x.neg { + return 0, Above + } + return math.MaxUint64, Below + } + + panic("unreachable") +} + +// Int64 returns the integer resulting from truncating x towards zero. +// If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is +// an integer, and Above (x < 0) or Below (x > 0) otherwise. +// The result is (math.MinInt64, Above) for x < math.MinInt64, +// and (math.MaxInt64, Below) for x > math.MaxInt64. +func (x *Float) Int64() (int64, Accuracy) { + if debugFloat { + x.validate() + } + + switch x.form { + case finite: + // 0 < |x| < +Inf + acc := makeAcc(x.neg) + if x.exp <= 0 { + // 0 < |x| < 1 + return 0, acc + } + // x.exp > 0 + + // 1 <= |x| < +Inf + if x.exp <= 63 { + // i = trunc(x) fits into an int64 (excluding math.MinInt64) + i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) + if x.neg { + i = -i + } + if x.MinPrec() <= uint(x.exp) { + return i, Exact + } + return i, acc // x truncated + } + if x.neg { + // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) + if x.exp == 64 && x.MinPrec() == 1 { + acc = Exact + } + return math.MinInt64, acc + } + // x too large + return math.MaxInt64, Below + + case zero: + return 0, Exact + + case inf: + if x.neg { + return math.MinInt64, Above + } + return math.MaxInt64, Below + } + + panic("unreachable") +} + +// Float32 returns the float32 value nearest to x. If x is too small to be +// represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result +// is (0, Below) or (-0, Above), respectively, depending on the sign of x. +// If x is too large to be represented by a float32 (|x| > math.MaxFloat32), +// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. +func (x *Float) Float32() (float32, Accuracy) { + if debugFloat { + x.validate() + } + + switch x.form { + case finite: + // 0 < |x| < +Inf + + const ( + fbits = 32 // float size + mbits = 23 // mantissa size (excluding implicit msb) + ebits = fbits - mbits - 1 // 8 exponent size + bias = 1<<(ebits-1) - 1 // 127 exponent bias + dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) + emin = 1 - bias // -126 smallest unbiased exponent (normal) + emax = bias // 127 largest unbiased exponent (normal) + ) + + // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa. + e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 + + // Compute precision p for float32 mantissa. + // If the exponent is too small, we have a denormal number before + // rounding and fewer than p mantissa bits of precision available + // (the exponent remains fixed but the mantissa gets shifted right). + p := mbits + 1 // precision of normal float + if e < emin { + // recompute precision + p = mbits + 1 - emin + int(e) + // If p == 0, the mantissa of x is shifted so much to the right + // that its msb falls immediately to the right of the float32 + // mantissa space. In other words, if the smallest denormal is + // considered "1.0", for p == 0, the mantissa value m is >= 0.5. + // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. + // If m == 0.5, it is rounded down to even, i.e., 0.0. + // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. + if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { + // underflow to ±0 + if x.neg { + var z float32 + return -z, Above + } + return 0.0, Below + } + // otherwise, round up + // We handle p == 0 explicitly because it's easy and because + // Float.round doesn't support rounding to 0 bits of precision. + if p == 0 { + if x.neg { + return -math.SmallestNonzeroFloat32, Below + } + return math.SmallestNonzeroFloat32, Above + } + } + // p > 0 + + // round + var r Float + r.prec = uint32(p) + r.Set(x) + e = r.exp - 1 + + // Rounding may have caused r to overflow to ±Inf + // (rounding never causes underflows to 0). + // If the exponent is too large, also overflow to ±Inf. + if r.form == inf || e > emax { + // overflow + if x.neg { + return float32(math.Inf(-1)), Below + } + return float32(math.Inf(+1)), Above + } + // e <= emax + + // Determine sign, biased exponent, and mantissa. + var sign, bexp, mant uint32 + if x.neg { + sign = 1 << (fbits - 1) + } + + // Rounding may have caused a denormal number to + // become normal. Check again. + if e < emin { + // denormal number: recompute precision + // Since rounding may have at best increased precision + // and we have eliminated p <= 0 early, we know p > 0. + // bexp == 0 for denormals + p = mbits + 1 - emin + int(e) + mant = msb32(r.mant) >> uint(fbits-p) + } else { + // normal number: emin <= e <= emax + bexp = uint32(e+bias) << mbits + mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) + } + + return math.Float32frombits(sign | bexp | mant), r.acc + + case zero: + if x.neg { + var z float32 + return -z, Exact + } + return 0.0, Exact + + case inf: + if x.neg { + return float32(math.Inf(-1)), Exact + } + return float32(math.Inf(+1)), Exact + } + + panic("unreachable") +} + +// Float64 returns the float64 value nearest to x. If x is too small to be +// represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result +// is (0, Below) or (-0, Above), respectively, depending on the sign of x. +// If x is too large to be represented by a float64 (|x| > math.MaxFloat64), +// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. +func (x *Float) Float64() (float64, Accuracy) { + if debugFloat { + x.validate() + } + + switch x.form { + case finite: + // 0 < |x| < +Inf + + const ( + fbits = 64 // float size + mbits = 52 // mantissa size (excluding implicit msb) + ebits = fbits - mbits - 1 // 11 exponent size + bias = 1<<(ebits-1) - 1 // 1023 exponent bias + dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) + emin = 1 - bias // -1022 smallest unbiased exponent (normal) + emax = bias // 1023 largest unbiased exponent (normal) + ) + + // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa. + e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 + + // Compute precision p for float64 mantissa. + // If the exponent is too small, we have a denormal number before + // rounding and fewer than p mantissa bits of precision available + // (the exponent remains fixed but the mantissa gets shifted right). + p := mbits + 1 // precision of normal float + if e < emin { + // recompute precision + p = mbits + 1 - emin + int(e) + // If p == 0, the mantissa of x is shifted so much to the right + // that its msb falls immediately to the right of the float64 + // mantissa space. In other words, if the smallest denormal is + // considered "1.0", for p == 0, the mantissa value m is >= 0.5. + // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. + // If m == 0.5, it is rounded down to even, i.e., 0.0. + // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. + if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { + // underflow to ±0 + if x.neg { + var z float64 + return -z, Above + } + return 0.0, Below + } + // otherwise, round up + // We handle p == 0 explicitly because it's easy and because + // Float.round doesn't support rounding to 0 bits of precision. + if p == 0 { + if x.neg { + return -math.SmallestNonzeroFloat64, Below + } + return math.SmallestNonzeroFloat64, Above + } + } + // p > 0 + + // round + var r Float + r.prec = uint32(p) + r.Set(x) + e = r.exp - 1 + + // Rounding may have caused r to overflow to ±Inf + // (rounding never causes underflows to 0). + // If the exponent is too large, also overflow to ±Inf. + if r.form == inf || e > emax { + // overflow + if x.neg { + return math.Inf(-1), Below + } + return math.Inf(+1), Above + } + // e <= emax + + // Determine sign, biased exponent, and mantissa. + var sign, bexp, mant uint64 + if x.neg { + sign = 1 << (fbits - 1) + } + + // Rounding may have caused a denormal number to + // become normal. Check again. + if e < emin { + // denormal number: recompute precision + // Since rounding may have at best increased precision + // and we have eliminated p <= 0 early, we know p > 0. + // bexp == 0 for denormals + p = mbits + 1 - emin + int(e) + mant = msb64(r.mant) >> uint(fbits-p) + } else { + // normal number: emin <= e <= emax + bexp = uint64(e+bias) << mbits + mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) + } + + return math.Float64frombits(sign | bexp | mant), r.acc + + case zero: + if x.neg { + var z float64 + return -z, Exact + } + return 0.0, Exact + + case inf: + if x.neg { + return math.Inf(-1), Exact + } + return math.Inf(+1), Exact + } + + panic("unreachable") +} + +// Int returns the result of truncating x towards zero; +// or nil if x is an infinity. +// The result is Exact if x.IsInt(); otherwise it is Below +// for x > 0, and Above for x < 0. +// If a non-nil *Int argument z is provided, Int stores +// the result in z instead of allocating a new Int. +func (x *Float) Int(z *Int) (*Int, Accuracy) { + if debugFloat { + x.validate() + } + + if z == nil && x.form <= finite { + z = new(Int) + } + + switch x.form { + case finite: + // 0 < |x| < +Inf + acc := makeAcc(x.neg) + if x.exp <= 0 { + // 0 < |x| < 1 + return z.SetInt64(0), acc + } + // x.exp > 0 + + // 1 <= |x| < +Inf + // determine minimum required precision for x + allBits := uint(len(x.mant)) * _W + exp := uint(x.exp) + if x.MinPrec() <= exp { + acc = Exact + } + // shift mantissa as needed + if z == nil { + z = new(Int) + } + z.neg = x.neg + switch { + case exp > allBits: + z.abs = z.abs.shl(x.mant, exp-allBits) + default: + z.abs = z.abs.set(x.mant) + case exp < allBits: + z.abs = z.abs.shr(x.mant, allBits-exp) + } + return z, acc + + case zero: + return z.SetInt64(0), Exact + + case inf: + return nil, makeAcc(x.neg) + } + + panic("unreachable") +} + +// Rat returns the rational number corresponding to x; +// or nil if x is an infinity. +// The result is Exact if x is not an Inf. +// If a non-nil *Rat argument z is provided, Rat stores +// the result in z instead of allocating a new Rat. +func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { + if debugFloat { + x.validate() + } + + if z == nil && x.form <= finite { + z = new(Rat) + } + + switch x.form { + case finite: + // 0 < |x| < +Inf + allBits := int32(len(x.mant)) * _W + // build up numerator and denominator + z.a.neg = x.neg + switch { + case x.exp > allBits: + z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) + z.b.abs = z.b.abs[:0] // == 1 (see Rat) + // z already in normal form + default: + z.a.abs = z.a.abs.set(x.mant) + z.b.abs = z.b.abs[:0] // == 1 (see Rat) + // z already in normal form + case x.exp < allBits: + z.a.abs = z.a.abs.set(x.mant) + t := z.b.abs.setUint64(1) + z.b.abs = t.shl(t, uint(allBits-x.exp)) + z.norm() + } + return z, Exact + + case zero: + return z.SetInt64(0), Exact + + case inf: + return nil, makeAcc(x.neg) + } + + panic("unreachable") +} + +// Abs sets z to the (possibly rounded) value |x| (the absolute value of x) +// and returns z. +func (z *Float) Abs(x *Float) *Float { + z.Set(x) + z.neg = false + return z +} + +// Neg sets z to the (possibly rounded) value of x with its sign negated, +// and returns z. +func (z *Float) Neg(x *Float) *Float { + z.Set(x) + z.neg = !z.neg + return z +} + +func validateBinaryOperands(x, y *Float) { + if !debugFloat { + // avoid performance bugs + panic("validateBinaryOperands called but debugFloat is not set") + } + if len(x.mant) == 0 { + panic("empty mantissa for x") + } + if len(y.mant) == 0 { + panic("empty mantissa for y") + } +} + +// z = x + y, ignoring signs of x and y for the addition +// but using the sign of z for rounding the result. +// x and y must have a non-empty mantissa and valid exponent. +func (z *Float) uadd(x, y *Float) { + // Note: This implementation requires 2 shifts most of the + // time. It is also inefficient if exponents or precisions + // differ by wide margins. The following article describes + // an efficient (but much more complicated) implementation + // compatible with the internal representation used here: + // + // Vincent Lefèvre: "The Generic Multiple-Precision Floating- + // Point Addition With Exact Rounding (as in the MPFR Library)" + // http://www.vinc17.net/research/papers/rnc6.pdf + + if debugFloat { + validateBinaryOperands(x, y) + } + + // compute exponents ex, ey for mantissa with "binary point" + // on the right (mantissa.0) - use int64 to avoid overflow + ex := int64(x.exp) - int64(len(x.mant))*_W + ey := int64(y.exp) - int64(len(y.mant))*_W + + al := alias(z.mant, x.mant) || alias(z.mant, y.mant) + + // TODO(gri) having a combined add-and-shift primitive + // could make this code significantly faster + switch { + case ex < ey: + if al { + t := nat(nil).shl(y.mant, uint(ey-ex)) + z.mant = z.mant.add(x.mant, t) + } else { + z.mant = z.mant.shl(y.mant, uint(ey-ex)) + z.mant = z.mant.add(x.mant, z.mant) + } + default: + // ex == ey, no shift needed + z.mant = z.mant.add(x.mant, y.mant) + case ex > ey: + if al { + t := nat(nil).shl(x.mant, uint(ex-ey)) + z.mant = z.mant.add(t, y.mant) + } else { + z.mant = z.mant.shl(x.mant, uint(ex-ey)) + z.mant = z.mant.add(z.mant, y.mant) + } + ex = ey + } + // len(z.mant) > 0 + + z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) +} + +// z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction +// but using the sign of z for rounding the result. +// x and y must have a non-empty mantissa and valid exponent. +func (z *Float) usub(x, y *Float) { + // This code is symmetric to uadd. + // We have not factored the common code out because + // eventually uadd (and usub) should be optimized + // by special-casing, and the code will diverge. + + if debugFloat { + validateBinaryOperands(x, y) + } + + ex := int64(x.exp) - int64(len(x.mant))*_W + ey := int64(y.exp) - int64(len(y.mant))*_W + + al := alias(z.mant, x.mant) || alias(z.mant, y.mant) + + switch { + case ex < ey: + if al { + t := nat(nil).shl(y.mant, uint(ey-ex)) + z.mant = t.sub(x.mant, t) + } else { + z.mant = z.mant.shl(y.mant, uint(ey-ex)) + z.mant = z.mant.sub(x.mant, z.mant) + } + default: + // ex == ey, no shift needed + z.mant = z.mant.sub(x.mant, y.mant) + case ex > ey: + if al { + t := nat(nil).shl(x.mant, uint(ex-ey)) + z.mant = t.sub(t, y.mant) + } else { + z.mant = z.mant.shl(x.mant, uint(ex-ey)) + z.mant = z.mant.sub(z.mant, y.mant) + } + ex = ey + } + + // operands may have canceled each other out + if len(z.mant) == 0 { + z.acc = Exact + z.form = zero + z.neg = false + return + } + // len(z.mant) > 0 + + z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) +} + +// z = x * y, ignoring signs of x and y for the multiplication +// but using the sign of z for rounding the result. +// x and y must have a non-empty mantissa and valid exponent. +func (z *Float) umul(x, y *Float) { + if debugFloat { + validateBinaryOperands(x, y) + } + + // Note: This is doing too much work if the precision + // of z is less than the sum of the precisions of x + // and y which is often the case (e.g., if all floats + // have the same precision). + // TODO(gri) Optimize this for the common case. + + e := int64(x.exp) + int64(y.exp) + if x == y { + z.mant = z.mant.sqr(x.mant) + } else { + z.mant = z.mant.mul(x.mant, y.mant) + } + z.setExpAndRound(e-fnorm(z.mant), 0) +} + +// z = x / y, ignoring signs of x and y for the division +// but using the sign of z for rounding the result. +// x and y must have a non-empty mantissa and valid exponent. +func (z *Float) uquo(x, y *Float) { + if debugFloat { + validateBinaryOperands(x, y) + } + + // mantissa length in words for desired result precision + 1 + // (at least one extra bit so we get the rounding bit after + // the division) + n := int(z.prec/_W) + 1 + + // compute adjusted x.mant such that we get enough result precision + xadj := x.mant + if d := n - len(x.mant) + len(y.mant); d > 0 { + // d extra words needed => add d "0 digits" to x + xadj = make(nat, len(x.mant)+d) + copy(xadj[d:], x.mant) + } + // TODO(gri): If we have too many digits (d < 0), we should be able + // to shorten x for faster division. But we must be extra careful + // with rounding in that case. + + // Compute d before division since there may be aliasing of x.mant + // (via xadj) or y.mant with z.mant. + d := len(xadj) - len(y.mant) + + // divide + var r nat + z.mant, r = z.mant.div(nil, xadj, y.mant) + e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W + + // The result is long enough to include (at least) the rounding bit. + // If there's a non-zero remainder, the corresponding fractional part + // (if it were computed), would have a non-zero sticky bit (if it were + // zero, it couldn't have a non-zero remainder). + var sbit uint + if len(r) > 0 { + sbit = 1 + } + + z.setExpAndRound(e-fnorm(z.mant), sbit) +} + +// ucmp returns -1, 0, or +1, depending on whether +// |x| < |y|, |x| == |y|, or |x| > |y|. +// x and y must have a non-empty mantissa and valid exponent. +func (x *Float) ucmp(y *Float) int { + if debugFloat { + validateBinaryOperands(x, y) + } + + switch { + case x.exp < y.exp: + return -1 + case x.exp > y.exp: + return +1 + } + // x.exp == y.exp + + // compare mantissas + i := len(x.mant) + j := len(y.mant) + for i > 0 || j > 0 { + var xm, ym Word + if i > 0 { + i-- + xm = x.mant[i] + } + if j > 0 { + j-- + ym = y.mant[j] + } + switch { + case xm < ym: + return -1 + case xm > ym: + return +1 + } + } + + return 0 +} + +// Handling of sign bit as defined by IEEE 754-2008, section 6.3: +// +// When neither the inputs nor result are NaN, the sign of a product or +// quotient is the exclusive OR of the operands’ signs; the sign of a sum, +// or of a difference x−y regarded as a sum x+(−y), differs from at most +// one of the addends’ signs; and the sign of the result of conversions, +// the quantize operation, the roundToIntegral operations, and the +// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. +// These rules shall apply even when operands or results are zero or infinite. +// +// When the sum of two operands with opposite signs (or the difference of +// two operands with like signs) is exactly zero, the sign of that sum (or +// difference) shall be +0 in all rounding-direction attributes except +// roundTowardNegative; under that attribute, the sign of an exact zero +// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same +// sign as x even when x is zero. +// +// See also: https://play.golang.org/p/RtH3UCt5IH + +// Add sets z to the rounded sum x+y and returns z. If z's precision is 0, +// it is changed to the larger of x's or y's precision before the operation. +// Rounding is performed according to z's precision and rounding mode; and +// z's accuracy reports the result error relative to the exact (not rounded) +// result. Add panics with ErrNaN if x and y are infinities with opposite +// signs. The value of z is undefined in that case. +func (z *Float) Add(x, y *Float) *Float { + if debugFloat { + x.validate() + y.validate() + } + + if z.prec == 0 { + z.prec = umax32(x.prec, y.prec) + } + + if x.form == finite && y.form == finite { + // x + y (common case) + + // Below we set z.neg = x.neg, and when z aliases y this will + // change the y operand's sign. This is fine, because if an + // operand aliases the receiver it'll be overwritten, but we still + // want the original x.neg and y.neg values when we evaluate + // x.neg != y.neg, so we need to save y.neg before setting z.neg. + yneg := y.neg + + z.neg = x.neg + if x.neg == yneg { + // x + y == x + y + // (-x) + (-y) == -(x + y) + z.uadd(x, y) + } else { + // x + (-y) == x - y == -(y - x) + // (-x) + y == y - x == -(x - y) + if x.ucmp(y) > 0 { + z.usub(x, y) + } else { + z.neg = !z.neg + z.usub(y, x) + } + } + if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { + z.neg = true + } + return z + } + + if x.form == inf && y.form == inf && x.neg != y.neg { + // +Inf + -Inf + // -Inf + +Inf + // value of z is undefined but make sure it's valid + z.acc = Exact + z.form = zero + z.neg = false + panic(ErrNaN{"addition of infinities with opposite signs"}) + } + + if x.form == zero && y.form == zero { + // ±0 + ±0 + z.acc = Exact + z.form = zero + z.neg = x.neg && y.neg // -0 + -0 == -0 + return z + } + + if x.form == inf || y.form == zero { + // ±Inf + y + // x + ±0 + return z.Set(x) + } + + // ±0 + y + // x + ±Inf + return z.Set(y) +} + +// Sub sets z to the rounded difference x-y and returns z. +// Precision, rounding, and accuracy reporting are as for Add. +// Sub panics with ErrNaN if x and y are infinities with equal +// signs. The value of z is undefined in that case. +func (z *Float) Sub(x, y *Float) *Float { + if debugFloat { + x.validate() + y.validate() + } + + if z.prec == 0 { + z.prec = umax32(x.prec, y.prec) + } + + if x.form == finite && y.form == finite { + // x - y (common case) + yneg := y.neg + z.neg = x.neg + if x.neg != yneg { + // x - (-y) == x + y + // (-x) - y == -(x + y) + z.uadd(x, y) + } else { + // x - y == x - y == -(y - x) + // (-x) - (-y) == y - x == -(x - y) + if x.ucmp(y) > 0 { + z.usub(x, y) + } else { + z.neg = !z.neg + z.usub(y, x) + } + } + if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { + z.neg = true + } + return z + } + + if x.form == inf && y.form == inf && x.neg == y.neg { + // +Inf - +Inf + // -Inf - -Inf + // value of z is undefined but make sure it's valid + z.acc = Exact + z.form = zero + z.neg = false + panic(ErrNaN{"subtraction of infinities with equal signs"}) + } + + if x.form == zero && y.form == zero { + // ±0 - ±0 + z.acc = Exact + z.form = zero + z.neg = x.neg && !y.neg // -0 - +0 == -0 + return z + } + + if x.form == inf || y.form == zero { + // ±Inf - y + // x - ±0 + return z.Set(x) + } + + // ±0 - y + // x - ±Inf + return z.Neg(y) +} + +// Mul sets z to the rounded product x*y and returns z. +// Precision, rounding, and accuracy reporting are as for Add. +// Mul panics with ErrNaN if one operand is zero and the other +// operand an infinity. The value of z is undefined in that case. +func (z *Float) Mul(x, y *Float) *Float { + if debugFloat { + x.validate() + y.validate() + } + + if z.prec == 0 { + z.prec = umax32(x.prec, y.prec) + } + + z.neg = x.neg != y.neg + + if x.form == finite && y.form == finite { + // x * y (common case) + z.umul(x, y) + return z + } + + z.acc = Exact + if x.form == zero && y.form == inf || x.form == inf && y.form == zero { + // ±0 * ±Inf + // ±Inf * ±0 + // value of z is undefined but make sure it's valid + z.form = zero + z.neg = false + panic(ErrNaN{"multiplication of zero with infinity"}) + } + + if x.form == inf || y.form == inf { + // ±Inf * y + // x * ±Inf + z.form = inf + return z + } + + // ±0 * y + // x * ±0 + z.form = zero + return z +} + +// Quo sets z to the rounded quotient x/y and returns z. +// Precision, rounding, and accuracy reporting are as for Add. +// Quo panics with ErrNaN if both operands are zero or infinities. +// The value of z is undefined in that case. +func (z *Float) Quo(x, y *Float) *Float { + if debugFloat { + x.validate() + y.validate() + } + + if z.prec == 0 { + z.prec = umax32(x.prec, y.prec) + } + + z.neg = x.neg != y.neg + + if x.form == finite && y.form == finite { + // x / y (common case) + z.uquo(x, y) + return z + } + + z.acc = Exact + if x.form == zero && y.form == zero || x.form == inf && y.form == inf { + // ±0 / ±0 + // ±Inf / ±Inf + // value of z is undefined but make sure it's valid + z.form = zero + z.neg = false + panic(ErrNaN{"division of zero by zero or infinity by infinity"}) + } + + if x.form == zero || y.form == inf { + // ±0 / y + // x / ±Inf + z.form = zero + return z + } + + // x / ±0 + // ±Inf / y + z.form = inf + return z +} + +// Cmp compares x and y and returns: +// +// -1 if x < y +// 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) +// +1 if x > y +// +func (x *Float) Cmp(y *Float) int { + if debugFloat { + x.validate() + y.validate() + } + + mx := x.ord() + my := y.ord() + switch { + case mx < my: + return -1 + case mx > my: + return +1 + } + // mx == my + + // only if |mx| == 1 we have to compare the mantissae + switch mx { + case -1: + return y.ucmp(x) + case +1: + return x.ucmp(y) + } + + return 0 +} + +// ord classifies x and returns: +// +// -2 if -Inf == x +// -1 if -Inf < x < 0 +// 0 if x == 0 (signed or unsigned) +// +1 if 0 < x < +Inf +// +2 if x == +Inf +// +func (x *Float) ord() int { + var m int + switch x.form { + case finite: + m = 1 + case zero: + return 0 + case inf: + m = 2 + } + if x.neg { + m = -m + } + return m +} + +func umax32(x, y uint32) uint32 { + if x > y { + return x + } + return y +} diff --git a/contrib/go/_std_1.18/src/math/big/floatconv.go b/contrib/go/_std_1.18/src/math/big/floatconv.go new file mode 100644 index 0000000000..57b7df3936 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/floatconv.go @@ -0,0 +1,304 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements string-to-Float conversion functions. + +package big + +import ( + "fmt" + "io" + "strings" +) + +var floatZero Float + +// SetString sets z to the value of s and returns z and a boolean indicating +// success. s must be a floating-point number of the same format as accepted +// by Parse, with base argument 0. The entire string (not just a prefix) must +// be valid for success. If the operation failed, the value of z is undefined +// but the returned value is nil. +func (z *Float) SetString(s string) (*Float, bool) { + if f, _, err := z.Parse(s, 0); err == nil { + return f, true + } + return nil, false +} + +// scan is like Parse but reads the longest possible prefix representing a valid +// floating point number from an io.ByteScanner rather than a string. It serves +// as the implementation of Parse. It does not recognize ±Inf and does not expect +// EOF at the end. +func (z *Float) scan(r io.ByteScanner, base int) (f *Float, b int, err error) { + prec := z.prec + if prec == 0 { + prec = 64 + } + + // A reasonable value in case of an error. + z.form = zero + + // sign + z.neg, err = scanSign(r) + if err != nil { + return + } + + // mantissa + var fcount int // fractional digit count; valid if <= 0 + z.mant, b, fcount, err = z.mant.scan(r, base, true) + if err != nil { + return + } + + // exponent + var exp int64 + var ebase int + exp, ebase, err = scanExponent(r, true, base == 0) + if err != nil { + return + } + + // special-case 0 + if len(z.mant) == 0 { + z.prec = prec + z.acc = Exact + z.form = zero + f = z + return + } + // len(z.mant) > 0 + + // The mantissa may have a radix point (fcount <= 0) and there + // may be a nonzero exponent exp. The radix point amounts to a + // division by b**(-fcount). An exponent means multiplication by + // ebase**exp. Finally, mantissa normalization (shift left) requires + // a correcting multiplication by 2**(-shiftcount). Multiplications + // are commutative, so we can apply them in any order as long as there + // is no loss of precision. We only have powers of 2 and 10, and + // we split powers of 10 into the product of the same powers of + // 2 and 5. This reduces the size of the multiplication factor + // needed for base-10 exponents. + + // normalize mantissa and determine initial exponent contributions + exp2 := int64(len(z.mant))*_W - fnorm(z.mant) + exp5 := int64(0) + + // determine binary or decimal exponent contribution of radix point + if fcount < 0 { + // The mantissa has a radix point ddd.dddd; and + // -fcount is the number of digits to the right + // of '.'. Adjust relevant exponent accordingly. + d := int64(fcount) + switch b { + case 10: + exp5 = d + fallthrough // 10**e == 5**e * 2**e + case 2: + exp2 += d + case 8: + exp2 += d * 3 // octal digits are 3 bits each + case 16: + exp2 += d * 4 // hexadecimal digits are 4 bits each + default: + panic("unexpected mantissa base") + } + // fcount consumed - not needed anymore + } + + // take actual exponent into account + switch ebase { + case 10: + exp5 += exp + fallthrough // see fallthrough above + case 2: + exp2 += exp + default: + panic("unexpected exponent base") + } + // exp consumed - not needed anymore + + // apply 2**exp2 + if MinExp <= exp2 && exp2 <= MaxExp { + z.prec = prec + z.form = finite + z.exp = int32(exp2) + f = z + } else { + err = fmt.Errorf("exponent overflow") + return + } + + if exp5 == 0 { + // no decimal exponent contribution + z.round(0) + return + } + // exp5 != 0 + + // apply 5**exp5 + p := new(Float).SetPrec(z.Prec() + 64) // use more bits for p -- TODO(gri) what is the right number? + if exp5 < 0 { + z.Quo(z, p.pow5(uint64(-exp5))) + } else { + z.Mul(z, p.pow5(uint64(exp5))) + } + + return +} + +// These powers of 5 fit into a uint64. +// +// for p, q := uint64(0), uint64(1); p < q; p, q = q, q*5 { +// fmt.Println(q) +// } +// +var pow5tab = [...]uint64{ + 1, + 5, + 25, + 125, + 625, + 3125, + 15625, + 78125, + 390625, + 1953125, + 9765625, + 48828125, + 244140625, + 1220703125, + 6103515625, + 30517578125, + 152587890625, + 762939453125, + 3814697265625, + 19073486328125, + 95367431640625, + 476837158203125, + 2384185791015625, + 11920928955078125, + 59604644775390625, + 298023223876953125, + 1490116119384765625, + 7450580596923828125, +} + +// pow5 sets z to 5**n and returns z. +// n must not be negative. +func (z *Float) pow5(n uint64) *Float { + const m = uint64(len(pow5tab) - 1) + if n <= m { + return z.SetUint64(pow5tab[n]) + } + // n > m + + z.SetUint64(pow5tab[m]) + n -= m + + // use more bits for f than for z + // TODO(gri) what is the right number? + f := new(Float).SetPrec(z.Prec() + 64).SetUint64(5) + + for n > 0 { + if n&1 != 0 { + z.Mul(z, f) + } + f.Mul(f, f) + n >>= 1 + } + + return z +} + +// Parse parses s which must contain a text representation of a floating- +// point number with a mantissa in the given conversion base (the exponent +// is always a decimal number), or a string representing an infinite value. +// +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number, or the returned +// digit count. Incorrect placement of underscores is reported as an +// error if there are no other errors. If base != 0, underscores are +// not recognized and thus terminate scanning like any other character +// that is not a valid radix point or digit. +// +// It sets z to the (possibly rounded) value of the corresponding floating- +// point value, and returns z, the actual base b, and an error err, if any. +// The entire string (not just a prefix) must be consumed for success. +// If z's precision is 0, it is changed to 64 before rounding takes effect. +// The number must be of the form: +// +// number = [ sign ] ( float | "inf" | "Inf" ) . +// sign = "+" | "-" . +// float = ( mantissa | prefix pmantissa ) [ exponent ] . +// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] . +// mantissa = digits "." [ digits ] | digits | "." digits . +// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits . +// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . +// digits = digit { [ "_" ] digit } . +// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . +// +// The base argument must be 0, 2, 8, 10, or 16. Providing an invalid base +// argument will lead to a run-time panic. +// +// For base 0, the number prefix determines the actual base: A prefix of +// ``0b'' or ``0B'' selects base 2, ``0o'' or ``0O'' selects base 8, and +// ``0x'' or ``0X'' selects base 16. Otherwise, the actual base is 10 and +// no prefix is accepted. The octal prefix "0" is not supported (a leading +// "0" is simply considered a "0"). +// +// A "p" or "P" exponent indicates a base 2 (rather then base 10) exponent; +// for instance, "0x1.fffffffffffffp1023" (using base 0) represents the +// maximum float64 value. For hexadecimal mantissae, the exponent character +// must be one of 'p' or 'P', if present (an "e" or "E" exponent indicator +// cannot be distinguished from a mantissa digit). +// +// The returned *Float f is nil and the value of z is valid but not +// defined if an error is reported. +// +func (z *Float) Parse(s string, base int) (f *Float, b int, err error) { + // scan doesn't handle ±Inf + if len(s) == 3 && (s == "Inf" || s == "inf") { + f = z.SetInf(false) + return + } + if len(s) == 4 && (s[0] == '+' || s[0] == '-') && (s[1:] == "Inf" || s[1:] == "inf") { + f = z.SetInf(s[0] == '-') + return + } + + r := strings.NewReader(s) + if f, b, err = z.scan(r, base); err != nil { + return + } + + // entire string must have been consumed + if ch, err2 := r.ReadByte(); err2 == nil { + err = fmt.Errorf("expected end of string, found %q", ch) + } else if err2 != io.EOF { + err = err2 + } + + return +} + +// ParseFloat is like f.Parse(s, base) with f set to the given precision +// and rounding mode. +func ParseFloat(s string, base int, prec uint, mode RoundingMode) (f *Float, b int, err error) { + return new(Float).SetPrec(prec).SetMode(mode).Parse(s, base) +} + +var _ fmt.Scanner = (*Float)(nil) // *Float must implement fmt.Scanner + +// Scan is a support routine for fmt.Scanner; it sets z to the value of +// the scanned number. It accepts formats whose verbs are supported by +// fmt.Scan for floating point values, which are: +// 'b' (binary), 'e', 'E', 'f', 'F', 'g' and 'G'. +// Scan doesn't handle ±Inf. +func (z *Float) Scan(s fmt.ScanState, ch rune) error { + s.SkipSpace() + _, _, err := z.scan(byteReader{s}, 0) + return err +} diff --git a/contrib/go/_std_1.18/src/math/big/floatmarsh.go b/contrib/go/_std_1.18/src/math/big/floatmarsh.go new file mode 100644 index 0000000000..d1c1dab069 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/floatmarsh.go @@ -0,0 +1,120 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements encoding/decoding of Floats. + +package big + +import ( + "encoding/binary" + "fmt" +) + +// Gob codec version. Permits backward-compatible changes to the encoding. +const floatGobVersion byte = 1 + +// GobEncode implements the gob.GobEncoder interface. +// The Float value and all its attributes (precision, +// rounding mode, accuracy) are marshaled. +func (x *Float) GobEncode() ([]byte, error) { + if x == nil { + return nil, nil + } + + // determine max. space (bytes) required for encoding + sz := 1 + 1 + 4 // version + mode|acc|form|neg (3+2+2+1bit) + prec + n := 0 // number of mantissa words + if x.form == finite { + // add space for mantissa and exponent + n = int((x.prec + (_W - 1)) / _W) // required mantissa length in words for given precision + // actual mantissa slice could be shorter (trailing 0's) or longer (unused bits): + // - if shorter, only encode the words present + // - if longer, cut off unused words when encoding in bytes + // (in practice, this should never happen since rounding + // takes care of it, but be safe and do it always) + if len(x.mant) < n { + n = len(x.mant) + } + // len(x.mant) >= n + sz += 4 + n*_S // exp + mant + } + buf := make([]byte, sz) + + buf[0] = floatGobVersion + b := byte(x.mode&7)<<5 | byte((x.acc+1)&3)<<3 | byte(x.form&3)<<1 + if x.neg { + b |= 1 + } + buf[1] = b + binary.BigEndian.PutUint32(buf[2:], x.prec) + + if x.form == finite { + binary.BigEndian.PutUint32(buf[6:], uint32(x.exp)) + x.mant[len(x.mant)-n:].bytes(buf[10:]) // cut off unused trailing words + } + + return buf, nil +} + +// GobDecode implements the gob.GobDecoder interface. +// The result is rounded per the precision and rounding mode of +// z unless z's precision is 0, in which case z is set exactly +// to the decoded value. +func (z *Float) GobDecode(buf []byte) error { + if len(buf) == 0 { + // Other side sent a nil or default value. + *z = Float{} + return nil + } + + if buf[0] != floatGobVersion { + return fmt.Errorf("Float.GobDecode: encoding version %d not supported", buf[0]) + } + + oldPrec := z.prec + oldMode := z.mode + + b := buf[1] + z.mode = RoundingMode((b >> 5) & 7) + z.acc = Accuracy((b>>3)&3) - 1 + z.form = form((b >> 1) & 3) + z.neg = b&1 != 0 + z.prec = binary.BigEndian.Uint32(buf[2:]) + + if z.form == finite { + z.exp = int32(binary.BigEndian.Uint32(buf[6:])) + z.mant = z.mant.setBytes(buf[10:]) + } + + if oldPrec != 0 { + z.mode = oldMode + z.SetPrec(uint(oldPrec)) + } + + return nil +} + +// MarshalText implements the encoding.TextMarshaler interface. +// Only the Float value is marshaled (in full precision), other +// attributes such as precision or accuracy are ignored. +func (x *Float) MarshalText() (text []byte, err error) { + if x == nil { + return []byte("<nil>"), nil + } + var buf []byte + return x.Append(buf, 'g', -1), nil +} + +// UnmarshalText implements the encoding.TextUnmarshaler interface. +// The result is rounded per the precision and rounding mode of z. +// If z's precision is 0, it is changed to 64 before rounding takes +// effect. +func (z *Float) UnmarshalText(text []byte) error { + // TODO(gri): get rid of the []byte/string conversion + _, _, err := z.Parse(string(text), 0) + if err != nil { + err = fmt.Errorf("math/big: cannot unmarshal %q into a *big.Float (%v)", text, err) + } + return err +} diff --git a/contrib/go/_std_1.18/src/math/big/ftoa.go b/contrib/go/_std_1.18/src/math/big/ftoa.go new file mode 100644 index 0000000000..5506e6e425 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/ftoa.go @@ -0,0 +1,536 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements Float-to-string conversion functions. +// It is closely following the corresponding implementation +// in strconv/ftoa.go, but modified and simplified for Float. + +package big + +import ( + "bytes" + "fmt" + "strconv" +) + +// Text converts the floating-point number x to a string according +// to the given format and precision prec. The format is one of: +// +// 'e' -d.dddde±dd, decimal exponent, at least two (possibly 0) exponent digits +// 'E' -d.ddddE±dd, decimal exponent, at least two (possibly 0) exponent digits +// 'f' -ddddd.dddd, no exponent +// 'g' like 'e' for large exponents, like 'f' otherwise +// 'G' like 'E' for large exponents, like 'f' otherwise +// 'x' -0xd.dddddp±dd, hexadecimal mantissa, decimal power of two exponent +// 'p' -0x.dddp±dd, hexadecimal mantissa, decimal power of two exponent (non-standard) +// 'b' -ddddddp±dd, decimal mantissa, decimal power of two exponent (non-standard) +// +// For the power-of-two exponent formats, the mantissa is printed in normalized form: +// +// 'x' hexadecimal mantissa in [1, 2), or 0 +// 'p' hexadecimal mantissa in [½, 1), or 0 +// 'b' decimal integer mantissa using x.Prec() bits, or 0 +// +// Note that the 'x' form is the one used by most other languages and libraries. +// +// If format is a different character, Text returns a "%" followed by the +// unrecognized format character. +// +// The precision prec controls the number of digits (excluding the exponent) +// printed by the 'e', 'E', 'f', 'g', 'G', and 'x' formats. +// For 'e', 'E', 'f', and 'x', it is the number of digits after the decimal point. +// For 'g' and 'G' it is the total number of digits. A negative precision selects +// the smallest number of decimal digits necessary to identify the value x uniquely +// using x.Prec() mantissa bits. +// The prec value is ignored for the 'b' and 'p' formats. +func (x *Float) Text(format byte, prec int) string { + cap := 10 // TODO(gri) determine a good/better value here + if prec > 0 { + cap += prec + } + return string(x.Append(make([]byte, 0, cap), format, prec)) +} + +// String formats x like x.Text('g', 10). +// (String must be called explicitly, Float.Format does not support %s verb.) +func (x *Float) String() string { + return x.Text('g', 10) +} + +// Append appends to buf the string form of the floating-point number x, +// as generated by x.Text, and returns the extended buffer. +func (x *Float) Append(buf []byte, fmt byte, prec int) []byte { + // sign + if x.neg { + buf = append(buf, '-') + } + + // Inf + if x.form == inf { + if !x.neg { + buf = append(buf, '+') + } + return append(buf, "Inf"...) + } + + // pick off easy formats + switch fmt { + case 'b': + return x.fmtB(buf) + case 'p': + return x.fmtP(buf) + case 'x': + return x.fmtX(buf, prec) + } + + // Algorithm: + // 1) convert Float to multiprecision decimal + // 2) round to desired precision + // 3) read digits out and format + + // 1) convert Float to multiprecision decimal + var d decimal // == 0.0 + if x.form == finite { + // x != 0 + d.init(x.mant, int(x.exp)-x.mant.bitLen()) + } + + // 2) round to desired precision + shortest := false + if prec < 0 { + shortest = true + roundShortest(&d, x) + // Precision for shortest representation mode. + switch fmt { + case 'e', 'E': + prec = len(d.mant) - 1 + case 'f': + prec = max(len(d.mant)-d.exp, 0) + case 'g', 'G': + prec = len(d.mant) + } + } else { + // round appropriately + switch fmt { + case 'e', 'E': + // one digit before and number of digits after decimal point + d.round(1 + prec) + case 'f': + // number of digits before and after decimal point + d.round(d.exp + prec) + case 'g', 'G': + if prec == 0 { + prec = 1 + } + d.round(prec) + } + } + + // 3) read digits out and format + switch fmt { + case 'e', 'E': + return fmtE(buf, fmt, prec, d) + case 'f': + return fmtF(buf, prec, d) + case 'g', 'G': + // trim trailing fractional zeros in %e format + eprec := prec + if eprec > len(d.mant) && len(d.mant) >= d.exp { + eprec = len(d.mant) + } + // %e is used if the exponent from the conversion + // is less than -4 or greater than or equal to the precision. + // If precision was the shortest possible, use eprec = 6 for + // this decision. + if shortest { + eprec = 6 + } + exp := d.exp - 1 + if exp < -4 || exp >= eprec { + if prec > len(d.mant) { + prec = len(d.mant) + } + return fmtE(buf, fmt+'e'-'g', prec-1, d) + } + if prec > d.exp { + prec = len(d.mant) + } + return fmtF(buf, max(prec-d.exp, 0), d) + } + + // unknown format + if x.neg { + buf = buf[:len(buf)-1] // sign was added prematurely - remove it again + } + return append(buf, '%', fmt) +} + +func roundShortest(d *decimal, x *Float) { + // if the mantissa is zero, the number is zero - stop now + if len(d.mant) == 0 { + return + } + + // Approach: All numbers in the interval [x - 1/2ulp, x + 1/2ulp] + // (possibly exclusive) round to x for the given precision of x. + // Compute the lower and upper bound in decimal form and find the + // shortest decimal number d such that lower <= d <= upper. + + // TODO(gri) strconv/ftoa.do describes a shortcut in some cases. + // See if we can use it (in adjusted form) here as well. + + // 1) Compute normalized mantissa mant and exponent exp for x such + // that the lsb of mant corresponds to 1/2 ulp for the precision of + // x (i.e., for mant we want x.prec + 1 bits). + mant := nat(nil).set(x.mant) + exp := int(x.exp) - mant.bitLen() + s := mant.bitLen() - int(x.prec+1) + switch { + case s < 0: + mant = mant.shl(mant, uint(-s)) + case s > 0: + mant = mant.shr(mant, uint(+s)) + } + exp += s + // x = mant * 2**exp with lsb(mant) == 1/2 ulp of x.prec + + // 2) Compute lower bound by subtracting 1/2 ulp. + var lower decimal + var tmp nat + lower.init(tmp.sub(mant, natOne), exp) + + // 3) Compute upper bound by adding 1/2 ulp. + var upper decimal + upper.init(tmp.add(mant, natOne), exp) + + // The upper and lower bounds are possible outputs only if + // the original mantissa is even, so that ToNearestEven rounding + // would round to the original mantissa and not the neighbors. + inclusive := mant[0]&2 == 0 // test bit 1 since original mantissa was shifted by 1 + + // Now we can figure out the minimum number of digits required. + // Walk along until d has distinguished itself from upper and lower. + for i, m := range d.mant { + l := lower.at(i) + u := upper.at(i) + + // Okay to round down (truncate) if lower has a different digit + // or if lower is inclusive and is exactly the result of rounding + // down (i.e., and we have reached the final digit of lower). + okdown := l != m || inclusive && i+1 == len(lower.mant) + + // Okay to round up if upper has a different digit and either upper + // is inclusive or upper is bigger than the result of rounding up. + okup := m != u && (inclusive || m+1 < u || i+1 < len(upper.mant)) + + // If it's okay to do either, then round to the nearest one. + // If it's okay to do only one, do it. + switch { + case okdown && okup: + d.round(i + 1) + return + case okdown: + d.roundDown(i + 1) + return + case okup: + d.roundUp(i + 1) + return + } + } +} + +// %e: d.ddddde±dd +func fmtE(buf []byte, fmt byte, prec int, d decimal) []byte { + // first digit + ch := byte('0') + if len(d.mant) > 0 { + ch = d.mant[0] + } + buf = append(buf, ch) + + // .moredigits + if prec > 0 { + buf = append(buf, '.') + i := 1 + m := min(len(d.mant), prec+1) + if i < m { + buf = append(buf, d.mant[i:m]...) + i = m + } + for ; i <= prec; i++ { + buf = append(buf, '0') + } + } + + // e± + buf = append(buf, fmt) + var exp int64 + if len(d.mant) > 0 { + exp = int64(d.exp) - 1 // -1 because first digit was printed before '.' + } + if exp < 0 { + ch = '-' + exp = -exp + } else { + ch = '+' + } + buf = append(buf, ch) + + // dd...d + if exp < 10 { + buf = append(buf, '0') // at least 2 exponent digits + } + return strconv.AppendInt(buf, exp, 10) +} + +// %f: ddddddd.ddddd +func fmtF(buf []byte, prec int, d decimal) []byte { + // integer, padded with zeros as needed + if d.exp > 0 { + m := min(len(d.mant), d.exp) + buf = append(buf, d.mant[:m]...) + for ; m < d.exp; m++ { + buf = append(buf, '0') + } + } else { + buf = append(buf, '0') + } + + // fraction + if prec > 0 { + buf = append(buf, '.') + for i := 0; i < prec; i++ { + buf = append(buf, d.at(d.exp+i)) + } + } + + return buf +} + +// fmtB appends the string of x in the format mantissa "p" exponent +// with a decimal mantissa and a binary exponent, or 0" if x is zero, +// and returns the extended buffer. +// The mantissa is normalized such that is uses x.Prec() bits in binary +// representation. +// The sign of x is ignored, and x must not be an Inf. +// (The caller handles Inf before invoking fmtB.) +func (x *Float) fmtB(buf []byte) []byte { + if x.form == zero { + return append(buf, '0') + } + + if debugFloat && x.form != finite { + panic("non-finite float") + } + // x != 0 + + // adjust mantissa to use exactly x.prec bits + m := x.mant + switch w := uint32(len(x.mant)) * _W; { + case w < x.prec: + m = nat(nil).shl(m, uint(x.prec-w)) + case w > x.prec: + m = nat(nil).shr(m, uint(w-x.prec)) + } + + buf = append(buf, m.utoa(10)...) + buf = append(buf, 'p') + e := int64(x.exp) - int64(x.prec) + if e >= 0 { + buf = append(buf, '+') + } + return strconv.AppendInt(buf, e, 10) +} + +// fmtX appends the string of x in the format "0x1." mantissa "p" exponent +// with a hexadecimal mantissa and a binary exponent, or "0x0p0" if x is zero, +// and returns the extended buffer. +// A non-zero mantissa is normalized such that 1.0 <= mantissa < 2.0. +// The sign of x is ignored, and x must not be an Inf. +// (The caller handles Inf before invoking fmtX.) +func (x *Float) fmtX(buf []byte, prec int) []byte { + if x.form == zero { + buf = append(buf, "0x0"...) + if prec > 0 { + buf = append(buf, '.') + for i := 0; i < prec; i++ { + buf = append(buf, '0') + } + } + buf = append(buf, "p+00"...) + return buf + } + + if debugFloat && x.form != finite { + panic("non-finite float") + } + + // round mantissa to n bits + var n uint + if prec < 0 { + n = 1 + (x.MinPrec()-1+3)/4*4 // round MinPrec up to 1 mod 4 + } else { + n = 1 + 4*uint(prec) + } + // n%4 == 1 + x = new(Float).SetPrec(n).SetMode(x.mode).Set(x) + + // adjust mantissa to use exactly n bits + m := x.mant + switch w := uint(len(x.mant)) * _W; { + case w < n: + m = nat(nil).shl(m, n-w) + case w > n: + m = nat(nil).shr(m, w-n) + } + exp64 := int64(x.exp) - 1 // avoid wrap-around + + hm := m.utoa(16) + if debugFloat && hm[0] != '1' { + panic("incorrect mantissa: " + string(hm)) + } + buf = append(buf, "0x1"...) + if len(hm) > 1 { + buf = append(buf, '.') + buf = append(buf, hm[1:]...) + } + + buf = append(buf, 'p') + if exp64 >= 0 { + buf = append(buf, '+') + } else { + exp64 = -exp64 + buf = append(buf, '-') + } + // Force at least two exponent digits, to match fmt. + if exp64 < 10 { + buf = append(buf, '0') + } + return strconv.AppendInt(buf, exp64, 10) +} + +// fmtP appends the string of x in the format "0x." mantissa "p" exponent +// with a hexadecimal mantissa and a binary exponent, or "0" if x is zero, +// and returns the extended buffer. +// The mantissa is normalized such that 0.5 <= 0.mantissa < 1.0. +// The sign of x is ignored, and x must not be an Inf. +// (The caller handles Inf before invoking fmtP.) +func (x *Float) fmtP(buf []byte) []byte { + if x.form == zero { + return append(buf, '0') + } + + if debugFloat && x.form != finite { + panic("non-finite float") + } + // x != 0 + + // remove trailing 0 words early + // (no need to convert to hex 0's and trim later) + m := x.mant + i := 0 + for i < len(m) && m[i] == 0 { + i++ + } + m = m[i:] + + buf = append(buf, "0x."...) + buf = append(buf, bytes.TrimRight(m.utoa(16), "0")...) + buf = append(buf, 'p') + if x.exp >= 0 { + buf = append(buf, '+') + } + return strconv.AppendInt(buf, int64(x.exp), 10) +} + +func min(x, y int) int { + if x < y { + return x + } + return y +} + +var _ fmt.Formatter = &floatZero // *Float must implement fmt.Formatter + +// Format implements fmt.Formatter. It accepts all the regular +// formats for floating-point numbers ('b', 'e', 'E', 'f', 'F', +// 'g', 'G', 'x') as well as 'p' and 'v'. See (*Float).Text for the +// interpretation of 'p'. The 'v' format is handled like 'g'. +// Format also supports specification of the minimum precision +// in digits, the output field width, as well as the format flags +// '+' and ' ' for sign control, '0' for space or zero padding, +// and '-' for left or right justification. See the fmt package +// for details. +func (x *Float) Format(s fmt.State, format rune) { + prec, hasPrec := s.Precision() + if !hasPrec { + prec = 6 // default precision for 'e', 'f' + } + + switch format { + case 'e', 'E', 'f', 'b', 'p', 'x': + // nothing to do + case 'F': + // (*Float).Text doesn't support 'F'; handle like 'f' + format = 'f' + case 'v': + // handle like 'g' + format = 'g' + fallthrough + case 'g', 'G': + if !hasPrec { + prec = -1 // default precision for 'g', 'G' + } + default: + fmt.Fprintf(s, "%%!%c(*big.Float=%s)", format, x.String()) + return + } + var buf []byte + buf = x.Append(buf, byte(format), prec) + if len(buf) == 0 { + buf = []byte("?") // should never happen, but don't crash + } + // len(buf) > 0 + + var sign string + switch { + case buf[0] == '-': + sign = "-" + buf = buf[1:] + case buf[0] == '+': + // +Inf + sign = "+" + if s.Flag(' ') { + sign = " " + } + buf = buf[1:] + case s.Flag('+'): + sign = "+" + case s.Flag(' '): + sign = " " + } + + var padding int + if width, hasWidth := s.Width(); hasWidth && width > len(sign)+len(buf) { + padding = width - len(sign) - len(buf) + } + + switch { + case s.Flag('0') && !x.IsInf(): + // 0-padding on left + writeMultiple(s, sign, 1) + writeMultiple(s, "0", padding) + s.Write(buf) + case s.Flag('-'): + // padding on right + writeMultiple(s, sign, 1) + s.Write(buf) + writeMultiple(s, " ", padding) + default: + // padding on left + writeMultiple(s, " ", padding) + writeMultiple(s, sign, 1) + s.Write(buf) + } +} diff --git a/contrib/go/_std_1.18/src/math/big/int.go b/contrib/go/_std_1.18/src/math/big/int.go new file mode 100644 index 0000000000..7647346486 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/int.go @@ -0,0 +1,1218 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements signed multi-precision integers. + +package big + +import ( + "fmt" + "io" + "math/rand" + "strings" +) + +// An Int represents a signed multi-precision integer. +// The zero value for an Int represents the value 0. +// +// Operations always take pointer arguments (*Int) rather +// than Int values, and each unique Int value requires +// its own unique *Int pointer. To "copy" an Int value, +// an existing (or newly allocated) Int must be set to +// a new value using the Int.Set method; shallow copies +// of Ints are not supported and may lead to errors. +type Int struct { + neg bool // sign + abs nat // absolute value of the integer +} + +var intOne = &Int{false, natOne} + +// Sign returns: +// +// -1 if x < 0 +// 0 if x == 0 +// +1 if x > 0 +// +func (x *Int) Sign() int { + if len(x.abs) == 0 { + return 0 + } + if x.neg { + return -1 + } + return 1 +} + +// SetInt64 sets z to x and returns z. +func (z *Int) SetInt64(x int64) *Int { + neg := false + if x < 0 { + neg = true + x = -x + } + z.abs = z.abs.setUint64(uint64(x)) + z.neg = neg + return z +} + +// SetUint64 sets z to x and returns z. +func (z *Int) SetUint64(x uint64) *Int { + z.abs = z.abs.setUint64(x) + z.neg = false + return z +} + +// NewInt allocates and returns a new Int set to x. +func NewInt(x int64) *Int { + return new(Int).SetInt64(x) +} + +// Set sets z to x and returns z. +func (z *Int) Set(x *Int) *Int { + if z != x { + z.abs = z.abs.set(x.abs) + z.neg = x.neg + } + return z +} + +// Bits provides raw (unchecked but fast) access to x by returning its +// absolute value as a little-endian Word slice. The result and x share +// the same underlying array. +// Bits is intended to support implementation of missing low-level Int +// functionality outside this package; it should be avoided otherwise. +func (x *Int) Bits() []Word { + return x.abs +} + +// SetBits provides raw (unchecked but fast) access to z by setting its +// value to abs, interpreted as a little-endian Word slice, and returning +// z. The result and abs share the same underlying array. +// SetBits is intended to support implementation of missing low-level Int +// functionality outside this package; it should be avoided otherwise. +func (z *Int) SetBits(abs []Word) *Int { + z.abs = nat(abs).norm() + z.neg = false + return z +} + +// Abs sets z to |x| (the absolute value of x) and returns z. +func (z *Int) Abs(x *Int) *Int { + z.Set(x) + z.neg = false + return z +} + +// Neg sets z to -x and returns z. +func (z *Int) Neg(x *Int) *Int { + z.Set(x) + z.neg = len(z.abs) > 0 && !z.neg // 0 has no sign + return z +} + +// Add sets z to the sum x+y and returns z. +func (z *Int) Add(x, y *Int) *Int { + neg := x.neg + if x.neg == y.neg { + // x + y == x + y + // (-x) + (-y) == -(x + y) + z.abs = z.abs.add(x.abs, y.abs) + } else { + // x + (-y) == x - y == -(y - x) + // (-x) + y == y - x == -(x - y) + if x.abs.cmp(y.abs) >= 0 { + z.abs = z.abs.sub(x.abs, y.abs) + } else { + neg = !neg + z.abs = z.abs.sub(y.abs, x.abs) + } + } + z.neg = len(z.abs) > 0 && neg // 0 has no sign + return z +} + +// Sub sets z to the difference x-y and returns z. +func (z *Int) Sub(x, y *Int) *Int { + neg := x.neg + if x.neg != y.neg { + // x - (-y) == x + y + // (-x) - y == -(x + y) + z.abs = z.abs.add(x.abs, y.abs) + } else { + // x - y == x - y == -(y - x) + // (-x) - (-y) == y - x == -(x - y) + if x.abs.cmp(y.abs) >= 0 { + z.abs = z.abs.sub(x.abs, y.abs) + } else { + neg = !neg + z.abs = z.abs.sub(y.abs, x.abs) + } + } + z.neg = len(z.abs) > 0 && neg // 0 has no sign + return z +} + +// Mul sets z to the product x*y and returns z. +func (z *Int) Mul(x, y *Int) *Int { + // x * y == x * y + // x * (-y) == -(x * y) + // (-x) * y == -(x * y) + // (-x) * (-y) == x * y + if x == y { + z.abs = z.abs.sqr(x.abs) + z.neg = false + return z + } + z.abs = z.abs.mul(x.abs, y.abs) + z.neg = len(z.abs) > 0 && x.neg != y.neg // 0 has no sign + return z +} + +// MulRange sets z to the product of all integers +// in the range [a, b] inclusively and returns z. +// If a > b (empty range), the result is 1. +func (z *Int) MulRange(a, b int64) *Int { + switch { + case a > b: + return z.SetInt64(1) // empty range + case a <= 0 && b >= 0: + return z.SetInt64(0) // range includes 0 + } + // a <= b && (b < 0 || a > 0) + + neg := false + if a < 0 { + neg = (b-a)&1 == 0 + a, b = -b, -a + } + + z.abs = z.abs.mulRange(uint64(a), uint64(b)) + z.neg = neg + return z +} + +// Binomial sets z to the binomial coefficient of (n, k) and returns z. +func (z *Int) Binomial(n, k int64) *Int { + // reduce the number of multiplications by reducing k + if n/2 < k && k <= n { + k = n - k // Binomial(n, k) == Binomial(n, n-k) + } + var a, b Int + a.MulRange(n-k+1, n) + b.MulRange(1, k) + return z.Quo(&a, &b) +} + +// Quo sets z to the quotient x/y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Quo implements truncated division (like Go); see QuoRem for more details. +func (z *Int) Quo(x, y *Int) *Int { + z.abs, _ = z.abs.div(nil, x.abs, y.abs) + z.neg = len(z.abs) > 0 && x.neg != y.neg // 0 has no sign + return z +} + +// Rem sets z to the remainder x%y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Rem implements truncated modulus (like Go); see QuoRem for more details. +func (z *Int) Rem(x, y *Int) *Int { + _, z.abs = nat(nil).div(z.abs, x.abs, y.abs) + z.neg = len(z.abs) > 0 && x.neg // 0 has no sign + return z +} + +// QuoRem sets z to the quotient x/y and r to the remainder x%y +// and returns the pair (z, r) for y != 0. +// If y == 0, a division-by-zero run-time panic occurs. +// +// QuoRem implements T-division and modulus (like Go): +// +// q = x/y with the result truncated to zero +// r = x - y*q +// +// (See Daan Leijen, ``Division and Modulus for Computer Scientists''.) +// See DivMod for Euclidean division and modulus (unlike Go). +// +func (z *Int) QuoRem(x, y, r *Int) (*Int, *Int) { + z.abs, r.abs = z.abs.div(r.abs, x.abs, y.abs) + z.neg, r.neg = len(z.abs) > 0 && x.neg != y.neg, len(r.abs) > 0 && x.neg // 0 has no sign + return z, r +} + +// Div sets z to the quotient x/y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Div implements Euclidean division (unlike Go); see DivMod for more details. +func (z *Int) Div(x, y *Int) *Int { + y_neg := y.neg // z may be an alias for y + var r Int + z.QuoRem(x, y, &r) + if r.neg { + if y_neg { + z.Add(z, intOne) + } else { + z.Sub(z, intOne) + } + } + return z +} + +// Mod sets z to the modulus x%y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Mod implements Euclidean modulus (unlike Go); see DivMod for more details. +func (z *Int) Mod(x, y *Int) *Int { + y0 := y // save y + if z == y || alias(z.abs, y.abs) { + y0 = new(Int).Set(y) + } + var q Int + q.QuoRem(x, y, z) + if z.neg { + if y0.neg { + z.Sub(z, y0) + } else { + z.Add(z, y0) + } + } + return z +} + +// DivMod sets z to the quotient x div y and m to the modulus x mod y +// and returns the pair (z, m) for y != 0. +// If y == 0, a division-by-zero run-time panic occurs. +// +// DivMod implements Euclidean division and modulus (unlike Go): +// +// q = x div y such that +// m = x - y*q with 0 <= m < |y| +// +// (See Raymond T. Boute, ``The Euclidean definition of the functions +// div and mod''. ACM Transactions on Programming Languages and +// Systems (TOPLAS), 14(2):127-144, New York, NY, USA, 4/1992. +// ACM press.) +// See QuoRem for T-division and modulus (like Go). +// +func (z *Int) DivMod(x, y, m *Int) (*Int, *Int) { + y0 := y // save y + if z == y || alias(z.abs, y.abs) { + y0 = new(Int).Set(y) + } + z.QuoRem(x, y, m) + if m.neg { + if y0.neg { + z.Add(z, intOne) + m.Sub(m, y0) + } else { + z.Sub(z, intOne) + m.Add(m, y0) + } + } + return z, m +} + +// Cmp compares x and y and returns: +// +// -1 if x < y +// 0 if x == y +// +1 if x > y +// +func (x *Int) Cmp(y *Int) (r int) { + // x cmp y == x cmp y + // x cmp (-y) == x + // (-x) cmp y == y + // (-x) cmp (-y) == -(x cmp y) + switch { + case x == y: + // nothing to do + case x.neg == y.neg: + r = x.abs.cmp(y.abs) + if x.neg { + r = -r + } + case x.neg: + r = -1 + default: + r = 1 + } + return +} + +// CmpAbs compares the absolute values of x and y and returns: +// +// -1 if |x| < |y| +// 0 if |x| == |y| +// +1 if |x| > |y| +// +func (x *Int) CmpAbs(y *Int) int { + return x.abs.cmp(y.abs) +} + +// low32 returns the least significant 32 bits of x. +func low32(x nat) uint32 { + if len(x) == 0 { + return 0 + } + return uint32(x[0]) +} + +// low64 returns the least significant 64 bits of x. +func low64(x nat) uint64 { + if len(x) == 0 { + return 0 + } + v := uint64(x[0]) + if _W == 32 && len(x) > 1 { + return uint64(x[1])<<32 | v + } + return v +} + +// Int64 returns the int64 representation of x. +// If x cannot be represented in an int64, the result is undefined. +func (x *Int) Int64() int64 { + v := int64(low64(x.abs)) + if x.neg { + v = -v + } + return v +} + +// Uint64 returns the uint64 representation of x. +// If x cannot be represented in a uint64, the result is undefined. +func (x *Int) Uint64() uint64 { + return low64(x.abs) +} + +// IsInt64 reports whether x can be represented as an int64. +func (x *Int) IsInt64() bool { + if len(x.abs) <= 64/_W { + w := int64(low64(x.abs)) + return w >= 0 || x.neg && w == -w + } + return false +} + +// IsUint64 reports whether x can be represented as a uint64. +func (x *Int) IsUint64() bool { + return !x.neg && len(x.abs) <= 64/_W +} + +// SetString sets z to the value of s, interpreted in the given base, +// and returns z and a boolean indicating success. The entire string +// (not just a prefix) must be valid for success. If SetString fails, +// the value of z is undefined but the returned value is nil. +// +// The base argument must be 0 or a value between 2 and MaxBase. +// For base 0, the number prefix determines the actual base: A prefix of +// ``0b'' or ``0B'' selects base 2, ``0'', ``0o'' or ``0O'' selects base 8, +// and ``0x'' or ``0X'' selects base 16. Otherwise, the selected base is 10 +// and no prefix is accepted. +// +// For bases <= 36, lower and upper case letters are considered the same: +// The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35. +// For bases > 36, the upper case letters 'A' to 'Z' represent the digit +// values 36 to 61. +// +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number. +// Incorrect placement of underscores is reported as an error if there +// are no other errors. If base != 0, underscores are not recognized +// and act like any other character that is not a valid digit. +// +func (z *Int) SetString(s string, base int) (*Int, bool) { + return z.setFromScanner(strings.NewReader(s), base) +} + +// setFromScanner implements SetString given an io.ByteScanner. +// For documentation see comments of SetString. +func (z *Int) setFromScanner(r io.ByteScanner, base int) (*Int, bool) { + if _, _, err := z.scan(r, base); err != nil { + return nil, false + } + // entire content must have been consumed + if _, err := r.ReadByte(); err != io.EOF { + return nil, false + } + return z, true // err == io.EOF => scan consumed all content of r +} + +// SetBytes interprets buf as the bytes of a big-endian unsigned +// integer, sets z to that value, and returns z. +func (z *Int) SetBytes(buf []byte) *Int { + z.abs = z.abs.setBytes(buf) + z.neg = false + return z +} + +// Bytes returns the absolute value of x as a big-endian byte slice. +// +// To use a fixed length slice, or a preallocated one, use FillBytes. +func (x *Int) Bytes() []byte { + buf := make([]byte, len(x.abs)*_S) + return buf[x.abs.bytes(buf):] +} + +// FillBytes sets buf to the absolute value of x, storing it as a zero-extended +// big-endian byte slice, and returns buf. +// +// If the absolute value of x doesn't fit in buf, FillBytes will panic. +func (x *Int) FillBytes(buf []byte) []byte { + // Clear whole buffer. (This gets optimized into a memclr.) + for i := range buf { + buf[i] = 0 + } + x.abs.bytes(buf) + return buf +} + +// BitLen returns the length of the absolute value of x in bits. +// The bit length of 0 is 0. +func (x *Int) BitLen() int { + return x.abs.bitLen() +} + +// TrailingZeroBits returns the number of consecutive least significant zero +// bits of |x|. +func (x *Int) TrailingZeroBits() uint { + return x.abs.trailingZeroBits() +} + +// Exp sets z = x**y mod |m| (i.e. the sign of m is ignored), and returns z. +// If m == nil or m == 0, z = x**y unless y <= 0 then z = 1. If m != 0, y < 0, +// and x and m are not relatively prime, z is unchanged and nil is returned. +// +// Modular exponentiation of inputs of a particular size is not a +// cryptographically constant-time operation. +func (z *Int) Exp(x, y, m *Int) *Int { + // See Knuth, volume 2, section 4.6.3. + xWords := x.abs + if y.neg { + if m == nil || len(m.abs) == 0 { + return z.SetInt64(1) + } + // for y < 0: x**y mod m == (x**(-1))**|y| mod m + inverse := new(Int).ModInverse(x, m) + if inverse == nil { + return nil + } + xWords = inverse.abs + } + yWords := y.abs + + var mWords nat + if m != nil { + mWords = m.abs // m.abs may be nil for m == 0 + } + + z.abs = z.abs.expNN(xWords, yWords, mWords) + z.neg = len(z.abs) > 0 && x.neg && len(yWords) > 0 && yWords[0]&1 == 1 // 0 has no sign + if z.neg && len(mWords) > 0 { + // make modulus result positive + z.abs = z.abs.sub(mWords, z.abs) // z == x**y mod |m| && 0 <= z < |m| + z.neg = false + } + + return z +} + +// GCD sets z to the greatest common divisor of a and b and returns z. +// If x or y are not nil, GCD sets their value such that z = a*x + b*y. +// +// a and b may be positive, zero or negative. (Before Go 1.14 both had +// to be > 0.) Regardless of the signs of a and b, z is always >= 0. +// +// If a == b == 0, GCD sets z = x = y = 0. +// +// If a == 0 and b != 0, GCD sets z = |b|, x = 0, y = sign(b) * 1. +// +// If a != 0 and b == 0, GCD sets z = |a|, x = sign(a) * 1, y = 0. +func (z *Int) GCD(x, y, a, b *Int) *Int { + if len(a.abs) == 0 || len(b.abs) == 0 { + lenA, lenB, negA, negB := len(a.abs), len(b.abs), a.neg, b.neg + if lenA == 0 { + z.Set(b) + } else { + z.Set(a) + } + z.neg = false + if x != nil { + if lenA == 0 { + x.SetUint64(0) + } else { + x.SetUint64(1) + x.neg = negA + } + } + if y != nil { + if lenB == 0 { + y.SetUint64(0) + } else { + y.SetUint64(1) + y.neg = negB + } + } + return z + } + + return z.lehmerGCD(x, y, a, b) +} + +// lehmerSimulate attempts to simulate several Euclidean update steps +// using the leading digits of A and B. It returns u0, u1, v0, v1 +// such that A and B can be updated as: +// A = u0*A + v0*B +// B = u1*A + v1*B +// Requirements: A >= B and len(B.abs) >= 2 +// Since we are calculating with full words to avoid overflow, +// we use 'even' to track the sign of the cosequences. +// For even iterations: u0, v1 >= 0 && u1, v0 <= 0 +// For odd iterations: u0, v1 <= 0 && u1, v0 >= 0 +func lehmerSimulate(A, B *Int) (u0, u1, v0, v1 Word, even bool) { + // initialize the digits + var a1, a2, u2, v2 Word + + m := len(B.abs) // m >= 2 + n := len(A.abs) // n >= m >= 2 + + // extract the top Word of bits from A and B + h := nlz(A.abs[n-1]) + a1 = A.abs[n-1]<<h | A.abs[n-2]>>(_W-h) + // B may have implicit zero words in the high bits if the lengths differ + switch { + case n == m: + a2 = B.abs[n-1]<<h | B.abs[n-2]>>(_W-h) + case n == m+1: + a2 = B.abs[n-2] >> (_W - h) + default: + a2 = 0 + } + + // Since we are calculating with full words to avoid overflow, + // we use 'even' to track the sign of the cosequences. + // For even iterations: u0, v1 >= 0 && u1, v0 <= 0 + // For odd iterations: u0, v1 <= 0 && u1, v0 >= 0 + // The first iteration starts with k=1 (odd). + even = false + // variables to track the cosequences + u0, u1, u2 = 0, 1, 0 + v0, v1, v2 = 0, 0, 1 + + // Calculate the quotient and cosequences using Collins' stopping condition. + // Note that overflow of a Word is not possible when computing the remainder + // sequence and cosequences since the cosequence size is bounded by the input size. + // See section 4.2 of Jebelean for details. + for a2 >= v2 && a1-a2 >= v1+v2 { + q, r := a1/a2, a1%a2 + a1, a2 = a2, r + u0, u1, u2 = u1, u2, u1+q*u2 + v0, v1, v2 = v1, v2, v1+q*v2 + even = !even + } + return +} + +// lehmerUpdate updates the inputs A and B such that: +// A = u0*A + v0*B +// B = u1*A + v1*B +// where the signs of u0, u1, v0, v1 are given by even +// For even == true: u0, v1 >= 0 && u1, v0 <= 0 +// For even == false: u0, v1 <= 0 && u1, v0 >= 0 +// q, r, s, t are temporary variables to avoid allocations in the multiplication +func lehmerUpdate(A, B, q, r, s, t *Int, u0, u1, v0, v1 Word, even bool) { + + t.abs = t.abs.setWord(u0) + s.abs = s.abs.setWord(v0) + t.neg = !even + s.neg = even + + t.Mul(A, t) + s.Mul(B, s) + + r.abs = r.abs.setWord(u1) + q.abs = q.abs.setWord(v1) + r.neg = even + q.neg = !even + + r.Mul(A, r) + q.Mul(B, q) + + A.Add(t, s) + B.Add(r, q) +} + +// euclidUpdate performs a single step of the Euclidean GCD algorithm +// if extended is true, it also updates the cosequence Ua, Ub +func euclidUpdate(A, B, Ua, Ub, q, r, s, t *Int, extended bool) { + q, r = q.QuoRem(A, B, r) + + *A, *B, *r = *B, *r, *A + + if extended { + // Ua, Ub = Ub, Ua - q*Ub + t.Set(Ub) + s.Mul(Ub, q) + Ub.Sub(Ua, s) + Ua.Set(t) + } +} + +// lehmerGCD sets z to the greatest common divisor of a and b, +// which both must be != 0, and returns z. +// If x or y are not nil, their values are set such that z = a*x + b*y. +// See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm L. +// This implementation uses the improved condition by Collins requiring only one +// quotient and avoiding the possibility of single Word overflow. +// See Jebelean, "Improving the multiprecision Euclidean algorithm", +// Design and Implementation of Symbolic Computation Systems, pp 45-58. +// The cosequences are updated according to Algorithm 10.45 from +// Cohen et al. "Handbook of Elliptic and Hyperelliptic Curve Cryptography" pp 192. +func (z *Int) lehmerGCD(x, y, a, b *Int) *Int { + var A, B, Ua, Ub *Int + + A = new(Int).Abs(a) + B = new(Int).Abs(b) + + extended := x != nil || y != nil + + if extended { + // Ua (Ub) tracks how many times input a has been accumulated into A (B). + Ua = new(Int).SetInt64(1) + Ub = new(Int) + } + + // temp variables for multiprecision update + q := new(Int) + r := new(Int) + s := new(Int) + t := new(Int) + + // ensure A >= B + if A.abs.cmp(B.abs) < 0 { + A, B = B, A + Ub, Ua = Ua, Ub + } + + // loop invariant A >= B + for len(B.abs) > 1 { + // Attempt to calculate in single-precision using leading words of A and B. + u0, u1, v0, v1, even := lehmerSimulate(A, B) + + // multiprecision Step + if v0 != 0 { + // Simulate the effect of the single-precision steps using the cosequences. + // A = u0*A + v0*B + // B = u1*A + v1*B + lehmerUpdate(A, B, q, r, s, t, u0, u1, v0, v1, even) + + if extended { + // Ua = u0*Ua + v0*Ub + // Ub = u1*Ua + v1*Ub + lehmerUpdate(Ua, Ub, q, r, s, t, u0, u1, v0, v1, even) + } + + } else { + // Single-digit calculations failed to simulate any quotients. + // Do a standard Euclidean step. + euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended) + } + } + + if len(B.abs) > 0 { + // extended Euclidean algorithm base case if B is a single Word + if len(A.abs) > 1 { + // A is longer than a single Word, so one update is needed. + euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended) + } + if len(B.abs) > 0 { + // A and B are both a single Word. + aWord, bWord := A.abs[0], B.abs[0] + if extended { + var ua, ub, va, vb Word + ua, ub = 1, 0 + va, vb = 0, 1 + even := true + for bWord != 0 { + q, r := aWord/bWord, aWord%bWord + aWord, bWord = bWord, r + ua, ub = ub, ua+q*ub + va, vb = vb, va+q*vb + even = !even + } + + t.abs = t.abs.setWord(ua) + s.abs = s.abs.setWord(va) + t.neg = !even + s.neg = even + + t.Mul(Ua, t) + s.Mul(Ub, s) + + Ua.Add(t, s) + } else { + for bWord != 0 { + aWord, bWord = bWord, aWord%bWord + } + } + A.abs[0] = aWord + } + } + negA := a.neg + if y != nil { + // avoid aliasing b needed in the division below + if y == b { + B.Set(b) + } else { + B = b + } + // y = (z - a*x)/b + y.Mul(a, Ua) // y can safely alias a + if negA { + y.neg = !y.neg + } + y.Sub(A, y) + y.Div(y, B) + } + + if x != nil { + *x = *Ua + if negA { + x.neg = !x.neg + } + } + + *z = *A + + return z +} + +// Rand sets z to a pseudo-random number in [0, n) and returns z. +// +// As this uses the math/rand package, it must not be used for +// security-sensitive work. Use crypto/rand.Int instead. +func (z *Int) Rand(rnd *rand.Rand, n *Int) *Int { + z.neg = false + if n.neg || len(n.abs) == 0 { + z.abs = nil + return z + } + z.abs = z.abs.random(rnd, n.abs, n.abs.bitLen()) + return z +} + +// ModInverse sets z to the multiplicative inverse of g in the ring ℤ/nℤ +// and returns z. If g and n are not relatively prime, g has no multiplicative +// inverse in the ring ℤ/nℤ. In this case, z is unchanged and the return value +// is nil. +func (z *Int) ModInverse(g, n *Int) *Int { + // GCD expects parameters a and b to be > 0. + if n.neg { + var n2 Int + n = n2.Neg(n) + } + if g.neg { + var g2 Int + g = g2.Mod(g, n) + } + var d, x Int + d.GCD(&x, nil, g, n) + + // if and only if d==1, g and n are relatively prime + if d.Cmp(intOne) != 0 { + return nil + } + + // x and y are such that g*x + n*y = 1, therefore x is the inverse element, + // but it may be negative, so convert to the range 0 <= z < |n| + if x.neg { + z.Add(&x, n) + } else { + z.Set(&x) + } + return z +} + +// Jacobi returns the Jacobi symbol (x/y), either +1, -1, or 0. +// The y argument must be an odd integer. +func Jacobi(x, y *Int) int { + if len(y.abs) == 0 || y.abs[0]&1 == 0 { + panic(fmt.Sprintf("big: invalid 2nd argument to Int.Jacobi: need odd integer but got %s", y)) + } + + // We use the formulation described in chapter 2, section 2.4, + // "The Yacas Book of Algorithms": + // http://yacas.sourceforge.net/Algo.book.pdf + + var a, b, c Int + a.Set(x) + b.Set(y) + j := 1 + + if b.neg { + if a.neg { + j = -1 + } + b.neg = false + } + + for { + if b.Cmp(intOne) == 0 { + return j + } + if len(a.abs) == 0 { + return 0 + } + a.Mod(&a, &b) + if len(a.abs) == 0 { + return 0 + } + // a > 0 + + // handle factors of 2 in 'a' + s := a.abs.trailingZeroBits() + if s&1 != 0 { + bmod8 := b.abs[0] & 7 + if bmod8 == 3 || bmod8 == 5 { + j = -j + } + } + c.Rsh(&a, s) // a = 2^s*c + + // swap numerator and denominator + if b.abs[0]&3 == 3 && c.abs[0]&3 == 3 { + j = -j + } + a.Set(&b) + b.Set(&c) + } +} + +// modSqrt3Mod4 uses the identity +// (a^((p+1)/4))^2 mod p +// == u^(p+1) mod p +// == u^2 mod p +// to calculate the square root of any quadratic residue mod p quickly for 3 +// mod 4 primes. +func (z *Int) modSqrt3Mod4Prime(x, p *Int) *Int { + e := new(Int).Add(p, intOne) // e = p + 1 + e.Rsh(e, 2) // e = (p + 1) / 4 + z.Exp(x, e, p) // z = x^e mod p + return z +} + +// modSqrt5Mod8 uses Atkin's observation that 2 is not a square mod p +// alpha == (2*a)^((p-5)/8) mod p +// beta == 2*a*alpha^2 mod p is a square root of -1 +// b == a*alpha*(beta-1) mod p is a square root of a +// to calculate the square root of any quadratic residue mod p quickly for 5 +// mod 8 primes. +func (z *Int) modSqrt5Mod8Prime(x, p *Int) *Int { + // p == 5 mod 8 implies p = e*8 + 5 + // e is the quotient and 5 the remainder on division by 8 + e := new(Int).Rsh(p, 3) // e = (p - 5) / 8 + tx := new(Int).Lsh(x, 1) // tx = 2*x + alpha := new(Int).Exp(tx, e, p) + beta := new(Int).Mul(alpha, alpha) + beta.Mod(beta, p) + beta.Mul(beta, tx) + beta.Mod(beta, p) + beta.Sub(beta, intOne) + beta.Mul(beta, x) + beta.Mod(beta, p) + beta.Mul(beta, alpha) + z.Mod(beta, p) + return z +} + +// modSqrtTonelliShanks uses the Tonelli-Shanks algorithm to find the square +// root of a quadratic residue modulo any prime. +func (z *Int) modSqrtTonelliShanks(x, p *Int) *Int { + // Break p-1 into s*2^e such that s is odd. + var s Int + s.Sub(p, intOne) + e := s.abs.trailingZeroBits() + s.Rsh(&s, e) + + // find some non-square n + var n Int + n.SetInt64(2) + for Jacobi(&n, p) != -1 { + n.Add(&n, intOne) + } + + // Core of the Tonelli-Shanks algorithm. Follows the description in + // section 6 of "Square roots from 1; 24, 51, 10 to Dan Shanks" by Ezra + // Brown: + // https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf + var y, b, g, t Int + y.Add(&s, intOne) + y.Rsh(&y, 1) + y.Exp(x, &y, p) // y = x^((s+1)/2) + b.Exp(x, &s, p) // b = x^s + g.Exp(&n, &s, p) // g = n^s + r := e + for { + // find the least m such that ord_p(b) = 2^m + var m uint + t.Set(&b) + for t.Cmp(intOne) != 0 { + t.Mul(&t, &t).Mod(&t, p) + m++ + } + + if m == 0 { + return z.Set(&y) + } + + t.SetInt64(0).SetBit(&t, int(r-m-1), 1).Exp(&g, &t, p) + // t = g^(2^(r-m-1)) mod p + g.Mul(&t, &t).Mod(&g, p) // g = g^(2^(r-m)) mod p + y.Mul(&y, &t).Mod(&y, p) + b.Mul(&b, &g).Mod(&b, p) + r = m + } +} + +// ModSqrt sets z to a square root of x mod p if such a square root exists, and +// returns z. The modulus p must be an odd prime. If x is not a square mod p, +// ModSqrt leaves z unchanged and returns nil. This function panics if p is +// not an odd integer. +func (z *Int) ModSqrt(x, p *Int) *Int { + switch Jacobi(x, p) { + case -1: + return nil // x is not a square mod p + case 0: + return z.SetInt64(0) // sqrt(0) mod p = 0 + case 1: + break + } + if x.neg || x.Cmp(p) >= 0 { // ensure 0 <= x < p + x = new(Int).Mod(x, p) + } + + switch { + case p.abs[0]%4 == 3: + // Check whether p is 3 mod 4, and if so, use the faster algorithm. + return z.modSqrt3Mod4Prime(x, p) + case p.abs[0]%8 == 5: + // Check whether p is 5 mod 8, use Atkin's algorithm. + return z.modSqrt5Mod8Prime(x, p) + default: + // Otherwise, use Tonelli-Shanks. + return z.modSqrtTonelliShanks(x, p) + } +} + +// Lsh sets z = x << n and returns z. +func (z *Int) Lsh(x *Int, n uint) *Int { + z.abs = z.abs.shl(x.abs, n) + z.neg = x.neg + return z +} + +// Rsh sets z = x >> n and returns z. +func (z *Int) Rsh(x *Int, n uint) *Int { + if x.neg { + // (-x) >> s == ^(x-1) >> s == ^((x-1) >> s) == -(((x-1) >> s) + 1) + t := z.abs.sub(x.abs, natOne) // no underflow because |x| > 0 + t = t.shr(t, n) + z.abs = t.add(t, natOne) + z.neg = true // z cannot be zero if x is negative + return z + } + + z.abs = z.abs.shr(x.abs, n) + z.neg = false + return z +} + +// Bit returns the value of the i'th bit of x. That is, it +// returns (x>>i)&1. The bit index i must be >= 0. +func (x *Int) Bit(i int) uint { + if i == 0 { + // optimization for common case: odd/even test of x + if len(x.abs) > 0 { + return uint(x.abs[0] & 1) // bit 0 is same for -x + } + return 0 + } + if i < 0 { + panic("negative bit index") + } + if x.neg { + t := nat(nil).sub(x.abs, natOne) + return t.bit(uint(i)) ^ 1 + } + + return x.abs.bit(uint(i)) +} + +// SetBit sets z to x, with x's i'th bit set to b (0 or 1). +// That is, if b is 1 SetBit sets z = x | (1 << i); +// if b is 0 SetBit sets z = x &^ (1 << i). If b is not 0 or 1, +// SetBit will panic. +func (z *Int) SetBit(x *Int, i int, b uint) *Int { + if i < 0 { + panic("negative bit index") + } + if x.neg { + t := z.abs.sub(x.abs, natOne) + t = t.setBit(t, uint(i), b^1) + z.abs = t.add(t, natOne) + z.neg = len(z.abs) > 0 + return z + } + z.abs = z.abs.setBit(x.abs, uint(i), b) + z.neg = false + return z +} + +// And sets z = x & y and returns z. +func (z *Int) And(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) & (-y) == ^(x-1) & ^(y-1) == ^((x-1) | (y-1)) == -(((x-1) | (y-1)) + 1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.or(x1, y1), natOne) + z.neg = true // z cannot be zero if x and y are negative + return z + } + + // x & y == x & y + z.abs = z.abs.and(x.abs, y.abs) + z.neg = false + return z + } + + // x.neg != y.neg + if x.neg { + x, y = y, x // & is symmetric + } + + // x & (-y) == x & ^(y-1) == x &^ (y-1) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.andNot(x.abs, y1) + z.neg = false + return z +} + +// AndNot sets z = x &^ y and returns z. +func (z *Int) AndNot(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) &^ (-y) == ^(x-1) &^ ^(y-1) == ^(x-1) & (y-1) == (y-1) &^ (x-1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.andNot(y1, x1) + z.neg = false + return z + } + + // x &^ y == x &^ y + z.abs = z.abs.andNot(x.abs, y.abs) + z.neg = false + return z + } + + if x.neg { + // (-x) &^ y == ^(x-1) &^ y == ^(x-1) & ^y == ^((x-1) | y) == -(((x-1) | y) + 1) + x1 := nat(nil).sub(x.abs, natOne) + z.abs = z.abs.add(z.abs.or(x1, y.abs), natOne) + z.neg = true // z cannot be zero if x is negative and y is positive + return z + } + + // x &^ (-y) == x &^ ^(y-1) == x & (y-1) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.and(x.abs, y1) + z.neg = false + return z +} + +// Or sets z = x | y and returns z. +func (z *Int) Or(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) | (-y) == ^(x-1) | ^(y-1) == ^((x-1) & (y-1)) == -(((x-1) & (y-1)) + 1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.and(x1, y1), natOne) + z.neg = true // z cannot be zero if x and y are negative + return z + } + + // x | y == x | y + z.abs = z.abs.or(x.abs, y.abs) + z.neg = false + return z + } + + // x.neg != y.neg + if x.neg { + x, y = y, x // | is symmetric + } + + // x | (-y) == x | ^(y-1) == ^((y-1) &^ x) == -(^((y-1) &^ x) + 1) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.andNot(y1, x.abs), natOne) + z.neg = true // z cannot be zero if one of x or y is negative + return z +} + +// Xor sets z = x ^ y and returns z. +func (z *Int) Xor(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) ^ (-y) == ^(x-1) ^ ^(y-1) == (x-1) ^ (y-1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.xor(x1, y1) + z.neg = false + return z + } + + // x ^ y == x ^ y + z.abs = z.abs.xor(x.abs, y.abs) + z.neg = false + return z + } + + // x.neg != y.neg + if x.neg { + x, y = y, x // ^ is symmetric + } + + // x ^ (-y) == x ^ ^(y-1) == ^(x ^ (y-1)) == -((x ^ (y-1)) + 1) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.xor(x.abs, y1), natOne) + z.neg = true // z cannot be zero if only one of x or y is negative + return z +} + +// Not sets z = ^x and returns z. +func (z *Int) Not(x *Int) *Int { + if x.neg { + // ^(-x) == ^(^(x-1)) == x-1 + z.abs = z.abs.sub(x.abs, natOne) + z.neg = false + return z + } + + // ^x == -x-1 == -(x+1) + z.abs = z.abs.add(x.abs, natOne) + z.neg = true // z cannot be zero if x is positive + return z +} + +// Sqrt sets z to ⌊√x⌋, the largest integer such that z² ≤ x, and returns z. +// It panics if x is negative. +func (z *Int) Sqrt(x *Int) *Int { + if x.neg { + panic("square root of negative number") + } + z.neg = false + z.abs = z.abs.sqrt(x.abs) + return z +} diff --git a/contrib/go/_std_1.18/src/math/big/intconv.go b/contrib/go/_std_1.18/src/math/big/intconv.go new file mode 100644 index 0000000000..0567284105 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/intconv.go @@ -0,0 +1,257 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements int-to-string conversion functions. + +package big + +import ( + "errors" + "fmt" + "io" +) + +// Text returns the string representation of x in the given base. +// Base must be between 2 and 62, inclusive. The result uses the +// lower-case letters 'a' to 'z' for digit values 10 to 35, and +// the upper-case letters 'A' to 'Z' for digit values 36 to 61. +// No prefix (such as "0x") is added to the string. If x is a nil +// pointer it returns "<nil>". +func (x *Int) Text(base int) string { + if x == nil { + return "<nil>" + } + return string(x.abs.itoa(x.neg, base)) +} + +// Append appends the string representation of x, as generated by +// x.Text(base), to buf and returns the extended buffer. +func (x *Int) Append(buf []byte, base int) []byte { + if x == nil { + return append(buf, "<nil>"...) + } + return append(buf, x.abs.itoa(x.neg, base)...) +} + +// String returns the decimal representation of x as generated by +// x.Text(10). +func (x *Int) String() string { + return x.Text(10) +} + +// write count copies of text to s +func writeMultiple(s fmt.State, text string, count int) { + if len(text) > 0 { + b := []byte(text) + for ; count > 0; count-- { + s.Write(b) + } + } +} + +var _ fmt.Formatter = intOne // *Int must implement fmt.Formatter + +// Format implements fmt.Formatter. It accepts the formats +// 'b' (binary), 'o' (octal with 0 prefix), 'O' (octal with 0o prefix), +// 'd' (decimal), 'x' (lowercase hexadecimal), and +// 'X' (uppercase hexadecimal). +// Also supported are the full suite of package fmt's format +// flags for integral types, including '+' and ' ' for sign +// control, '#' for leading zero in octal and for hexadecimal, +// a leading "0x" or "0X" for "%#x" and "%#X" respectively, +// specification of minimum digits precision, output field +// width, space or zero padding, and '-' for left or right +// justification. +// +func (x *Int) Format(s fmt.State, ch rune) { + // determine base + var base int + switch ch { + case 'b': + base = 2 + case 'o', 'O': + base = 8 + case 'd', 's', 'v': + base = 10 + case 'x', 'X': + base = 16 + default: + // unknown format + fmt.Fprintf(s, "%%!%c(big.Int=%s)", ch, x.String()) + return + } + + if x == nil { + fmt.Fprint(s, "<nil>") + return + } + + // determine sign character + sign := "" + switch { + case x.neg: + sign = "-" + case s.Flag('+'): // supersedes ' ' when both specified + sign = "+" + case s.Flag(' '): + sign = " " + } + + // determine prefix characters for indicating output base + prefix := "" + if s.Flag('#') { + switch ch { + case 'b': // binary + prefix = "0b" + case 'o': // octal + prefix = "0" + case 'x': // hexadecimal + prefix = "0x" + case 'X': + prefix = "0X" + } + } + if ch == 'O' { + prefix = "0o" + } + + digits := x.abs.utoa(base) + if ch == 'X' { + // faster than bytes.ToUpper + for i, d := range digits { + if 'a' <= d && d <= 'z' { + digits[i] = 'A' + (d - 'a') + } + } + } + + // number of characters for the three classes of number padding + var left int // space characters to left of digits for right justification ("%8d") + var zeros int // zero characters (actually cs[0]) as left-most digits ("%.8d") + var right int // space characters to right of digits for left justification ("%-8d") + + // determine number padding from precision: the least number of digits to output + precision, precisionSet := s.Precision() + if precisionSet { + switch { + case len(digits) < precision: + zeros = precision - len(digits) // count of zero padding + case len(digits) == 1 && digits[0] == '0' && precision == 0: + return // print nothing if zero value (x == 0) and zero precision ("." or ".0") + } + } + + // determine field pad from width: the least number of characters to output + length := len(sign) + len(prefix) + zeros + len(digits) + if width, widthSet := s.Width(); widthSet && length < width { // pad as specified + switch d := width - length; { + case s.Flag('-'): + // pad on the right with spaces; supersedes '0' when both specified + right = d + case s.Flag('0') && !precisionSet: + // pad with zeros unless precision also specified + zeros = d + default: + // pad on the left with spaces + left = d + } + } + + // print number as [left pad][sign][prefix][zero pad][digits][right pad] + writeMultiple(s, " ", left) + writeMultiple(s, sign, 1) + writeMultiple(s, prefix, 1) + writeMultiple(s, "0", zeros) + s.Write(digits) + writeMultiple(s, " ", right) +} + +// scan sets z to the integer value corresponding to the longest possible prefix +// read from r representing a signed integer number in a given conversion base. +// It returns z, the actual conversion base used, and an error, if any. In the +// error case, the value of z is undefined but the returned value is nil. The +// syntax follows the syntax of integer literals in Go. +// +// The base argument must be 0 or a value from 2 through MaxBase. If the base +// is 0, the string prefix determines the actual conversion base. A prefix of +// ``0b'' or ``0B'' selects base 2; a ``0'', ``0o'', or ``0O'' prefix selects +// base 8, and a ``0x'' or ``0X'' prefix selects base 16. Otherwise the selected +// base is 10. +// +func (z *Int) scan(r io.ByteScanner, base int) (*Int, int, error) { + // determine sign + neg, err := scanSign(r) + if err != nil { + return nil, 0, err + } + + // determine mantissa + z.abs, base, _, err = z.abs.scan(r, base, false) + if err != nil { + return nil, base, err + } + z.neg = len(z.abs) > 0 && neg // 0 has no sign + + return z, base, nil +} + +func scanSign(r io.ByteScanner) (neg bool, err error) { + var ch byte + if ch, err = r.ReadByte(); err != nil { + return false, err + } + switch ch { + case '-': + neg = true + case '+': + // nothing to do + default: + r.UnreadByte() + } + return +} + +// byteReader is a local wrapper around fmt.ScanState; +// it implements the ByteReader interface. +type byteReader struct { + fmt.ScanState +} + +func (r byteReader) ReadByte() (byte, error) { + ch, size, err := r.ReadRune() + if size != 1 && err == nil { + err = fmt.Errorf("invalid rune %#U", ch) + } + return byte(ch), err +} + +func (r byteReader) UnreadByte() error { + return r.UnreadRune() +} + +var _ fmt.Scanner = intOne // *Int must implement fmt.Scanner + +// Scan is a support routine for fmt.Scanner; it sets z to the value of +// the scanned number. It accepts the formats 'b' (binary), 'o' (octal), +// 'd' (decimal), 'x' (lowercase hexadecimal), and 'X' (uppercase hexadecimal). +func (z *Int) Scan(s fmt.ScanState, ch rune) error { + s.SkipSpace() // skip leading space characters + base := 0 + switch ch { + case 'b': + base = 2 + case 'o': + base = 8 + case 'd': + base = 10 + case 'x', 'X': + base = 16 + case 's', 'v': + // let scan determine the base + default: + return errors.New("Int.Scan: invalid verb") + } + _, _, err := z.scan(byteReader{s}, base) + return err +} diff --git a/contrib/go/_std_1.18/src/math/big/intmarsh.go b/contrib/go/_std_1.18/src/math/big/intmarsh.go new file mode 100644 index 0000000000..c1422e2710 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/intmarsh.go @@ -0,0 +1,80 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements encoding/decoding of Ints. + +package big + +import ( + "bytes" + "fmt" +) + +// Gob codec version. Permits backward-compatible changes to the encoding. +const intGobVersion byte = 1 + +// GobEncode implements the gob.GobEncoder interface. +func (x *Int) GobEncode() ([]byte, error) { + if x == nil { + return nil, nil + } + buf := make([]byte, 1+len(x.abs)*_S) // extra byte for version and sign bit + i := x.abs.bytes(buf) - 1 // i >= 0 + b := intGobVersion << 1 // make space for sign bit + if x.neg { + b |= 1 + } + buf[i] = b + return buf[i:], nil +} + +// GobDecode implements the gob.GobDecoder interface. +func (z *Int) GobDecode(buf []byte) error { + if len(buf) == 0 { + // Other side sent a nil or default value. + *z = Int{} + return nil + } + b := buf[0] + if b>>1 != intGobVersion { + return fmt.Errorf("Int.GobDecode: encoding version %d not supported", b>>1) + } + z.neg = b&1 != 0 + z.abs = z.abs.setBytes(buf[1:]) + return nil +} + +// MarshalText implements the encoding.TextMarshaler interface. +func (x *Int) MarshalText() (text []byte, err error) { + if x == nil { + return []byte("<nil>"), nil + } + return x.abs.itoa(x.neg, 10), nil +} + +// UnmarshalText implements the encoding.TextUnmarshaler interface. +func (z *Int) UnmarshalText(text []byte) error { + if _, ok := z.setFromScanner(bytes.NewReader(text), 0); !ok { + return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Int", text) + } + return nil +} + +// The JSON marshalers are only here for API backward compatibility +// (programs that explicitly look for these two methods). JSON works +// fine with the TextMarshaler only. + +// MarshalJSON implements the json.Marshaler interface. +func (x *Int) MarshalJSON() ([]byte, error) { + return x.MarshalText() +} + +// UnmarshalJSON implements the json.Unmarshaler interface. +func (z *Int) UnmarshalJSON(text []byte) error { + // Ignore null, like in the main JSON package. + if string(text) == "null" { + return nil + } + return z.UnmarshalText(text) +} diff --git a/contrib/go/_std_1.18/src/math/big/nat.go b/contrib/go/_std_1.18/src/math/big/nat.go new file mode 100644 index 0000000000..140c619c8c --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/nat.go @@ -0,0 +1,1244 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements unsigned multi-precision integers (natural +// numbers). They are the building blocks for the implementation +// of signed integers, rationals, and floating-point numbers. +// +// Caution: This implementation relies on the function "alias" +// which assumes that (nat) slice capacities are never +// changed (no 3-operand slice expressions). If that +// changes, alias needs to be updated for correctness. + +package big + +import ( + "encoding/binary" + "math/bits" + "math/rand" + "sync" +) + +// An unsigned integer x of the form +// +// x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0] +// +// with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n, +// with the digits x[i] as the slice elements. +// +// A number is normalized if the slice contains no leading 0 digits. +// During arithmetic operations, denormalized values may occur but are +// always normalized before returning the final result. The normalized +// representation of 0 is the empty or nil slice (length = 0). +// +type nat []Word + +var ( + natOne = nat{1} + natTwo = nat{2} + natFive = nat{5} + natTen = nat{10} +) + +func (z nat) clear() { + for i := range z { + z[i] = 0 + } +} + +func (z nat) norm() nat { + i := len(z) + for i > 0 && z[i-1] == 0 { + i-- + } + return z[0:i] +} + +func (z nat) make(n int) nat { + if n <= cap(z) { + return z[:n] // reuse z + } + if n == 1 { + // Most nats start small and stay that way; don't over-allocate. + return make(nat, 1) + } + // Choosing a good value for e has significant performance impact + // because it increases the chance that a value can be reused. + const e = 4 // extra capacity + return make(nat, n, n+e) +} + +func (z nat) setWord(x Word) nat { + if x == 0 { + return z[:0] + } + z = z.make(1) + z[0] = x + return z +} + +func (z nat) setUint64(x uint64) nat { + // single-word value + if w := Word(x); uint64(w) == x { + return z.setWord(w) + } + // 2-word value + z = z.make(2) + z[1] = Word(x >> 32) + z[0] = Word(x) + return z +} + +func (z nat) set(x nat) nat { + z = z.make(len(x)) + copy(z, x) + return z +} + +func (z nat) add(x, y nat) nat { + m := len(x) + n := len(y) + + switch { + case m < n: + return z.add(y, x) + case m == 0: + // n == 0 because m >= n; result is 0 + return z[:0] + case n == 0: + // result is x + return z.set(x) + } + // m > 0 + + z = z.make(m + 1) + c := addVV(z[0:n], x, y) + if m > n { + c = addVW(z[n:m], x[n:], c) + } + z[m] = c + + return z.norm() +} + +func (z nat) sub(x, y nat) nat { + m := len(x) + n := len(y) + + switch { + case m < n: + panic("underflow") + case m == 0: + // n == 0 because m >= n; result is 0 + return z[:0] + case n == 0: + // result is x + return z.set(x) + } + // m > 0 + + z = z.make(m) + c := subVV(z[0:n], x, y) + if m > n { + c = subVW(z[n:], x[n:], c) + } + if c != 0 { + panic("underflow") + } + + return z.norm() +} + +func (x nat) cmp(y nat) (r int) { + m := len(x) + n := len(y) + if m != n || m == 0 { + switch { + case m < n: + r = -1 + case m > n: + r = 1 + } + return + } + + i := m - 1 + for i > 0 && x[i] == y[i] { + i-- + } + + switch { + case x[i] < y[i]: + r = -1 + case x[i] > y[i]: + r = 1 + } + return +} + +func (z nat) mulAddWW(x nat, y, r Word) nat { + m := len(x) + if m == 0 || y == 0 { + return z.setWord(r) // result is r + } + // m > 0 + + z = z.make(m + 1) + z[m] = mulAddVWW(z[0:m], x, y, r) + + return z.norm() +} + +// basicMul multiplies x and y and leaves the result in z. +// The (non-normalized) result is placed in z[0 : len(x) + len(y)]. +func basicMul(z, x, y nat) { + z[0 : len(x)+len(y)].clear() // initialize z + for i, d := range y { + if d != 0 { + z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d) + } + } +} + +// montgomery computes z mod m = x*y*2**(-n*_W) mod m, +// assuming k = -1/m mod 2**_W. +// z is used for storing the result which is returned; +// z must not alias x, y or m. +// See Gueron, "Efficient Software Implementations of Modular Exponentiation". +// https://eprint.iacr.org/2011/239.pdf +// In the terminology of that paper, this is an "Almost Montgomery Multiplication": +// x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result +// z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m. +func (z nat) montgomery(x, y, m nat, k Word, n int) nat { + // This code assumes x, y, m are all the same length, n. + // (required by addMulVVW and the for loop). + // It also assumes that x, y are already reduced mod m, + // or else the result will not be properly reduced. + if len(x) != n || len(y) != n || len(m) != n { + panic("math/big: mismatched montgomery number lengths") + } + z = z.make(n * 2) + z.clear() + var c Word + for i := 0; i < n; i++ { + d := y[i] + c2 := addMulVVW(z[i:n+i], x, d) + t := z[i] * k + c3 := addMulVVW(z[i:n+i], m, t) + cx := c + c2 + cy := cx + c3 + z[n+i] = cy + if cx < c2 || cy < c3 { + c = 1 + } else { + c = 0 + } + } + if c != 0 { + subVV(z[:n], z[n:], m) + } else { + copy(z[:n], z[n:]) + } + return z[:n] +} + +// Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks. +// Factored out for readability - do not use outside karatsuba. +func karatsubaAdd(z, x nat, n int) { + if c := addVV(z[0:n], z, x); c != 0 { + addVW(z[n:n+n>>1], z[n:], c) + } +} + +// Like karatsubaAdd, but does subtract. +func karatsubaSub(z, x nat, n int) { + if c := subVV(z[0:n], z, x); c != 0 { + subVW(z[n:n+n>>1], z[n:], c) + } +} + +// Operands that are shorter than karatsubaThreshold are multiplied using +// "grade school" multiplication; for longer operands the Karatsuba algorithm +// is used. +var karatsubaThreshold = 40 // computed by calibrate_test.go + +// karatsuba multiplies x and y and leaves the result in z. +// Both x and y must have the same length n and n must be a +// power of 2. The result vector z must have len(z) >= 6*n. +// The (non-normalized) result is placed in z[0 : 2*n]. +func karatsuba(z, x, y nat) { + n := len(y) + + // Switch to basic multiplication if numbers are odd or small. + // (n is always even if karatsubaThreshold is even, but be + // conservative) + if n&1 != 0 || n < karatsubaThreshold || n < 2 { + basicMul(z, x, y) + return + } + // n&1 == 0 && n >= karatsubaThreshold && n >= 2 + + // Karatsuba multiplication is based on the observation that + // for two numbers x and y with: + // + // x = x1*b + x0 + // y = y1*b + y0 + // + // the product x*y can be obtained with 3 products z2, z1, z0 + // instead of 4: + // + // x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0 + // = z2*b*b + z1*b + z0 + // + // with: + // + // xd = x1 - x0 + // yd = y0 - y1 + // + // z1 = xd*yd + z2 + z0 + // = (x1-x0)*(y0 - y1) + z2 + z0 + // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0 + // = x1*y0 - z2 - z0 + x0*y1 + z2 + z0 + // = x1*y0 + x0*y1 + + // split x, y into "digits" + n2 := n >> 1 // n2 >= 1 + x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0 + y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0 + + // z is used for the result and temporary storage: + // + // 6*n 5*n 4*n 3*n 2*n 1*n 0*n + // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ] + // + // For each recursive call of karatsuba, an unused slice of + // z is passed in that has (at least) half the length of the + // caller's z. + + // compute z0 and z2 with the result "in place" in z + karatsuba(z, x0, y0) // z0 = x0*y0 + karatsuba(z[n:], x1, y1) // z2 = x1*y1 + + // compute xd (or the negative value if underflow occurs) + s := 1 // sign of product xd*yd + xd := z[2*n : 2*n+n2] + if subVV(xd, x1, x0) != 0 { // x1-x0 + s = -s + subVV(xd, x0, x1) // x0-x1 + } + + // compute yd (or the negative value if underflow occurs) + yd := z[2*n+n2 : 3*n] + if subVV(yd, y0, y1) != 0 { // y0-y1 + s = -s + subVV(yd, y1, y0) // y1-y0 + } + + // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0 + // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0 + p := z[n*3:] + karatsuba(p, xd, yd) + + // save original z2:z0 + // (ok to use upper half of z since we're done recursing) + r := z[n*4:] + copy(r, z[:n*2]) + + // add up all partial products + // + // 2*n n 0 + // z = [ z2 | z0 ] + // + [ z0 ] + // + [ z2 ] + // + [ p ] + // + karatsubaAdd(z[n2:], r, n) + karatsubaAdd(z[n2:], r[n:], n) + if s > 0 { + karatsubaAdd(z[n2:], p, n) + } else { + karatsubaSub(z[n2:], p, n) + } +} + +// alias reports whether x and y share the same base array. +// Note: alias assumes that the capacity of underlying arrays +// is never changed for nat values; i.e. that there are +// no 3-operand slice expressions in this code (or worse, +// reflect-based operations to the same effect). +func alias(x, y nat) bool { + return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1] +} + +// addAt implements z += x<<(_W*i); z must be long enough. +// (we don't use nat.add because we need z to stay the same +// slice, and we don't need to normalize z after each addition) +func addAt(z, x nat, i int) { + if n := len(x); n > 0 { + if c := addVV(z[i:i+n], z[i:], x); c != 0 { + j := i + n + if j < len(z) { + addVW(z[j:], z[j:], c) + } + } + } +} + +func max(x, y int) int { + if x > y { + return x + } + return y +} + +// karatsubaLen computes an approximation to the maximum k <= n such that +// k = p<<i for a number p <= threshold and an i >= 0. Thus, the +// result is the largest number that can be divided repeatedly by 2 before +// becoming about the value of threshold. +func karatsubaLen(n, threshold int) int { + i := uint(0) + for n > threshold { + n >>= 1 + i++ + } + return n << i +} + +func (z nat) mul(x, y nat) nat { + m := len(x) + n := len(y) + + switch { + case m < n: + return z.mul(y, x) + case m == 0 || n == 0: + return z[:0] + case n == 1: + return z.mulAddWW(x, y[0], 0) + } + // m >= n > 1 + + // determine if z can be reused + if alias(z, x) || alias(z, y) { + z = nil // z is an alias for x or y - cannot reuse + } + + // use basic multiplication if the numbers are small + if n < karatsubaThreshold { + z = z.make(m + n) + basicMul(z, x, y) + return z.norm() + } + // m >= n && n >= karatsubaThreshold && n >= 2 + + // determine Karatsuba length k such that + // + // x = xh*b + x0 (0 <= x0 < b) + // y = yh*b + y0 (0 <= y0 < b) + // b = 1<<(_W*k) ("base" of digits xi, yi) + // + k := karatsubaLen(n, karatsubaThreshold) + // k <= n + + // multiply x0 and y0 via Karatsuba + x0 := x[0:k] // x0 is not normalized + y0 := y[0:k] // y0 is not normalized + z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y + karatsuba(z, x0, y0) + z = z[0 : m+n] // z has final length but may be incomplete + z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m) + + // If xh != 0 or yh != 0, add the missing terms to z. For + // + // xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b) + // yh = y1*b (0 <= y1 < b) + // + // the missing terms are + // + // x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0 + // + // since all the yi for i > 1 are 0 by choice of k: If any of them + // were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would + // be a larger valid threshold contradicting the assumption about k. + // + if k < n || m != n { + tp := getNat(3 * k) + t := *tp + + // add x0*y1*b + x0 := x0.norm() + y1 := y[k:] // y1 is normalized because y is + t = t.mul(x0, y1) // update t so we don't lose t's underlying array + addAt(z, t, k) + + // add xi*y0<<i, xi*y1*b<<(i+k) + y0 := y0.norm() + for i := k; i < len(x); i += k { + xi := x[i:] + if len(xi) > k { + xi = xi[:k] + } + xi = xi.norm() + t = t.mul(xi, y0) + addAt(z, t, i) + t = t.mul(xi, y1) + addAt(z, t, i+k) + } + + putNat(tp) + } + + return z.norm() +} + +// basicSqr sets z = x*x and is asymptotically faster than basicMul +// by about a factor of 2, but slower for small arguments due to overhead. +// Requirements: len(x) > 0, len(z) == 2*len(x) +// The (non-normalized) result is placed in z. +func basicSqr(z, x nat) { + n := len(x) + tp := getNat(2 * n) + t := *tp // temporary variable to hold the products + t.clear() + z[1], z[0] = mulWW(x[0], x[0]) // the initial square + for i := 1; i < n; i++ { + d := x[i] + // z collects the squares x[i] * x[i] + z[2*i+1], z[2*i] = mulWW(d, d) + // t collects the products x[i] * x[j] where j < i + t[2*i] = addMulVVW(t[i:2*i], x[0:i], d) + } + t[2*n-1] = shlVU(t[1:2*n-1], t[1:2*n-1], 1) // double the j < i products + addVV(z, z, t) // combine the result + putNat(tp) +} + +// karatsubaSqr squares x and leaves the result in z. +// len(x) must be a power of 2 and len(z) >= 6*len(x). +// The (non-normalized) result is placed in z[0 : 2*len(x)]. +// +// The algorithm and the layout of z are the same as for karatsuba. +func karatsubaSqr(z, x nat) { + n := len(x) + + if n&1 != 0 || n < karatsubaSqrThreshold || n < 2 { + basicSqr(z[:2*n], x) + return + } + + n2 := n >> 1 + x1, x0 := x[n2:], x[0:n2] + + karatsubaSqr(z, x0) + karatsubaSqr(z[n:], x1) + + // s = sign(xd*yd) == -1 for xd != 0; s == 1 for xd == 0 + xd := z[2*n : 2*n+n2] + if subVV(xd, x1, x0) != 0 { + subVV(xd, x0, x1) + } + + p := z[n*3:] + karatsubaSqr(p, xd) + + r := z[n*4:] + copy(r, z[:n*2]) + + karatsubaAdd(z[n2:], r, n) + karatsubaAdd(z[n2:], r[n:], n) + karatsubaSub(z[n2:], p, n) // s == -1 for p != 0; s == 1 for p == 0 +} + +// Operands that are shorter than basicSqrThreshold are squared using +// "grade school" multiplication; for operands longer than karatsubaSqrThreshold +// we use the Karatsuba algorithm optimized for x == y. +var basicSqrThreshold = 20 // computed by calibrate_test.go +var karatsubaSqrThreshold = 260 // computed by calibrate_test.go + +// z = x*x +func (z nat) sqr(x nat) nat { + n := len(x) + switch { + case n == 0: + return z[:0] + case n == 1: + d := x[0] + z = z.make(2) + z[1], z[0] = mulWW(d, d) + return z.norm() + } + + if alias(z, x) { + z = nil // z is an alias for x - cannot reuse + } + + if n < basicSqrThreshold { + z = z.make(2 * n) + basicMul(z, x, x) + return z.norm() + } + if n < karatsubaSqrThreshold { + z = z.make(2 * n) + basicSqr(z, x) + return z.norm() + } + + // Use Karatsuba multiplication optimized for x == y. + // The algorithm and layout of z are the same as for mul. + + // z = (x1*b + x0)^2 = x1^2*b^2 + 2*x1*x0*b + x0^2 + + k := karatsubaLen(n, karatsubaSqrThreshold) + + x0 := x[0:k] + z = z.make(max(6*k, 2*n)) + karatsubaSqr(z, x0) // z = x0^2 + z = z[0 : 2*n] + z[2*k:].clear() + + if k < n { + tp := getNat(2 * k) + t := *tp + x0 := x0.norm() + x1 := x[k:] + t = t.mul(x0, x1) + addAt(z, t, k) + addAt(z, t, k) // z = 2*x1*x0*b + x0^2 + t = t.sqr(x1) + addAt(z, t, 2*k) // z = x1^2*b^2 + 2*x1*x0*b + x0^2 + putNat(tp) + } + + return z.norm() +} + +// mulRange computes the product of all the unsigned integers in the +// range [a, b] inclusively. If a > b (empty range), the result is 1. +func (z nat) mulRange(a, b uint64) nat { + switch { + case a == 0: + // cut long ranges short (optimization) + return z.setUint64(0) + case a > b: + return z.setUint64(1) + case a == b: + return z.setUint64(a) + case a+1 == b: + return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b)) + } + m := (a + b) / 2 + return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b)) +} + +// getNat returns a *nat of len n. The contents may not be zero. +// The pool holds *nat to avoid allocation when converting to interface{}. +func getNat(n int) *nat { + var z *nat + if v := natPool.Get(); v != nil { + z = v.(*nat) + } + if z == nil { + z = new(nat) + } + *z = z.make(n) + return z +} + +func putNat(x *nat) { + natPool.Put(x) +} + +var natPool sync.Pool + +// Length of x in bits. x must be normalized. +func (x nat) bitLen() int { + if i := len(x) - 1; i >= 0 { + return i*_W + bits.Len(uint(x[i])) + } + return 0 +} + +// trailingZeroBits returns the number of consecutive least significant zero +// bits of x. +func (x nat) trailingZeroBits() uint { + if len(x) == 0 { + return 0 + } + var i uint + for x[i] == 0 { + i++ + } + // x[i] != 0 + return i*_W + uint(bits.TrailingZeros(uint(x[i]))) +} + +func same(x, y nat) bool { + return len(x) == len(y) && len(x) > 0 && &x[0] == &y[0] +} + +// z = x << s +func (z nat) shl(x nat, s uint) nat { + if s == 0 { + if same(z, x) { + return z + } + if !alias(z, x) { + return z.set(x) + } + } + + m := len(x) + if m == 0 { + return z[:0] + } + // m > 0 + + n := m + int(s/_W) + z = z.make(n + 1) + z[n] = shlVU(z[n-m:n], x, s%_W) + z[0 : n-m].clear() + + return z.norm() +} + +// z = x >> s +func (z nat) shr(x nat, s uint) nat { + if s == 0 { + if same(z, x) { + return z + } + if !alias(z, x) { + return z.set(x) + } + } + + m := len(x) + n := m - int(s/_W) + if n <= 0 { + return z[:0] + } + // n > 0 + + z = z.make(n) + shrVU(z, x[m-n:], s%_W) + + return z.norm() +} + +func (z nat) setBit(x nat, i uint, b uint) nat { + j := int(i / _W) + m := Word(1) << (i % _W) + n := len(x) + switch b { + case 0: + z = z.make(n) + copy(z, x) + if j >= n { + // no need to grow + return z + } + z[j] &^= m + return z.norm() + case 1: + if j >= n { + z = z.make(j + 1) + z[n:].clear() + } else { + z = z.make(n) + } + copy(z, x) + z[j] |= m + // no need to normalize + return z + } + panic("set bit is not 0 or 1") +} + +// bit returns the value of the i'th bit, with lsb == bit 0. +func (x nat) bit(i uint) uint { + j := i / _W + if j >= uint(len(x)) { + return 0 + } + // 0 <= j < len(x) + return uint(x[j] >> (i % _W) & 1) +} + +// sticky returns 1 if there's a 1 bit within the +// i least significant bits, otherwise it returns 0. +func (x nat) sticky(i uint) uint { + j := i / _W + if j >= uint(len(x)) { + if len(x) == 0 { + return 0 + } + return 1 + } + // 0 <= j < len(x) + for _, x := range x[:j] { + if x != 0 { + return 1 + } + } + if x[j]<<(_W-i%_W) != 0 { + return 1 + } + return 0 +} + +func (z nat) and(x, y nat) nat { + m := len(x) + n := len(y) + if m > n { + m = n + } + // m <= n + + z = z.make(m) + for i := 0; i < m; i++ { + z[i] = x[i] & y[i] + } + + return z.norm() +} + +func (z nat) andNot(x, y nat) nat { + m := len(x) + n := len(y) + if n > m { + n = m + } + // m >= n + + z = z.make(m) + for i := 0; i < n; i++ { + z[i] = x[i] &^ y[i] + } + copy(z[n:m], x[n:m]) + + return z.norm() +} + +func (z nat) or(x, y nat) nat { + m := len(x) + n := len(y) + s := x + if m < n { + n, m = m, n + s = y + } + // m >= n + + z = z.make(m) + for i := 0; i < n; i++ { + z[i] = x[i] | y[i] + } + copy(z[n:m], s[n:m]) + + return z.norm() +} + +func (z nat) xor(x, y nat) nat { + m := len(x) + n := len(y) + s := x + if m < n { + n, m = m, n + s = y + } + // m >= n + + z = z.make(m) + for i := 0; i < n; i++ { + z[i] = x[i] ^ y[i] + } + copy(z[n:m], s[n:m]) + + return z.norm() +} + +// random creates a random integer in [0..limit), using the space in z if +// possible. n is the bit length of limit. +func (z nat) random(rand *rand.Rand, limit nat, n int) nat { + if alias(z, limit) { + z = nil // z is an alias for limit - cannot reuse + } + z = z.make(len(limit)) + + bitLengthOfMSW := uint(n % _W) + if bitLengthOfMSW == 0 { + bitLengthOfMSW = _W + } + mask := Word((1 << bitLengthOfMSW) - 1) + + for { + switch _W { + case 32: + for i := range z { + z[i] = Word(rand.Uint32()) + } + case 64: + for i := range z { + z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32 + } + default: + panic("unknown word size") + } + z[len(limit)-1] &= mask + if z.cmp(limit) < 0 { + break + } + } + + return z.norm() +} + +// If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m; +// otherwise it sets z to x**y. The result is the value of z. +func (z nat) expNN(x, y, m nat) nat { + if alias(z, x) || alias(z, y) { + // We cannot allow in-place modification of x or y. + z = nil + } + + // x**y mod 1 == 0 + if len(m) == 1 && m[0] == 1 { + return z.setWord(0) + } + // m == 0 || m > 1 + + // x**0 == 1 + if len(y) == 0 { + return z.setWord(1) + } + // y > 0 + + // x**1 mod m == x mod m + if len(y) == 1 && y[0] == 1 && len(m) != 0 { + _, z = nat(nil).div(z, x, m) + return z + } + // y > 1 + + if len(m) != 0 { + // We likely end up being as long as the modulus. + z = z.make(len(m)) + } + z = z.set(x) + + // If the base is non-trivial and the exponent is large, we use + // 4-bit, windowed exponentiation. This involves precomputing 14 values + // (x^2...x^15) but then reduces the number of multiply-reduces by a + // third. Even for a 32-bit exponent, this reduces the number of + // operations. Uses Montgomery method for odd moduli. + if x.cmp(natOne) > 0 && len(y) > 1 && len(m) > 0 { + if m[0]&1 == 1 { + return z.expNNMontgomery(x, y, m) + } + return z.expNNWindowed(x, y, m) + } + + v := y[len(y)-1] // v > 0 because y is normalized and y > 0 + shift := nlz(v) + 1 + v <<= shift + var q nat + + const mask = 1 << (_W - 1) + + // We walk through the bits of the exponent one by one. Each time we + // see a bit, we square, thus doubling the power. If the bit is a one, + // we also multiply by x, thus adding one to the power. + + w := _W - int(shift) + // zz and r are used to avoid allocating in mul and div as + // otherwise the arguments would alias. + var zz, r nat + for j := 0; j < w; j++ { + zz = zz.sqr(z) + zz, z = z, zz + + if v&mask != 0 { + zz = zz.mul(z, x) + zz, z = z, zz + } + + if len(m) != 0 { + zz, r = zz.div(r, z, m) + zz, r, q, z = q, z, zz, r + } + + v <<= 1 + } + + for i := len(y) - 2; i >= 0; i-- { + v = y[i] + + for j := 0; j < _W; j++ { + zz = zz.sqr(z) + zz, z = z, zz + + if v&mask != 0 { + zz = zz.mul(z, x) + zz, z = z, zz + } + + if len(m) != 0 { + zz, r = zz.div(r, z, m) + zz, r, q, z = q, z, zz, r + } + + v <<= 1 + } + } + + return z.norm() +} + +// expNNWindowed calculates x**y mod m using a fixed, 4-bit window. +func (z nat) expNNWindowed(x, y, m nat) nat { + // zz and r are used to avoid allocating in mul and div as otherwise + // the arguments would alias. + var zz, r nat + + const n = 4 + // powers[i] contains x^i. + var powers [1 << n]nat + powers[0] = natOne + powers[1] = x + for i := 2; i < 1<<n; i += 2 { + p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1] + *p = p.sqr(*p2) + zz, r = zz.div(r, *p, m) + *p, r = r, *p + *p1 = p1.mul(*p, x) + zz, r = zz.div(r, *p1, m) + *p1, r = r, *p1 + } + + z = z.setWord(1) + + for i := len(y) - 1; i >= 0; i-- { + yi := y[i] + for j := 0; j < _W; j += n { + if i != len(y)-1 || j != 0 { + // Unrolled loop for significant performance + // gain. Use go test -bench=".*" in crypto/rsa + // to check performance before making changes. + zz = zz.sqr(z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.sqr(z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.sqr(z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.sqr(z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + } + + zz = zz.mul(z, powers[yi>>(_W-n)]) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + yi <<= n + } + } + + return z.norm() +} + +// expNNMontgomery calculates x**y mod m using a fixed, 4-bit window. +// Uses Montgomery representation. +func (z nat) expNNMontgomery(x, y, m nat) nat { + numWords := len(m) + + // We want the lengths of x and m to be equal. + // It is OK if x >= m as long as len(x) == len(m). + if len(x) > numWords { + _, x = nat(nil).div(nil, x, m) + // Note: now len(x) <= numWords, not guaranteed ==. + } + if len(x) < numWords { + rr := make(nat, numWords) + copy(rr, x) + x = rr + } + + // Ideally the precomputations would be performed outside, and reused + // k0 = -m**-1 mod 2**_W. Algorithm from: Dumas, J.G. "On Newton–Raphson + // Iteration for Multiplicative Inverses Modulo Prime Powers". + k0 := 2 - m[0] + t := m[0] - 1 + for i := 1; i < _W; i <<= 1 { + t *= t + k0 *= (t + 1) + } + k0 = -k0 + + // RR = 2**(2*_W*len(m)) mod m + RR := nat(nil).setWord(1) + zz := nat(nil).shl(RR, uint(2*numWords*_W)) + _, RR = nat(nil).div(RR, zz, m) + if len(RR) < numWords { + zz = zz.make(numWords) + copy(zz, RR) + RR = zz + } + // one = 1, with equal length to that of m + one := make(nat, numWords) + one[0] = 1 + + const n = 4 + // powers[i] contains x^i + var powers [1 << n]nat + powers[0] = powers[0].montgomery(one, RR, m, k0, numWords) + powers[1] = powers[1].montgomery(x, RR, m, k0, numWords) + for i := 2; i < 1<<n; i++ { + powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords) + } + + // initialize z = 1 (Montgomery 1) + z = z.make(numWords) + copy(z, powers[0]) + + zz = zz.make(numWords) + + // same windowed exponent, but with Montgomery multiplications + for i := len(y) - 1; i >= 0; i-- { + yi := y[i] + for j := 0; j < _W; j += n { + if i != len(y)-1 || j != 0 { + zz = zz.montgomery(z, z, m, k0, numWords) + z = z.montgomery(zz, zz, m, k0, numWords) + zz = zz.montgomery(z, z, m, k0, numWords) + z = z.montgomery(zz, zz, m, k0, numWords) + } + zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords) + z, zz = zz, z + yi <<= n + } + } + // convert to regular number + zz = zz.montgomery(z, one, m, k0, numWords) + + // One last reduction, just in case. + // See golang.org/issue/13907. + if zz.cmp(m) >= 0 { + // Common case is m has high bit set; in that case, + // since zz is the same length as m, there can be just + // one multiple of m to remove. Just subtract. + // We think that the subtract should be sufficient in general, + // so do that unconditionally, but double-check, + // in case our beliefs are wrong. + // The div is not expected to be reached. + zz = zz.sub(zz, m) + if zz.cmp(m) >= 0 { + _, zz = nat(nil).div(nil, zz, m) + } + } + + return zz.norm() +} + +// bytes writes the value of z into buf using big-endian encoding. +// The value of z is encoded in the slice buf[i:]. If the value of z +// cannot be represented in buf, bytes panics. The number i of unused +// bytes at the beginning of buf is returned as result. +func (z nat) bytes(buf []byte) (i int) { + i = len(buf) + for _, d := range z { + for j := 0; j < _S; j++ { + i-- + if i >= 0 { + buf[i] = byte(d) + } else if byte(d) != 0 { + panic("math/big: buffer too small to fit value") + } + d >>= 8 + } + } + + if i < 0 { + i = 0 + } + for i < len(buf) && buf[i] == 0 { + i++ + } + + return +} + +// bigEndianWord returns the contents of buf interpreted as a big-endian encoded Word value. +func bigEndianWord(buf []byte) Word { + if _W == 64 { + return Word(binary.BigEndian.Uint64(buf)) + } + return Word(binary.BigEndian.Uint32(buf)) +} + +// setBytes interprets buf as the bytes of a big-endian unsigned +// integer, sets z to that value, and returns z. +func (z nat) setBytes(buf []byte) nat { + z = z.make((len(buf) + _S - 1) / _S) + + i := len(buf) + for k := 0; i >= _S; k++ { + z[k] = bigEndianWord(buf[i-_S : i]) + i -= _S + } + if i > 0 { + var d Word + for s := uint(0); i > 0; s += 8 { + d |= Word(buf[i-1]) << s + i-- + } + z[len(z)-1] = d + } + + return z.norm() +} + +// sqrt sets z = ⌊√x⌋ +func (z nat) sqrt(x nat) nat { + if x.cmp(natOne) <= 0 { + return z.set(x) + } + if alias(z, x) { + z = nil + } + + // Start with value known to be too large and repeat "z = ⌊(z + ⌊x/z⌋)/2⌋" until it stops getting smaller. + // See Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 (SqrtInt). + // https://members.loria.fr/PZimmermann/mca/pub226.html + // If x is one less than a perfect square, the sequence oscillates between the correct z and z+1; + // otherwise it converges to the correct z and stays there. + var z1, z2 nat + z1 = z + z1 = z1.setUint64(1) + z1 = z1.shl(z1, uint(x.bitLen()+1)/2) // must be ≥ √x + for n := 0; ; n++ { + z2, _ = z2.div(nil, x, z1) + z2 = z2.add(z2, z1) + z2 = z2.shr(z2, 1) + if z2.cmp(z1) >= 0 { + // z1 is answer. + // Figure out whether z1 or z2 is currently aliased to z by looking at loop count. + if n&1 == 0 { + return z1 + } + return z.set(z1) + } + z1, z2 = z2, z1 + } +} diff --git a/contrib/go/_std_1.18/src/math/big/natconv.go b/contrib/go/_std_1.18/src/math/big/natconv.go new file mode 100644 index 0000000000..42d1cccf6f --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/natconv.go @@ -0,0 +1,512 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements nat-to-string conversion functions. + +package big + +import ( + "errors" + "fmt" + "io" + "math" + "math/bits" + "sync" +) + +const digits = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + +// Note: MaxBase = len(digits), but it must remain an untyped rune constant +// for API compatibility. + +// MaxBase is the largest number base accepted for string conversions. +const MaxBase = 10 + ('z' - 'a' + 1) + ('Z' - 'A' + 1) +const maxBaseSmall = 10 + ('z' - 'a' + 1) + +// maxPow returns (b**n, n) such that b**n is the largest power b**n <= _M. +// For instance maxPow(10) == (1e19, 19) for 19 decimal digits in a 64bit Word. +// In other words, at most n digits in base b fit into a Word. +// TODO(gri) replace this with a table, generated at build time. +func maxPow(b Word) (p Word, n int) { + p, n = b, 1 // assuming b <= _M + for max := _M / b; p <= max; { + // p == b**n && p <= max + p *= b + n++ + } + // p == b**n && p <= _M + return +} + +// pow returns x**n for n > 0, and 1 otherwise. +func pow(x Word, n int) (p Word) { + // n == sum of bi * 2**i, for 0 <= i < imax, and bi is 0 or 1 + // thus x**n == product of x**(2**i) for all i where bi == 1 + // (Russian Peasant Method for exponentiation) + p = 1 + for n > 0 { + if n&1 != 0 { + p *= x + } + x *= x + n >>= 1 + } + return +} + +// scan errors +var ( + errNoDigits = errors.New("number has no digits") + errInvalSep = errors.New("'_' must separate successive digits") +) + +// scan scans the number corresponding to the longest possible prefix +// from r representing an unsigned number in a given conversion base. +// scan returns the corresponding natural number res, the actual base b, +// a digit count, and a read or syntax error err, if any. +// +// For base 0, an underscore character ``_'' may appear between a base +// prefix and an adjacent digit, and between successive digits; such +// underscores do not change the value of the number, or the returned +// digit count. Incorrect placement of underscores is reported as an +// error if there are no other errors. If base != 0, underscores are +// not recognized and thus terminate scanning like any other character +// that is not a valid radix point or digit. +// +// number = mantissa | prefix pmantissa . +// prefix = "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] . +// mantissa = digits "." [ digits ] | digits | "." digits . +// pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits . +// digits = digit { [ "_" ] digit } . +// digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . +// +// Unless fracOk is set, the base argument must be 0 or a value between +// 2 and MaxBase. If fracOk is set, the base argument must be one of +// 0, 2, 8, 10, or 16. Providing an invalid base argument leads to a run- +// time panic. +// +// For base 0, the number prefix determines the actual base: A prefix of +// ``0b'' or ``0B'' selects base 2, ``0o'' or ``0O'' selects base 8, and +// ``0x'' or ``0X'' selects base 16. If fracOk is false, a ``0'' prefix +// (immediately followed by digits) selects base 8 as well. Otherwise, +// the selected base is 10 and no prefix is accepted. +// +// If fracOk is set, a period followed by a fractional part is permitted. +// The result value is computed as if there were no period present; and +// the count value is used to determine the fractional part. +// +// For bases <= 36, lower and upper case letters are considered the same: +// The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35. +// For bases > 36, the upper case letters 'A' to 'Z' represent the digit +// values 36 to 61. +// +// A result digit count > 0 corresponds to the number of (non-prefix) digits +// parsed. A digit count <= 0 indicates the presence of a period (if fracOk +// is set, only), and -count is the number of fractional digits found. +// In this case, the actual value of the scanned number is res * b**count. +// +func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count int, err error) { + // reject invalid bases + baseOk := base == 0 || + !fracOk && 2 <= base && base <= MaxBase || + fracOk && (base == 2 || base == 8 || base == 10 || base == 16) + if !baseOk { + panic(fmt.Sprintf("invalid number base %d", base)) + } + + // prev encodes the previously seen char: it is one + // of '_', '0' (a digit), or '.' (anything else). A + // valid separator '_' may only occur after a digit + // and if base == 0. + prev := '.' + invalSep := false + + // one char look-ahead + ch, err := r.ReadByte() + + // determine actual base + b, prefix := base, 0 + if base == 0 { + // actual base is 10 unless there's a base prefix + b = 10 + if err == nil && ch == '0' { + prev = '0' + count = 1 + ch, err = r.ReadByte() + if err == nil { + // possibly one of 0b, 0B, 0o, 0O, 0x, 0X + switch ch { + case 'b', 'B': + b, prefix = 2, 'b' + case 'o', 'O': + b, prefix = 8, 'o' + case 'x', 'X': + b, prefix = 16, 'x' + default: + if !fracOk { + b, prefix = 8, '0' + } + } + if prefix != 0 { + count = 0 // prefix is not counted + if prefix != '0' { + ch, err = r.ReadByte() + } + } + } + } + } + + // convert string + // Algorithm: Collect digits in groups of at most n digits in di + // and then use mulAddWW for every such group to add them to the + // result. + z = z[:0] + b1 := Word(b) + bn, n := maxPow(b1) // at most n digits in base b1 fit into Word + di := Word(0) // 0 <= di < b1**i < bn + i := 0 // 0 <= i < n + dp := -1 // position of decimal point + for err == nil { + if ch == '.' && fracOk { + fracOk = false + if prev == '_' { + invalSep = true + } + prev = '.' + dp = count + } else if ch == '_' && base == 0 { + if prev != '0' { + invalSep = true + } + prev = '_' + } else { + // convert rune into digit value d1 + var d1 Word + switch { + case '0' <= ch && ch <= '9': + d1 = Word(ch - '0') + case 'a' <= ch && ch <= 'z': + d1 = Word(ch - 'a' + 10) + case 'A' <= ch && ch <= 'Z': + if b <= maxBaseSmall { + d1 = Word(ch - 'A' + 10) + } else { + d1 = Word(ch - 'A' + maxBaseSmall) + } + default: + d1 = MaxBase + 1 + } + if d1 >= b1 { + r.UnreadByte() // ch does not belong to number anymore + break + } + prev = '0' + count++ + + // collect d1 in di + di = di*b1 + d1 + i++ + + // if di is "full", add it to the result + if i == n { + z = z.mulAddWW(z, bn, di) + di = 0 + i = 0 + } + } + + ch, err = r.ReadByte() + } + + if err == io.EOF { + err = nil + } + + // other errors take precedence over invalid separators + if err == nil && (invalSep || prev == '_') { + err = errInvalSep + } + + if count == 0 { + // no digits found + if prefix == '0' { + // there was only the octal prefix 0 (possibly followed by separators and digits > 7); + // interpret as decimal 0 + return z[:0], 10, 1, err + } + err = errNoDigits // fall through; result will be 0 + } + + // add remaining digits to result + if i > 0 { + z = z.mulAddWW(z, pow(b1, i), di) + } + res = z.norm() + + // adjust count for fraction, if any + if dp >= 0 { + // 0 <= dp <= count + count = dp - count + } + + return +} + +// utoa converts x to an ASCII representation in the given base; +// base must be between 2 and MaxBase, inclusive. +func (x nat) utoa(base int) []byte { + return x.itoa(false, base) +} + +// itoa is like utoa but it prepends a '-' if neg && x != 0. +func (x nat) itoa(neg bool, base int) []byte { + if base < 2 || base > MaxBase { + panic("invalid base") + } + + // x == 0 + if len(x) == 0 { + return []byte("0") + } + // len(x) > 0 + + // allocate buffer for conversion + i := int(float64(x.bitLen())/math.Log2(float64(base))) + 1 // off by 1 at most + if neg { + i++ + } + s := make([]byte, i) + + // convert power of two and non power of two bases separately + if b := Word(base); b == b&-b { + // shift is base b digit size in bits + shift := uint(bits.TrailingZeros(uint(b))) // shift > 0 because b >= 2 + mask := Word(1<<shift - 1) + w := x[0] // current word + nbits := uint(_W) // number of unprocessed bits in w + + // convert less-significant words (include leading zeros) + for k := 1; k < len(x); k++ { + // convert full digits + for nbits >= shift { + i-- + s[i] = digits[w&mask] + w >>= shift + nbits -= shift + } + + // convert any partial leading digit and advance to next word + if nbits == 0 { + // no partial digit remaining, just advance + w = x[k] + nbits = _W + } else { + // partial digit in current word w (== x[k-1]) and next word x[k] + w |= x[k] << nbits + i-- + s[i] = digits[w&mask] + + // advance + w = x[k] >> (shift - nbits) + nbits = _W - (shift - nbits) + } + } + + // convert digits of most-significant word w (omit leading zeros) + for w != 0 { + i-- + s[i] = digits[w&mask] + w >>= shift + } + + } else { + bb, ndigits := maxPow(b) + + // construct table of successive squares of bb*leafSize to use in subdivisions + // result (table != nil) <=> (len(x) > leafSize > 0) + table := divisors(len(x), b, ndigits, bb) + + // preserve x, create local copy for use by convertWords + q := nat(nil).set(x) + + // convert q to string s in base b + q.convertWords(s, b, ndigits, bb, table) + + // strip leading zeros + // (x != 0; thus s must contain at least one non-zero digit + // and the loop will terminate) + i = 0 + for s[i] == '0' { + i++ + } + } + + if neg { + i-- + s[i] = '-' + } + + return s[i:] +} + +// Convert words of q to base b digits in s. If q is large, it is recursively "split in half" +// by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using +// repeated nat/Word division. +// +// The iterative method processes n Words by n divW() calls, each of which visits every Word in the +// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s. +// Recursive conversion divides q by its approximate square root, yielding two parts, each half +// the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s +// plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and +// is made better by splitting the subblocks recursively. Best is to split blocks until one more +// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the +// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the +// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and +// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for +// specific hardware. +// +func (q nat) convertWords(s []byte, b Word, ndigits int, bb Word, table []divisor) { + // split larger blocks recursively + if table != nil { + // len(q) > leafSize > 0 + var r nat + index := len(table) - 1 + for len(q) > leafSize { + // find divisor close to sqrt(q) if possible, but in any case < q + maxLength := q.bitLen() // ~= log2 q, or at of least largest possible q of this bit length + minLength := maxLength >> 1 // ~= log2 sqrt(q) + for index > 0 && table[index-1].nbits > minLength { + index-- // desired + } + if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 { + index-- + if index < 0 { + panic("internal inconsistency") + } + } + + // split q into the two digit number (q'*bbb + r) to form independent subblocks + q, r = q.div(r, q, table[index].bbb) + + // convert subblocks and collect results in s[:h] and s[h:] + h := len(s) - table[index].ndigits + r.convertWords(s[h:], b, ndigits, bb, table[0:index]) + s = s[:h] // == q.convertWords(s, b, ndigits, bb, table[0:index+1]) + } + } + + // having split any large blocks now process the remaining (small) block iteratively + i := len(s) + var r Word + if b == 10 { + // hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants) + for len(q) > 0 { + // extract least significant, base bb "digit" + q, r = q.divW(q, bb) + for j := 0; j < ndigits && i > 0; j++ { + i-- + // avoid % computation since r%10 == r - int(r/10)*10; + // this appears to be faster for BenchmarkString10000Base10 + // and smaller strings (but a bit slower for larger ones) + t := r / 10 + s[i] = '0' + byte(r-t*10) + r = t + } + } + } else { + for len(q) > 0 { + // extract least significant, base bb "digit" + q, r = q.divW(q, bb) + for j := 0; j < ndigits && i > 0; j++ { + i-- + s[i] = digits[r%b] + r /= b + } + } + } + + // prepend high-order zeros + for i > 0 { // while need more leading zeros + i-- + s[i] = '0' + } +} + +// Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion) +// Benchmark and configure leafSize using: go test -bench="Leaf" +// 8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines) +// 8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU +var leafSize int = 8 // number of Word-size binary values treat as a monolithic block + +type divisor struct { + bbb nat // divisor + nbits int // bit length of divisor (discounting leading zeros) ~= log2(bbb) + ndigits int // digit length of divisor in terms of output base digits +} + +var cacheBase10 struct { + sync.Mutex + table [64]divisor // cached divisors for base 10 +} + +// expWW computes x**y +func (z nat) expWW(x, y Word) nat { + return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil) +} + +// construct table of powers of bb*leafSize to use in subdivisions +func divisors(m int, b Word, ndigits int, bb Word) []divisor { + // only compute table when recursive conversion is enabled and x is large + if leafSize == 0 || m <= leafSize { + return nil + } + + // determine k where (bb**leafSize)**(2**k) >= sqrt(x) + k := 1 + for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 { + k++ + } + + // reuse and extend existing table of divisors or create new table as appropriate + var table []divisor // for b == 10, table overlaps with cacheBase10.table + if b == 10 { + cacheBase10.Lock() + table = cacheBase10.table[0:k] // reuse old table for this conversion + } else { + table = make([]divisor, k) // create new table for this conversion + } + + // extend table + if table[k-1].ndigits == 0 { + // add new entries as needed + var larger nat + for i := 0; i < k; i++ { + if table[i].ndigits == 0 { + if i == 0 { + table[0].bbb = nat(nil).expWW(bb, Word(leafSize)) + table[0].ndigits = ndigits * leafSize + } else { + table[i].bbb = nat(nil).sqr(table[i-1].bbb) + table[i].ndigits = 2 * table[i-1].ndigits + } + + // optimization: exploit aggregated extra bits in macro blocks + larger = nat(nil).set(table[i].bbb) + for mulAddVWW(larger, larger, b, 0) == 0 { + table[i].bbb = table[i].bbb.set(larger) + table[i].ndigits++ + } + + table[i].nbits = table[i].bbb.bitLen() + } + } + } + + if b == 10 { + cacheBase10.Unlock() + } + + return table +} diff --git a/contrib/go/_std_1.18/src/math/big/natdiv.go b/contrib/go/_std_1.18/src/math/big/natdiv.go new file mode 100644 index 0000000000..882bb6d3ba --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/natdiv.go @@ -0,0 +1,884 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + +Multi-precision division. Here be dragons. + +Given u and v, where u is n+m digits, and v is n digits (with no leading zeros), +the goal is to return quo, rem such that u = quo*v + rem, where 0 ≤ rem < v. +That is, quo = ⌊u/v⌋ where ⌊x⌋ denotes the floor (truncation to integer) of x, +and rem = u - quo·v. + + +Long Division + +Division in a computer proceeds the same as long division in elementary school, +but computers are not as good as schoolchildren at following vague directions, +so we have to be much more precise about the actual steps and what can happen. + +We work from most to least significant digit of the quotient, doing: + + • Guess a digit q, the number of v to subtract from the current + section of u to zero out the topmost digit. + • Check the guess by multiplying q·v and comparing it against + the current section of u, adjusting the guess as needed. + • Subtract q·v from the current section of u. + • Add q to the corresponding section of the result quo. + +When all digits have been processed, the final remainder is left in u +and returned as rem. + +For example, here is a sketch of dividing 5 digits by 3 digits (n=3, m=2). + + q₂ q₁ q₀ + _________________ + v₂ v₁ v₀ ) u₄ u₃ u₂ u₁ u₀ + ↓ ↓ ↓ | | + [u₄ u₃ u₂]| | + - [ q₂·v ]| | + ----------- ↓ | + [ rem | u₁]| + - [ q₁·v ]| + ----------- ↓ + [ rem | u₀] + - [ q₀·v ] + ------------ + [ rem ] + +Instead of creating new storage for the remainders and copying digits from u +as indicated by the arrows, we use u's storage directly as both the source +and destination of the subtractions, so that the remainders overwrite +successive overlapping sections of u as the division proceeds, using a slice +of u to identify the current section. This avoids all the copying as well as +shifting of remainders. + +Division of u with n+m digits by v with n digits (in base B) can in general +produce at most m+1 digits, because: + + • u < B^(n+m) [B^(n+m) has n+m+1 digits] + • v ≥ B^(n-1) [B^(n-1) is the smallest n-digit number] + • u/v < B^(n+m) / B^(n-1) [divide bounds for u, v] + • u/v < B^(m+1) [simplify] + +The first step is special: it takes the top n digits of u and divides them by +the n digits of v, producing the first quotient digit and an n-digit remainder. +In the example, q₂ = ⌊u₄u₃u₂ / v⌋. + +The first step divides n digits by n digits to ensure that it produces only a +single digit. + +Each subsequent step appends the next digit from u to the remainder and divides +those n+1 digits by the n digits of v, producing another quotient digit and a +new n-digit remainder. + +Subsequent steps divide n+1 digits by n digits, an operation that in general +might produce two digits. However, as used in the algorithm, that division is +guaranteed to produce only a single digit. The dividend is of the form +rem·B + d, where rem is a remainder from the previous step and d is a single +digit, so: + + • rem ≤ v - 1 [rem is a remainder from dividing by v] + • rem·B ≤ v·B - B [multiply by B] + • d ≤ B - 1 [d is a single digit] + • rem·B + d ≤ v·B - 1 [add] + • rem·B + d < v·B [change ≤ to <] + • (rem·B + d)/v < B [divide by v] + + +Guess and Check + +At each step we need to divide n+1 digits by n digits, but this is for the +implementation of division by n digits, so we can't just invoke a division +routine: we _are_ the division routine. Instead, we guess at the answer and +then check it using multiplication. If the guess is wrong, we correct it. + +How can this guessing possibly be efficient? It turns out that the following +statement (let's call it the Good Guess Guarantee) is true. + +If + + • q = ⌊u/v⌋ where u is n+1 digits and v is n digits, + • q < B, and + • the topmost digit of v = vₙ₋₁ ≥ B/2, + +then q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ satisfies q ≤ q̂ ≤ q+2. (Proof below.) + +That is, if we know the answer has only a single digit and we guess an answer +by ignoring the bottom n-1 digits of u and v, using a 2-by-1-digit division, +then that guess is at least as large as the correct answer. It is also not +too much larger: it is off by at most two from the correct answer. + +Note that in the first step of the overall division, which is an n-by-n-digit +division, the 2-by-1 guess uses an implicit uₙ = 0. + +Note that using a 2-by-1-digit division here does not mean calling ourselves +recursively. Instead, we use an efficient direct hardware implementation of +that operation. + +Note that because q is u/v rounded down, q·v must not exceed u: u ≥ q·v. +If a guess q̂ is too big, it will not satisfy this test. Viewed a different way, +the remainder r̂ for a given q̂ is u - q̂·v, which must be positive. If it is +negative, then the guess q̂ is too big. + +This gives us a way to compute q. First compute q̂ with 2-by-1-digit division. +Then, while u < q̂·v, decrement q̂; this loop executes at most twice, because +q̂ ≤ q+2. + + +Scaling Inputs + +The Good Guess Guarantee requires that the top digit of v (vₙ₋₁) be at least B/2. +For example in base 10, ⌊172/19⌋ = 9, but ⌊18/1⌋ = 18: the guess is wildly off +because the first digit 1 is smaller than B/2 = 5. + +We can ensure that v has a large top digit by multiplying both u and v by the +right amount. Continuing the example, if we multiply both 172 and 19 by 3, we +now have ⌊516/57⌋, the leading digit of v is now ≥ 5, and sure enough +⌊51/5⌋ = 10 is much closer to the correct answer 9. It would be easier here +to multiply by 4, because that can be done with a shift. Specifically, we can +always count the number of leading zeros i in the first digit of v and then +shift both u and v left by i bits. + +Having scaled u and v, the value ⌊u/v⌋ is unchanged, but the remainder will +be scaled: 172 mod 19 is 1, but 516 mod 57 is 3. We have to divide the remainder +by the scaling factor (shifting right i bits) when we finish. + +Note that these shifts happen before and after the entire division algorithm, +not at each step in the per-digit iteration. + +Note the effect of scaling inputs on the size of the possible quotient. +In the scaled u/v, u can gain a digit from scaling; v never does, because we +pick the scaling factor to make v's top digit larger but without overflowing. +If u and v have n+m and n digits after scaling, then: + + • u < B^(n+m) [B^(n+m) has n+m+1 digits] + • v ≥ B^n / 2 [vₙ₋₁ ≥ B/2, so vₙ₋₁·B^(n-1) ≥ B^n/2] + • u/v < B^(n+m) / (B^n / 2) [divide bounds for u, v] + • u/v < 2 B^m [simplify] + +The quotient can still have m+1 significant digits, but if so the top digit +must be a 1. This provides a different way to handle the first digit of the +result: compare the top n digits of u against v and fill in either a 0 or a 1. + + +Refining Guesses + +Before we check whether u < q̂·v, we can adjust our guess to change it from +q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ into the refined guess ⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋. +Although not mentioned above, the Good Guess Guarantee also promises that this +3-by-2-digit division guess is more precise and at most one away from the real +answer q. The improvement from the 2-by-1 to the 3-by-2 guess can also be done +without n-digit math. + +If we have a guess q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ and we want to see if it also equal to +⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋, we can use the same check we would for the full division: +if uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂, then the guess is too large and should be reduced. + +Checking uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂ is the same as uₙuₙ₋₁uₙ₋₂ - q̂·vₙ₋₁vₙ₋₂ < 0, +and + + uₙuₙ₋₁uₙ₋₂ - q̂·vₙ₋₁vₙ₋₂ = (uₙuₙ₋₁·B + uₙ₋₂) - q̂·(vₙ₋₁·B + vₙ₋₂) + [splitting off the bottom digit] + = (uₙuₙ₋₁ - q̂·vₙ₋₁)·B + uₙ₋₂ - q̂·vₙ₋₂ + [regrouping] + +The expression (uₙuₙ₋₁ - q̂·vₙ₋₁) is the remainder of uₙuₙ₋₁ / vₙ₋₁. +If the initial guess returns both q̂ and its remainder r̂, then checking +whether uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂ is the same as checking r̂·B + uₙ₋₂ < q̂·vₙ₋₂. + +If we find that r̂·B + uₙ₋₂ < q̂·vₙ₋₂, then we can adjust the guess by +decrementing q̂ and adding vₙ₋₁ to r̂. We repeat until r̂·B + uₙ₋₂ ≥ q̂·vₙ₋₂. +(As before, this fixup is only needed at most twice.) + +Now that q̂ = ⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋, as mentioned above it is at most one +away from the correct q, and we've avoided doing any n-digit math. +(If we need the new remainder, it can be computed as r̂·B + uₙ₋₂ - q̂·vₙ₋₂.) + +The final check u < q̂·v and the possible fixup must be done at full precision. +For random inputs, a fixup at this step is exceedingly rare: the 3-by-2 guess +is not often wrong at all. But still we must do the check. Note that since the +3-by-2 guess is off by at most 1, it can be convenient to perform the final +u < q̂·v as part of the computation of the remainder r = u - q̂·v. If the +subtraction underflows, decremeting q̂ and adding one v back to r is enough to +arrive at the final q, r. + +That's the entirety of long division: scale the inputs, and then loop over +each output position, guessing, checking, and correcting the next output digit. + +For a 2n-digit number divided by an n-digit number (the worst size-n case for +division complexity), this algorithm uses n+1 iterations, each of which must do +at least the 1-by-n-digit multiplication q̂·v. That's O(n) iterations of +O(n) time each, so O(n²) time overall. + + +Recursive Division + +For very large inputs, it is possible to improve on the O(n²) algorithm. +Let's call a group of n/2 real digits a (very) “wide digit”. We can run the +standard long division algorithm explained above over the wide digits instead of +the actual digits. This will result in many fewer steps, but the math involved in +each step is more work. + +Where basic long division uses a 2-by-1-digit division to guess the initial q̂, +the new algorithm must use a 2-by-1-wide-digit division, which is of course +really an n-by-n/2-digit division. That's OK: if we implement n-digit division +in terms of n/2-digit division, the recursion will terminate when the divisor +becomes small enough to handle with standard long division or even with the +2-by-1 hardware instruction. + +For example, here is a sketch of dividing 10 digits by 4, proceeding with +wide digits corresponding to two regular digits. The first step, still special, +must leave off a (regular) digit, dividing 5 by 4 and producing a 4-digit +remainder less than v. The middle steps divide 6 digits by 4, guaranteed to +produce two output digits each (one wide digit) with 4-digit remainders. +The final step must use what it has: the 4-digit remainder plus one more, +5 digits to divide by 4. + + q₆ q₅ q₄ q₃ q₂ q₁ q₀ + _______________________________ + v₃ v₂ v₁ v₀ ) u₉ u₈ u₇ u₆ u₅ u₄ u₃ u₂ u₁ u₀ + ↓ ↓ ↓ ↓ ↓ | | | | | + [u₉ u₈ u₇ u₆ u₅]| | | | | + - [ q₆q₅·v ]| | | | | + ----------------- ↓ ↓ | | | + [ rem |u₄ u₃]| | | + - [ q₄q₃·v ]| | | + -------------------- ↓ ↓ | + [ rem |u₂ u₁]| + - [ q₂q₁·v ]| + -------------------- ↓ + [ rem |u₀] + - [ q₀·v ] + ------------------ + [ rem ] + +An alternative would be to look ahead to how well n/2 divides into n+m and +adjust the first step to use fewer digits as needed, making the first step +more special to make the last step not special at all. For example, using the +same input, we could choose to use only 4 digits in the first step, leaving +a full wide digit for the last step: + + q₆ q₅ q₄ q₃ q₂ q₁ q₀ + _______________________________ + v₃ v₂ v₁ v₀ ) u₉ u₈ u₇ u₆ u₅ u₄ u₃ u₂ u₁ u₀ + ↓ ↓ ↓ ↓ | | | | | | + [u₉ u₈ u₇ u₆]| | | | | | + - [ q₆·v ]| | | | | | + -------------- ↓ ↓ | | | | + [ rem |u₅ u₄]| | | | + - [ q₅q₄·v ]| | | | + -------------------- ↓ ↓ | | + [ rem |u₃ u₂]| | + - [ q₃q₂·v ]| | + -------------------- ↓ ↓ + [ rem |u₁ u₀] + - [ q₁q₀·v ] + --------------------- + [ rem ] + +Today, the code in divRecursiveStep works like the first example. Perhaps in +the future we will make it work like the alternative, to avoid a special case +in the final iteration. + +Either way, each step is a 3-by-2-wide-digit division approximated first by +a 2-by-1-wide-digit division, just as we did for regular digits in long division. +Because the actual answer we want is a 3-by-2-wide-digit division, instead of +multiplying q̂·v directly during the fixup, we can use the quick refinement +from long division (an n/2-by-n/2 multiply) to correct q to its actual value +and also compute the remainder (as mentioned above), and then stop after that, +never doing a full n-by-n multiply. + +Instead of using an n-by-n/2-digit division to produce n/2 digits, we can add +(not discard) one more real digit, doing an (n+1)-by-(n/2+1)-digit division that +produces n/2+1 digits. That single extra digit tightens the Good Guess Guarantee +to q ≤ q̂ ≤ q+1 and lets us drop long division's special treatment of the first +digit. These benefits are discussed more after the Good Guess Guarantee proof +below. + + +How Fast is Recursive Division? + +For a 2n-by-n-digit division, this algorithm runs a 4-by-2 long division over +wide digits, producing two wide digits plus a possible leading regular digit 1, +which can be handled without a recursive call. That is, the algorithm uses two +full iterations, each using an n-by-n/2-digit division and an n/2-by-n/2-digit +multiplication, along with a few n-digit additions and subtractions. The standard +n-by-n-digit multiplication algorithm requires O(n²) time, making the overall +algorithm require time T(n) where + + T(n) = 2T(n/2) + O(n) + O(n²) + +which, by the Bentley-Haken-Saxe theorem, ends up reducing to T(n) = O(n²). +This is not an improvement over regular long division. + +When the number of digits n becomes large enough, Karatsuba's algorithm for +multiplication can be used instead, which takes O(n^log₂3) = O(n^1.6) time. +(Karatsuba multiplication is implemented in func karatsuba in nat.go.) +That makes the overall recursive division algorithm take O(n^1.6) time as well, +which is an improvement, but again only for large enough numbers. + +It is not critical to make sure that every recursion does only two recursive +calls. While in general the number of recursive calls can change the time +analysis, in this case doing three calls does not change the analysis: + + T(n) = 3T(n/2) + O(n) + O(n^log₂3) + +ends up being T(n) = O(n^log₂3). Because the Karatsuba multiplication taking +time O(n^log₂3) is itself doing 3 half-sized recursions, doing three for the +division does not hurt the asymptotic performance. Of course, it is likely +still faster in practice to do two. + + +Proof of the Good Guess Guarantee + +Given numbers x, y, let us break them into the quotients and remainders when +divided by some scaling factor S, with the added constraints that the quotient +x/y and the high part of y are both less than some limit T, and that the high +part of y is at least half as big as T. + + x₁ = ⌊x/S⌋ y₁ = ⌊y/S⌋ + x₀ = x mod S y₀ = y mod S + + x = x₁·S + x₀ 0 ≤ x₀ < S x/y < T + y = y₁·S + y₀ 0 ≤ y₀ < S T/2 ≤ y₁ < T + +And consider the two truncated quotients: + + q = ⌊x/y⌋ + q̂ = ⌊x₁/y₁⌋ + +We will prove that q ≤ q̂ ≤ q+2. + +The guarantee makes no real demands on the scaling factor S: it is simply the +magnitude of the digits cut from both x and y to produce x₁ and y₁. +The guarantee makes only limited demands on T: it must be large enough to hold +the quotient x/y, and y₁ must have roughly the same size. + +To apply to the earlier discussion of 2-by-1 guesses in long division, +we would choose: + + S = Bⁿ⁻¹ + T = B + x = u + x₁ = uₙuₙ₋₁ + x₀ = uₙ₋₂...u₀ + y = v + y₁ = vₙ₋₁ + y₀ = vₙ₋₂...u₀ + +These simpler variables avoid repeating those longer expressions in the proof. + +Note also that, by definition, truncating division ⌊x/y⌋ satisfies + + x/y - 1 < ⌊x/y⌋ ≤ x/y. + +This fact will be used a few times in the proofs. + +Proof that q ≤ q̂: + + q̂·y₁ = ⌊x₁/y₁⌋·y₁ [by definition, q̂ = ⌊x₁/y₁⌋] + > (x₁/y₁ - 1)·y₁ [x₁/y₁ - 1 < ⌊x₁/y₁⌋] + = x₁ - y₁ [distribute y₁] + + So q̂·y₁ > x₁ - y₁. + Since q̂·y₁ is an integer, q̂·y₁ ≥ x₁ - y₁ + 1. + + q̂ - q = q̂ - ⌊x/y⌋ [by definition, q = ⌊x/y⌋] + ≥ q̂ - x/y [⌊x/y⌋ < x/y] + = (1/y)·(q̂·y - x) [factor out 1/y] + ≥ (1/y)·(q̂·y₁·S - x) [y = y₁·S + y₀ ≥ y₁·S] + ≥ (1/y)·((x₁ - y₁ + 1)·S - x) [above: q̂·y₁ ≥ x₁ - y₁ + 1] + = (1/y)·(x₁·S - y₁·S + S - x) [distribute S] + = (1/y)·(S - x₀ - y₁·S) [-x = -x₁·S - x₀] + > -y₁·S / y [x₀ < S, so S - x₀ < 0; drop it] + ≥ -1 [y₁·S ≤ y] + + So q̂ - q > -1. + Since q̂ - q is an integer, q̂ - q ≥ 0, or equivalently q ≤ q̂. + +Proof that q̂ ≤ q+2: + + x₁/y₁ - x/y = x₁·S/y₁·S - x/y [multiply left term by S/S] + ≤ x/y₁·S - x/y [x₁S ≤ x] + = (x/y)·(y/y₁·S - 1) [factor out x/y] + = (x/y)·((y - y₁·S)/y₁·S) [move -1 into y/y₁·S fraction] + = (x/y)·(y₀/y₁·S) [y - y₁·S = y₀] + = (x/y)·(1/y₁)·(y₀/S) [factor out 1/y₁] + < (x/y)·(1/y₁) [y₀ < S, so y₀/S < 1] + ≤ (x/y)·(2/T) [y₁ ≥ T/2, so 1/y₁ ≤ 2/T] + < T·(2/T) [x/y < T] + = 2 [T·(2/T) = 2] + + So x₁/y₁ - x/y < 2. + + q̂ - q = ⌊x₁/y₁⌋ - q [by definition, q̂ = ⌊x₁/y₁⌋] + = ⌊x₁/y₁⌋ - ⌊x/y⌋ [by definition, q = ⌊x/y⌋] + ≤ x₁/y₁ - ⌊x/y⌋ [⌊x₁/y₁⌋ ≤ x₁/y₁] + < x₁/y₁ - (x/y - 1) [⌊x/y⌋ > x/y - 1] + = (x₁/y₁ - x/y) + 1 [regrouping] + < 2 + 1 [above: x₁/y₁ - x/y < 2] + = 3 + + So q̂ - q < 3. + Since q̂ - q is an integer, q̂ - q ≤ 2. + +Note that when x/y < T/2, the bounds tighten to x₁/y₁ - x/y < 1 and therefore +q̂ - q ≤ 1. + +Note also that in the general case 2n-by-n division where we don't know that +x/y < T, we do know that x/y < 2T, yielding the bound q̂ - q ≤ 4. So we could +remove the special case first step of long division as long as we allow the +first fixup loop to run up to four times. (Using a simple comparison to decide +whether the first digit is 0 or 1 is still more efficient, though.) + +Finally, note that when dividing three leading base-B digits by two (scaled), +we have T = B² and x/y < B = T/B, a much tighter bound than x/y < T. +This in turn yields the much tighter bound x₁/y₁ - x/y < 2/B. This means that +⌊x₁/y₁⌋ and ⌊x/y⌋ can only differ when x/y is less than 2/B greater than an +integer. For random x and y, the chance of this is 2/B, or, for large B, +approximately zero. This means that after we produce the 3-by-2 guess in the +long division algorithm, the fixup loop essentially never runs. + +In the recursive algorithm, the extra digit in (2·⌊n/2⌋+1)-by-(⌊n/2⌋+1)-digit +division has exactly the same effect: the probability of needing a fixup is the +same 2/B. Even better, we can allow the general case x/y < 2T and the fixup +probability only grows to 4/B, still essentially zero. + + +References + +There are no great references for implementing long division; thus this comment. +Here are some notes about what to expect from the obvious references. + +Knuth Volume 2 (Seminumerical Algorithms) section 4.3.1 is the usual canonical +reference for long division, but that entire series is highly compressed, never +repeating a necessary fact and leaving important insights to the exercises. +For example, no rationale whatsoever is given for the calculation that extends +q̂ from a 2-by-1 to a 3-by-2 guess, nor why it reduces the error bound. +The proof that the calculation even has the desired effect is left to exercises. +The solutions to those exercises provided at the back of the book are entirely +calculations, still with no explanation as to what is going on or how you would +arrive at the idea of doing those exact calculations. Nowhere is it mentioned +that this test extends the 2-by-1 guess into a 3-by-2 guess. The proof of the +Good Guess Guarantee is only for the 2-by-1 guess and argues by contradiction, +making it difficult to understand how modifications like adding another digit +or adjusting the quotient range affects the overall bound. + +All that said, Knuth remains the canonical reference. It is dense but packed +full of information and references, and the proofs are simpler than many other +presentations. The proofs above are reworkings of Knuth's to remove the +arguments by contradiction and add explanations or steps that Knuth omitted. +But beware of errors in older printings. Take the published errata with you. + +Brinch Hansen's “Multiple-length Division Revisited: a Tour of the Minefield” +starts with a blunt critique of Knuth's presentation (among others) and then +presents a more detailed and easier to follow treatment of long division, +including an implementation in Pascal. But the algorithm and implementation +work entirely in terms of 3-by-2 division, which is much less useful on modern +hardware than an algorithm using 2-by-1 division. The proofs are a bit too +focused on digit counting and seem needlessly complex, especially compared to +the ones given above. + +Burnikel and Ziegler's “Fast Recursive Division” introduced the key insight of +implementing division by an n-digit divisor using recursive calls to division +by an n/2-digit divisor, relying on Karatsuba multiplication to yield a +sub-quadratic run time. However, the presentation decisions are made almost +entirely for the purpose of simplifying the run-time analysis, rather than +simplifying the presentation. Instead of a single algorithm that loops over +quotient digits, the paper presents two mutually-recursive algorithms, for +2n-by-n and 3n-by-2n. The paper also does not present any general (n+m)-by-n +algorithm. + +The proofs in the paper are remarkably complex, especially considering that +the algorithm is at its core just long division on wide digits, so that the +usual long division proofs apply essentially unaltered. +*/ + +package big + +import "math/bits" + +// div returns q, r such that q = ⌊u/v⌋ and r = u%v = u - q·v. +// It uses z and z2 as the storage for q and r. +func (z nat) div(z2, u, v nat) (q, r nat) { + if len(v) == 0 { + panic("division by zero") + } + + if u.cmp(v) < 0 { + q = z[:0] + r = z2.set(u) + return + } + + if len(v) == 1 { + // Short division: long optimized for a single-word divisor. + // In that case, the 2-by-1 guess is all we need at each step. + var r2 Word + q, r2 = z.divW(u, v[0]) + r = z2.setWord(r2) + return + } + + q, r = z.divLarge(z2, u, v) + return +} + +// divW returns q, r such that q = ⌊x/y⌋ and r = x%y = x - q·y. +// It uses z as the storage for q. +// Note that y is a single digit (Word), not a big number. +func (z nat) divW(x nat, y Word) (q nat, r Word) { + m := len(x) + switch { + case y == 0: + panic("division by zero") + case y == 1: + q = z.set(x) // result is x + return + case m == 0: + q = z[:0] // result is 0 + return + } + // m > 0 + z = z.make(m) + r = divWVW(z, 0, x, y) + q = z.norm() + return +} + +// modW returns x % d. +func (x nat) modW(d Word) (r Word) { + // TODO(agl): we don't actually need to store the q value. + var q nat + q = q.make(len(x)) + return divWVW(q, 0, x, d) +} + +// divWVW overwrites z with ⌊x/y⌋, returning the remainder r. +// The caller must ensure that len(z) = len(x). +func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) { + r = xn + if len(x) == 1 { + qq, rr := bits.Div(uint(r), uint(x[0]), uint(y)) + z[0] = Word(qq) + return Word(rr) + } + rec := reciprocalWord(y) + for i := len(z) - 1; i >= 0; i-- { + z[i], r = divWW(r, x[i], y, rec) + } + return r +} + +// div returns q, r such that q = ⌊uIn/vIn⌋ and r = uIn%vIn = uIn - q·vIn. +// It uses z and u as the storage for q and r. +// The caller must ensure that len(vIn) ≥ 2 (use divW otherwise) +// and that len(uIn) ≥ len(vIn) (the answer is 0, uIn otherwise). +func (z nat) divLarge(u, uIn, vIn nat) (q, r nat) { + n := len(vIn) + m := len(uIn) - n + + // Scale the inputs so vIn's top bit is 1 (see “Scaling Inputs” above). + // vIn is treated as a read-only input (it may be in use by another + // goroutine), so we must make a copy. + // uIn is copied to u. + shift := nlz(vIn[n-1]) + vp := getNat(n) + v := *vp + shlVU(v, vIn, shift) + u = u.make(len(uIn) + 1) + u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) + + // The caller should not pass aliased z and u, since those are + // the two different outputs, but correct just in case. + if alias(z, u) { + z = nil + } + q = z.make(m + 1) + + // Use basic or recursive long division depending on size. + if n < divRecursiveThreshold { + q.divBasic(u, v) + } else { + q.divRecursive(u, v) + } + putNat(vp) + + q = q.norm() + + // Undo scaling of remainder. + shrVU(u, u, shift) + r = u.norm() + + return q, r +} + +// divBasic implements long division as described above. +// It overwrites q with ⌊u/v⌋ and overwrites u with the remainder r. +// q must be large enough to hold ⌊u/v⌋. +func (q nat) divBasic(u, v nat) { + n := len(v) + m := len(u) - n + + qhatvp := getNat(n + 1) + qhatv := *qhatvp + + // Set up for divWW below, precomputing reciprocal argument. + vn1 := v[n-1] + rec := reciprocalWord(vn1) + + // Compute each digit of quotient. + for j := m; j >= 0; j-- { + // Compute the 2-by-1 guess q̂. + // The first iteration must invent a leading 0 for u. + qhat := Word(_M) + var ujn Word + if j+n < len(u) { + ujn = u[j+n] + } + + // ujn ≤ vn1, or else q̂ would be more than one digit. + // For ujn == vn1, we set q̂ to the max digit M above. + // Otherwise, we compute the 2-by-1 guess. + if ujn != vn1 { + var rhat Word + qhat, rhat = divWW(ujn, u[j+n-1], vn1, rec) + + // Refine q̂ to a 3-by-2 guess. See “Refining Guesses” above. + vn2 := v[n-2] + x1, x2 := mulWW(qhat, vn2) + ujn2 := u[j+n-2] + for greaterThan(x1, x2, rhat, ujn2) { // x1x2 > r̂ u[j+n-2] + qhat-- + prevRhat := rhat + rhat += vn1 + // If r̂ overflows, then + // r̂ u[j+n-2]v[n-1] is now definitely > x1 x2. + if rhat < prevRhat { + break + } + // TODO(rsc): No need for a full mulWW. + // x2 += vn2; if x2 overflows, x1++ + x1, x2 = mulWW(qhat, vn2) + } + } + + // Compute q̂·v. + qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0) + qhl := len(qhatv) + if j+qhl > len(u) && qhatv[n] == 0 { + qhl-- + } + + // Subtract q̂·v from the current section of u. + // If it underflows, q̂·v > u, which we fix up + // by decrementing q̂ and adding v back. + c := subVV(u[j:j+qhl], u[j:], qhatv) + if c != 0 { + c := addVV(u[j:j+n], u[j:], v) + // If n == qhl, the carry from subVV and the carry from addVV + // cancel out and don't affect u[j+n]. + if n < qhl { + u[j+n] += c + } + qhat-- + } + + // Save quotient digit. + // Caller may know the top digit is zero and not leave room for it. + if j == m && m == len(q) && qhat == 0 { + continue + } + q[j] = qhat + } + + putNat(qhatvp) +} + +// greaterThan reports whether the two digit numbers x1 x2 > y1 y2. +// TODO(rsc): In contradiction to most of this file, x1 is the high +// digit and x2 is the low digit. This should be fixed. +func greaterThan(x1, x2, y1, y2 Word) bool { + return x1 > y1 || x1 == y1 && x2 > y2 +} + +// divRecursiveThreshold is the number of divisor digits +// at which point divRecursive is faster than divBasic. +const divRecursiveThreshold = 100 + +// divRecursive implements recursive division as described above. +// It overwrites z with ⌊u/v⌋ and overwrites u with the remainder r. +// z must be large enough to hold ⌊u/v⌋. +// This function is just for allocating and freeing temporaries +// around divRecursiveStep, the real implementation. +func (z nat) divRecursive(u, v nat) { + // Recursion depth is (much) less than 2 log₂(len(v)). + // Allocate a slice of temporaries to be reused across recursion, + // plus one extra temporary not live across the recursion. + recDepth := 2 * bits.Len(uint(len(v))) + tmp := getNat(3 * len(v)) + temps := make([]*nat, recDepth) + + z.clear() + z.divRecursiveStep(u, v, 0, tmp, temps) + + // Free temporaries. + for _, n := range temps { + if n != nil { + putNat(n) + } + } + putNat(tmp) +} + +// divRecursiveStep is the actual implementation of recursive division. +// It adds ⌊u/v⌋ to z and overwrites u with the remainder r. +// z must be large enough to hold ⌊u/v⌋. +// It uses temps[depth] (allocating if needed) as a temporary live across +// the recursive call. It also uses tmp, but not live across the recursion. +func (z nat) divRecursiveStep(u, v nat, depth int, tmp *nat, temps []*nat) { + // u is a subsection of the original and may have leading zeros. + // TODO(rsc): The v = v.norm() is useless and should be removed. + // We know (and require) that v's top digit is ≥ B/2. + u = u.norm() + v = v.norm() + if len(u) == 0 { + z.clear() + return + } + + // Fall back to basic division if the problem is now small enough. + n := len(v) + if n < divRecursiveThreshold { + z.divBasic(u, v) + return + } + + // Nothing to do if u is shorter than v (implies u < v). + m := len(u) - n + if m < 0 { + return + } + + // We consider B digits in a row as a single wide digit. + // (See “Recursive Division” above.) + // + // TODO(rsc): rename B to Wide, to avoid confusion with _B, + // which is something entirely different. + // TODO(rsc): Look into whether using ⌈n/2⌉ is better than ⌊n/2⌋. + B := n / 2 + + // Allocate a nat for qhat below. + if temps[depth] == nil { + temps[depth] = getNat(n) // TODO(rsc): Can be just B+1. + } else { + *temps[depth] = temps[depth].make(B + 1) + } + + // Compute each wide digit of the quotient. + // + // TODO(rsc): Change the loop to be + // for j := (m+B-1)/B*B; j > 0; j -= B { + // which will make the final step a regular step, letting us + // delete what amounts to an extra copy of the loop body below. + j := m + for j > B { + // Divide u[j-B:j+n] (3 wide digits) by v (2 wide digits). + // First make the 2-by-1-wide-digit guess using a recursive call. + // Then extend the guess to the full 3-by-2 (see “Refining Guesses”). + // + // For the 2-by-1-wide-digit guess, instead of doing 2B-by-B-digit, + // we use a (2B+1)-by-(B+1) digit, which handles the possibility that + // the result has an extra leading 1 digit as well as guaranteeing + // that the computed q̂ will be off by at most 1 instead of 2. + + // s is the number of digits to drop from the 3B- and 2B-digit chunks. + // We drop B-1 to be left with 2B+1 and B+1. + s := (B - 1) + + // uu is the up-to-3B-digit section of u we are working on. + uu := u[j-B:] + + // Compute the 2-by-1 guess q̂, leaving r̂ in uu[s:B+n]. + qhat := *temps[depth] + qhat.clear() + qhat.divRecursiveStep(uu[s:B+n], v[s:], depth+1, tmp, temps) + qhat = qhat.norm() + + // Extend to a 3-by-2 quotient and remainder. + // Because divRecursiveStep overwrote the top part of uu with + // the remainder r̂, the full uu already contains the equivalent + // of r̂·B + uₙ₋₂ from the “Refining Guesses” discussion. + // Subtracting q̂·vₙ₋₂ from it will compute the full-length remainder. + // If that subtraction underflows, q̂·v > u, which we fix up + // by decrementing q̂ and adding v back, same as in long division. + + // TODO(rsc): Instead of subtract and fix-up, this code is computing + // q̂·vₙ₋₂ and decrementing q̂ until that product is ≤ u. + // But we can do the subtraction directly, as in the comment above + // and in long division, because we know that q̂ is wrong by at most one. + qhatv := tmp.make(3 * n) + qhatv.clear() + qhatv = qhatv.mul(qhat, v[:s]) + for i := 0; i < 2; i++ { + e := qhatv.cmp(uu.norm()) + if e <= 0 { + break + } + subVW(qhat, qhat, 1) + c := subVV(qhatv[:s], qhatv[:s], v[:s]) + if len(qhatv) > s { + subVW(qhatv[s:], qhatv[s:], c) + } + addAt(uu[s:], v[s:], 0) + } + if qhatv.cmp(uu.norm()) > 0 { + panic("impossible") + } + c := subVV(uu[:len(qhatv)], uu[:len(qhatv)], qhatv) + if c > 0 { + subVW(uu[len(qhatv):], uu[len(qhatv):], c) + } + addAt(z, qhat, j-B) + j -= B + } + + // TODO(rsc): Rewrite loop as described above and delete all this code. + + // Now u < (v<<B), compute lower bits in the same way. + // Choose shift = B-1 again. + s := B - 1 + qhat := *temps[depth] + qhat.clear() + qhat.divRecursiveStep(u[s:].norm(), v[s:], depth+1, tmp, temps) + qhat = qhat.norm() + qhatv := tmp.make(3 * n) + qhatv.clear() + qhatv = qhatv.mul(qhat, v[:s]) + // Set the correct remainder as before. + for i := 0; i < 2; i++ { + if e := qhatv.cmp(u.norm()); e > 0 { + subVW(qhat, qhat, 1) + c := subVV(qhatv[:s], qhatv[:s], v[:s]) + if len(qhatv) > s { + subVW(qhatv[s:], qhatv[s:], c) + } + addAt(u[s:], v[s:], 0) + } + } + if qhatv.cmp(u.norm()) > 0 { + panic("impossible") + } + c := subVV(u[0:len(qhatv)], u[0:len(qhatv)], qhatv) + if c > 0 { + c = subVW(u[len(qhatv):], u[len(qhatv):], c) + } + if c > 0 { + panic("impossible") + } + + // Done! + addAt(z, qhat.norm(), 0) +} diff --git a/contrib/go/_std_1.18/src/math/big/prime.go b/contrib/go/_std_1.18/src/math/big/prime.go new file mode 100644 index 0000000000..d9a5f1ec96 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/prime.go @@ -0,0 +1,320 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import "math/rand" + +// ProbablyPrime reports whether x is probably prime, +// applying the Miller-Rabin test with n pseudorandomly chosen bases +// as well as a Baillie-PSW test. +// +// If x is prime, ProbablyPrime returns true. +// If x is chosen randomly and not prime, ProbablyPrime probably returns false. +// The probability of returning true for a randomly chosen non-prime is at most ¼ⁿ. +// +// ProbablyPrime is 100% accurate for inputs less than 2⁶⁴. +// See Menezes et al., Handbook of Applied Cryptography, 1997, pp. 145-149, +// and FIPS 186-4 Appendix F for further discussion of the error probabilities. +// +// ProbablyPrime is not suitable for judging primes that an adversary may +// have crafted to fool the test. +// +// As of Go 1.8, ProbablyPrime(0) is allowed and applies only a Baillie-PSW test. +// Before Go 1.8, ProbablyPrime applied only the Miller-Rabin tests, and ProbablyPrime(0) panicked. +func (x *Int) ProbablyPrime(n int) bool { + // Note regarding the doc comment above: + // It would be more precise to say that the Baillie-PSW test uses the + // extra strong Lucas test as its Lucas test, but since no one knows + // how to tell any of the Lucas tests apart inside a Baillie-PSW test + // (they all work equally well empirically), that detail need not be + // documented or implicitly guaranteed. + // The comment does avoid saying "the" Baillie-PSW test + // because of this general ambiguity. + + if n < 0 { + panic("negative n for ProbablyPrime") + } + if x.neg || len(x.abs) == 0 { + return false + } + + // primeBitMask records the primes < 64. + const primeBitMask uint64 = 1<<2 | 1<<3 | 1<<5 | 1<<7 | + 1<<11 | 1<<13 | 1<<17 | 1<<19 | 1<<23 | 1<<29 | 1<<31 | + 1<<37 | 1<<41 | 1<<43 | 1<<47 | 1<<53 | 1<<59 | 1<<61 + + w := x.abs[0] + if len(x.abs) == 1 && w < 64 { + return primeBitMask&(1<<w) != 0 + } + + if w&1 == 0 { + return false // x is even + } + + const primesA = 3 * 5 * 7 * 11 * 13 * 17 * 19 * 23 * 37 + const primesB = 29 * 31 * 41 * 43 * 47 * 53 + + var rA, rB uint32 + switch _W { + case 32: + rA = uint32(x.abs.modW(primesA)) + rB = uint32(x.abs.modW(primesB)) + case 64: + r := x.abs.modW((primesA * primesB) & _M) + rA = uint32(r % primesA) + rB = uint32(r % primesB) + default: + panic("math/big: invalid word size") + } + + if rA%3 == 0 || rA%5 == 0 || rA%7 == 0 || rA%11 == 0 || rA%13 == 0 || rA%17 == 0 || rA%19 == 0 || rA%23 == 0 || rA%37 == 0 || + rB%29 == 0 || rB%31 == 0 || rB%41 == 0 || rB%43 == 0 || rB%47 == 0 || rB%53 == 0 { + return false + } + + return x.abs.probablyPrimeMillerRabin(n+1, true) && x.abs.probablyPrimeLucas() +} + +// probablyPrimeMillerRabin reports whether n passes reps rounds of the +// Miller-Rabin primality test, using pseudo-randomly chosen bases. +// If force2 is true, one of the rounds is forced to use base 2. +// See Handbook of Applied Cryptography, p. 139, Algorithm 4.24. +// The number n is known to be non-zero. +func (n nat) probablyPrimeMillerRabin(reps int, force2 bool) bool { + nm1 := nat(nil).sub(n, natOne) + // determine q, k such that nm1 = q << k + k := nm1.trailingZeroBits() + q := nat(nil).shr(nm1, k) + + nm3 := nat(nil).sub(nm1, natTwo) + rand := rand.New(rand.NewSource(int64(n[0]))) + + var x, y, quotient nat + nm3Len := nm3.bitLen() + +NextRandom: + for i := 0; i < reps; i++ { + if i == reps-1 && force2 { + x = x.set(natTwo) + } else { + x = x.random(rand, nm3, nm3Len) + x = x.add(x, natTwo) + } + y = y.expNN(x, q, n) + if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 { + continue + } + for j := uint(1); j < k; j++ { + y = y.sqr(y) + quotient, y = quotient.div(y, y, n) + if y.cmp(nm1) == 0 { + continue NextRandom + } + if y.cmp(natOne) == 0 { + return false + } + } + return false + } + + return true +} + +// probablyPrimeLucas reports whether n passes the "almost extra strong" Lucas probable prime test, +// using Baillie-OEIS parameter selection. This corresponds to "AESLPSP" on Jacobsen's tables (link below). +// The combination of this test and a Miller-Rabin/Fermat test with base 2 gives a Baillie-PSW test. +// +// References: +// +// Baillie and Wagstaff, "Lucas Pseudoprimes", Mathematics of Computation 35(152), +// October 1980, pp. 1391-1417, especially page 1401. +// https://www.ams.org/journals/mcom/1980-35-152/S0025-5718-1980-0583518-6/S0025-5718-1980-0583518-6.pdf +// +// Grantham, "Frobenius Pseudoprimes", Mathematics of Computation 70(234), +// March 2000, pp. 873-891. +// https://www.ams.org/journals/mcom/2001-70-234/S0025-5718-00-01197-2/S0025-5718-00-01197-2.pdf +// +// Baillie, "Extra strong Lucas pseudoprimes", OEIS A217719, https://oeis.org/A217719. +// +// Jacobsen, "Pseudoprime Statistics, Tables, and Data", http://ntheory.org/pseudoprimes.html. +// +// Nicely, "The Baillie-PSW Primality Test", http://www.trnicely.net/misc/bpsw.html. +// (Note that Nicely's definition of the "extra strong" test gives the wrong Jacobi condition, +// as pointed out by Jacobsen.) +// +// Crandall and Pomerance, Prime Numbers: A Computational Perspective, 2nd ed. +// Springer, 2005. +func (n nat) probablyPrimeLucas() bool { + // Discard 0, 1. + if len(n) == 0 || n.cmp(natOne) == 0 { + return false + } + // Two is the only even prime. + // Already checked by caller, but here to allow testing in isolation. + if n[0]&1 == 0 { + return n.cmp(natTwo) == 0 + } + + // Baillie-OEIS "method C" for choosing D, P, Q, + // as in https://oeis.org/A217719/a217719.txt: + // try increasing P ≥ 3 such that D = P² - 4 (so Q = 1) + // until Jacobi(D, n) = -1. + // The search is expected to succeed for non-square n after just a few trials. + // After more than expected failures, check whether n is square + // (which would cause Jacobi(D, n) = 1 for all D not dividing n). + p := Word(3) + d := nat{1} + t1 := nat(nil) // temp + intD := &Int{abs: d} + intN := &Int{abs: n} + for ; ; p++ { + if p > 10000 { + // This is widely believed to be impossible. + // If we get a report, we'll want the exact number n. + panic("math/big: internal error: cannot find (D/n) = -1 for " + intN.String()) + } + d[0] = p*p - 4 + j := Jacobi(intD, intN) + if j == -1 { + break + } + if j == 0 { + // d = p²-4 = (p-2)(p+2). + // If (d/n) == 0 then d shares a prime factor with n. + // Since the loop proceeds in increasing p and starts with p-2==1, + // the shared prime factor must be p+2. + // If p+2 == n, then n is prime; otherwise p+2 is a proper factor of n. + return len(n) == 1 && n[0] == p+2 + } + if p == 40 { + // We'll never find (d/n) = -1 if n is a square. + // If n is a non-square we expect to find a d in just a few attempts on average. + // After 40 attempts, take a moment to check if n is indeed a square. + t1 = t1.sqrt(n) + t1 = t1.sqr(t1) + if t1.cmp(n) == 0 { + return false + } + } + } + + // Grantham definition of "extra strong Lucas pseudoprime", after Thm 2.3 on p. 876 + // (D, P, Q above have become Δ, b, 1): + // + // Let U_n = U_n(b, 1), V_n = V_n(b, 1), and Δ = b²-4. + // An extra strong Lucas pseudoprime to base b is a composite n = 2^r s + Jacobi(Δ, n), + // where s is odd and gcd(n, 2*Δ) = 1, such that either (i) U_s ≡ 0 mod n and V_s ≡ ±2 mod n, + // or (ii) V_{2^t s} ≡ 0 mod n for some 0 ≤ t < r-1. + // + // We know gcd(n, Δ) = 1 or else we'd have found Jacobi(d, n) == 0 above. + // We know gcd(n, 2) = 1 because n is odd. + // + // Arrange s = (n - Jacobi(Δ, n)) / 2^r = (n+1) / 2^r. + s := nat(nil).add(n, natOne) + r := int(s.trailingZeroBits()) + s = s.shr(s, uint(r)) + nm2 := nat(nil).sub(n, natTwo) // n-2 + + // We apply the "almost extra strong" test, which checks the above conditions + // except for U_s ≡ 0 mod n, which allows us to avoid computing any U_k values. + // Jacobsen points out that maybe we should just do the full extra strong test: + // "It is also possible to recover U_n using Crandall and Pomerance equation 3.13: + // U_n = D^-1 (2V_{n+1} - PV_n) allowing us to run the full extra-strong test + // at the cost of a single modular inversion. This computation is easy and fast in GMP, + // so we can get the full extra-strong test at essentially the same performance as the + // almost extra strong test." + + // Compute Lucas sequence V_s(b, 1), where: + // + // V(0) = 2 + // V(1) = P + // V(k) = P V(k-1) - Q V(k-2). + // + // (Remember that due to method C above, P = b, Q = 1.) + // + // In general V(k) = α^k + β^k, where α and β are roots of x² - Px + Q. + // Crandall and Pomerance (p.147) observe that for 0 ≤ j ≤ k, + // + // V(j+k) = V(j)V(k) - V(k-j). + // + // So in particular, to quickly double the subscript: + // + // V(2k) = V(k)² - 2 + // V(2k+1) = V(k) V(k+1) - P + // + // We can therefore start with k=0 and build up to k=s in log₂(s) steps. + natP := nat(nil).setWord(p) + vk := nat(nil).setWord(2) + vk1 := nat(nil).setWord(p) + t2 := nat(nil) // temp + for i := int(s.bitLen()); i >= 0; i-- { + if s.bit(uint(i)) != 0 { + // k' = 2k+1 + // V(k') = V(2k+1) = V(k) V(k+1) - P. + t1 = t1.mul(vk, vk1) + t1 = t1.add(t1, n) + t1 = t1.sub(t1, natP) + t2, vk = t2.div(vk, t1, n) + // V(k'+1) = V(2k+2) = V(k+1)² - 2. + t1 = t1.sqr(vk1) + t1 = t1.add(t1, nm2) + t2, vk1 = t2.div(vk1, t1, n) + } else { + // k' = 2k + // V(k'+1) = V(2k+1) = V(k) V(k+1) - P. + t1 = t1.mul(vk, vk1) + t1 = t1.add(t1, n) + t1 = t1.sub(t1, natP) + t2, vk1 = t2.div(vk1, t1, n) + // V(k') = V(2k) = V(k)² - 2 + t1 = t1.sqr(vk) + t1 = t1.add(t1, nm2) + t2, vk = t2.div(vk, t1, n) + } + } + + // Now k=s, so vk = V(s). Check V(s) ≡ ±2 (mod n). + if vk.cmp(natTwo) == 0 || vk.cmp(nm2) == 0 { + // Check U(s) ≡ 0. + // As suggested by Jacobsen, apply Crandall and Pomerance equation 3.13: + // + // U(k) = D⁻¹ (2 V(k+1) - P V(k)) + // + // Since we are checking for U(k) == 0 it suffices to check 2 V(k+1) == P V(k) mod n, + // or P V(k) - 2 V(k+1) == 0 mod n. + t1 := t1.mul(vk, natP) + t2 := t2.shl(vk1, 1) + if t1.cmp(t2) < 0 { + t1, t2 = t2, t1 + } + t1 = t1.sub(t1, t2) + t3 := vk1 // steal vk1, no longer needed below + vk1 = nil + _ = vk1 + t2, t3 = t2.div(t3, t1, n) + if len(t3) == 0 { + return true + } + } + + // Check V(2^t s) ≡ 0 mod n for some 0 ≤ t < r-1. + for t := 0; t < r-1; t++ { + if len(vk) == 0 { // vk == 0 + return true + } + // Optimization: V(k) = 2 is a fixed point for V(k') = V(k)² - 2, + // so if V(k) = 2, we can stop: we will never find a future V(k) == 0. + if len(vk) == 1 && vk[0] == 2 { // vk == 2 + return false + } + // k' = 2k + // V(k') = V(2k) = V(k)² - 2 + t1 = t1.sqr(vk) + t1 = t1.sub(t1, natTwo) + t2, vk = t2.div(vk, t1, n) + } + return false +} diff --git a/contrib/go/_std_1.18/src/math/big/rat.go b/contrib/go/_std_1.18/src/math/big/rat.go new file mode 100644 index 0000000000..731a979ff7 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/rat.go @@ -0,0 +1,544 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements multi-precision rational numbers. + +package big + +import ( + "fmt" + "math" +) + +// A Rat represents a quotient a/b of arbitrary precision. +// The zero value for a Rat represents the value 0. +// +// Operations always take pointer arguments (*Rat) rather +// than Rat values, and each unique Rat value requires +// its own unique *Rat pointer. To "copy" a Rat value, +// an existing (or newly allocated) Rat must be set to +// a new value using the Rat.Set method; shallow copies +// of Rats are not supported and may lead to errors. +type Rat struct { + // To make zero values for Rat work w/o initialization, + // a zero value of b (len(b) == 0) acts like b == 1. At + // the earliest opportunity (when an assignment to the Rat + // is made), such uninitialized denominators are set to 1. + // a.neg determines the sign of the Rat, b.neg is ignored. + a, b Int +} + +// NewRat creates a new Rat with numerator a and denominator b. +func NewRat(a, b int64) *Rat { + return new(Rat).SetFrac64(a, b) +} + +// SetFloat64 sets z to exactly f and returns z. +// If f is not finite, SetFloat returns nil. +func (z *Rat) SetFloat64(f float64) *Rat { + const expMask = 1<<11 - 1 + bits := math.Float64bits(f) + mantissa := bits & (1<<52 - 1) + exp := int((bits >> 52) & expMask) + switch exp { + case expMask: // non-finite + return nil + case 0: // denormal + exp -= 1022 + default: // normal + mantissa |= 1 << 52 + exp -= 1023 + } + + shift := 52 - exp + + // Optimization (?): partially pre-normalise. + for mantissa&1 == 0 && shift > 0 { + mantissa >>= 1 + shift-- + } + + z.a.SetUint64(mantissa) + z.a.neg = f < 0 + z.b.Set(intOne) + if shift > 0 { + z.b.Lsh(&z.b, uint(shift)) + } else { + z.a.Lsh(&z.a, uint(-shift)) + } + return z.norm() +} + +// quotToFloat32 returns the non-negative float32 value +// nearest to the quotient a/b, using round-to-even in +// halfway cases. It does not mutate its arguments. +// Preconditions: b is non-zero; a and b have no common factors. +func quotToFloat32(a, b nat) (f float32, exact bool) { + const ( + // float size in bits + Fsize = 32 + + // mantissa + Msize = 23 + Msize1 = Msize + 1 // incl. implicit 1 + Msize2 = Msize1 + 1 + + // exponent + Esize = Fsize - Msize1 + Ebias = 1<<(Esize-1) - 1 + Emin = 1 - Ebias + Emax = Ebias + ) + + // TODO(adonovan): specialize common degenerate cases: 1.0, integers. + alen := a.bitLen() + if alen == 0 { + return 0, true + } + blen := b.bitLen() + if blen == 0 { + panic("division by zero") + } + + // 1. Left-shift A or B such that quotient A/B is in [1<<Msize1, 1<<(Msize2+1) + // (Msize2 bits if A < B when they are left-aligned, Msize2+1 bits if A >= B). + // This is 2 or 3 more than the float32 mantissa field width of Msize: + // - the optional extra bit is shifted away in step 3 below. + // - the high-order 1 is omitted in "normal" representation; + // - the low-order 1 will be used during rounding then discarded. + exp := alen - blen + var a2, b2 nat + a2 = a2.set(a) + b2 = b2.set(b) + if shift := Msize2 - exp; shift > 0 { + a2 = a2.shl(a2, uint(shift)) + } else if shift < 0 { + b2 = b2.shl(b2, uint(-shift)) + } + + // 2. Compute quotient and remainder (q, r). NB: due to the + // extra shift, the low-order bit of q is logically the + // high-order bit of r. + var q nat + q, r := q.div(a2, a2, b2) // (recycle a2) + mantissa := low32(q) + haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half + + // 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1 + // (in effect---we accomplish this incrementally). + if mantissa>>Msize2 == 1 { + if mantissa&1 == 1 { + haveRem = true + } + mantissa >>= 1 + exp++ + } + if mantissa>>Msize1 != 1 { + panic(fmt.Sprintf("expected exactly %d bits of result", Msize2)) + } + + // 4. Rounding. + if Emin-Msize <= exp && exp <= Emin { + // Denormal case; lose 'shift' bits of precision. + shift := uint(Emin - (exp - 1)) // [1..Esize1) + lostbits := mantissa & (1<<shift - 1) + haveRem = haveRem || lostbits != 0 + mantissa >>= shift + exp = 2 - Ebias // == exp + shift + } + // Round q using round-half-to-even. + exact = !haveRem + if mantissa&1 != 0 { + exact = false + if haveRem || mantissa&2 != 0 { + if mantissa++; mantissa >= 1<<Msize2 { + // Complete rollover 11...1 => 100...0, so shift is safe + mantissa >>= 1 + exp++ + } + } + } + mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 1<<Msize1. + + f = float32(math.Ldexp(float64(mantissa), exp-Msize1)) + if math.IsInf(float64(f), 0) { + exact = false + } + return +} + +// quotToFloat64 returns the non-negative float64 value +// nearest to the quotient a/b, using round-to-even in +// halfway cases. It does not mutate its arguments. +// Preconditions: b is non-zero; a and b have no common factors. +func quotToFloat64(a, b nat) (f float64, exact bool) { + const ( + // float size in bits + Fsize = 64 + + // mantissa + Msize = 52 + Msize1 = Msize + 1 // incl. implicit 1 + Msize2 = Msize1 + 1 + + // exponent + Esize = Fsize - Msize1 + Ebias = 1<<(Esize-1) - 1 + Emin = 1 - Ebias + Emax = Ebias + ) + + // TODO(adonovan): specialize common degenerate cases: 1.0, integers. + alen := a.bitLen() + if alen == 0 { + return 0, true + } + blen := b.bitLen() + if blen == 0 { + panic("division by zero") + } + + // 1. Left-shift A or B such that quotient A/B is in [1<<Msize1, 1<<(Msize2+1) + // (Msize2 bits if A < B when they are left-aligned, Msize2+1 bits if A >= B). + // This is 2 or 3 more than the float64 mantissa field width of Msize: + // - the optional extra bit is shifted away in step 3 below. + // - the high-order 1 is omitted in "normal" representation; + // - the low-order 1 will be used during rounding then discarded. + exp := alen - blen + var a2, b2 nat + a2 = a2.set(a) + b2 = b2.set(b) + if shift := Msize2 - exp; shift > 0 { + a2 = a2.shl(a2, uint(shift)) + } else if shift < 0 { + b2 = b2.shl(b2, uint(-shift)) + } + + // 2. Compute quotient and remainder (q, r). NB: due to the + // extra shift, the low-order bit of q is logically the + // high-order bit of r. + var q nat + q, r := q.div(a2, a2, b2) // (recycle a2) + mantissa := low64(q) + haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half + + // 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1 + // (in effect---we accomplish this incrementally). + if mantissa>>Msize2 == 1 { + if mantissa&1 == 1 { + haveRem = true + } + mantissa >>= 1 + exp++ + } + if mantissa>>Msize1 != 1 { + panic(fmt.Sprintf("expected exactly %d bits of result", Msize2)) + } + + // 4. Rounding. + if Emin-Msize <= exp && exp <= Emin { + // Denormal case; lose 'shift' bits of precision. + shift := uint(Emin - (exp - 1)) // [1..Esize1) + lostbits := mantissa & (1<<shift - 1) + haveRem = haveRem || lostbits != 0 + mantissa >>= shift + exp = 2 - Ebias // == exp + shift + } + // Round q using round-half-to-even. + exact = !haveRem + if mantissa&1 != 0 { + exact = false + if haveRem || mantissa&2 != 0 { + if mantissa++; mantissa >= 1<<Msize2 { + // Complete rollover 11...1 => 100...0, so shift is safe + mantissa >>= 1 + exp++ + } + } + } + mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 1<<Msize1. + + f = math.Ldexp(float64(mantissa), exp-Msize1) + if math.IsInf(f, 0) { + exact = false + } + return +} + +// Float32 returns the nearest float32 value for x and a bool indicating +// whether f represents x exactly. If the magnitude of x is too large to +// be represented by a float32, f is an infinity and exact is false. +// The sign of f always matches the sign of x, even if f == 0. +func (x *Rat) Float32() (f float32, exact bool) { + b := x.b.abs + if len(b) == 0 { + b = natOne + } + f, exact = quotToFloat32(x.a.abs, b) + if x.a.neg { + f = -f + } + return +} + +// Float64 returns the nearest float64 value for x and a bool indicating +// whether f represents x exactly. If the magnitude of x is too large to +// be represented by a float64, f is an infinity and exact is false. +// The sign of f always matches the sign of x, even if f == 0. +func (x *Rat) Float64() (f float64, exact bool) { + b := x.b.abs + if len(b) == 0 { + b = natOne + } + f, exact = quotToFloat64(x.a.abs, b) + if x.a.neg { + f = -f + } + return +} + +// SetFrac sets z to a/b and returns z. +// If b == 0, SetFrac panics. +func (z *Rat) SetFrac(a, b *Int) *Rat { + z.a.neg = a.neg != b.neg + babs := b.abs + if len(babs) == 0 { + panic("division by zero") + } + if &z.a == b || alias(z.a.abs, babs) { + babs = nat(nil).set(babs) // make a copy + } + z.a.abs = z.a.abs.set(a.abs) + z.b.abs = z.b.abs.set(babs) + return z.norm() +} + +// SetFrac64 sets z to a/b and returns z. +// If b == 0, SetFrac64 panics. +func (z *Rat) SetFrac64(a, b int64) *Rat { + if b == 0 { + panic("division by zero") + } + z.a.SetInt64(a) + if b < 0 { + b = -b + z.a.neg = !z.a.neg + } + z.b.abs = z.b.abs.setUint64(uint64(b)) + return z.norm() +} + +// SetInt sets z to x (by making a copy of x) and returns z. +func (z *Rat) SetInt(x *Int) *Rat { + z.a.Set(x) + z.b.abs = z.b.abs.setWord(1) + return z +} + +// SetInt64 sets z to x and returns z. +func (z *Rat) SetInt64(x int64) *Rat { + z.a.SetInt64(x) + z.b.abs = z.b.abs.setWord(1) + return z +} + +// SetUint64 sets z to x and returns z. +func (z *Rat) SetUint64(x uint64) *Rat { + z.a.SetUint64(x) + z.b.abs = z.b.abs.setWord(1) + return z +} + +// Set sets z to x (by making a copy of x) and returns z. +func (z *Rat) Set(x *Rat) *Rat { + if z != x { + z.a.Set(&x.a) + z.b.Set(&x.b) + } + if len(z.b.abs) == 0 { + z.b.abs = z.b.abs.setWord(1) + } + return z +} + +// Abs sets z to |x| (the absolute value of x) and returns z. +func (z *Rat) Abs(x *Rat) *Rat { + z.Set(x) + z.a.neg = false + return z +} + +// Neg sets z to -x and returns z. +func (z *Rat) Neg(x *Rat) *Rat { + z.Set(x) + z.a.neg = len(z.a.abs) > 0 && !z.a.neg // 0 has no sign + return z +} + +// Inv sets z to 1/x and returns z. +// If x == 0, Inv panics. +func (z *Rat) Inv(x *Rat) *Rat { + if len(x.a.abs) == 0 { + panic("division by zero") + } + z.Set(x) + z.a.abs, z.b.abs = z.b.abs, z.a.abs + return z +} + +// Sign returns: +// +// -1 if x < 0 +// 0 if x == 0 +// +1 if x > 0 +// +func (x *Rat) Sign() int { + return x.a.Sign() +} + +// IsInt reports whether the denominator of x is 1. +func (x *Rat) IsInt() bool { + return len(x.b.abs) == 0 || x.b.abs.cmp(natOne) == 0 +} + +// Num returns the numerator of x; it may be <= 0. +// The result is a reference to x's numerator; it +// may change if a new value is assigned to x, and vice versa. +// The sign of the numerator corresponds to the sign of x. +func (x *Rat) Num() *Int { + return &x.a +} + +// Denom returns the denominator of x; it is always > 0. +// The result is a reference to x's denominator, unless +// x is an uninitialized (zero value) Rat, in which case +// the result is a new Int of value 1. (To initialize x, +// any operation that sets x will do, including x.Set(x).) +// If the result is a reference to x's denominator it +// may change if a new value is assigned to x, and vice versa. +func (x *Rat) Denom() *Int { + // Note that x.b.neg is guaranteed false. + if len(x.b.abs) == 0 { + // Note: If this proves problematic, we could + // panic instead and require the Rat to + // be explicitly initialized. + return &Int{abs: nat{1}} + } + return &x.b +} + +func (z *Rat) norm() *Rat { + switch { + case len(z.a.abs) == 0: + // z == 0; normalize sign and denominator + z.a.neg = false + fallthrough + case len(z.b.abs) == 0: + // z is integer; normalize denominator + z.b.abs = z.b.abs.setWord(1) + default: + // z is fraction; normalize numerator and denominator + neg := z.a.neg + z.a.neg = false + z.b.neg = false + if f := NewInt(0).lehmerGCD(nil, nil, &z.a, &z.b); f.Cmp(intOne) != 0 { + z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f.abs) + z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs) + } + z.a.neg = neg + } + return z +} + +// mulDenom sets z to the denominator product x*y (by taking into +// account that 0 values for x or y must be interpreted as 1) and +// returns z. +func mulDenom(z, x, y nat) nat { + switch { + case len(x) == 0 && len(y) == 0: + return z.setWord(1) + case len(x) == 0: + return z.set(y) + case len(y) == 0: + return z.set(x) + } + return z.mul(x, y) +} + +// scaleDenom sets z to the product x*f. +// If f == 0 (zero value of denominator), z is set to (a copy of) x. +func (z *Int) scaleDenom(x *Int, f nat) { + if len(f) == 0 { + z.Set(x) + return + } + z.abs = z.abs.mul(x.abs, f) + z.neg = x.neg +} + +// Cmp compares x and y and returns: +// +// -1 if x < y +// 0 if x == y +// +1 if x > y +// +func (x *Rat) Cmp(y *Rat) int { + var a, b Int + a.scaleDenom(&x.a, y.b.abs) + b.scaleDenom(&y.a, x.b.abs) + return a.Cmp(&b) +} + +// Add sets z to the sum x+y and returns z. +func (z *Rat) Add(x, y *Rat) *Rat { + var a1, a2 Int + a1.scaleDenom(&x.a, y.b.abs) + a2.scaleDenom(&y.a, x.b.abs) + z.a.Add(&a1, &a2) + z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs) + return z.norm() +} + +// Sub sets z to the difference x-y and returns z. +func (z *Rat) Sub(x, y *Rat) *Rat { + var a1, a2 Int + a1.scaleDenom(&x.a, y.b.abs) + a2.scaleDenom(&y.a, x.b.abs) + z.a.Sub(&a1, &a2) + z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs) + return z.norm() +} + +// Mul sets z to the product x*y and returns z. +func (z *Rat) Mul(x, y *Rat) *Rat { + if x == y { + // a squared Rat is positive and can't be reduced (no need to call norm()) + z.a.neg = false + z.a.abs = z.a.abs.sqr(x.a.abs) + if len(x.b.abs) == 0 { + z.b.abs = z.b.abs.setWord(1) + } else { + z.b.abs = z.b.abs.sqr(x.b.abs) + } + return z + } + z.a.Mul(&x.a, &y.a) + z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs) + return z.norm() +} + +// Quo sets z to the quotient x/y and returns z. +// If y == 0, Quo panics. +func (z *Rat) Quo(x, y *Rat) *Rat { + if len(y.a.abs) == 0 { + panic("division by zero") + } + var a, b Int + a.scaleDenom(&x.a, y.b.abs) + b.scaleDenom(&y.a, x.b.abs) + z.a.abs = a.abs + z.b.abs = b.abs + z.a.neg = a.neg != b.neg + return z.norm() +} diff --git a/contrib/go/_std_1.18/src/math/big/ratconv.go b/contrib/go/_std_1.18/src/math/big/ratconv.go new file mode 100644 index 0000000000..90053a9c81 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/ratconv.go @@ -0,0 +1,380 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements rat-to-string conversion functions. + +package big + +import ( + "errors" + "fmt" + "io" + "strconv" + "strings" +) + +func ratTok(ch rune) bool { + return strings.ContainsRune("+-/0123456789.eE", ch) +} + +var ratZero Rat +var _ fmt.Scanner = &ratZero // *Rat must implement fmt.Scanner + +// Scan is a support routine for fmt.Scanner. It accepts the formats +// 'e', 'E', 'f', 'F', 'g', 'G', and 'v'. All formats are equivalent. +func (z *Rat) Scan(s fmt.ScanState, ch rune) error { + tok, err := s.Token(true, ratTok) + if err != nil { + return err + } + if !strings.ContainsRune("efgEFGv", ch) { + return errors.New("Rat.Scan: invalid verb") + } + if _, ok := z.SetString(string(tok)); !ok { + return errors.New("Rat.Scan: invalid syntax") + } + return nil +} + +// SetString sets z to the value of s and returns z and a boolean indicating +// success. s can be given as a (possibly signed) fraction "a/b", or as a +// floating-point number optionally followed by an exponent. +// If a fraction is provided, both the dividend and the divisor may be a +// decimal integer or independently use a prefix of ``0b'', ``0'' or ``0o'', +// or ``0x'' (or their upper-case variants) to denote a binary, octal, or +// hexadecimal integer, respectively. The divisor may not be signed. +// If a floating-point number is provided, it may be in decimal form or +// use any of the same prefixes as above but for ``0'' to denote a non-decimal +// mantissa. A leading ``0'' is considered a decimal leading 0; it does not +// indicate octal representation in this case. +// An optional base-10 ``e'' or base-2 ``p'' (or their upper-case variants) +// exponent may be provided as well, except for hexadecimal floats which +// only accept an (optional) ``p'' exponent (because an ``e'' or ``E'' cannot +// be distinguished from a mantissa digit). If the exponent's absolute value +// is too large, the operation may fail. +// The entire string, not just a prefix, must be valid for success. If the +// operation failed, the value of z is undefined but the returned value is nil. +func (z *Rat) SetString(s string) (*Rat, bool) { + if len(s) == 0 { + return nil, false + } + // len(s) > 0 + + // parse fraction a/b, if any + if sep := strings.Index(s, "/"); sep >= 0 { + if _, ok := z.a.SetString(s[:sep], 0); !ok { + return nil, false + } + r := strings.NewReader(s[sep+1:]) + var err error + if z.b.abs, _, _, err = z.b.abs.scan(r, 0, false); err != nil { + return nil, false + } + // entire string must have been consumed + if _, err = r.ReadByte(); err != io.EOF { + return nil, false + } + if len(z.b.abs) == 0 { + return nil, false + } + return z.norm(), true + } + + // parse floating-point number + r := strings.NewReader(s) + + // sign + neg, err := scanSign(r) + if err != nil { + return nil, false + } + + // mantissa + var base int + var fcount int // fractional digit count; valid if <= 0 + z.a.abs, base, fcount, err = z.a.abs.scan(r, 0, true) + if err != nil { + return nil, false + } + + // exponent + var exp int64 + var ebase int + exp, ebase, err = scanExponent(r, true, true) + if err != nil { + return nil, false + } + + // there should be no unread characters left + if _, err = r.ReadByte(); err != io.EOF { + return nil, false + } + + // special-case 0 (see also issue #16176) + if len(z.a.abs) == 0 { + return z, true + } + // len(z.a.abs) > 0 + + // The mantissa may have a radix point (fcount <= 0) and there + // may be a nonzero exponent exp. The radix point amounts to a + // division by base**(-fcount), which equals a multiplication by + // base**fcount. An exponent means multiplication by ebase**exp. + // Multiplications are commutative, so we can apply them in any + // order. We only have powers of 2 and 10, and we split powers + // of 10 into the product of the same powers of 2 and 5. This + // may reduce the size of shift/multiplication factors or + // divisors required to create the final fraction, depending + // on the actual floating-point value. + + // determine binary or decimal exponent contribution of radix point + var exp2, exp5 int64 + if fcount < 0 { + // The mantissa has a radix point ddd.dddd; and + // -fcount is the number of digits to the right + // of '.'. Adjust relevant exponent accordingly. + d := int64(fcount) + switch base { + case 10: + exp5 = d + fallthrough // 10**e == 5**e * 2**e + case 2: + exp2 = d + case 8: + exp2 = d * 3 // octal digits are 3 bits each + case 16: + exp2 = d * 4 // hexadecimal digits are 4 bits each + default: + panic("unexpected mantissa base") + } + // fcount consumed - not needed anymore + } + + // take actual exponent into account + switch ebase { + case 10: + exp5 += exp + fallthrough // see fallthrough above + case 2: + exp2 += exp + default: + panic("unexpected exponent base") + } + // exp consumed - not needed anymore + + // apply exp5 contributions + // (start with exp5 so the numbers to multiply are smaller) + if exp5 != 0 { + n := exp5 + if n < 0 { + n = -n + if n < 0 { + // This can occur if -n overflows. -(-1 << 63) would become + // -1 << 63, which is still negative. + return nil, false + } + } + if n > 1e6 { + return nil, false // avoid excessively large exponents + } + pow5 := z.b.abs.expNN(natFive, nat(nil).setWord(Word(n)), nil) // use underlying array of z.b.abs + if exp5 > 0 { + z.a.abs = z.a.abs.mul(z.a.abs, pow5) + z.b.abs = z.b.abs.setWord(1) + } else { + z.b.abs = pow5 + } + } else { + z.b.abs = z.b.abs.setWord(1) + } + + // apply exp2 contributions + if exp2 < -1e7 || exp2 > 1e7 { + return nil, false // avoid excessively large exponents + } + if exp2 > 0 { + z.a.abs = z.a.abs.shl(z.a.abs, uint(exp2)) + } else if exp2 < 0 { + z.b.abs = z.b.abs.shl(z.b.abs, uint(-exp2)) + } + + z.a.neg = neg && len(z.a.abs) > 0 // 0 has no sign + + return z.norm(), true +} + +// scanExponent scans the longest possible prefix of r representing a base 10 +// (``e'', ``E'') or a base 2 (``p'', ``P'') exponent, if any. It returns the +// exponent, the exponent base (10 or 2), or a read or syntax error, if any. +// +// If sepOk is set, an underscore character ``_'' may appear between successive +// exponent digits; such underscores do not change the value of the exponent. +// Incorrect placement of underscores is reported as an error if there are no +// other errors. If sepOk is not set, underscores are not recognized and thus +// terminate scanning like any other character that is not a valid digit. +// +// exponent = ( "e" | "E" | "p" | "P" ) [ sign ] digits . +// sign = "+" | "-" . +// digits = digit { [ '_' ] digit } . +// digit = "0" ... "9" . +// +// A base 2 exponent is only permitted if base2ok is set. +func scanExponent(r io.ByteScanner, base2ok, sepOk bool) (exp int64, base int, err error) { + // one char look-ahead + ch, err := r.ReadByte() + if err != nil { + if err == io.EOF { + err = nil + } + return 0, 10, err + } + + // exponent char + switch ch { + case 'e', 'E': + base = 10 + case 'p', 'P': + if base2ok { + base = 2 + break // ok + } + fallthrough // binary exponent not permitted + default: + r.UnreadByte() // ch does not belong to exponent anymore + return 0, 10, nil + } + + // sign + var digits []byte + ch, err = r.ReadByte() + if err == nil && (ch == '+' || ch == '-') { + if ch == '-' { + digits = append(digits, '-') + } + ch, err = r.ReadByte() + } + + // prev encodes the previously seen char: it is one + // of '_', '0' (a digit), or '.' (anything else). A + // valid separator '_' may only occur after a digit. + prev := '.' + invalSep := false + + // exponent value + hasDigits := false + for err == nil { + if '0' <= ch && ch <= '9' { + digits = append(digits, ch) + prev = '0' + hasDigits = true + } else if ch == '_' && sepOk { + if prev != '0' { + invalSep = true + } + prev = '_' + } else { + r.UnreadByte() // ch does not belong to number anymore + break + } + ch, err = r.ReadByte() + } + + if err == io.EOF { + err = nil + } + if err == nil && !hasDigits { + err = errNoDigits + } + if err == nil { + exp, err = strconv.ParseInt(string(digits), 10, 64) + } + // other errors take precedence over invalid separators + if err == nil && (invalSep || prev == '_') { + err = errInvalSep + } + + return +} + +// String returns a string representation of x in the form "a/b" (even if b == 1). +func (x *Rat) String() string { + return string(x.marshal()) +} + +// marshal implements String returning a slice of bytes +func (x *Rat) marshal() []byte { + var buf []byte + buf = x.a.Append(buf, 10) + buf = append(buf, '/') + if len(x.b.abs) != 0 { + buf = x.b.Append(buf, 10) + } else { + buf = append(buf, '1') + } + return buf +} + +// RatString returns a string representation of x in the form "a/b" if b != 1, +// and in the form "a" if b == 1. +func (x *Rat) RatString() string { + if x.IsInt() { + return x.a.String() + } + return x.String() +} + +// FloatString returns a string representation of x in decimal form with prec +// digits of precision after the radix point. The last digit is rounded to +// nearest, with halves rounded away from zero. +func (x *Rat) FloatString(prec int) string { + var buf []byte + + if x.IsInt() { + buf = x.a.Append(buf, 10) + if prec > 0 { + buf = append(buf, '.') + for i := prec; i > 0; i-- { + buf = append(buf, '0') + } + } + return string(buf) + } + // x.b.abs != 0 + + q, r := nat(nil).div(nat(nil), x.a.abs, x.b.abs) + + p := natOne + if prec > 0 { + p = nat(nil).expNN(natTen, nat(nil).setUint64(uint64(prec)), nil) + } + + r = r.mul(r, p) + r, r2 := r.div(nat(nil), r, x.b.abs) + + // see if we need to round up + r2 = r2.add(r2, r2) + if x.b.abs.cmp(r2) <= 0 { + r = r.add(r, natOne) + if r.cmp(p) >= 0 { + q = nat(nil).add(q, natOne) + r = nat(nil).sub(r, p) + } + } + + if x.a.neg { + buf = append(buf, '-') + } + buf = append(buf, q.utoa(10)...) // itoa ignores sign if q == 0 + + if prec > 0 { + buf = append(buf, '.') + rs := r.utoa(10) + for i := prec - len(rs); i > 0; i-- { + buf = append(buf, '0') + } + buf = append(buf, rs...) + } + + return string(buf) +} diff --git a/contrib/go/_std_1.18/src/math/big/ratmarsh.go b/contrib/go/_std_1.18/src/math/big/ratmarsh.go new file mode 100644 index 0000000000..fbc7b6002d --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/ratmarsh.go @@ -0,0 +1,75 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements encoding/decoding of Rats. + +package big + +import ( + "encoding/binary" + "errors" + "fmt" +) + +// Gob codec version. Permits backward-compatible changes to the encoding. +const ratGobVersion byte = 1 + +// GobEncode implements the gob.GobEncoder interface. +func (x *Rat) GobEncode() ([]byte, error) { + if x == nil { + return nil, nil + } + buf := make([]byte, 1+4+(len(x.a.abs)+len(x.b.abs))*_S) // extra bytes for version and sign bit (1), and numerator length (4) + i := x.b.abs.bytes(buf) + j := x.a.abs.bytes(buf[:i]) + n := i - j + if int(uint32(n)) != n { + // this should never happen + return nil, errors.New("Rat.GobEncode: numerator too large") + } + binary.BigEndian.PutUint32(buf[j-4:j], uint32(n)) + j -= 1 + 4 + b := ratGobVersion << 1 // make space for sign bit + if x.a.neg { + b |= 1 + } + buf[j] = b + return buf[j:], nil +} + +// GobDecode implements the gob.GobDecoder interface. +func (z *Rat) GobDecode(buf []byte) error { + if len(buf) == 0 { + // Other side sent a nil or default value. + *z = Rat{} + return nil + } + b := buf[0] + if b>>1 != ratGobVersion { + return fmt.Errorf("Rat.GobDecode: encoding version %d not supported", b>>1) + } + const j = 1 + 4 + i := j + binary.BigEndian.Uint32(buf[j-4:j]) + z.a.neg = b&1 != 0 + z.a.abs = z.a.abs.setBytes(buf[j:i]) + z.b.abs = z.b.abs.setBytes(buf[i:]) + return nil +} + +// MarshalText implements the encoding.TextMarshaler interface. +func (x *Rat) MarshalText() (text []byte, err error) { + if x.IsInt() { + return x.a.MarshalText() + } + return x.marshal(), nil +} + +// UnmarshalText implements the encoding.TextUnmarshaler interface. +func (z *Rat) UnmarshalText(text []byte) error { + // TODO(gri): get rid of the []byte/string conversion + if _, ok := z.SetString(string(text)); !ok { + return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Rat", text) + } + return nil +} diff --git a/contrib/go/_std_1.18/src/math/big/roundingmode_string.go b/contrib/go/_std_1.18/src/math/big/roundingmode_string.go new file mode 100644 index 0000000000..c7629eb98b --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/roundingmode_string.go @@ -0,0 +1,16 @@ +// Code generated by "stringer -type=RoundingMode"; DO NOT EDIT. + +package big + +import "strconv" + +const _RoundingMode_name = "ToNearestEvenToNearestAwayToZeroAwayFromZeroToNegativeInfToPositiveInf" + +var _RoundingMode_index = [...]uint8{0, 13, 26, 32, 44, 57, 70} + +func (i RoundingMode) String() string { + if i >= RoundingMode(len(_RoundingMode_index)-1) { + return "RoundingMode(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _RoundingMode_name[_RoundingMode_index[i]:_RoundingMode_index[i+1]] +} diff --git a/contrib/go/_std_1.18/src/math/big/sqrt.go b/contrib/go/_std_1.18/src/math/big/sqrt.go new file mode 100644 index 0000000000..0d50164557 --- /dev/null +++ b/contrib/go/_std_1.18/src/math/big/sqrt.go @@ -0,0 +1,128 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "math" + "sync" +) + +var threeOnce struct { + sync.Once + v *Float +} + +func three() *Float { + threeOnce.Do(func() { + threeOnce.v = NewFloat(3.0) + }) + return threeOnce.v +} + +// Sqrt sets z to the rounded square root of x, and returns it. +// +// If z's precision is 0, it is changed to x's precision before the +// operation. Rounding is performed according to z's precision and +// rounding mode, but z's accuracy is not computed. Specifically, the +// result of z.Acc() is undefined. +// +// The function panics if z < 0. The value of z is undefined in that +// case. +func (z *Float) Sqrt(x *Float) *Float { + if debugFloat { + x.validate() + } + + if z.prec == 0 { + z.prec = x.prec + } + + if x.Sign() == -1 { + // following IEEE754-2008 (section 7.2) + panic(ErrNaN{"square root of negative operand"}) + } + + // handle ±0 and +∞ + if x.form != finite { + z.acc = Exact + z.form = x.form + z.neg = x.neg // IEEE754-2008 requires √±0 = ±0 + return z + } + + // MantExp sets the argument's precision to the receiver's, and + // when z.prec > x.prec this will lower z.prec. Restore it after + // the MantExp call. + prec := z.prec + b := x.MantExp(z) + z.prec = prec + + // Compute √(z·2**b) as + // √( z)·2**(½b) if b is even + // √(2z)·2**(⌊½b⌋) if b > 0 is odd + // √(½z)·2**(⌈½b⌉) if b < 0 is odd + switch b % 2 { + case 0: + // nothing to do + case 1: + z.exp++ + case -1: + z.exp-- + } + // 0.25 <= z < 2.0 + + // Solving 1/x² - z = 0 avoids Quo calls and is faster, especially + // for high precisions. + z.sqrtInverse(z) + + // re-attach halved exponent + return z.SetMantExp(z, b/2) +} + +// Compute √x (to z.prec precision) by solving +// 1/t² - x = 0 +// for t (using Newton's method), and then inverting. +func (z *Float) sqrtInverse(x *Float) { + // let + // f(t) = 1/t² - x + // then + // g(t) = f(t)/f'(t) = -½t(1 - xt²) + // and the next guess is given by + // t2 = t - g(t) = ½t(3 - xt²) + u := newFloat(z.prec) + v := newFloat(z.prec) + three := three() + ng := func(t *Float) *Float { + u.prec = t.prec + v.prec = t.prec + u.Mul(t, t) // u = t² + u.Mul(x, u) // = xt² + v.Sub(three, u) // v = 3 - xt² + u.Mul(t, v) // u = t(3 - xt²) + u.exp-- // = ½t(3 - xt²) + return t.Set(u) + } + + xf, _ := x.Float64() + sqi := newFloat(z.prec) + sqi.SetFloat64(1 / math.Sqrt(xf)) + for prec := z.prec + 32; sqi.prec < prec; { + sqi.prec *= 2 + sqi = ng(sqi) + } + // sqi = 1/√x + + // x/√x = √x + z.Mul(x, sqi) +} + +// newFloat returns a new *Float with space for twice the given +// precision. +func newFloat(prec2 uint32) *Float { + z := new(Float) + // nat.make ensures the slice length is > 0 + z.mant = z.mant.make(int(prec2/_W) * 2) + return z +} |