From 1a57267a17c2fc17fb6e104846fabc3e363c326c Mon Sep 17 00:00:00 2001 From: Emile Date: Fri, 16 Aug 2024 19:50:26 +0200 Subject: initial commit --- vendor/github.com/remyoudompheng/bigfft/LICENSE | 27 ++ vendor/github.com/remyoudompheng/bigfft/README | 54 +++ .../github.com/remyoudompheng/bigfft/arith_decl.go | 33 ++ vendor/github.com/remyoudompheng/bigfft/fermat.go | 216 ++++++++++++ vendor/github.com/remyoudompheng/bigfft/fft.go | 370 +++++++++++++++++++++ vendor/github.com/remyoudompheng/bigfft/scan.go | 70 ++++ 6 files changed, 770 insertions(+) create mode 100644 vendor/github.com/remyoudompheng/bigfft/LICENSE create mode 100644 vendor/github.com/remyoudompheng/bigfft/README create mode 100644 vendor/github.com/remyoudompheng/bigfft/arith_decl.go create mode 100644 vendor/github.com/remyoudompheng/bigfft/fermat.go create mode 100644 vendor/github.com/remyoudompheng/bigfft/fft.go create mode 100644 vendor/github.com/remyoudompheng/bigfft/scan.go (limited to 'vendor/github.com/remyoudompheng') diff --git a/vendor/github.com/remyoudompheng/bigfft/LICENSE b/vendor/github.com/remyoudompheng/bigfft/LICENSE new file mode 100644 index 0000000..7448756 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2012 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/remyoudompheng/bigfft/README b/vendor/github.com/remyoudompheng/bigfft/README new file mode 100644 index 0000000..0fcd39d --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/README @@ -0,0 +1,54 @@ +This library is a toy proof-of-concept implementation of the +well-known Schonhage-Strassen method for multiplying integers. +It is not expected to have a real life usecase outside number +theory computations, nor is it expected to be used in any production +system. + +If you are using it in your project, you may want to carefully +examine the actual requirement or problem you are trying to solve. + +# Comparison with the standard library and GMP + +Benchmarking math/big vs. bigfft + +Number size old ns/op new ns/op delta + 1kb 1599 1640 +2.56% + 10kb 61533 62170 +1.04% + 50kb 833693 831051 -0.32% +100kb 2567995 2693864 +4.90% + 1Mb 105237800 28446400 -72.97% + 5Mb 1272947000 168554600 -86.76% + 10Mb 3834354000 405120200 -89.43% + 20Mb 11514488000 845081600 -92.66% + 50Mb 49199945000 2893950000 -94.12% +100Mb 147599836000 5921594000 -95.99% + +Benchmarking GMP vs bigfft + +Number size GMP ns/op Go ns/op delta + 1kb 536 1500 +179.85% + 10kb 26669 50777 +90.40% + 50kb 252270 658534 +161.04% +100kb 686813 2127534 +209.77% + 1Mb 12100000 22391830 +85.06% + 5Mb 111731843 133550600 +19.53% + 10Mb 212314000 318595800 +50.06% + 20Mb 490196000 671512800 +36.99% + 50Mb 1280000000 2451476000 +91.52% +100Mb 2673000000 5228991000 +95.62% + +Benchmarks were run on a Core 2 Quad Q8200 (2.33GHz). +FFT is enabled when input numbers are over 200kbits. + +Scanning large decimal number from strings. +(math/big [n^2 complexity] vs bigfft [n^1.6 complexity], Core i5-4590) + +Digits old ns/op new ns/op delta +1e3 9995 10876 +8.81% +1e4 175356 243806 +39.03% +1e5 9427422 6780545 -28.08% +1e6 1776707489 144867502 -91.85% +2e6 6865499995 346540778 -94.95% +5e6 42641034189 1069878799 -97.49% +10e6 151975273589 2693328580 -98.23% + diff --git a/vendor/github.com/remyoudompheng/bigfft/arith_decl.go b/vendor/github.com/remyoudompheng/bigfft/arith_decl.go new file mode 100644 index 0000000..96937df --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/arith_decl.go @@ -0,0 +1,33 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bigfft + +import ( + "math/big" + _ "unsafe" +) + +type Word = big.Word + +//go:linkname addVV math/big.addVV +func addVV(z, x, y []Word) (c Word) + +//go:linkname subVV math/big.subVV +func subVV(z, x, y []Word) (c Word) + +//go:linkname addVW math/big.addVW +func addVW(z, x []Word, y Word) (c Word) + +//go:linkname subVW math/big.subVW +func subVW(z, x []Word, y Word) (c Word) + +//go:linkname shlVU math/big.shlVU +func shlVU(z, x []Word, s uint) (c Word) + +//go:linkname mulAddVWW math/big.mulAddVWW +func mulAddVWW(z, x []Word, y, r Word) (c Word) + +//go:linkname addMulVVW math/big.addMulVVW +func addMulVVW(z, x []Word, y Word) (c Word) diff --git a/vendor/github.com/remyoudompheng/bigfft/fermat.go b/vendor/github.com/remyoudompheng/bigfft/fermat.go new file mode 100644 index 0000000..200ee57 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/fermat.go @@ -0,0 +1,216 @@ +package bigfft + +import ( + "math/big" +) + +// Arithmetic modulo 2^n+1. + +// A fermat of length w+1 represents a number modulo 2^(w*_W) + 1. The last +// word is zero or one. A number has at most two representatives satisfying the +// 0-1 last word constraint. +type fermat nat + +func (n fermat) String() string { return nat(n).String() } + +func (z fermat) norm() { + n := len(z) - 1 + c := z[n] + if c == 0 { + return + } + if z[0] >= c { + z[n] = 0 + z[0] -= c + return + } + // z[0] < z[n]. + subVW(z, z, c) // Substract c + if c > 1 { + z[n] -= c - 1 + c = 1 + } + // Add back c. + if z[n] == 1 { + z[n] = 0 + return + } else { + addVW(z, z, 1) + } +} + +// Shift computes (x << k) mod (2^n+1). +func (z fermat) Shift(x fermat, k int) { + if len(z) != len(x) { + panic("len(z) != len(x) in Shift") + } + n := len(x) - 1 + // Shift by n*_W is taking the opposite. + k %= 2 * n * _W + if k < 0 { + k += 2 * n * _W + } + neg := false + if k >= n*_W { + k -= n * _W + neg = true + } + + kw, kb := k/_W, k%_W + + z[n] = 1 // Add (-1) + if !neg { + for i := 0; i < kw; i++ { + z[i] = 0 + } + // Shift left by kw words. + // x = a·2^(n-k) + b + // x< 0 { + z[kw+1] -= b + } else { + subVW(z[kw+1:], z[kw+1:], b) + } + } else { + for i := kw + 1; i < n; i++ { + z[i] = 0 + } + // Shift left and negate, by kw words. + copy(z[:kw+1], x[n-kw:n+1]) // z_low = x_high + b := subVV(z[kw:n], z[kw:n], x[:n-kw]) // z_high -= x_low + z[n] -= b + } + // Add back 1. + if z[n] > 0 { + z[n]-- + } else if z[0] < ^big.Word(0) { + z[0]++ + } else { + addVW(z, z, 1) + } + // Shift left by kb bits + shlVU(z, z, uint(kb)) + z.norm() +} + +// ShiftHalf shifts x by k/2 bits the left. Shifting by 1/2 bit +// is multiplication by sqrt(2) mod 2^n+1 which is 2^(3n/4) - 2^(n/4). +// A temporary buffer must be provided in tmp. +func (z fermat) ShiftHalf(x fermat, k int, tmp fermat) { + n := len(z) - 1 + if k%2 == 0 { + z.Shift(x, k/2) + return + } + u := (k - 1) / 2 + a := u + (3*_W/4)*n + b := u + (_W/4)*n + z.Shift(x, a) + tmp.Shift(x, b) + z.Sub(z, tmp) +} + +// Add computes addition mod 2^n+1. +func (z fermat) Add(x, y fermat) fermat { + if len(z) != len(x) { + panic("Add: len(z) != len(x)") + } + addVV(z, x, y) // there cannot be a carry here. + z.norm() + return z +} + +// Sub computes substraction mod 2^n+1. +func (z fermat) Sub(x, y fermat) fermat { + if len(z) != len(x) { + panic("Add: len(z) != len(x)") + } + n := len(y) - 1 + b := subVV(z[:n], x[:n], y[:n]) + b += y[n] + // If b > 0, we need to subtract b< 2*n+1 { + panic("len(z) > 2n+1") + } + // We now have + // z = z[:n] + 1<<(n*W) * z[n:2n+1] + // which normalizes to: + // z = z[:n] - z[n:2n] + z[2n] + c1 := big.Word(0) + if len(z) > 2*n { + c1 = addVW(z[:n], z[:n], z[2*n]) + } + c2 := big.Word(0) + if len(z) >= 2*n { + c2 = subVV(z[:n], z[:n], z[n:2*n]) + } else { + m := len(z) - n + c2 = subVV(z[:m], z[:m], z[n:]) + c2 = subVW(z[m:n], z[m:n], c2) + } + // Restore carries. + // Substracting z[n] -= c2 is the same + // as z[0] += c2 + z = z[:n+1] + z[n] = c1 + c := addVW(z, z, c2) + if c != 0 { + panic("impossible") + } + z.norm() + return z +} + +// copied from math/big +// +// basicMul multiplies x and y and leaves the result in z. +// The (non-normalized) result is placed in z[0 : len(x) + len(y)]. +func basicMul(z, x, y fermat) { + // initialize z + for i := 0; i < len(z); i++ { + z[i] = 0 + } + for i, d := range y { + if d != 0 { + z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d) + } + } +} diff --git a/vendor/github.com/remyoudompheng/bigfft/fft.go b/vendor/github.com/remyoudompheng/bigfft/fft.go new file mode 100644 index 0000000..2d4c1e7 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/fft.go @@ -0,0 +1,370 @@ +// Package bigfft implements multiplication of big.Int using FFT. +// +// The implementation is based on the Schönhage-Strassen method +// using integer FFT modulo 2^n+1. +package bigfft + +import ( + "math/big" + "unsafe" +) + +const _W = int(unsafe.Sizeof(big.Word(0)) * 8) + +type nat []big.Word + +func (n nat) String() string { + v := new(big.Int) + v.SetBits(n) + return v.String() +} + +// fftThreshold is the size (in words) above which FFT is used over +// Karatsuba from math/big. +// +// TestCalibrate seems to indicate a threshold of 60kbits on 32-bit +// arches and 110kbits on 64-bit arches. +var fftThreshold = 1800 + +// Mul computes the product x*y and returns z. +// It can be used instead of the Mul method of +// *big.Int from math/big package. +func Mul(x, y *big.Int) *big.Int { + xwords := len(x.Bits()) + ywords := len(y.Bits()) + if xwords > fftThreshold && ywords > fftThreshold { + return mulFFT(x, y) + } + return new(big.Int).Mul(x, y) +} + +func mulFFT(x, y *big.Int) *big.Int { + var xb, yb nat = x.Bits(), y.Bits() + zb := fftmul(xb, yb) + z := new(big.Int) + z.SetBits(zb) + if x.Sign()*y.Sign() < 0 { + z.Neg(z) + } + return z +} + +// A FFT size of K=1< bits { + k = uint(i) + break + } + } + // The 1< words + m = words>>k + 1 + return +} + +// valueSize returns the length (in words) to use for polynomial +// coefficients, to compute a correct product of polynomials P*Q +// where deg(P*Q) < K (== 1<= 2*m*W+K + n := 2*m*_W + int(k) // necessary bits + K := 1 << (k - extra) + if K < _W { + K = _W + } + n = ((n / K) + 1) * K // round to a multiple of K + return n / _W +} + +// poly represents an integer via a polynomial in Z[x]/(x^K+1) +// where K is the FFT length and b^m is the computation basis 1<<(m*_W). +// If P = a[0] + a[1] x + ... a[n] x^(K-1), the associated natural number +// is P(b^m). +type poly struct { + k uint // k is such that K = 1< 0 { + length += len(p.a[na-1]) + } + n := make(nat, length) + m := p.m + np := n + for i := range p.a { + l := len(p.a[i]) + c := addVV(np[:l], np[:l], p.a[i]) + if np[l] < ^big.Word(0) { + np[l] += c + } else { + addVW(np[l:], np[l:], c) + } + np = np[m:] + } + n = trim(n) + return n +} + +func trim(n nat) nat { + for i := range n { + if n[len(n)-1-i] != 0 { + return n[:len(n)-i] + } + } + return nil +} + +// Mul multiplies p and q modulo X^K-1, where K = 1<= 1<= 1<> k + // p(x) = a_0 + a_1 x + ... + a_{K-1} x^(K-1) + // p(θx) = q(x) where + // q(x) = a_0 + θa_1 x + ... + θ^(K-1) a_{K-1} x^(K-1) + // + // Twist p by θ to obtain q. + tbits := make([]big.Word, (n+1)<> k + + // Perform an inverse Fourier transform to recover q. + qbits := make([]big.Word, (n+1)<> size + if backward { + ω2shift = -ω2shift + } + + // Easy cases. + if len(src[0]) != n+1 || len(dst[0]) != n+1 { + panic("len(src[0]) != n+1 || len(dst[0]) != n+1") + } + switch size { + case 0: + copy(dst[0], src[0]) + return + case 1: + dst[0].Add(src[0], src[1< quadraticScanThreshold; n /= 2 { + pow++ + } + // threshold * 2^(pow-1) <= size < threshold * 2^pow + return quadraticScanThreshold << (pow - 1), s.power(pow - 1) +} + +func (s *scanner) power(k uint) *big.Int { + for i := len(s.powers); i <= int(k); i++ { + z := new(big.Int) + if i == 0 { + if quadraticScanThreshold%14 != 0 { + panic("quadraticScanThreshold % 14 != 0") + } + z.Exp(big.NewInt(1e14), big.NewInt(quadraticScanThreshold/14), nil) + } else { + z.Mul(s.powers[i-1], s.powers[i-1]) + } + s.powers = append(s.powers, z) + } + return s.powers[k] +} + +func (s *scanner) scan(z *big.Int, str string) { + if len(str) <= quadraticScanThreshold { + z.SetString(str, 10) + return + } + sz, pow := s.chunkSize(len(str)) + // Scan the left half. + s.scan(z, str[:len(str)-sz]) + // FIXME: reuse temporaries. + left := Mul(z, pow) + // Scan the right half + s.scan(z, str[len(str)-sz:]) + z.Add(z, left) +} + +// quadraticScanThreshold is the number of digits +// below which big.Int.SetString is more efficient +// than subquadratic algorithms. +// 1232 digits fit in 4096 bits. +const quadraticScanThreshold = 1232 -- cgit 1.4.1