about summary refs log tree commit diff
path: root/vendor/github.com/remyoudompheng
diff options
context:
space:
mode:
authorEmile <git@emile.space>2024-08-16 19:50:26 +0200
committerEmile <git@emile.space>2024-08-16 19:50:26 +0200
commit1a57267a17c2fc17fb6e104846fabc3e363c326c (patch)
tree1e574e3a80622086dc3c81ff9cba65ef7049b1a9 /vendor/github.com/remyoudompheng
initial commit
Diffstat (limited to 'vendor/github.com/remyoudompheng')
-rw-r--r--vendor/github.com/remyoudompheng/bigfft/LICENSE27
-rw-r--r--vendor/github.com/remyoudompheng/bigfft/README54
-rw-r--r--vendor/github.com/remyoudompheng/bigfft/arith_decl.go33
-rw-r--r--vendor/github.com/remyoudompheng/bigfft/fermat.go216
-rw-r--r--vendor/github.com/remyoudompheng/bigfft/fft.go370
-rw-r--r--vendor/github.com/remyoudompheng/bigfft/scan.go70
6 files changed, 770 insertions, 0 deletions
diff --git a/vendor/github.com/remyoudompheng/bigfft/LICENSE b/vendor/github.com/remyoudompheng/bigfft/LICENSE
new file mode 100644
index 0000000..7448756
--- /dev/null
+++ b/vendor/github.com/remyoudompheng/bigfft/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2012 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/remyoudompheng/bigfft/README b/vendor/github.com/remyoudompheng/bigfft/README
new file mode 100644
index 0000000..0fcd39d
--- /dev/null
+++ b/vendor/github.com/remyoudompheng/bigfft/README
@@ -0,0 +1,54 @@
+This library is a toy proof-of-concept implementation of the
+well-known Schonhage-Strassen method for multiplying integers.
+It is not expected to have a real life usecase outside number
+theory computations, nor is it expected to be used in any production
+system.
+
+If you are using it in your project, you may want to carefully
+examine the actual requirement or problem you are trying to solve.
+
+# Comparison with the standard library and GMP
+
+Benchmarking math/big vs. bigfft
+
+Number size    old ns/op    new ns/op    delta
+  1kb               1599         1640   +2.56%
+ 10kb              61533        62170   +1.04%
+ 50kb             833693       831051   -0.32%
+100kb            2567995      2693864   +4.90%
+  1Mb          105237800     28446400  -72.97%
+  5Mb         1272947000    168554600  -86.76%
+ 10Mb         3834354000    405120200  -89.43%
+ 20Mb        11514488000    845081600  -92.66%
+ 50Mb        49199945000   2893950000  -94.12%
+100Mb       147599836000   5921594000  -95.99%
+
+Benchmarking GMP vs bigfft
+
+Number size   GMP ns/op     Go ns/op    delta
+  1kb                536         1500  +179.85%
+ 10kb              26669        50777  +90.40%
+ 50kb             252270       658534  +161.04%
+100kb             686813      2127534  +209.77%
+  1Mb           12100000     22391830  +85.06%
+  5Mb          111731843    133550600  +19.53%
+ 10Mb          212314000    318595800  +50.06%
+ 20Mb          490196000    671512800  +36.99%
+ 50Mb         1280000000   2451476000  +91.52%
+100Mb         2673000000   5228991000  +95.62%
+
+Benchmarks were run on a Core 2 Quad Q8200 (2.33GHz).
+FFT is enabled when input numbers are over 200kbits.
+
+Scanning large decimal number from strings.
+(math/big [n^2 complexity] vs bigfft [n^1.6 complexity], Core i5-4590)
+
+Digits    old ns/op      new ns/op      delta
+1e3            9995          10876     +8.81%
+1e4          175356         243806    +39.03%
+1e5         9427422        6780545    -28.08%
+1e6      1776707489      144867502    -91.85%
+2e6      6865499995      346540778    -94.95%
+5e6     42641034189     1069878799    -97.49%
+10e6   151975273589     2693328580    -98.23%
+
diff --git a/vendor/github.com/remyoudompheng/bigfft/arith_decl.go b/vendor/github.com/remyoudompheng/bigfft/arith_decl.go
new file mode 100644
index 0000000..96937df
--- /dev/null
+++ b/vendor/github.com/remyoudompheng/bigfft/arith_decl.go
@@ -0,0 +1,33 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bigfft
+
+import (
+	"math/big"
+	_ "unsafe"
+)
+
+type Word = big.Word
+
+//go:linkname addVV math/big.addVV
+func addVV(z, x, y []Word) (c Word)
+
+//go:linkname subVV math/big.subVV
+func subVV(z, x, y []Word) (c Word)
+
+//go:linkname addVW math/big.addVW
+func addVW(z, x []Word, y Word) (c Word)
+
+//go:linkname subVW math/big.subVW
+func subVW(z, x []Word, y Word) (c Word)
+
+//go:linkname shlVU math/big.shlVU
+func shlVU(z, x []Word, s uint) (c Word)
+
+//go:linkname mulAddVWW math/big.mulAddVWW
+func mulAddVWW(z, x []Word, y, r Word) (c Word)
+
+//go:linkname addMulVVW math/big.addMulVVW
+func addMulVVW(z, x []Word, y Word) (c Word)
diff --git a/vendor/github.com/remyoudompheng/bigfft/fermat.go b/vendor/github.com/remyoudompheng/bigfft/fermat.go
new file mode 100644
index 0000000..200ee57
--- /dev/null
+++ b/vendor/github.com/remyoudompheng/bigfft/fermat.go
@@ -0,0 +1,216 @@
+package bigfft
+
+import (
+	"math/big"
+)
+
+// Arithmetic modulo 2^n+1.
+
+// A fermat of length w+1 represents a number modulo 2^(w*_W) + 1. The last
+// word is zero or one. A number has at most two representatives satisfying the
+// 0-1 last word constraint.
+type fermat nat
+
+func (n fermat) String() string { return nat(n).String() }
+
+func (z fermat) norm() {
+	n := len(z) - 1
+	c := z[n]
+	if c == 0 {
+		return
+	}
+	if z[0] >= c {
+		z[n] = 0
+		z[0] -= c
+		return
+	}
+	// z[0] < z[n].
+	subVW(z, z, c) // Substract c
+	if c > 1 {
+		z[n] -= c - 1
+		c = 1
+	}
+	// Add back c.
+	if z[n] == 1 {
+		z[n] = 0
+		return
+	} else {
+		addVW(z, z, 1)
+	}
+}
+
+// Shift computes (x << k) mod (2^n+1).
+func (z fermat) Shift(x fermat, k int) {
+	if len(z) != len(x) {
+		panic("len(z) != len(x) in Shift")
+	}
+	n := len(x) - 1
+	// Shift by n*_W is taking the opposite.
+	k %= 2 * n * _W
+	if k < 0 {
+		k += 2 * n * _W
+	}
+	neg := false
+	if k >= n*_W {
+		k -= n * _W
+		neg = true
+	}
+
+	kw, kb := k/_W, k%_W
+
+	z[n] = 1 // Add (-1)
+	if !neg {
+		for i := 0; i < kw; i++ {
+			z[i] = 0
+		}
+		// Shift left by kw words.
+		// x = a·2^(n-k) + b
+		// x<<k = (b<<k) - a
+		copy(z[kw:], x[:n-kw])
+		b := subVV(z[:kw+1], z[:kw+1], x[n-kw:])
+		if z[kw+1] > 0 {
+			z[kw+1] -= b
+		} else {
+			subVW(z[kw+1:], z[kw+1:], b)
+		}
+	} else {
+		for i := kw + 1; i < n; i++ {
+			z[i] = 0
+		}
+		// Shift left and negate, by kw words.
+		copy(z[:kw+1], x[n-kw:n+1])            // z_low = x_high
+		b := subVV(z[kw:n], z[kw:n], x[:n-kw]) // z_high -= x_low
+		z[n] -= b
+	}
+	// Add back 1.
+	if z[n] > 0 {
+		z[n]--
+	} else if z[0] < ^big.Word(0) {
+		z[0]++
+	} else {
+		addVW(z, z, 1)
+	}
+	// Shift left by kb bits
+	shlVU(z, z, uint(kb))
+	z.norm()
+}
+
+// ShiftHalf shifts x by k/2 bits the left. Shifting by 1/2 bit
+// is multiplication by sqrt(2) mod 2^n+1 which is 2^(3n/4) - 2^(n/4).
+// A temporary buffer must be provided in tmp.
+func (z fermat) ShiftHalf(x fermat, k int, tmp fermat) {
+	n := len(z) - 1
+	if k%2 == 0 {
+		z.Shift(x, k/2)
+		return
+	}
+	u := (k - 1) / 2
+	a := u + (3*_W/4)*n
+	b := u + (_W/4)*n
+	z.Shift(x, a)
+	tmp.Shift(x, b)
+	z.Sub(z, tmp)
+}
+
+// Add computes addition mod 2^n+1.
+func (z fermat) Add(x, y fermat) fermat {
+	if len(z) != len(x) {
+		panic("Add: len(z) != len(x)")
+	}
+	addVV(z, x, y) // there cannot be a carry here.
+	z.norm()
+	return z
+}
+
+// Sub computes substraction mod 2^n+1.
+func (z fermat) Sub(x, y fermat) fermat {
+	if len(z) != len(x) {
+		panic("Add: len(z) != len(x)")
+	}
+	n := len(y) - 1
+	b := subVV(z[:n], x[:n], y[:n])
+	b += y[n]
+	// If b > 0, we need to subtract b<<n, which is the same as adding b.
+	z[n] = x[n]
+	if z[0] <= ^big.Word(0)-b {
+		z[0] += b
+	} else {
+		addVW(z, z, b)
+	}
+	z.norm()
+	return z
+}
+
+func (z fermat) Mul(x, y fermat) fermat {
+	if len(x) != len(y) {
+		panic("Mul: len(x) != len(y)")
+	}
+	n := len(x) - 1
+	if n < 30 {
+		z = z[:2*n+2]
+		basicMul(z, x, y)
+		z = z[:2*n+1]
+	} else {
+		var xi, yi, zi big.Int
+		xi.SetBits(x)
+		yi.SetBits(y)
+		zi.SetBits(z)
+		zb := zi.Mul(&xi, &yi).Bits()
+		if len(zb) <= n {
+			// Short product.
+			copy(z, zb)
+			for i := len(zb); i < len(z); i++ {
+				z[i] = 0
+			}
+			return z
+		}
+		z = zb
+	}
+	// len(z) is at most 2n+1.
+	if len(z) > 2*n+1 {
+		panic("len(z) > 2n+1")
+	}
+	// We now have
+	// z = z[:n] + 1<<(n*W) * z[n:2n+1]
+	// which normalizes to:
+	// z = z[:n] - z[n:2n] + z[2n]
+	c1 := big.Word(0)
+	if len(z) > 2*n {
+		c1 = addVW(z[:n], z[:n], z[2*n])
+	}
+	c2 := big.Word(0)
+	if len(z) >= 2*n {
+		c2 = subVV(z[:n], z[:n], z[n:2*n])
+	} else {
+		m := len(z) - n
+		c2 = subVV(z[:m], z[:m], z[n:])
+		c2 = subVW(z[m:n], z[m:n], c2)
+	}
+	// Restore carries.
+	// Substracting z[n] -= c2 is the same
+	// as z[0] += c2
+	z = z[:n+1]
+	z[n] = c1
+	c := addVW(z, z, c2)
+	if c != 0 {
+		panic("impossible")
+	}
+	z.norm()
+	return z
+}
+
+// copied from math/big
+//
+// basicMul multiplies x and y and leaves the result in z.
+// The (non-normalized) result is placed in z[0 : len(x) + len(y)].
+func basicMul(z, x, y fermat) {
+	// initialize z
+	for i := 0; i < len(z); i++ {
+		z[i] = 0
+	}
+	for i, d := range y {
+		if d != 0 {
+			z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
+		}
+	}
+}
diff --git a/vendor/github.com/remyoudompheng/bigfft/fft.go b/vendor/github.com/remyoudompheng/bigfft/fft.go
new file mode 100644
index 0000000..2d4c1e7
--- /dev/null
+++ b/vendor/github.com/remyoudompheng/bigfft/fft.go
@@ -0,0 +1,370 @@
+// Package bigfft implements multiplication of big.Int using FFT.
+//
+// The implementation is based on the Schönhage-Strassen method
+// using integer FFT modulo 2^n+1.
+package bigfft
+
+import (
+	"math/big"
+	"unsafe"
+)
+
+const _W = int(unsafe.Sizeof(big.Word(0)) * 8)
+
+type nat []big.Word
+
+func (n nat) String() string {
+	v := new(big.Int)
+	v.SetBits(n)
+	return v.String()
+}
+
+// fftThreshold is the size (in words) above which FFT is used over
+// Karatsuba from math/big.
+//
+// TestCalibrate seems to indicate a threshold of 60kbits on 32-bit
+// arches and 110kbits on 64-bit arches.
+var fftThreshold = 1800
+
+// Mul computes the product x*y and returns z.
+// It can be used instead of the Mul method of
+// *big.Int from math/big package.
+func Mul(x, y *big.Int) *big.Int {
+	xwords := len(x.Bits())
+	ywords := len(y.Bits())
+	if xwords > fftThreshold && ywords > fftThreshold {
+		return mulFFT(x, y)
+	}
+	return new(big.Int).Mul(x, y)
+}
+
+func mulFFT(x, y *big.Int) *big.Int {
+	var xb, yb nat = x.Bits(), y.Bits()
+	zb := fftmul(xb, yb)
+	z := new(big.Int)
+	z.SetBits(zb)
+	if x.Sign()*y.Sign() < 0 {
+		z.Neg(z)
+	}
+	return z
+}
+
+// A FFT size of K=1<<k is adequate when K is about 2*sqrt(N) where
+// N = x.Bitlen() + y.Bitlen().
+
+func fftmul(x, y nat) nat {
+	k, m := fftSize(x, y)
+	xp := polyFromNat(x, k, m)
+	yp := polyFromNat(y, k, m)
+	rp := xp.Mul(&yp)
+	return rp.Int()
+}
+
+// fftSizeThreshold[i] is the maximal size (in bits) where we should use
+// fft size i.
+var fftSizeThreshold = [...]int64{0, 0, 0,
+	4 << 10, 8 << 10, 16 << 10, // 5 
+	32 << 10, 64 << 10, 1 << 18, 1 << 20, 3 << 20, // 10
+	8 << 20, 30 << 20, 100 << 20, 300 << 20, 600 << 20,
+}
+
+// returns the FFT length k, m the number of words per chunk
+// such that m << k is larger than the number of words
+// in x*y.
+func fftSize(x, y nat) (k uint, m int) {
+	words := len(x) + len(y)
+	bits := int64(words) * int64(_W)
+	k = uint(len(fftSizeThreshold))
+	for i := range fftSizeThreshold {
+		if fftSizeThreshold[i] > bits {
+			k = uint(i)
+			break
+		}
+	}
+	// The 1<<k chunks of m words must have N bits so that
+	// 2^N-1 is larger than x*y. That is, m<<k > words
+	m = words>>k + 1
+	return
+}
+
+// valueSize returns the length (in words) to use for polynomial
+// coefficients, to compute a correct product of polynomials P*Q
+// where deg(P*Q) < K (== 1<<k) and where coefficients of P and Q are
+// less than b^m (== 1 << (m*_W)).
+// The chosen length (in bits) must be a multiple of 1 << (k-extra).
+func valueSize(k uint, m int, extra uint) int {
+	// The coefficients of P*Q are less than b^(2m)*K
+	// so we need W * valueSize >= 2*m*W+K
+	n := 2*m*_W + int(k) // necessary bits
+	K := 1 << (k - extra)
+	if K < _W {
+		K = _W
+	}
+	n = ((n / K) + 1) * K // round to a multiple of K
+	return n / _W
+}
+
+// poly represents an integer via a polynomial in Z[x]/(x^K+1)
+// where K is the FFT length and b^m is the computation basis 1<<(m*_W).
+// If P = a[0] + a[1] x + ... a[n] x^(K-1), the associated natural number
+// is P(b^m).
+type poly struct {
+	k uint  // k is such that K = 1<<k.
+	m int   // the m such that P(b^m) is the original number.
+	a []nat // a slice of at most K m-word coefficients.
+}
+
+// polyFromNat slices the number x into a polynomial
+// with 1<<k coefficients made of m words.
+func polyFromNat(x nat, k uint, m int) poly {
+	p := poly{k: k, m: m}
+	length := len(x)/m + 1
+	p.a = make([]nat, length)
+	for i := range p.a {
+		if len(x) < m {
+			p.a[i] = make(nat, m)
+			copy(p.a[i], x)
+			break
+		}
+		p.a[i] = x[:m]
+		x = x[m:]
+	}
+	return p
+}
+
+// Int evaluates back a poly to its integer value.
+func (p *poly) Int() nat {
+	length := len(p.a)*p.m + 1
+	if na := len(p.a); na > 0 {
+		length += len(p.a[na-1])
+	}
+	n := make(nat, length)
+	m := p.m
+	np := n
+	for i := range p.a {
+		l := len(p.a[i])
+		c := addVV(np[:l], np[:l], p.a[i])
+		if np[l] < ^big.Word(0) {
+			np[l] += c
+		} else {
+			addVW(np[l:], np[l:], c)
+		}
+		np = np[m:]
+	}
+	n = trim(n)
+	return n
+}
+
+func trim(n nat) nat {
+	for i := range n {
+		if n[len(n)-1-i] != 0 {
+			return n[:len(n)-i]
+		}
+	}
+	return nil
+}
+
+// Mul multiplies p and q modulo X^K-1, where K = 1<<p.k.
+// The product is done via a Fourier transform.
+func (p *poly) Mul(q *poly) poly {
+	// extra=2 because:
+	// * some power of 2 is a K-th root of unity when n is a multiple of K/2.
+	// * 2 itself is a square (see fermat.ShiftHalf)
+	n := valueSize(p.k, p.m, 2)
+
+	pv, qv := p.Transform(n), q.Transform(n)
+	rv := pv.Mul(&qv)
+	r := rv.InvTransform()
+	r.m = p.m
+	return r
+}
+
+// A polValues represents the value of a poly at the powers of a
+// K-th root of unity θ=2^(l/2) in Z/(b^n+1)Z, where b^n = 2^(K/4*l).
+type polValues struct {
+	k      uint     // k is such that K = 1<<k.
+	n      int      // the length of coefficients, n*_W a multiple of K/4.
+	values []fermat // a slice of K (n+1)-word values
+}
+
+// Transform evaluates p at θ^i for i = 0...K-1, where
+// θ is a K-th primitive root of unity in Z/(b^n+1)Z.
+func (p *poly) Transform(n int) polValues {
+	k := p.k
+	inputbits := make([]big.Word, (n+1)<<k)
+	input := make([]fermat, 1<<k)
+	// Now computed q(ω^i) for i = 0 ... K-1
+	valbits := make([]big.Word, (n+1)<<k)
+	values := make([]fermat, 1<<k)
+	for i := range values {
+		input[i] = inputbits[i*(n+1) : (i+1)*(n+1)]
+		if i < len(p.a) {
+			copy(input[i], p.a[i])
+		}
+		values[i] = fermat(valbits[i*(n+1) : (i+1)*(n+1)])
+	}
+	fourier(values, input, false, n, k)
+	return polValues{k, n, values}
+}
+
+// InvTransform reconstructs p (modulo X^K - 1) from its
+// values at θ^i for i = 0..K-1.
+func (v *polValues) InvTransform() poly {
+	k, n := v.k, v.n
+
+	// Perform an inverse Fourier transform to recover p.
+	pbits := make([]big.Word, (n+1)<<k)
+	p := make([]fermat, 1<<k)
+	for i := range p {
+		p[i] = fermat(pbits[i*(n+1) : (i+1)*(n+1)])
+	}
+	fourier(p, v.values, true, n, k)
+	// Divide by K, and untwist q to recover p.
+	u := make(fermat, n+1)
+	a := make([]nat, 1<<k)
+	for i := range p {
+		u.Shift(p[i], -int(k))
+		copy(p[i], u)
+		a[i] = nat(p[i])
+	}
+	return poly{k: k, m: 0, a: a}
+}
+
+// NTransform evaluates p at θω^i for i = 0...K-1, where
+// θ is a (2K)-th primitive root of unity in Z/(b^n+1)Z
+// and ω = θ².
+func (p *poly) NTransform(n int) polValues {
+	k := p.k
+	if len(p.a) >= 1<<k {
+		panic("Transform: len(p.a) >= 1<<k")
+	}
+	// θ is represented as a shift.
+	θshift := (n * _W) >> k
+	// p(x) = a_0 + a_1 x + ... + a_{K-1} x^(K-1)
+	// p(θx) = q(x) where
+	// q(x) = a_0 + θa_1 x + ... + θ^(K-1) a_{K-1} x^(K-1)
+	//
+	// Twist p by θ to obtain q.
+	tbits := make([]big.Word, (n+1)<<k)
+	twisted := make([]fermat, 1<<k)
+	src := make(fermat, n+1)
+	for i := range twisted {
+		twisted[i] = fermat(tbits[i*(n+1) : (i+1)*(n+1)])
+		if i < len(p.a) {
+			for i := range src {
+				src[i] = 0
+			}
+			copy(src, p.a[i])
+			twisted[i].Shift(src, θshift*i)
+		}
+	}
+
+	// Now computed q(ω^i) for i = 0 ... K-1
+	valbits := make([]big.Word, (n+1)<<k)
+	values := make([]fermat, 1<<k)
+	for i := range values {
+		values[i] = fermat(valbits[i*(n+1) : (i+1)*(n+1)])
+	}
+	fourier(values, twisted, false, n, k)
+	return polValues{k, n, values}
+}
+
+// InvTransform reconstructs a polynomial from its values at
+// roots of x^K+1. The m field of the returned polynomial
+// is unspecified.
+func (v *polValues) InvNTransform() poly {
+	k := v.k
+	n := v.n
+	θshift := (n * _W) >> k
+
+	// Perform an inverse Fourier transform to recover q.
+	qbits := make([]big.Word, (n+1)<<k)
+	q := make([]fermat, 1<<k)
+	for i := range q {
+		q[i] = fermat(qbits[i*(n+1) : (i+1)*(n+1)])
+	}
+	fourier(q, v.values, true, n, k)
+
+	// Divide by K, and untwist q to recover p.
+	u := make(fermat, n+1)
+	a := make([]nat, 1<<k)
+	for i := range q {
+		u.Shift(q[i], -int(k)-i*θshift)
+		copy(q[i], u)
+		a[i] = nat(q[i])
+	}
+	return poly{k: k, m: 0, a: a}
+}
+
+// fourier performs an unnormalized Fourier transform
+// of src, a length 1<<k vector of numbers modulo b^n+1
+// where b = 1<<_W.
+func fourier(dst []fermat, src []fermat, backward bool, n int, k uint) {
+	var rec func(dst, src []fermat, size uint)
+	tmp := make(fermat, n+1)  // pre-allocate temporary variables.
+	tmp2 := make(fermat, n+1) // pre-allocate temporary variables.
+
+	// The recursion function of the FFT.
+	// The root of unity used in the transform is ω=1<<(ω2shift/2).
+	// The source array may use shifted indices (i.e. the i-th
+	// element is src[i << idxShift]).
+	rec = func(dst, src []fermat, size uint) {
+		idxShift := k - size
+		ω2shift := (4 * n * _W) >> size
+		if backward {
+			ω2shift = -ω2shift
+		}
+
+		// Easy cases.
+		if len(src[0]) != n+1 || len(dst[0]) != n+1 {
+			panic("len(src[0]) != n+1 || len(dst[0]) != n+1")
+		}
+		switch size {
+		case 0:
+			copy(dst[0], src[0])
+			return
+		case 1:
+			dst[0].Add(src[0], src[1<<idxShift]) // dst[0] = src[0] + src[1]
+			dst[1].Sub(src[0], src[1<<idxShift]) // dst[1] = src[0] - src[1]
+			return
+		}
+
+		// Let P(x) = src[0] + src[1<<idxShift] * x + ... + src[K-1 << idxShift] * x^(K-1)
+		// The P(x) = Q1(x²) + x*Q2(x²)
+		// where Q1's coefficients are src with indices shifted by 1
+		// where Q2's coefficients are src[1<<idxShift:] with indices shifted by 1
+
+		// Split destination vectors in halves.
+		dst1 := dst[:1<<(size-1)]
+		dst2 := dst[1<<(size-1):]
+		// Transform Q1 and Q2 in the halves.
+		rec(dst1, src, size-1)
+		rec(dst2, src[1<<idxShift:], size-1)
+
+		// Reconstruct P's transform from transforms of Q1 and Q2.
+		// dst[i]            is dst1[i] + ω^i * dst2[i]
+		// dst[i + 1<<(k-1)] is dst1[i] + ω^(i+K/2) * dst2[i]
+		//
+		for i := range dst1 {
+			tmp.ShiftHalf(dst2[i], i*ω2shift, tmp2) // ω^i * dst2[i]
+			dst2[i].Sub(dst1[i], tmp)
+			dst1[i].Add(dst1[i], tmp)
+		}
+	}
+	rec(dst, src, k)
+}
+
+// Mul returns the pointwise product of p and q.
+func (p *polValues) Mul(q *polValues) (r polValues) {
+	n := p.n
+	r.k, r.n = p.k, p.n
+	r.values = make([]fermat, len(p.values))
+	bits := make([]big.Word, len(p.values)*(n+1))
+	buf := make(fermat, 8*n)
+	for i := range r.values {
+		r.values[i] = bits[i*(n+1) : (i+1)*(n+1)]
+		z := buf.Mul(p.values[i], q.values[i])
+		copy(r.values[i], z)
+	}
+	return
+}
diff --git a/vendor/github.com/remyoudompheng/bigfft/scan.go b/vendor/github.com/remyoudompheng/bigfft/scan.go
new file mode 100644
index 0000000..dd3f267
--- /dev/null
+++ b/vendor/github.com/remyoudompheng/bigfft/scan.go
@@ -0,0 +1,70 @@
+package bigfft
+
+import (
+	"math/big"
+)
+
+// FromDecimalString converts the base 10 string
+// representation of a natural (non-negative) number
+// into a *big.Int.
+// Its asymptotic complexity is less than quadratic.
+func FromDecimalString(s string) *big.Int {
+	var sc scanner
+	z := new(big.Int)
+	sc.scan(z, s)
+	return z
+}
+
+type scanner struct {
+	// powers[i] is 10^(2^i * quadraticScanThreshold).
+	powers []*big.Int
+}
+
+func (s *scanner) chunkSize(size int) (int, *big.Int) {
+	if size <= quadraticScanThreshold {
+		panic("size < quadraticScanThreshold")
+	}
+	pow := uint(0)
+	for n := size; n > quadraticScanThreshold; n /= 2 {
+		pow++
+	}
+	// threshold * 2^(pow-1) <= size < threshold * 2^pow
+	return quadraticScanThreshold << (pow - 1), s.power(pow - 1)
+}
+
+func (s *scanner) power(k uint) *big.Int {
+	for i := len(s.powers); i <= int(k); i++ {
+		z := new(big.Int)
+		if i == 0 {
+			if quadraticScanThreshold%14 != 0 {
+				panic("quadraticScanThreshold % 14 != 0")
+			}
+			z.Exp(big.NewInt(1e14), big.NewInt(quadraticScanThreshold/14), nil)
+		} else {
+			z.Mul(s.powers[i-1], s.powers[i-1])
+		}
+		s.powers = append(s.powers, z)
+	}
+	return s.powers[k]
+}
+
+func (s *scanner) scan(z *big.Int, str string) {
+	if len(str) <= quadraticScanThreshold {
+		z.SetString(str, 10)
+		return
+	}
+	sz, pow := s.chunkSize(len(str))
+	// Scan the left half.
+	s.scan(z, str[:len(str)-sz])
+	// FIXME: reuse temporaries.
+	left := Mul(z, pow)
+	// Scan the right half
+	s.scan(z, str[len(str)-sz:])
+	z.Add(z, left)
+}
+
+// quadraticScanThreshold is the number of digits
+// below which big.Int.SetString is more efficient
+// than subquadratic algorithms.
+// 1232 digits fit in 4096 bits.
+const quadraticScanThreshold = 1232