Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
/* Implementation of the binary recursive gcd algorithm. Version 1.0, 04 November 2005. Reference: "A Binary Recursive Gcd Algorithm", by D. Stehlé and P. Zimmermann, Proceedings of the Algorithmic Number Theory Symposium (ANTS VI), pages 411-425, 2004. Copyright 2001, 2002, 2003, 2004, 2005 Damien Stehlé and Paul Zimmermann This code is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This code is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Place, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include /* for ULONG_MAX */ #include "gmp.h" #include "gmp-impl.h" #include "longlong.h" #include #include /* #define DEBUG 1 */ /* #define DEBUG2 */ double mul_cost, mul_cost_hgcd; #define THRESHOLD1 200000000 #define MPZ_MUL(a, b, c) do { \ mul_cost += (double) mpz_size (b) * mpz_size (c); \ if (mpz_size(b) >= THRESHOLD1 || mpz_size(c) >= THRESHOLD1) \ printf ("mpz_mul %d %d\n", mpz_size(b), mpz_size(c)); \ mpz_mul ((a), (b), (c)); } while (0) #define MPZ_ADDMUL(a, b, c) do { \ mul_cost += (double) mpz_size (b) * mpz_size (c); \ if (mpz_size(b) >= THRESHOLD1 || mpz_size(c) >= THRESHOLD1) \ printf ("mpz_addmul %d %d\n", mpz_size(b), mpz_size(c)); \ mpz_addmul ((a), (b), (c)); } while (0) /* #define DD 1 */ #define BBOUND 63 int cputime (); void mpz_addmul_si (mpz_t, mpz_t, long); unsigned long mpz_divi_slow (mpz_t, mpz_t, mpz_t, mpz_t, int); unsigned long mpz_divi (mpz_t, mpz_t, mpz_t, mpz_t, int); unsigned long mpz_divi_si (long*, long*, long*, long*, mpz_t, mpz_t, mpz_t, unsigned long, int); int check (mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, unsigned long); long mpz_int_rfeea (mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, unsigned long, mpz_t*, int); void mpz_fastgcd (mpz_t, const mpz_t, const mpz_t); void Hstar (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); void H (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); void G (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); void mpz_bfib2_ui (mpz_t, mpz_t, unsigned long); long mpz_int_rfeea0 (mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, unsigned long, mpz_t*); unsigned long base_case_bin (mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, unsigned long, unsigned long); int cputime () { struct rusage rus; getrusage (0, &rus); return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; } #define OUT(x) { mpz_out_str (stdout, 10, x); printf (":\n"); } #define OUT2(x) { mpz_out_str (stdout, 2, x); printf (":\n"); } #define VAL(P) mpz_scan1(P, 0) #define VAL2(P,i) mpz_scan1(P, i) #define INFINITY ULONG_MAX /* a <- a + b * i */ void mpz_addmul_si (mpz_t a, mpz_t b, long i) { if (i >= 0) mpz_addmul_ui (a, b, (unsigned long) i); else mpz_submul_ui (a, b, (unsigned long) (-i)); } /* given P and Q with val(P) < val(Q), replaces P by R and returns q such that q*Q/2^j+P = R and val(R) > val(Q) with j := val(Q)-val(P) with -2^j <= q < 2^j. We have q = - P'/Q' (mod 2^(j+1)) with P' = P/2^val(P), Q' = Q/2^val(Q). Returns j := val(Q)-val(P). tmp is an auxiliary variable. */ unsigned long mpz_divi_slow (mpz_t q, mpz_t P, mpz_t Q, mpz_t tmp, int extended) { unsigned long valP, valQ, v; valP = VAL(P); valQ = VAL(Q); #ifdef DEBUG if (valP >= valQ) { fprintf (stderr, "Error in mpz_divi_slow: val(P) >= val(Q)\n"); exit (1); } #endif if (extended) mpz_set_ui (q, 0); v = valP; while (v < valQ) { if (extended) mpz_setbit (q, v - valP); mpz_div_2exp (tmp, Q, valQ - v); mpz_add (P, P, tmp); v = VAL2 (P, v + 1); #ifdef DEBUG if (v != VAL(P)) { fprintf (stderr, "Error in mpz_divi_slow: v <> VAL(P)\n"); exit (1); } #endif } if (v == valQ) { #define CENTERED #ifdef CENTERED #define MPZ_ADD mpz_sub /* last step is centered, q may be negative */ #else #define MPZ_ADD mpz_add /* last step is not centered, q is positive */ #endif if (extended) /* q = q - 2^j */ { mpz_set_ui (tmp, 1); mpz_mul_2exp (tmp, tmp, valQ - valP); MPZ_ADD (q, q, tmp); } MPZ_ADD (P, P, Q); } return valQ - valP; } /* given P and Q with val(P) < val(Q), replaces P by R and returns q such that q*Q/2^j+P = R and val(R) > val(Q) with j := val(Q)-val(P) with -2^j < q < 2^j (q is always odd). We have q = - P'/Q' (mod 2^(j+1)) with P' = P/2^val(P), Q' = Q/2^val(Q). Returns j := val(Q)-val(P). tmp is an auxiliary variable. */ unsigned long mpz_divi (mpz_t q, mpz_t P, mpz_t Q, mpz_t tmp, int extended) { unsigned long valP, valQ, j, i; mp_limb_t pp, qp, mask; long qq; mp_ptr tp; #ifdef DEBUG if (mpz_cmp_ui (Q, 0) == 0) { fprintf (stderr, "Error: Q=0\n"); exit (1); } #endif valP = VAL(P); valQ = VAL(Q); j = valQ - valP; if (j >= GMP_NUMB_BITS) return mpz_divi_slow (q, P, Q, tmp, extended); #ifdef DEBUG if (mpz_sizeinbase (P, 2) < valP + j + 1) { fprintf (stderr, "Error: bits(P) < valP+j+1\n"); exit (1); } #endif tp = PTR(P) + valP / GMP_NUMB_BITS; valP %= GMP_NUMB_BITS; pp = (valP) ? ((tp[0] >> valP) + (tp[1] << (GMP_NUMB_BITS - valP))) : tp[0]; /* now pp contains the low j+1 significant bits from P */ #ifdef DEBUG if (mpz_sizeinbase (Q, 2) < valQ + j + 1) { fprintf (stderr, "Error: bits(Q) < valQ+j+1\n"); exit (1); } #endif tp = PTR(Q) + valQ / GMP_NUMB_BITS; valQ %= GMP_NUMB_BITS; qp = (valQ) ? ((tp[0] >> valQ) + (tp[1] << (GMP_NUMB_BITS - valQ))) : tp[0]; /* now qp contains the low j+1 significant bits from Q */ /* we now have the low j+1 bits from P and Q in pp and qp */ qq = (long) 0; /* always odd */ mask = (mp_limb_t) 1; for (i = 0; i < j; i++) { /* invariant: mask = 1 << i */ if (pp & mask) { qq += mask; pp += qp; } qp <<= 1; mask <<= 1; } /* last step is centered */ qq -= pp & mask; if (mpz_sgn (P) != mpz_sgn (Q)) qq = - qq; if (extended) mpz_set_si (q, qq); /* replace P by q*Q/2^j+P */ mpz_div_2exp (tmp, Q, j); if (qq >= 0) { if (qq == 1) mpz_add (P, P, tmp); else mpz_addmul_ui (P, tmp, qq); } else { if (qq == -1) mpz_sub (P, P, tmp); else mpz_submul_ui (P, tmp, -qq); } return j; } /* returns j and matrix [a,b;c,d] which reduces P0, Q0 into P, Q. Assumes 0 = val(P) < val(Q) initially. Modifies P, Q so that val(Q) < val(P), and divides by 2^j so that val(Q)=0. Stop if k < val(P) is attained. If swap is non-zero, swap P and Q at the end, s.t. 0 = val(P) <= k < val(Q). */ unsigned long mpz_divi_si (long *a, long *b, long *c, long *d, mpz_t P, mpz_t Q, mpz_t tmp, unsigned long k, int swap) { unsigned long jtot; long j; mp_limb_t pp0[2], qp0[2], *pp, *qp, *tp, invq, twoj; long q; double sign = (mpz_sgn (P) == mpz_sgn (Q)) ? 1.0 : -1.0; double aa, bb, cc, dd; long significant_bits; double newa, newb, newc, newd, twojd, qq; long sizep, sizeq; #ifdef DEBUG mpz_t Pin, Qin; mpz_init_set (Pin, P); mpz_init_set (Qin, Q); if (sizep < 2) { fprintf (stderr, "Error: sizep < 2\n"); exit (1); } #endif sizep = ABS(SIZ(P)); pp = pp0; pp[0] = PTR(P)[0]; pp[1] = (sizep >= 2) ? PTR(P)[1] : 0; /* now {pp,2} contains the low 2 significant limbs from |P| */ j = VAL(Q); /* if j >= GMP_NUMB_BITS, then the coefficients may not fit in a long */ if (j >= GMP_NUMB_BITS) abort (); /* should call mpz_divi_slow here */ /* now j < GMP_NUMB_BITS */ #ifdef DEBUG if (sizeq < 2) { fprintf (stderr, "Error: sizeq < 2\n"); exit (1); } #endif /* since val(P) < val(Q), j cannot be 0 */ sizeq = ABS(SIZ(Q)); qp = qp0; if (sizeq == 1) { mpn_rshift (qp, PTR(Q), 1, j); qp[1] = 0; } else { mpn_rshift (qp, PTR(Q), 2, j); if (sizeq > 2) qp[1] |= PTR(Q)[2] << (GMP_NUMB_BITS - j); } /* now {qp,2} contains the low 2 significant limbs from |Q| */ aa = 1.0; bb = 0.0; cc = 0.0; dd = 1.0; jtot = 0; /* matrix denominator is 2^jtot */ /* number of significant bits from p and q*2^j */ significant_bits = 2 * GMP_NUMB_BITS + j; /* invariant: j = valuation difference = VAL(Q) - VAL(P), and jtot=val(P) */ while (jtot + j <= k && j < significant_bits) { /* we need at least j+1 significant bits in pp and qq */ /* reduce P wrt Q, by computing -P/Q mod 2^(j+1) */ significant_bits -= 2 * j; twoj = 1L << j; modlimb_invert (invq, qp[0]); invq *= (~pp[0]) + 1UL; invq &= (twoj << 1) - 1UL; /* -P/Q mod 2^(j+1) */ q = (invq >= twoj) ? invq - (twoj << 1) : invq; /* printf ("q=%1.0f\n", sign*(double)q); */ /* replace P by P + q*Q/2^j */ if (q >= 0) mpn_addmul_1 (pp, qp, 2, q); else mpn_submul_1 (pp, qp, 2, -q); /* update matrix: [newP, newQ] -> [Q, P+q/2^j*Q] thus jtot -> jtot + j, [a, b; c, d] -> [0, 2^j; 2^j; q] * [a, b; c, d] */ /* a[0] <- 2^j*c[0], c[0] <- 2^j*a[0] + q*c[0] b[0] <- 2^j*d[0], d[0] <- 2^j*b[0] + q*d[0] */ #define BOUND 2147483647.0 #define ok(x) (-BOUND <= x && x <= BOUND) twojd = (double) twoj; qq = sign * (double) q; newa = twojd * cc; newc = twojd * aa + qq * cc; newb = twojd * dd; newd = twojd * bb + qq * dd; if (ok(newa) && ok(newb) && ok(newc) && ok(newd)) { aa = newa; bb = newb; cc = newc; dd = newd; } else break; jtot += j; /* we know that val(P) >= j+1 here, new j is val(P) - j */ if (pp[0] == 0) break; count_trailing_zeros(q, pp[0]); mpn_rshift (pp, pp, 2, q); j = q - j; /* exchange P and Q */ tp = pp; pp = qp; qp = tp; } c[0] = (long) aa; d[0] = (long) bb; a[0] = (long) cc; b[0] = (long) dd; /* update P and Q: Q <- (a[0]*P + b[0]*Q)/2^jtot, P <- (c[0]*P + d[0]*Q)/2^jtot */ { if (swap == 0) /* last computed term will be in P */ { mpz_mul_si (tmp, P, c[0]); mpz_addmul_si (tmp, Q, d[0]); mpz_mul_si (P, P, a[0]); mpz_addmul_si (P, Q, b[0]); mpz_div_2exp (P, P, 2 * jtot); mpz_div_2exp (Q, tmp, 2 * jtot); } else /* swap=1: last computed term will be in Q */ { mpz_mul_si (tmp, P, c[0]); mpz_addmul_si (tmp, Q, d[0]); mpz_mul_si (Q, Q, b[0]); mpz_addmul_si (Q, P, a[0]); mpz_div_2exp (Q, Q, 2 * jtot); mpz_div_2exp (P, tmp, 2 * jtot); } } #ifdef DEBUG if ((swap == 0 && VAL(Q) != 0) || (swap == 1 && VAL(P) != 0)) { printf ("VAL(Q)<>0: VAL(P)=%u VAL(Q)=%u\n", VAL(P), VAL(Q)); printf ("k=%u Pin=", k); OUT(Pin); printf ("Qin="); OUT(Qin); exit (1); } if ((swap == 0 && VAL(P) <= VAL(Q)) || (swap == 1 && VAL(Q) <= VAL(P))) { fprintf (stderr, "Error in divi_si: VAL(P) <= VAL(Q) at exit\n"); exit (1); } mpz_clear (Pin); mpz_clear (Qin); #endif return jtot; } unsigned long base_case_bin (mpz_t R11, mpz_t R12, mpz_t R21, mpz_t R22, mpz_t P, mpz_t Q, unsigned long k, unsigned long l) { mpz_t S11, S12, S21, S22, t1; long a[4]; unsigned long jtot, j; unsigned long kk; mpz_init (S11); mpz_init (S12); mpz_init (S21); mpz_init (S22); mpz_init (t1); /* if (k <=10) printf ( "k= %u \n " , k); OUT(P); OUT(Q); printf( "\n"); */ jtot = 0; if ( l <= BBOUND) { jtot = mpz_divi_si (a, a+1, a+2, a+3, P, Q, t1, k, 1); mpz_set_si (R11, a[2]); mpz_set_si (R12, a[3]); mpz_set_si (R21, a[0]); mpz_set_si (R22, a[1]); while (jtot + VAL(Q) <= l) { j = mpz_divi_si (a, a+1, a+2, a+3, P, Q, t1, k - jtot, 1); jtot += j; /* R = [a2,a3;a0,a1] * R, i.e. R11 <- a2*R11+a3*R21, R21 <- a0*R11+a1*R21, R12 <- a2*R12+a3*R22, R22 <- a0*R12+a1*R22 */ mpz_mul_si (t1, R11, a[0]); mpz_mul_si (R11, R11, a[2]); mpz_addmul_si (R11, R21, a[3]); mpz_mul_si (R21, R21, a[1]); mpz_add (R21, R21, t1); mpz_mul_si (t1, R12, a[0]); mpz_mul_si (R12, R12, a[2]); mpz_addmul_si (R12, R22, a[3]); mpz_mul_si (R22, R22, a[1]); mpz_add (R22, R22, t1); } } else { /* printf("coucou\n"); */ if (VAL(Q) > k) { mpz_set_ui (R11, 1); mpz_set_ui (R12, 0); mpz_set_ui (R21, 0); mpz_set_ui (R22, 1); } else { kk = l/2; jtot = base_case_bin (R11, R12, R21, R22, P, Q, k, kk); if (VAL(Q) <= k) { k -= jtot; jtot += base_case_bin (S11, S12, S21, S22, P, Q, k, l-jtot); /* R = S * R */ mpz_mul (t1, R11, S21); mpz_mul (R11, R11, S11); mpz_addmul (R11, R21, S12); mpz_mul (R21, R21, S22); mpz_add (R21, R21, t1); mpz_mul (t1, R12, S21); mpz_mul (R12, R12, S11); mpz_addmul (R12, R22, S12); mpz_mul (R22, R22, S22); mpz_add (R22, R22, t1); } } } mpz_clear (t1); mpz_clear (S11); mpz_clear (S12); mpz_clear (S21); mpz_clear (S22); /*printf ( "jtot = %u \n", jtot); */ return jtot; } /* check that R*[Pin,Qin] = [P,Q] */ int check (mpz_t R11, mpz_t R12, mpz_t R21, mpz_t R22, mpz_t Pin, mpz_t Qin, mpz_t P, mpz_t Q, unsigned long j) { mpz_t P0, Q0, t1, q; int res; mpz_init (P0); mpz_init (Q0); mpz_init (t1); mpz_init (q); mpz_mul (P0, R11, Pin); mpz_mul (Q0, R12, Qin); mpz_mul (t1, R21, Pin); mpz_mul (q, R22, Qin); mpz_add (P0, P0, Q0); mpz_add (Q0, t1, q); mpz_div_2exp (P0, P0, j); mpz_div_2exp (Q0, Q0, j); res = mpz_cmp (P0, P) != 0 || mpz_cmp (Q0, Q) != 0; mpz_clear (P0); mpz_clear (Q0); mpz_clear (t1); mpz_clear (q); return res; } long mpz_int_rfeea0 (mpz_t R11, mpz_t R12, mpz_t R21, mpz_t R22, mpz_t P, mpz_t Q, unsigned long k, mpz_t *tmp) { mpz_t P0, Q0; unsigned long cut; int j1; mpz_init (P0); mpz_init (Q0); #define q tmp[1] #define t1 tmp[0] cut = 2 * k + 1; if (cut % GMP_NUMB_BITS) cut += GMP_NUMB_BITS - (cut % GMP_NUMB_BITS); mpz_tdiv_r_2exp (P0, P, cut); mpz_tdiv_q_2exp (P, P, cut); mpz_tdiv_r_2exp (Q0, Q, cut); mpz_tdiv_q_2exp (Q, Q, cut); j1 = mpz_int_rfeea (R11, R12, R21, R22, P0, Q0, k, tmp, 1); MPZ_MUL (q, R12, Q); MPZ_MUL (Q, R22, Q); MPZ_ADDMUL (Q, R21, P); mpz_mul_2exp (Q, Q, cut - 2 * j1); mpz_add (Q, Q, Q0); MPZ_MUL (t1, R11, P); mpz_add (P, t1, q); mpz_mul_2exp (P, P, cut - 2 * j1); mpz_add (P, P, P0); mpz_clear (P0); mpz_clear (Q0); return j1; } /* returns a 2x2 integer matrix R = [[R11,R12],[R21,R22]] such that if (P') (P) ( ) = R * ( ) / 2^j (Q') (Q) P', Q' are two consecutive remainders of the modified right binary Euclidean algorithm with val(P') <= val(P) + k < val(Q'). Assumes 0=val(P) < val(Q), i.e. P is odd and Q is even. Returns j. Modifies in-place P and Q to P'/2^j and Q'/2^j, with val(P'/2^j)=0. If extended=0, the matrix R is not computed. We have det(R)=2^(2*j). For example, for P=646173941, Q=3473871690, k=32, it returns R11=3094378856, R12=4734555148, R21=-3473871690, R22=646173941, j=32. */ long mpz_int_rfeea (mpz_t R11, mpz_t R12, mpz_t R21, mpz_t R22, mpz_t P, mpz_t Q, unsigned long k, mpz_t *tmp, int extended) { unsigned long m, d, mm, dd, j, j1, j2, cut; mpz_t P0, Q0, S11, S12, S21, S22; /* t1, q */ long a[4]; int computeS; #ifdef DEBUG mpz_t g, g2; mpz_init (g); mpz_init (g2); mpz_gcd (g, P, Q); mpz_div_2exp (g, g, VAL(g)); #endif #define t1 tmp[0] #define q tmp[1] m = VAL(Q); #ifdef DEBUG if (VAL(P) != 0 || m == 0) { fprintf (stderr, "Error in mpz_int_rfeea: VAL(P)<>0 or val(Q)=0\n"); printf ("P:="); OUT(P); printf ("Q:="); OUT(Q); exit (1); } #endif if (k < m) /* val(P) + k = k < val(Q) */ { /* the output condition is satisfied with P'=P, Q'=Q, j=0, and R=Id */ if (extended) { mpz_set_ui (R11, 1); mpz_set_ui (R12, 0); mpz_set_ui (R21, 0); mpz_set_ui (R22, 1); } j1 = 0; goto end2; } /* we start to gain when k/4 > MUL_KARATSUBA_THRESHOLD * GMP_NUMB_BITS */ #define THRESHOLD ( MUL_KARATSUBA_THRESHOLD * GMP_NUMB_BITS ) if (k < THRESHOLD) /* naive algorithm */ { #define DIVI_SI #ifdef DIVI_SI #ifdef DD /* printf("blabla, %u\n",k); */ j1 = base_case_bin (R11, R12, R21, R22, P, Q, k, k); /* printf ("blabla, %u\n", j1); */ goto end2; #else j1 = mpz_divi_si (a, a+1, a+2, a+3, P, Q, t1, k, 1); if (extended) { mpz_set_si (R11, a[2]); mpz_set_si (R12, a[3]); mpz_set_si (R21, a[0]); mpz_set_si (R22, a[1]); } while (j1 + VAL(Q) <= k) { j = mpz_divi_si (a, a+1, a+2, a+3, P, Q, t1, k - j1, 1); j1 += j; if (extended) /* R = [a2,a3;a0,a1] * R, i.e. R11 <- a2*R11+a3*R21, R21 <- a0*R11+a1*R21, R12 <- a2*R12+a3*R22, R22 <- a0*R12+a1*R22 */ { mpz_mul_si (t1, R11, a[0]); mpz_mul_si (R11, R11, a[2]); mpz_addmul_si (R11, R21, a[3]); mpz_mul_si (R21, R21, a[1]); mpz_add (R21, R21, t1); mpz_mul_si (t1, R12, a[0]); mpz_mul_si (R12, R12, a[2]); mpz_addmul_si (R12, R22, a[3]); mpz_mul_si (R22, R22, a[1]); mpz_add (R22, R22, t1); } } goto end2; #endif #else j1 = mpz_divi (R22, P, Q, t1, extended); mpz_swap (P, Q); /* P = oldQ, Q = (2^j1*oldP + q*oldQ)/2^j1, val(Q) < val(P) */ if (extended) { mpz_set_ui (R11, 0); mpz_set_ui (R12, 1); mpz_mul_2exp (R12, R12, j1); mpz_set (R21, R12); } while (VAL(Q) <= k) { j = mpz_divi (q, P, Q, t1, extended); mpz_swap (P, Q); j1 += j; if (extended) /* R = [0,2^j;2^j,q] * R */ { MPZ_MUL (t1, q, R21); mpz_mul_2exp (R11, R11, j); mpz_add (R11, R11, t1); /* 2^j*R11+q*R21 */ mpz_mul_2exp (R21, R21, j); mpz_swap (R11, R21); MPZ_MUL (t1, q, R22); mpz_mul_2exp (R12, R12, j); mpz_add (R12, R12, t1); /* 2^j*R12+q*R22 */ mpz_mul_2exp (R22, R22, j); mpz_swap (R12, R22); } } mpz_div_2exp (P, P, j1); mpz_div_2exp (Q, Q, j1); goto end2; #endif } d = k / 2; mpz_init (P0); mpz_init (Q0); cut = 2 * d + 1; if (cut % GMP_NUMB_BITS) cut += GMP_NUMB_BITS - (cut % GMP_NUMB_BITS); mpz_tdiv_r_2exp (P0, P, cut); mpz_tdiv_q_2exp (P, P, cut); mpz_tdiv_r_2exp (Q0, Q, cut); mpz_tdiv_q_2exp (Q, Q, cut); #ifdef DEBUG if (VAL(P0) != 0) {printf ("1st call\n"); abort();} #endif /* we don't need to compute the matrix R when extended=0 and P=Q=0 */ j1 = mpz_int_rfeea (R11, R12, R21, R22, P0, Q0, d, tmp, extended || mpz_cmp_ui (P, 0) || mpz_cmp_ui (Q, 0)); /* we should have 2^j1*P0 = R11*oldP0+R12*oldQ0, 2^j1*Q0 = R21*oldP0+R22*oldQ0 */ /* we now have j1 = j1+val(P0') <= val(P0) + d < j1+val(Q0') [val(P0)=val(P)=0] with P0' = R11*P0 + R12*Q0 */ /* now compute P' = (R11*P1 + R12*Q1)*2^(2d+1-2*j1) + P0'. We have val(P') = val(P0') <= val(P) + d < val(Q0') = val(Q') */ #ifdef DEBUG if (!(j1 + VAL(P0) <= d && ((VAL(Q0) == INFINITY) || (d < j1 + VAL(Q0))))) { fprintf (stderr, "j1+VAL(P0) <= d && d < j1+VAL(Q0) not fulfilled: %u %u %u\n", VAL(P0), d, VAL(Q0)); exit (1); } if (cut < j1) { fprintf (stderr, "Error: cut < j1\n"); exit (1); } #endif #if 0 if (extended && k > 40000) printf ("k:%u d:%u R11:%u R12:%u R21:%u R22:%u P:%u Q:%u\n", k, d, mpz_size (R11), mpz_size (R12), mpz_size (R21), mpz_size (R22), mpz_size (P), mpz_size (Q)); #endif #ifdef DEBUG if (2 * j1 >= cut) { fprintf (stderr, "Error: 2 * j1 >= cut\n"); exit (1); } #endif MPZ_MUL (q, R12, Q); MPZ_MUL (Q, R22, Q); MPZ_ADDMUL (Q, R21, P); mpz_mul_2exp (Q, Q, cut - 2 * j1); mpz_add (Q, Q, Q0); MPZ_MUL (t1, R11, P); mpz_add (P, t1, q); mpz_mul_2exp (P, P, cut - 2 * j1); mpz_add (P, P, P0); #ifdef DEBUG if (!(j1 + VAL(P) <= d && ((VAL(Q) == INFINITY) || (d < j1 + VAL(Q))))) { fprintf (stderr, "j1+VAL(P) <= d && d < j1+VAL(Q) not fulfilled: %u %u %u\n", j1+VAL(P), d, j1+VAL(Q)); exit (1); } #endif #ifdef DEBUG mpz_gcd (g2, P, Q); mpz_div_2exp (g2, g2, VAL(g2)); if (mpz_cmp (g, g2)) { fprintf (stderr, "1: g <> g2, extended=%u\n", extended); OUT(g); OUT(g2); exit (1); } if (j1 > d + 1) { fprintf (stderr, "Error: j1 > d + 1\n"); exit (1); } #endif mm = VAL2(Q, d + 1 - j1); #ifdef DEBUG if (mm != VAL(Q)) { fprintf (stderr, "Error: mm<>VAL(Q)\n"); printf ("mm=%u d=%u\n", mm, d); printf ("P:="); OUT(P); printf ("Q:="); OUT(Q); printf ("P0:="); OUT(P0); printf ("Q0:="); OUT(Q0); exit (1); } #endif if (mm == INFINITY || k < j1 + mm) /* val(P_init) + k < val(Q') */ goto end; #ifdef DIVI_SI j = mpz_divi_si (a, a+1, a+2, a+3, P, Q, t1, ULONG_MAX, 0); /* P = a[0]*oldP + a[1]*oldQ, Q = a[2]*oldP + a[3]*oldQ */ mm = j; #else j = mpz_divi (q, P, Q, t1, 1); if (mm) { mpz_div_2exp (P, P, mm); mpz_div_2exp (Q, Q, mm); } #endif /* we should now have VAL(Q) < VAL(P) and VAL(Q) <= k */ #ifdef DEBUG if (VAL(P) <= VAL(Q) || VAL(Q) > k) { fprintf (stderr, "Error: VAL(P)<=VAL(Q) or VAL(Q)>k after divi\n"); exit (1); } #endif if (j1 + mm > k) { fprintf (stderr, "Error: j1 + mm > k\n"); exit (1); } dd = k - (j1 + mm); cut = 2 * dd + 1; if (cut % GMP_NUMB_BITS) cut += GMP_NUMB_BITS - (cut % GMP_NUMB_BITS); mpz_tdiv_r_2exp (Q0, Q, cut); mpz_tdiv_q_2exp (Q, Q, cut); mpz_tdiv_r_2exp (P0, P, cut); mpz_tdiv_q_2exp (P, P, cut); if (extended) { #ifdef DIVI_SI /* R <- [a[2],a[3]; a[0],a[1]] * R: R11 <- a[0]*R11+a[1]*R21 R21 <- a[2]*R11+a[3]*R21 R12 <- a[0]*R12+a[1]*R22 R22 <- a[2]*R12+a[3]*R22 */ mpz_mul_si (t1, R11, a[2]); mpz_addmul_si (t1, R21, a[3]); mpz_mul_si (R21, R21, a[1]); mpz_addmul_si (R21, R11, a[0]); mpz_swap (R11, t1); mpz_mul_si (t1, R12, a[2]); mpz_addmul_si (t1, R22, a[3]); mpz_mul_si (R22, R22, a[1]); mpz_addmul_si (R22, R12, a[0]); mpz_swap (R12, t1); #else /* R <- [0,2^j; 2^j, q] * R: R11 <- 2^j*R21 R21 <- 2^j*R11+q*R21 R12 <- 2^j*R22 R22 <- 2^j*R12+q*R22 */ MPZ_MUL (t1, q, R21); mpz_mul_2exp (R11, R11, j); mpz_add (R11, R11, t1); /* 2^j*R11+q*R21 */ mpz_mul_2exp (R21, R21, j); mpz_swap (R11, R21); MPZ_MUL (t1, q, R22); mpz_mul_2exp (R12, R12, j); mpz_add (R12, R12, t1); /* 2^j*R12+q*R22 */ mpz_mul_2exp (R22, R22, j); mpz_swap (R12, R22); #endif } computeS = extended || mpz_cmp_ui (P, 0) || mpz_cmp_ui (Q, 0); mpz_init (S11); mpz_init (S12); mpz_init (S21); mpz_init (S22); #ifdef DEBUG if (VAL(Q0) != 0) { fprintf (stderr, "Error: VAL(Q0) <> 0\n"); exit (1); } #endif j2 = mpz_int_rfeea (S11, S12, S21, S22, Q0, P0, dd, tmp, computeS); #ifdef DEBUG if (!(j2 + VAL(Q0) <= dd && ((VAL(P0) == INFINITY) || (dd < j2 + VAL(P0))))) { fprintf (stderr, "j2+VAL(Q0) <= dd && dd < j2+VAL(P0) not fulfilled: %u %u %u\n", j2+VAL(Q0), dd, j2+VAL(P0)); exit (1); } #endif if (extended) /* R <- S * R: R11 <- S11 * R11 + S12 * R21 R12 <- S11 * R12 + S12 * R22 R21 <- S21 * R11 + S22 * R21 R22 <- S21 * R12 + S22 * R22 */ { MPZ_MUL (t1, S21, R11); MPZ_MUL (R11, S11, R11); MPZ_MUL (q, S12, R21); MPZ_MUL (R21, S22, R21); mpz_add (R11, R11, q); mpz_add (R21, t1, R21); MPZ_MUL (t1, S21, R12); MPZ_MUL (R12, S11, R12); MPZ_MUL (q, S12, R22); MPZ_MUL (R22, S22, R22); mpz_add (R12, R12, q); mpz_add (R22, t1, R22); } /* update Q, P to S*2^(cut+mm-j2) * [Q, P] + [P0, Q0] */ #ifdef DEBUG if (cut + mm <= j2) { fprintf (stderr, "Error: cut + mm <= j2: %d %d\n", cut + mm, j2); exit (1); } #endif #if 0 if (extended && k > 40000) printf ("k:%u d:%u S11:%u S12:%u S21:%u S22:%u P:%u Q:%u\n", k, dd, mpz_size (S11), mpz_size (S12), mpz_size (S21), mpz_size (S22), mpz_size (P), mpz_size (Q)); #endif #ifdef DEBUG if (2 * j2 >= cut) { fprintf (stderr, "Error: 2 * j2 >= cut: j2=%u cut=%u\n", j2, cut); exit (1); } #endif MPZ_MUL (t1, S11, Q); MPZ_MUL (q, S12, P); MPZ_MUL (Q, S21, Q); MPZ_MUL (P, S22, P); mpz_add (Q, Q, P); mpz_add (P, t1, q); mpz_mul_2exp (P, P, cut - 2 * j2); mpz_mul_2exp (Q, Q, cut - 2 * j2); mpz_add (P, P, Q0); mpz_add (Q, Q, P0); mpz_clear (S11); mpz_clear (S12); mpz_clear (S21); mpz_clear (S22); j1 += j + j2; end: mpz_clear (P0); mpz_clear (Q0); end2: #ifdef DEBUG mpz_gcd (g2, P, Q); mpz_div_2exp (g2, g2, VAL(g2)); if (mpz_cmp (g, g2)) { fprintf (stderr, "g <> g2, extended=%u\n", extended); OUT(g); OUT(g2); exit (1); } mpz_clear (g); mpz_clear (g2); #endif return j1; } #define TMP_SIZE 8 void mpz_fastgcd (mpz_t g, const mpz_t a, const mpz_t b) { mpz_t tmp[TMP_SIZE]; unsigned long i, va, vb, v, v1; for (i = 0; i < TMP_SIZE; i++) mpz_init (tmp[i]); va = VAL(a); vb = VAL(b); if (va < vb) { mpz_set (tmp[0], a); mpz_set (tmp[1], b); } else { mpz_set (tmp[0], b); if (va == vb) mpz_add (tmp[1], a, b); else mpz_set (tmp[1], a); va = vb; } /* now VAL(tmp[0]) < VAL(tmp[1]) */ #ifdef DEBUG2 printf ("t0:="); OUT(tmp[0]); printf ("t1:="); OUT(tmp[1]); #endif mpz_div_2exp (tmp[0], tmp[0], va); mpz_div_2exp (tmp[1], tmp[1], va); while (VAL(tmp[1]) +1 < mpz_sizeinbase (tmp[1], 2)) { #if 0 mpz_div_2exp (tmp[1], tmp[1], VAL(tmp[1])); mpz_add (tmp[1], tmp[0], tmp[1]); #endif #ifdef DEBUG if ((int) SIZ(tmp[0]) >= 498) printf (" size = %i \n", SIZ(tmp[0])); #endif if (mpz_sizeinbase (tmp[1], 2) <= VAL(tmp[1]) + THRESHOLD) break; if (mpz_size (tmp[1]) > MUL_FFT_THRESHOLD) v = mpz_sizeinbase (tmp[1], 2) / 4; else v = mpz_sizeinbase (tmp[1], 2) / 4; v1 = mpz_int_rfeea0 (tmp[2], tmp[3], tmp[4], tmp[5], tmp[0], tmp[1], v, tmp + 6); if (v1 == 0) break; } #ifdef DEBUG2 printf ("t0:="); OUT(tmp[0]); printf ("t1:="); OUT(tmp[1]); #endif mpz_div_2exp (tmp[1], tmp[1], VAL(tmp[1])); mpz_gcd (g, tmp[0], tmp[1]); mpz_mul_2exp (g, g, va); for (i = 0; i < TMP_SIZE; i++) mpz_clear (tmp[i]); } void Hstar (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n) { int i; if (n / 4 == 0) return; for (i=0; i<4; i++) mpn_mul (r, a, n / 2, b, n / 4); for (i=0; i<12; i++) mpn_mul (r, a, n / 4, b, n / 4); Hstar (r, a, b, n / 2); Hstar (r, a, b, n / 2); } void H (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n) { int i; for (i=0; i<4; i++) mpn_mul (r, a, n / 2, b, n / 4); for (i=0; i<4; i++) mpn_mul (r, a, n / 4, b, n / 4); Hstar (r, a, b, n / 2); Hstar (r, a, b, n / 2); } void G (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n) { if (n <= 1) return; H (r, a, b, n); G (r, a, b, (n + 1) / 2); } /* P=a[n], Q=2*a[n-1] such that a[0]=0, a[1]=1, a[n] = -a[n-1] + 4 a[n-2] */ void mpz_bfib2_ui (mpz_t P, mpz_t Q, unsigned long n) { if (n == 0) abort(); mpz_set_ui (P, 1); mpz_set_ui (Q, 0); /* n=0 */ while (n-- > 1) { mpz_mul_2exp (Q, Q, 2); mpz_sub (Q, Q, P); mpz_swap (P, Q); } mpz_mul_2exp (Q, Q, 1); } int main (int argc, char *argv[]) { unsigned long n, j, k; mpz_t P, Q, R11, R12, R21, R22, g, g2; int st, fib = 0; if (argc < 2) { fprintf (stderr, "Usage: hgcd_bin [-fib] [-bfib] n [k]\n"); fprintf (stderr, " n - input size (in limbs)\n"); fprintf (stderr, " k - repeat k times\n"); fprintf (stderr, " -fib - use F(n) and F(n-1) as input\n"); fprintf (stderr, " -bfib - use bF(n) and bF(n-1) as input\n"); exit (1); } if (strcmp (argv[1], "-fib") == 0) { fib = 1; argv ++; argc --; } if (strcmp (argv[1], "-bfib") == 0) { fib = -1; argv ++; argc --; } n = atoi (argv[1]); /* number of limbs of input numbers */ k = (argc > 2) ? atoi (argv[2]) : 1; mpz_init (P); mpz_init (Q); mpz_init (R11); mpz_init (R12); mpz_init (g); mpz_init (g2); if (fib == 1) { mpz_fib2_ui (P, Q, n); n = mpz_size (P); if (mpz_size (Q) > n) n = mpz_size (Q); printf ("Using Fibonacci numbers of %lu limbs\n", n); } else if (fib == -1) { mpz_bfib2_ui (P, Q, n); n = mpz_size (P); if (mpz_size (Q) > n) n = mpz_size (Q); printf ("Using bin-Fibonacci numbers of %lu limbs\n", n); } else { do { mpz_random (P, n); mpz_random (Q, n); } while (VAL(P) >= VAL(Q)); } mpz_realloc (g, 2 * n); mul_cost_hgcd = 0.0; st = cputime (); for (j = 0; j < k; j++) mpz_gcd (g, P, Q); printf ("mpz_gcd took %dms\n", cputime () - st); #ifdef DEBUG printf ("mul_cost_hgcd = %f\n", mul_cost_hgcd); printf ("\n"); #endif mul_cost = 0.0; st = cputime (); for (j = 0; j < k; j++) mpz_fastgcd (g2, P, Q); printf ("mpz_fastgcd took %dms\n", cputime () - st); #ifdef DEBUG printf ("mul_cost = %f\n", mul_cost); #endif if (mpz_cmp (g, g2) != 0) { fprintf (stderr, "mpz_gcd and mpz_fastgcd differ\n"); /* printf ("g="); OUT(g); printf ("g2="); OUT(g2); */ exit (1); } st = cputime (); for (j = 0; j < k; j++) mpz_gcdext (g, R11, R12, P, Q); printf ("mpz_gcdext took %dms\n", cputime () - st); if (mpz_cmp (g, g2) != 0) { fprintf (stderr, "mpz_gcdext and mpz_fastgcd differ\n"); /* printf ("g="); OUT(g); printf ("g2="); OUT(g2); */ exit (1); } mpz_clear (P); mpz_clear (Q); mpz_clear (R11); mpz_clear (R12); mpz_clear (g); mpz_clear (g2); return 0; }