Testing latest pari + WASM + node.js... and it works?! Wow.
License: GPL3
ubuntu2004
/* Copyright (C) 2000 The PARI group.12This file is part of the PARI/GP package.34PARI/GP is free software; you can redistribute it and/or modify it under the5terms of the GNU General Public License as published by the Free Software6Foundation; either version 2 of the License, or (at your option) any later7version. It is distributed in the hope that it will be useful, but WITHOUT8ANY WARRANTY WHATSOEVER.910Check the License for details. You should have received a copy of it, along11with the package; see the file 'COPYING'. If not, write to the Free Software12Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */1314/* Self-Initializing Multi-Polynomial Quadratic Sieve, based on code developed15* as part of the LiDIA project.16*17* Original version: Thomas Papanikolaou and Xavier Roblot18* Extensively modified by The PARI group. */19/* Notation commonly used in this file, and sketch of algorithm:20*21* Given an odd integer N > 1 to be factored, we throw in a small odd squarefree22* multiplier k so as to make kN = 1 mod 4 and to have many small primes over23* which X^2 - kN splits. We compute a factor base FB of such primes then24* look for values x0 such that Q0(x0) = x0^2 - kN can be decomposed over FB,25* up to a possible factor dividing k and a possible "large prime". Relations26* involving the latter can be combined into full relations which don't; full27* relations, by Gaussian elimination over F2 for the exponent vectors lead us28* to an expression X^2 - Y^2 divisible by N and hopefully to a nontrivial29* splitting when we compute gcd(X + Y, N). Note that this can never30* split prime powers.31*32* Candidates x0 are found by sieving along arithmetic progressions modulo the33* small primes in FB and evaluation of candidates picks out those x0 where34* many of these progressions coincide, resulting in a highly divisible Q0(x0).35*36* The Multi-Polynomial version improves this by choosing a modest subset of37* FB primes (let A be their product) and forcing these to divide Q0(x).38* Write Q(x) = Q0(2Ax + B) = (2Ax + B)^2 - kN = 4A(Ax^2 + Bx + C), where B is39* suitably chosen. For each A, there are 2^omega_A possible values for B40* but we'll use only half of these, since the other half is easily covered by41* exploiting the symmetry x -> -x of the original Q0. The "Self-Initializating"42* bit refers to the fact that switching from one B to the next is fast, whereas43* switching to the next A involves some recomputation (C is never needed).44* Thus we quickly run through many polynomials sharing the same A.45*46* The sieve ranges over values x0 such that |x0| < M (we use x = x0 + M47* as array subscript). The coefficients A are chosen so that A*M ~ sqrt(kN).48* Then |B| is bounded by ~ (j+4)*A, and |C| = -C ~ (M/4)*sqrt(kN), so49* Q(x0)/(4A) takes values roughly between -|C| and 3|C|.50*51* Refinements. We do not use the smallest FB primes for sieving, incorporating52* them only after selecting candidates). The substition of 2Ax+B into53* X^2 - kN, with odd B, forces 2 to occur; when kN is 1 mod 8, it occurs at54* least to the 3rd power; when kN = 5 mod 8, it occurs exactly to the 2nd55* power. We never sieve on 2 and always pull out the power of 2 directly. The56* prime factors of k show up whenever 2Ax + B has a factor in common with k;57* we don't sieve on these either but easily recognize them in a candidate. */58#include "pari.h"59#include "paripriv.h"6061#define DEBUGLEVEL DEBUGLEVEL_mpqs6263/** DEBUG **/64/* #define MPQS_DEBUG_VERBOSE 1 */65#include "mpqs.h"6667#define REL_OFFSET 2068#define REL_MASK ((1UL<<REL_OFFSET)-1)69#define MAX_PE_PAIR 607071#ifdef HAS_SSE272#include <emmintrin.h>73#define AND(a,b) ((mpqs_bit_array)__builtin_ia32_andps((__v4sf)(a), (__v4sf)(b)))74#define EXT0(a) ((ulong)__builtin_ia32_vec_ext_v2di((__v2di)(a), 0))75#define EXT1(a) ((ulong)__builtin_ia32_vec_ext_v2di((__v2di)(a), 1))76#define TEST(a) (EXT0(a) || EXT1(a))77typedef __v2di mpqs_bit_array;78const mpqs_bit_array mpqs_mask = { (long) 0x8080808080808080L, (long) 0x8080808080808080UL };79#else80/* Use ulong for the bit arrays */81typedef ulong mpqs_bit_array;82#define AND(a,b) ((a)&(b))83#define TEST(a) (a)8485#ifdef LONG_IS_64BIT86const mpqs_bit_array mpqs_mask = 0x8080808080808080UL;87#else88const mpqs_bit_array mpqs_mask = 0x80808080UL;89#endif90#endif9192static GEN rel_q(GEN c) { return gel(c,3); }93static GEN rel_Y(GEN c) { return gel(c,1); }94static GEN rel_p(GEN c) { return gel(c,2); }9596static void97frel_add(hashtable *frel, GEN R)98{99ulong h = hash_GEN(R);100if (!hash_search2(frel, (void*)R, h))101hash_insert2(frel, (void*)R, (void*)1, h);102}103104/*********************************************************************/105/** INITIAL SIZING **/106/*********************************************************************/107/* # of decimal digits of argument */108static long109decimal_len(GEN N)110{ pari_sp av = avma; return gc_long(av, 1+logint(N, utoipos(10))); }111112/* To be called after choosing k and putting kN into the handle:113* Pick up the parameters for given size of kN in decimal digits and fill in114* the handle. Return 0 when kN is too large, 1 when we're ok. */115static int116mpqs_set_parameters(mpqs_handle_t *h)117{118long s, D;119const mpqs_parameterset_t *P;120121h->digit_size_kN = D = decimal_len(h->kN);122if (D > MPQS_MAX_DIGIT_SIZE_KN) return 0;123P = &(mpqs_parameters[maxss(0, D - 9)]);124h->tolerance = P->tolerance;125h->lp_scale = P->lp_scale;126/* make room for prime factors of k if any: */127h->size_of_FB = s = P->size_of_FB + h->_k->omega_k;128/* for the purpose of Gauss elimination etc., prime factors of k behave129* like real FB primes, so take them into account when setting the goal: */130h->target_rels = (s >= 200 ? s + 10 : (mpqs_int32_t)(s * 1.05));131h->M = P->M;132h->omega_A = P->omega_A;133h->no_B = 1UL << (P->omega_A - 1);134h->pmin_index1 = P->pmin_index1;135/* certain subscripts into h->FB should also be offset by omega_k: */136h->index0_FB = 3 + h->_k->omega_k;137if (DEBUGLEVEL >= 5)138{139err_printf("MPQS: kN = %Ps\n", h->kN);140err_printf("MPQS: kN has %ld decimal digits\n", D);141err_printf("\t(estimated memory needed: %4.1fMBy)\n",142(s + 1)/8388608. * h->target_rels);143}144return 1;145}146147/*********************************************************************/148/** OBJECT HOUSEKEEPING **/149/*********************************************************************/150151/* factor base constructor. Really a home-grown memalign(3c) underneath.152* We don't want FB entries to straddle L1 cache line boundaries, and153* malloc(3c) only guarantees alignment adequate for all primitive data154* types of the platform ABI - typically to 8 or 16 byte boundaries.155* Also allocate the inv_A_H array.156* The FB array pointer is returned for convenience */157static mpqs_FB_entry_t *158mpqs_FB_ctor(mpqs_handle_t *h)159{160/* leave room for slots 0, 1, and sentinel slot at the end of the array */161long size_FB_chunk = (h->size_of_FB + 3) * sizeof(mpqs_FB_entry_t);162/* like FB, except this one does not have a sentinel slot at the end */163long size_IAH_chunk = (h->size_of_FB + 2) * sizeof(mpqs_inv_A_H_t);164char *fbp = (char*)stack_malloc(size_FB_chunk + 64);165char *iahp = (char*)stack_malloc(size_IAH_chunk + 64);166long fbl, iahl;167168h->FB_chunk = (void *)fbp;169h->invAH_chunk = (void *)iahp;170/* round up to next higher 64-bytes-aligned address */171fbl = (((long)fbp) + 64) & ~0x3FL;172/* and put the actual array there */173h->FB = (mpqs_FB_entry_t *)fbl;174175iahl = (((long)iahp) + 64) & ~0x3FL;176h->inv_A_H = (mpqs_inv_A_H_t *)iahl;177return (mpqs_FB_entry_t *)fbl;178}179180/* sieve array constructor; also allocates the candidates array181* and temporary storage for relations under construction */182static void183mpqs_sieve_array_ctor(mpqs_handle_t *h)184{185long size = (h->M << 1) + 1;186mpqs_int32_t size_of_FB = h->size_of_FB;187188h->sieve_array = (unsigned char *) stack_calloc_align(size, sizeof(mpqs_mask));189h->sieve_array_end = h->sieve_array + size - 2;190h->sieve_array_end[1] = 255; /* sentinel */191h->candidates = (long *)stack_malloc(MPQS_CANDIDATE_ARRAY_SIZE * sizeof(long));192/* whereas mpqs_self_init() uses size_of_FB+1, we just use the size as193* it is, not counting FB[1], to start off the following estimate */194if (size_of_FB > MAX_PE_PAIR) size_of_FB = MAX_PE_PAIR;195/* and for tracking which primes occur in the current relation: */196h->relaprimes = (long *) stack_malloc((size_of_FB << 1) * sizeof(long));197}198199/* allocate GENs for current polynomial and self-initialization scratch data */200static void201mpqs_poly_ctor(mpqs_handle_t *h)202{203mpqs_int32_t i, w = h->omega_A;204h->per_A_pr = (mpqs_per_A_prime_t *)205stack_calloc(w * sizeof(mpqs_per_A_prime_t));206/* A is the product of w primes, each below word size.207* |B| <= (w + 4) * A, so can have at most one word more208* H holds residues modulo A: the same size as used for A is sufficient. */209h->A = cgeti(w + 2);210h->B = cgeti(w + 3);211for (i = 0; i < w; i++) h->per_A_pr[i]._H = cgeti(w + 2);212}213214/*********************************************************************/215/** FACTOR BASE SETUP **/216/*********************************************************************/217/* fill in the best-guess multiplier k for N. We force kN = 1 mod 4.218* Caller should proceed to fill in kN219* See Knuth-Schroeppel function in220* Robert D. Silverman221* The multiple polynomial quadratic sieve222* Math. Comp. 48 (1987), 329-339223* https://www.ams.org/journals/mcom/1987-48-177/S0025-5718-1987-0866119-8/224*/225static ulong226mpqs_find_k(mpqs_handle_t *h)227{228const pari_sp av = avma;229const long N_mod_8 = mod8(h->N), N_mod_4 = N_mod_8 & 3;230long dl = decimal_len(h->N);231long D = maxss(0, minss(dl,MPQS_MAX_DIGIT_SIZE_KN)-9);232long MPQS_MULTIPLIER_SEARCH_DEPTH = mpqs_parameters[D].size_of_FB;233forprime_t S;234struct {235const mpqs_multiplier_t *_k;236long np; /* number of primes in factorbase so far for this k */237double value; /* the larger, the better */238} cache[MPQS_POSSIBLE_MULTIPLIERS];239ulong MPQS_NB_MULTIPLIERS = dl < 40 ? 5 : MPQS_POSSIBLE_MULTIPLIERS;240ulong p, i, nbk;241242for (i = nbk = 0; i < numberof(cand_multipliers); i++)243{244const mpqs_multiplier_t *cand_k = &cand_multipliers[i];245long k = cand_k->k;246double v;247if ((k & 3) != N_mod_4) continue; /* want kN = 1 (mod 4) */248v = -log((double)k)/2;249if ((k & 7) == N_mod_8) v += M_LN2; /* kN = 1 (mod 8) */250cache[nbk].np = 0;251cache[nbk]._k = cand_k;252cache[nbk].value = v;253if (++nbk == MPQS_NB_MULTIPLIERS) break; /* enough */254}255/* next test is an impossible situation: kills spurious gcc-5.1 warnings256* "array subscript is above array bounds" */257if (nbk > MPQS_POSSIBLE_MULTIPLIERS) nbk = MPQS_POSSIBLE_MULTIPLIERS;258u_forprime_init(&S, 2, ULONG_MAX);259while ( (p = u_forprime_next(&S)) )260{261long kroNp = kroiu(h->N, p), seen = 0;262if (!kroNp) return p;263for (i = 0; i < nbk; i++)264{265long krokp;266if (cache[i].np > MPQS_MULTIPLIER_SEARCH_DEPTH) continue;267seen++;268krokp = krouu(cache[i]._k->k % p, p);269if (krokp == kroNp) /* kronecker(k*N, p)=1 */270{271cache[i].value += 2*log((double) p)/p;272cache[i].np++;273} else if (krokp == 0)274{275cache[i].value += log((double) p)/p;276cache[i].np++;277}278}279if (!seen) break; /* we're gone through SEARCH_DEPTH primes for all k */280}281if (!p) pari_err_OVERFLOW("mpqs_find_k [ran out of primes]");282{283long best_i = 0;284double v = cache[0].value;285for (i = 1; i < nbk; i++)286if (cache[i].value > v) { best_i = i; v = cache[i].value; }287h->_k = cache[best_i]._k; return gc_ulong(av,0);288}289}290291/* Create a factor base of 'size' primes p_i such that legendre(k*N, p_i) != -1292* We could have shifted subscripts down from their historical arrangement,293* but this seems too risky for the tiny potential gain in memory economy.294* The real constraint is that the subscripts of anything which later shows295* up at the Gauss stage must be nonnegative, because the exponent vectors296* there use the same subscripts to refer to the same FB entries. Thus in297* particular, the entry representing -1 could be put into FB[0], but could298* not be moved to FB[-1] (although mpqs_FB_ctor() could be easily adapted299* to support negative subscripts).-- The historically grown layout is:300* FB[0] is unused.301* FB[1] is not explicitly used but stands for -1.302* FB[2] contains 2 (always).303* Before we are called, the size_of_FB field in the handle will already have304* been adjusted by _k->omega_k, so there's room for the primes dividing k,305* which when present will occupy FB[3] and following.306* The "real" odd FB primes begin at FB[h->index0_FB].307* FB[size_of_FB+1] is the last prime p_i.308* FB[size_of_FB+2] is a sentinel to simplify some of our loops.309* Thus we allocate size_of_FB+3 slots for FB.310*311* If a prime factor of N is found during the construction, it is returned312* in f, otherwise f = 0. */313314/* returns the FB array pointer for convenience */315static mpqs_FB_entry_t *316mpqs_create_FB(mpqs_handle_t *h, ulong *f)317{318mpqs_FB_entry_t *FB = mpqs_FB_ctor(h);319const pari_sp av = avma;320mpqs_int32_t size = h->size_of_FB;321long i;322mpqs_uint32_t k = h->_k->k;323forprime_t S;324325FB[2].fbe_p = 2;326/* the fbe_logval and the fbe_sqrt_kN for 2 are never used */327FB[2].fbe_flags = MPQS_FBE_CLEAR;328for (i = 3; i < h->index0_FB; i++)329{ /* this loop executes h->_k->omega_k = 0, 1, or 2 times */330mpqs_uint32_t kp = (ulong)h->_k->kp[i-3];331if (MPQS_DEBUGLEVEL >= 7) err_printf(",<%lu>", (ulong)kp);332FB[i].fbe_p = kp;333/* we could flag divisors of k here, but no need so far */334FB[i].fbe_flags = MPQS_FBE_CLEAR;335FB[i].fbe_flogp = (float)log2((double) kp);336FB[i].fbe_sqrt_kN = 0;337}338(void)u_forprime_init(&S, 3, ULONG_MAX);339while (i < size + 2)340{341ulong p = u_forprime_next(&S);342if (p > k || k % p)343{344ulong kNp = umodiu(h->kN, p);345long kr = krouu(kNp, p);346if (kr != -1)347{348if (kr == 0) { *f = p; return FB; }349FB[i].fbe_p = (mpqs_uint32_t) p;350FB[i].fbe_flags = MPQS_FBE_CLEAR;351/* dyadic logarithm of p; single precision suffices */352FB[i].fbe_flogp = (float)log2((double)p);353/* cannot yet fill in fbe_logval because the scaling multiplier354* depends on the largest prime in FB, as yet unknown */355356/* x such that x^2 = kN (mod p_i) */357FB[i++].fbe_sqrt_kN = (mpqs_uint32_t)Fl_sqrt(kNp, p);358}359}360}361set_avma(av);362if (MPQS_DEBUGLEVEL >= 7)363{364err_printf("MPQS: FB [-1,2");365for (i = 3; i < h->index0_FB; i++) err_printf(",<%lu>", FB[i].fbe_p);366for (; i < size + 2; i++) err_printf(",%lu", FB[i].fbe_p);367err_printf("]\n");368}369370FB[i].fbe_p = 0; /* sentinel */371h->largest_FB_p = FB[i-1].fbe_p; /* at subscript size_of_FB + 1 */372373/* locate the smallest prime that will be used for sieving */374for (i = h->index0_FB; FB[i].fbe_p != 0; i++)375if (FB[i].fbe_p >= h->pmin_index1) break;376h->index1_FB = i;377/* with our parameters this will never fall off the end of the FB */378*f = 0; return FB;379}380381/*********************************************************************/382/** MISC HELPER FUNCTIONS **/383/*********************************************************************/384385/* Effect of the following: multiplying the base-2 logarithm of some386* quantity by log_multiplier will rescale something of size387* log2 ( sqrt(kN) * M / (largest_FB_prime)^tolerance )388* to 232. Note that sqrt(kN) * M is just A*M^2, the value our polynomials389* take at the outer edges of the sieve interval. The scale here leaves390* a little wiggle room for accumulated rounding errors from the approximate391* byte-sized scaled logarithms for the factor base primes which we add up392* in the sieving phase.-- The threshold is then chosen so that a point in393* the sieve has to reach a result which, under the same scaling, represents394* log2 ( sqrt(kN) * M / (largest_FB_prime)^tolerance )395* in order to be accepted as a candidate. */396/* The old formula was...397* log_multiplier =398* 127.0 / (0.5 * log2 (handle->dkN) + log2((double)M)399* - tolerance * log2((double)handle->largest_FB_p));400* and we used to use this with a constant threshold of 128. */401402/* NOTE: We used to divide log_multiplier by an extra factor 2, and in403* compensation we were multiplying by 2 when the fbe_logp fields were being404* filled in, making all those bytes even. Tradeoff: the extra bit of405* precision is helpful, but interferes with a possible sieving optimization406* (artifically shift right the logp's of primes in A, and just run over both407* arithmetical progressions (which coincide in this case) instead of408* skipping the second one, to avoid the conditional branch in the409* mpqs_sieve() loops). We could still do this, but might lose a little bit410* accuracy for those primes. Probably no big deal. */411static void412mpqs_set_sieve_threshold(mpqs_handle_t *h)413{414mpqs_FB_entry_t *FB = h->FB;415double log_maxval, log_multiplier;416long i;417418h->l2sqrtkN = 0.5 * log2(h->dkN);419h->l2M = log2((double)h->M);420log_maxval = h->l2sqrtkN + h->l2M - MPQS_A_FUDGE;421log_multiplier = 232.0 / log_maxval;422h->sieve_threshold = (unsigned char) (log_multiplier *423(log_maxval - h->tolerance * log2((double)h->largest_FB_p))) + 1;424/* That "+ 1" really helps - we may want to tune towards somewhat smaller425* tolerances (or introduce self-tuning one day)... */426427/* If this turns out to be <128, scream loudly.428* That means that the FB or the tolerance or both are way too429* large for the size of kN. (Normally, the threshold should end430* up in the 150...170 range.) */431if (h->sieve_threshold < 128) {432h->sieve_threshold = 128;433pari_warn(warner,434"MPQS: sizing out of tune, FB size or tolerance\n\ttoo large");435}436if (DEBUGLEVEL >= 5)437err_printf("MPQS: sieve threshold: %ld\n",h->sieve_threshold);438/* Now fill in the byte-sized approximate scaled logarithms of p_i */439if (DEBUGLEVEL >= 5)440err_printf("MPQS: computing logarithm approximations for p_i in FB\n");441for (i = h->index0_FB; i < h->size_of_FB + 2; i++)442FB[i].fbe_logval = (unsigned char) (log_multiplier * FB[i].fbe_flogp);443}444445/* Given the partially populated handle, find the optimum place in the FB446* to pick prime factors for A from. The lowest admissible subscript is447* index0_FB, but unless kN is very small, we stay away a bit from that.448* The highest admissible is size_of_FB + 1, where the largest FB prime449* resides. The ideal corner is about (sqrt(kN)/M) ^ (1/omega_A),450* so that A will end up of size comparable to sqrt(kN)/M; experimentally451* it seems desirable to stay slightly below this. Moreover, the selection452* of the individual primes happens to err on the large side, for which we453* compensate a bit, using the (small positive) quantity MPQS_A_FUDGE.454* We rely on a few auxiliary fields in the handle to be already set by455* mqps_set_sieve_threshold() before we are called.456* Return 1 on success, and 0 otherwise. */457static int458mpqs_locate_A_range(mpqs_handle_t *h)459{460/* i will be counted up to the desirable index2_FB + 1, and omega_A is never461* less than 3, and we want462* index2_FB - (omega_A - 1) + 1 >= index0_FB + omega_A - 3,463* so: */464long i = h->index0_FB + 2*(h->omega_A) - 4;465double l2_target_pA;466mpqs_FB_entry_t *FB = h->FB;467468h->l2_target_A = (h->l2sqrtkN - h->l2M - MPQS_A_FUDGE);469l2_target_pA = h->l2_target_A / h->omega_A;470471/* find the sweet spot, normally shouldn't take long */472while (FB[i].fbe_p && FB[i].fbe_flogp <= l2_target_pA) i++;473474/* check whether this hasn't walked off the top end... */475/* The following should actually NEVER happen. */476if (i > h->size_of_FB - 3)477{ /* this isn't going to work at all. */478pari_warn(warner,479"MPQS: sizing out of tune, FB too small or\n\tway too few primes in A");480return 0;481}482h->index2_FB = i - 1; return 1;483/* assert: index0_FB + (omega_A - 3) [the lowest FB subscript used in primes484* for A] + (omega_A - 2) <= index2_FB [the subscript from which the choice485* of primes for A starts, putting omega_A - 1 of them at or below index2_FB,486* and the last and largest one above, cf. mpqs_si_choose_primes]. Moreover,487* index2_FB indicates the last prime below the ideal size, unless (when kN488* is tiny) the ideal size was too small to use. */489}490491/*********************************************************************/492/** SELF-INITIALIZATION **/493/*********************************************************************/494495#ifdef MPQS_DEBUG496/* Debug-only helper routine: check correctness of the root z mod p_i497* by evaluting A * z^2 + B * z + C mod p_i (which should be 0). */498static void499check_root(mpqs_handle_t *h, GEN mC, long p, long start)500{501pari_sp av = avma;502long z = start - ((long)(h->M) % p);503if (umodiu(subii(mulsi(z, addii(h->B, mulsi(z, h->A))), mC), p))504{505err_printf("MPQS: p = %ld\n", p);506err_printf("MPQS: A = %Ps\n", h->A);507err_printf("MPQS: B = %Ps\n", h->B);508err_printf("MPQS: C = %Ps\n", negi(mC));509err_printf("MPQS: z = %ld\n", z);510pari_err_BUG("MPQS: self_init: found wrong polynomial");511}512set_avma(av);513}514#endif515516/* Increment *x > 0 to a larger value which has the same number of 1s in its517* binary representation. Wraparound can be detected by the caller as long as518* we keep total_no_of_primes_for_A strictly less than BITS_IN_LONG.519*520* Changed switch to increment *x in all cases to the next larger number521* which (a) has the same count of 1 bits and (b) does not arise from the522* old value by moving a single 1 bit one position to the left (which was523* undesirable for the sieve). --GN based on discussion with TP */524INLINE void525mpqs_increment(mpqs_uint32_t *x)526{527mpqs_uint32_t r1_mask, r01_mask, slider=1UL;528529switch (*x & 0x1F)530{ /* 32-way computed jump handles 22 out of 32 cases */531case 29:532(*x)++; break; /* shifts a single bit, but we postprocess this case */533case 26:534(*x) += 2; break; /* again */535case 1: case 3: case 6: case 9: case 11:536case 17: case 19: case 22: case 25: case 27:537(*x) += 3; return;538case 20:539(*x) += 4; break; /* again */540case 5: case 12: case 14: case 21:541(*x) += 5; return;542case 2: case 7: case 13: case 18: case 23:543(*x) += 6; return;544case 10:545(*x) += 7; return;546case 8:547(*x) += 8; break; /* and again */548case 4: case 15:549(*x) += 12; return;550default: /* 0, 16, 24, 28, 30, 31 */551/* isolate rightmost 1 */552r1_mask = ((*x ^ (*x - 1)) + 1) >> 1;553/* isolate rightmost 1 which has a 0 to its left */554r01_mask = ((*x ^ (*x + r1_mask)) + r1_mask) >> 2;555/* simple cases. Both of these shift a single bit one position to the556left, and will need postprocessing */557if (r1_mask == r01_mask) { *x += r1_mask; break; }558if (r1_mask == 1) { *x += r01_mask; break; }559/* General case: add r01_mask, kill off as many 1 bits as possible to its560* right while at the same time filling in 1 bits from the LSB. */561if (r1_mask == 2) { *x += (r01_mask>>1) + 1; return; }562while (r01_mask > r1_mask && slider < r1_mask)563{564r01_mask >>= 1; slider <<= 1;565}566*x += r01_mask + slider - 1;567return;568}569/* post-process cases which couldn't be finalized above */570r1_mask = ((*x ^ (*x - 1)) + 1) >> 1;571r01_mask = ((*x ^ (*x + r1_mask)) + r1_mask) >> 2;572if (r1_mask == r01_mask) { *x += r1_mask; return; }573if (r1_mask == 1) { *x += r01_mask; return; }574if (r1_mask == 2) { *x += (r01_mask>>1) + 1; return; }575while (r01_mask > r1_mask && slider < r1_mask)576{577r01_mask >>= 1; slider <<= 1;578}579*x += r01_mask + slider - 1;580}581582/* self-init (1): advancing the bit pattern, and choice of primes for A.583* On first call, h->bin_index = 0. On later occasions, we need to begin584* by clearing the MPQS_FBE_DIVIDES_A bit in the fbe_flags of the former585* prime factors of A (use per_A_pr to find them). Upon successful return, that586* array will have been filled in, and the flag bits will have been turned on587* again in the right places.588* Return 1 when all is fine and 0 when we found we'd be using more bits to589* the left in bin_index than we have matching primes in the FB. In the latter590* case, bin_index will be zeroed out, index2_FB will be incremented by 2,591* index2_moved will be turned on; the caller, after checking that index2_FB592* has not become too large, should just call us again, which then succeeds:593* we'll start again with a right-justified sequence of 1 bits in bin_index,594* now interpreted as selecting primes relative to the new index2_FB. */595INLINE int596mpqs_si_choose_primes(mpqs_handle_t *h)597{598mpqs_FB_entry_t *FB = h->FB;599mpqs_per_A_prime_t *per_A_pr = h->per_A_pr;600double l2_last_p = h->l2_target_A;601mpqs_int32_t omega_A = h->omega_A;602int i, j, v2, prev_last_p_idx;603int room = h->index2_FB - h->index0_FB - omega_A + 4;604/* The notion of room here (cf mpqs_locate_A_range() above) is the number605* of primes at or below index2_FB which are eligible for A. We need606* >= omega_A - 1 of them, and it is guaranteed by mpqs_locate_A_range() that607* this many are available: the lowest FB slot used for A is never less than608* index0_FB + omega_A - 3. When omega_A = 3 (very small kN), we allow609* ourselves to reach all the way down to index0_FB; otherwise, we keep away610* from it by at least one position. For omega_A >= 4 this avoids situations611* where the selection of the smaller primes here has advanced to a lot of612* very small ones, and the single last larger one has soared away to bump613* into the top end of the FB. */614mpqs_uint32_t room_mask;615mpqs_int32_t p;616ulong bits;617618/* XXX also clear the index_j field here? */619if (h->bin_index == 0)620{ /* first time here, or after increasing index2_FB, initialize to a pattern621* of omega_A - 1 consecutive 1 bits. Caller has ensured that there are622* enough primes for this in the FB below index2_FB. */623h->bin_index = (1UL << (omega_A - 1)) - 1;624prev_last_p_idx = 0;625}626else627{ /* clear out old flags */628for (i = 0; i < omega_A; i++) MPQS_FLG(i) = MPQS_FBE_CLEAR;629prev_last_p_idx = MPQS_I(omega_A-1);630631if (room > 30) room = 30;632room_mask = ~((1UL << room) - 1);633634/* bump bin_index to next acceptable value. If index2_moved is off, call635* mpqs_increment() once; otherwise, repeat until there's something in the636* least significant 2 bits - to ensure that we never re-use an A which637* we'd used before increasing index2_FB - but also stop if something shows638* up in the forbidden bits on the left where we'd run out of bits or walk639* beyond index0_FB + omega_A - 3. */640mpqs_increment(&h->bin_index);641if (h->index2_moved)642{643while ((h->bin_index & (room_mask | 0x3)) == 0)644mpqs_increment(&h->bin_index);645}646/* did we fall off the edge on the left? */647if ((h->bin_index & room_mask) != 0)648{ /* Yes. Turn on the index2_moved flag in the handle */649h->index2_FB += 2; /* caller to check this isn't too large!!! */650h->index2_moved = 1;651h->bin_index = 0;652if (MPQS_DEBUGLEVEL >= 5)653err_printf("MPQS: wrapping, more primes for A now chosen near FB[%ld] = %ld\n",654(long)h->index2_FB,655(long)FB[h->index2_FB].fbe_p);656return 0; /* back off - caller should retry */657}658}659/* assert: we aren't occupying any of the room_mask bits now, and if660* index2_moved had already been on, at least one of the two LSBs is on */661bits = h->bin_index;662if (MPQS_DEBUGLEVEL >= 6)663err_printf("MPQS: new bit pattern for primes for A: 0x%lX\n", bits);664665/* map bits to FB subscripts, counting downward with bit 0 corresponding666* to index2_FB, and accumulate logarithms against l2_last_p */667j = h->index2_FB;668v2 = vals((long)bits);669if (v2) { j -= v2; bits >>= v2; }670for (i = omega_A - 2; i >= 0; i--)671{672MPQS_I(i) = j;673l2_last_p -= MPQS_LP(i);674MPQS_FLG(i) |= MPQS_FBE_DIVIDES_A;675bits &= ~1UL;676if (!bits) break; /* i = 0 */677v2 = vals((long)bits); /* > 0 */678bits >>= v2; j -= v2;679}680/* Choose the larger prime. Note we keep index2_FB <= size_of_FB - 3 */681for (j = h->index2_FB + 1; (p = FB[j].fbe_p); j++)682if (FB[j].fbe_flogp > l2_last_p) break;683/* The following trick avoids generating a large proportion of duplicate684* relations when the last prime falls into an area where there are large685* gaps from one FB prime to the next, and would otherwise often be repeated686* (so that successive A's would wind up too similar to each other). While687* this trick isn't perfect, it gets rid of a major part of the potential688* duplication. */689if (p && j == prev_last_p_idx) { j++; p = FB[j].fbe_p; }690MPQS_I(omega_A - 1) = p? j: h->size_of_FB + 1;691MPQS_FLG(omega_A - 1) |= MPQS_FBE_DIVIDES_A;692693if (MPQS_DEBUGLEVEL >= 6)694{695err_printf("MPQS: chose primes for A");696for (i = 0; i < omega_A; i++)697err_printf(" FB[%ld]=%ld%s", (long)MPQS_I(i), (long)MPQS_AP(i),698i < omega_A - 1 ? "," : "\n");699}700return 1;701}702703/* There are 4 parts to self-initialization, exercised at different times:704* - choosing a new sqfree coef. A (selecting its prime factors, FB bookkeeping)705* - doing the actual computations attached to a new A706* - choosing a new B keeping the same A (much simpler)707* - a small common bit that needs to happen in both cases.708* As to the first item, the scheme works as follows: pick omega_A - 1 prime709* factors for A below the index2_FB point which marks their ideal size, and710* one prime above this point, choosing the latter so log2(A) ~ l2_target_A.711* Lower prime factors are chosen using bit patterns of constant weight,712* gradually moving away from index2_FB towards smaller FB subscripts.713* If this bumps into index0_FB (for very small input), back up by increasing714* index2_FB by two, and from then on choosing only bit patterns with either or715* both of their bottom bits set, so at least one of the omega_A - 1 smaller716* prime factor will be beyond the original index2_FB point. In this way we717* avoid re-using the same A. (The choice of the upper "flyer" prime is718* constrained by the size of the FB, which normally should never a problem.719* For tiny kN, we might have to live with a nonoptimal choice.)720*721* Mathematically, we solve a quadratic (over F_p for each prime p in the FB722* which doesn't divide A), a linear equation for each prime p | A, and723* precompute differences between roots mod p so we can adjust the roots724* quickly when we change B. See Thomas Sosnowski's Diplomarbeit. */725/* compute coefficients of sieving polynomial for self initializing variant.726* Coefficients A and B are set (preallocated GENs) and several tables are727* updated. */728static int729mpqs_self_init(mpqs_handle_t *h)730{731const ulong size_of_FB = h->size_of_FB + 1;732mpqs_FB_entry_t *FB = h->FB;733mpqs_inv_A_H_t *inv_A_H = h->inv_A_H;734const pari_sp av = avma;735GEN p1, A = h->A, B = h->B;736mpqs_per_A_prime_t *per_A_pr = h->per_A_pr;737long i, j;738739#ifdef MPQS_DEBUG740err_printf("MPQS DEBUG: enter self init, avma = 0x%lX\n", (ulong)avma);741#endif742if (++h->index_j == (mpqs_uint32_t)h->no_B)743{ /* all the B's have been used, choose new A; this is indicated by setting744* index_j to 0 */745h->index_j = 0;746h->index_i++; /* count finished A's */747}748749if (h->index_j == 0)750{ /* compute first polynomial with new A */751GEN a, b, A2;752if (!mpqs_si_choose_primes(h))753{ /* Ran out of room towards small primes, and index2_FB was raised. */754if (size_of_FB - h->index2_FB < 4) return 0; /* Fail */755(void)mpqs_si_choose_primes(h); /* now guaranteed to succeed */756}757/* bin_index and per_A_pr now populated with consistent values */758759/* compute A = product of omega_A primes given by bin_index */760a = b = NULL;761for (i = 0; i < h->omega_A; i++)762{763ulong p = MPQS_AP(i);764a = a? muliu(a, p): utoipos(p);765}766affii(a, A);767/* Compute H[i], 0 <= i < omega_A. Also compute the initial768* B = sum(v_i*H[i]), by taking all v_i = +1769* TODO: following needs to be changed later for segmented FB and sieve770* interval, where we'll want to precompute several B's. */771for (i = 0; i < h->omega_A; i++)772{773ulong p = MPQS_AP(i);774GEN t = divis(A, (long)p);775t = remii(mulii(t, muluu(Fl_inv(umodiu(t, p), p), MPQS_SQRT(i))), A);776affii(t, MPQS_H(i));777b = b? addii(b, t): t;778}779affii(b, B); set_avma(av);780781/* ensure B = 1 mod 4 */782if (mod2(B) == 0)783affii(addii(B, mului(mod4(A), A)), B); /* B += (A % 4) * A; */784785A2 = shifti(A, 1);786/* compute the roots z1, z2, of the polynomial Q(x) mod p_j and787* initialize start1[i] with the first value p_i | Q(z1 + i p_j)788* initialize start2[i] with the first value p_i | Q(z2 + i p_j)789* The following loop does The Right Thing for primes dividing k (where790* sqrt_kN is 0 mod p). Primes dividing A are skipped here, and are handled791* further down in the common part of SI. */792for (j = 3; (ulong)j <= size_of_FB; j++)793{794ulong s, mb, t, m, p, iA2, iA;795if (FB[j].fbe_flags & MPQS_FBE_DIVIDES_A) continue;796p = (ulong)FB[j].fbe_p;797m = h->M % p;798iA2 = Fl_inv(umodiu(A2, p), p); /* = 1/(2*A) mod p_j */799iA = iA2 << 1; if (iA > p) iA -= p;800mb = umodiu(B, p); if (mb) mb = p - mb; /* mb = -B mod p */801s = FB[j].fbe_sqrt_kN;802t = Fl_add(m, Fl_mul(Fl_sub(mb, s, p), iA2, p), p);803FB[j].fbe_start1 = (mpqs_int32_t)t;804FB[j].fbe_start2 = (mpqs_int32_t)Fl_add(t, Fl_mul(s, iA, p), p);805for (i = 0; i < h->omega_A - 1; i++)806{807ulong h = umodiu(MPQS_H(i), p);808MPQS_INV_A_H(i,j) = Fl_mul(h, iA, p); /* 1/A * H[i] mod p_j */809}810}811}812else813{ /* no "real" computation -- use recursive formula */814/* The following exploits that B is the sum of omega_A terms +-H[i]. Each815* time we switch to a new B, we choose a new pattern of signs; the816* precomputation of the inv_A_H array allows us to change the two817* arithmetic progressions equally fast. The choice of sign patterns does818* not follow the bit pattern of the ordinal number of B in the current819* cohort; rather, we use a Gray code, changing only one sign each time.820* When the i-th rightmost bit of the new ordinal number index_j of B is 1,821* the sign of H[i] is changed; the next bit to the left tells us whether822* we should be adding or subtracting the difference term. We never need to823* change the sign of H[omega_A-1] (the topmost one), because that would824* just give us the same sieve items Q(x) again with the opposite sign825* of x. This is why we only precomputed inv_A_H up to i = omega_A - 2. */826ulong p, v2 = vals(h->index_j); /* new starting positions for sieving */827j = h->index_j >> v2;828p1 = shifti(MPQS_H(v2), 1);829if (j & 2)830{ /* j = 3 mod 4 */831for (j = 3; (ulong)j <= size_of_FB; j++)832{833if (FB[j].fbe_flags & MPQS_FBE_DIVIDES_A) continue;834p = (ulong)FB[j].fbe_p;835FB[j].fbe_start1 = Fl_sub(FB[j].fbe_start1, MPQS_INV_A_H(v2,j), p);836FB[j].fbe_start2 = Fl_sub(FB[j].fbe_start2, MPQS_INV_A_H(v2,j), p);837}838p1 = addii(B, p1);839}840else841{ /* j = 1 mod 4 */842for (j = 3; (ulong)j <= size_of_FB; j++)843{844if (FB[j].fbe_flags & MPQS_FBE_DIVIDES_A) continue;845p = (ulong)FB[j].fbe_p;846FB[j].fbe_start1 = Fl_add(FB[j].fbe_start1, MPQS_INV_A_H(v2,j), p);847FB[j].fbe_start2 = Fl_add(FB[j].fbe_start2, MPQS_INV_A_H(v2,j), p);848}849p1 = subii(B, p1);850}851affii(p1, B);852}853854/* p=2 is a special case. start1[2], start2[2] are never looked at,855* so don't bother setting them. */856857/* compute zeros of polynomials that have only one zero mod p since p | A */858p1 = diviiexact(subii(h->kN, sqri(B)), shifti(A, 2)); /* coefficient -C */859for (i = 0; i < h->omega_A; i++)860{861ulong p = MPQS_AP(i), s = h->M + Fl_div(umodiu(p1, p), umodiu(B, p), p);862FB[MPQS_I(i)].fbe_start1 = FB[MPQS_I(i)].fbe_start2 = (mpqs_int32_t)(s % p);863}864#ifdef MPQS_DEBUG865for (j = 3; j <= size_of_FB; j++)866{867check_root(h, p1, FB[j].fbe_p, FB[j].fbe_start1);868check_root(h, p1, FB[j].fbe_p, FB[j].fbe_start2);869}870#endif871if (MPQS_DEBUGLEVEL >= 6)872err_printf("MPQS: chose Q_%ld(x) = %Ps x^2 %c %Ps x + C\n",873(long) h->index_j, h->A,874signe(h->B) < 0? '-': '+', absi_shallow(h->B));875set_avma(av);876#ifdef MPQS_DEBUG877err_printf("MPQS DEBUG: leave self init, avma = 0x%lX\n", (ulong)avma);878#endif879return 1;880}881882/*********************************************************************/883/** THE SIEVE **/884/*********************************************************************/885/* p4 = 4*p, logp ~ log(p), B/E point to the beginning/end of a sieve array */886INLINE void887mpqs_sieve_p(unsigned char *B, unsigned char *E, long p4, long p,888unsigned char logp)889{890register unsigned char *e = E - p4;891/* Unrolled loop. It might be better to let the compiler worry about this892* kind of optimization, based on its knowledge of whatever useful tricks the893* machine instruction set architecture is offering */894while (e - B >= 0) /* signed comparison */895{896(*B) += logp, B += p;897(*B) += logp, B += p;898(*B) += logp, B += p;899(*B) += logp, B += p;900}901while (E - B >= 0) (*B) += logp, B += p;902}903904INLINE void905mpqs_sieve_p1(unsigned char *B, unsigned char *E, long s1, long s2,906unsigned char logp)907{908while (E - B >= 0)909{910(*B) += logp, B += s1;911if (E - B < 0) break;912(*B) += logp, B += s2;913}914}915916INLINE void917mpqs_sieve_p2(unsigned char *B, unsigned char *E, long p4, long s1, long s2,918unsigned char logp)919{920register unsigned char *e = E - p4;921/* Unrolled loop. It might be better to let the compiler worry about this922* kind of optimization, based on its knowledge of whatever useful tricks the923* machine instruction set architecture is offering */924while (e - B >= 0) /* signed comparison */925{926(*B) += logp, B += s1;927(*B) += logp, B += s2;928(*B) += logp, B += s1;929(*B) += logp, B += s2;930(*B) += logp, B += s1;931(*B) += logp, B += s2;932(*B) += logp, B += s1;933(*B) += logp, B += s2;934}935while (E - B >= 0) {(*B) += logp, B += s1; if (E - B < 0) break; (*B) += logp, B += s2;}936}937static void938mpqs_sieve(mpqs_handle_t *h)939{940long p, l = h->index1_FB;941mpqs_FB_entry_t *FB = &(h->FB[l]);942unsigned char *S = h->sieve_array, *Send = h->sieve_array_end;943long size = h->M << 1, size4 = size >> 3;944memset((void*)S, 0, size * sizeof(unsigned char));945for ( ; (p = FB->fbe_p) && p <= size4; FB++) /* l++ */946{947unsigned char logp = FB->fbe_logval;948long s1 = FB->fbe_start1, s2 = FB->fbe_start2;949/* sieve with FB[l] from start1[l], and from start2[l] if s1 != s2 */950if (s1 == s2) mpqs_sieve_p(S + s1, Send, p << 2, p, logp);951else952{953if (s1>s2) lswap(s1,s2)954mpqs_sieve_p2(S + s1, Send, p << 2, s2-s1,p+s1-s2, logp);955}956}957for ( ; (p = FB->fbe_p) && p <= size; FB++) /* l++ */958{959unsigned char logp = FB->fbe_logval;960long s1 = FB->fbe_start1, s2 = FB->fbe_start2;961/* sieve with FB[l] from start1[l], and from start2[l] if s1 != s2 */962if (s1 == s2) mpqs_sieve_p(S + s1, Send, p << 2, p, logp);963else964{965if (s1>s2) lswap(s1,s2)966mpqs_sieve_p1(S + s1, Send, s2-s1, p+s1-s2, logp);967}968}969for ( ; (p = FB->fbe_p); FB++)970{971unsigned char logp = FB->fbe_logval;972long s1 = FB->fbe_start1, s2 = FB->fbe_start2;973if (s1 < size) S[s1] += logp;974if (s2!=s1 && s2 < size) S[s2] += logp;975}976}977978/* Could use the fact that 4 | M, but let the compiler worry about unrolling. */979static long980mpqs_eval_sieve(mpqs_handle_t *h)981{982long x = 0, count = 0, M2 = h->M << 1;983unsigned char t = h->sieve_threshold;984unsigned char *S = h->sieve_array;985mpqs_bit_array * U = (mpqs_bit_array *) S;986long *cand = h->candidates;987const long sizemask = sizeof(mpqs_mask);988989/* Exploiting the sentinel, we don't need to check for x < M2 in the inner990* while loop; more than makes up for the lack of explicit unrolling. */991while (count < MPQS_CANDIDATE_ARRAY_SIZE - 1)992{993long j, y;994while (!TEST(AND(U[x],mpqs_mask))) x++;995y = x*sizemask;996for (j=0; j<sizemask; j++, y++)997{998if (y >= M2)999{ cand[count] = 0; return count; }1000if (S[y]>=t)1001cand[count++] = y;1002}1003x++;1004}1005cand[count] = 0; return count;1006}10071008/*********************************************************************/1009/** CONSTRUCTING RELATIONS **/1010/*********************************************************************/10111012/* only used for debugging */1013static void1014split_relp(GEN rel, GEN *prelp, GEN *prelc)1015{1016long j, l = lg(rel);1017GEN relp, relc;1018*prelp = relp = cgetg(l, t_VECSMALL);1019*prelc = relc = cgetg(l, t_VECSMALL);1020for (j=1; j<l; j++)1021{1022relc[j] = rel[j] >> REL_OFFSET;1023relp[j] = rel[j] & REL_MASK;1024}1025}10261027#ifdef MPQS_DEBUG1028static GEN1029mpqs_factorback(mpqs_handle_t *h, GEN relp)1030{1031GEN N = h->N, Q = gen_1;1032long j, l = lg(relp);1033for (j = 1; j < l; j++)1034{1035long e = relp[j] >> REL_OFFSET, i = relp[j] & REL_MASK;1036if (i == 1) Q = Fp_neg(Q,N); /* special case -1 */1037else Q = Fp_mul(Q, Fp_powu(utoipos(h->FB[i].fbe_p), e, N), N);1038}1039return Q;1040}1041static void1042mpqs_check_rel(mpqs_handle_t *h, GEN c)1043{1044pari_sp av = avma;1045int LP = (lg(c) == 4);1046GEN rhs = mpqs_factorback(h, rel_p(c));1047GEN Y = rel_Y(c), Qx_2 = remii(sqri(Y), h->N);1048if (LP) rhs = modii(mulii(rhs, rel_q(c)), h->N);1049if (!equalii(Qx_2, rhs))1050{1051GEN relpp, relpc;1052split_relp(rel_p(c), &relpp, &relpc);1053err_printf("MPQS: %Ps : %Ps %Ps\n", Y, relpp,relpc);1054err_printf("\tQx_2 = %Ps\n", Qx_2);1055err_printf("\t rhs = %Ps\n", rhs);1056pari_err_BUG(LP? "MPQS: wrong large prime relation found"1057: "MPQS: wrong full relation found");1058}1059PRINT_IF_VERBOSE(LP? "\b(;)": "\b(:)");1060set_avma(av);1061}1062#endif10631064static void1065rel_to_ei(GEN ei, GEN relp)1066{1067long j, l = lg(relp);1068for (j=1; j<l; j++)1069{1070long e = relp[j] >> REL_OFFSET, i = relp[j] & REL_MASK;1071ei[i] += e;1072}1073}1074static void1075mpqs_add_factor(GEN relp, long *i, ulong ei, ulong pi)1076{ relp[++*i] = pi | (ei << REL_OFFSET); }10771078static int1079zv_is_even(GEN V)1080{1081long i, l = lg(V);1082for (i=1; i<l; i++)1083if (odd(uel(V,i))) return 0;1084return 1;1085}10861087static GEN1088combine_large_primes(mpqs_handle_t *h, GEN rel1, GEN rel2)1089{1090GEN new_Y, new_Y1, Y1 = rel_Y(rel1), Y2 = rel_Y(rel2);1091long l, lei = h->size_of_FB + 1, nb = 0;1092GEN ei, relp, iq, q = rel_q(rel1);10931094if (!invmod(q, h->N, &iq)) return equalii(iq, h->N)? NULL: iq; /* rare */1095ei = zero_zv(lei);1096rel_to_ei(ei, rel_p(rel1));1097rel_to_ei(ei, rel_p(rel2));1098if (zv_is_even(ei)) return NULL;1099new_Y = modii(mulii(mulii(Y1, Y2), iq), h->N);1100new_Y1 = subii(h->N, new_Y);1101if (abscmpii(new_Y1, new_Y) < 0) new_Y = new_Y1;1102relp = cgetg(MAX_PE_PAIR+1,t_VECSMALL);1103if (odd(ei[1])) mpqs_add_factor(relp, &nb, 1, 1);1104for (l = 2; l <= lei; l++)1105if (ei[l]) mpqs_add_factor(relp, &nb, ei[l],l);1106setlg(relp, nb+1);1107if (DEBUGLEVEL >= 6)1108{1109GEN relpp, relpc, rel1p, rel1c, rel2p, rel2c;1110split_relp(relp,&relpp,&relpc);1111split_relp(rel1,&rel1p,&rel1c);1112split_relp(rel2,&rel2p,&rel2c);1113err_printf("MPQS: combining\n");1114err_printf(" {%Ps @ %Ps : %Ps}\n", q, Y1, rel1p, rel1c);1115err_printf(" * {%Ps @ %Ps : %Ps}\n", q, Y2, rel2p, rel2c);1116err_printf(" == {%Ps, %Ps}\n", relpp, relpc);1117}1118#ifdef MPQS_DEBUG1119{1120pari_sp av1 = avma;1121if (!equalii(modii(sqri(new_Y), h->N), mpqs_factorback(h, relp)))1122pari_err_BUG("MPQS: combined large prime relation is false");1123set_avma(av1);1124}1125#endif1126return mkvec2(new_Y, relp);1127}11281129/* nc candidates */1130static GEN1131mpqs_eval_cand(mpqs_handle_t *h, long nc, hashtable *frel, hashtable *lprel)1132{1133mpqs_FB_entry_t *FB = h->FB;1134GEN A = h->A, B = h->B;1135long *relaprimes = h->relaprimes, *candidates = h->candidates;1136long pi, i;1137int pii;1138mpqs_per_A_prime_t *per_A_pr = h->per_A_pr;11391140for (i = 0; i < nc; i++)1141{1142pari_sp btop = avma;1143GEN Qx, Qx_part, Y, relp = cgetg(MAX_PE_PAIR+1,t_VECSMALL);1144long powers_of_2, p, x = candidates[i], nb = 0;1145int relaprpos = 0;1146long k;1147unsigned char thr = h->sieve_array[x];1148/* Y = 2*A*x + B, Qx = Y^2/(4*A) = Q(x) */1149Y = addii(mulis(A, 2 * (x - h->M)), B);1150Qx = subii(sqri(Y), h->kN); /* != 0 since N not a square and (N,k) = 1 */1151if (signe(Qx) < 0)1152{1153setabssign(Qx);1154mpqs_add_factor(relp, &nb, 1, 1); /* i = 1, ei = 1, pi */1155}1156/* Qx > 0, divide by powers of 2; we're really dealing with 4*A*Q(x), so we1157* always have at least 2^2 here, and at least 2^3 when kN = 1 mod 4 */1158powers_of_2 = vali(Qx);1159Qx = shifti(Qx, -powers_of_2);1160mpqs_add_factor(relp, &nb, powers_of_2, 2); /* i = 1, ei = 1, pi */1161/* When N is small, it may happen that N | Qx outright. In any case, when1162* no extensive prior trial division / Rho / ECM was attempted, gcd(Qx,N)1163* may turn out to be a nontrivial factor of N (not in FB or we'd have1164* found it already, but possibly smaller than the large prime bound). This1165* is too rare to check for here in the inner loop, but it will be caught1166* if such an LP relation is ever combined with another. */11671168/* Pass 1 over odd primes in FB: pick up all possible divisors of Qx1169* including those sitting in k or in A, and remember them in relaprimes.1170* Do not yet worry about possible repeated factors, these will be found in1171* the Pass 2. Pass 1 recognizes divisors of A by their corresponding flags1172* bit in the FB entry. (Divisors of k are ignored at this stage.)1173* We construct a preliminary table of FB subscripts and "exponents" of FB1174* primes which divide Qx. (We store subscripts, not the primes themselves.)1175* We distinguish three cases:1176* 0) prime in A which does not divide Qx/A,1177* 1) prime not in A which divides Qx/A,1178* 2) prime in A which divides Qx/A.1179* Cases 1 and 2 need checking for repeated factors, kind 0 doesn't.1180* Cases 0 and 1 contribute 1 to the exponent in the relation, case 21181* contributes 2.1182* Factors in common with k are simpler: if they occur, they occur1183* exactly to the first power, and this makes no difference in Pass 1,1184* so they behave just like every normal odd FB prime. */1185for (Qx_part = A, pi = 3; pi< h->index1_FB; pi++)1186{1187ulong p = FB[pi].fbe_p;1188long xp = x % p;1189/* Here we used that MPQS_FBE_DIVIDES_A = 1. */11901191if (xp == FB[pi].fbe_start1 || xp == FB[pi].fbe_start2)1192{ /* p divides Q(x)/A and possibly A, case 2 or 3 */1193ulong ei = FB[pi].fbe_flags & MPQS_FBE_DIVIDES_A;1194relaprimes[relaprpos++] = pi;1195relaprimes[relaprpos++] = 1 + ei;1196Qx_part = muliu(Qx_part, p);1197}1198}1199for ( ; thr && (p = FB[pi].fbe_p); pi++)1200{1201long xp = x % p;1202/* Here we used that MPQS_FBE_DIVIDES_A = 1. */12031204if (xp == FB[pi].fbe_start1 || xp == FB[pi].fbe_start2)1205{ /* p divides Q(x)/A and possibly A, case 2 or 3 */1206ulong ei = FB[pi].fbe_flags & MPQS_FBE_DIVIDES_A;1207relaprimes[relaprpos++] = pi;1208relaprimes[relaprpos++] = 1 + ei;1209Qx_part = muliu(Qx_part, p);1210thr -= FB[pi].fbe_logval;1211}1212}1213for (k = 0; k< h->omega_A; k++)1214{1215long pi = MPQS_I(k);1216ulong p = FB[pi].fbe_p;1217long xp = x % p;1218if (!(xp == FB[pi].fbe_start1 || xp == FB[pi].fbe_start2))1219{ /* p divides A but does not divide Q(x)/A, case 1 */1220relaprimes[relaprpos++] = pi;1221relaprimes[relaprpos++] = 0;1222}1223}1224/* We have accumulated the known factors of Qx except for possible repeated1225* factors and for possible large primes. Divide off what we have.1226* This is faster than dividing off A and each prime separately. */1227Qx = diviiexact(Qx, Qx_part);12281229#ifdef MPQS_DEBUG1230err_printf("MPQS DEBUG: eval loop 3, avma = 0x%lX\n", (ulong)avma);1231#endif1232/* Pass 2: deal with repeated factors and store tentative relation. At this1233* point, the only primes which can occur again in the adjusted Qx are1234* those in relaprimes which are followed by 1 or 2. We must pick up those1235* followed by a 0, too. */1236PRINT_IF_VERBOSE("a");1237for (pii = 0; pii < relaprpos; pii += 2)1238{1239ulong r, ei = relaprimes[pii+1];1240GEN q;12411242pi = relaprimes[pii];1243/* p | k (identified by its index before index0_FB)* or p | A (ei = 0) */1244if ((mpqs_int32_t)pi < h->index0_FB || ei == 0)1245{1246mpqs_add_factor(relp, &nb, 1, pi);1247continue;1248}1249p = FB[pi].fbe_p;1250/* p might still divide the current adjusted Qx. Try it. */1251switch(cmpiu(Qx, p))1252{1253case 0: ei++; Qx = gen_1; break;1254case 1:1255q = absdiviu_rem(Qx, p, &r);1256while (r == 0) { ei++; Qx = q; q = absdiviu_rem(Qx, p, &r); }1257break;1258}1259mpqs_add_factor(relp, &nb, ei, pi);1260}12611262#ifdef MPQS_DEBUG1263err_printf("MPQS DEBUG: eval loop 4, avma = 0x%lX\n", (ulong)avma);1264#endif1265PRINT_IF_VERBOSE("\bb");1266setlg(relp, nb+1);1267if (is_pm1(Qx))1268{1269GEN rel = gerepilecopy(btop, mkvec2(absi_shallow(Y), relp));1270#ifdef MPQS_DEBUG1271mpqs_check_rel(h, rel);1272#endif1273frel_add(frel, rel);1274}1275else if (cmpiu(Qx, h->lp_bound) <= 0)1276{1277ulong q = itou(Qx);1278GEN rel = mkvec3(absi_shallow(Y),relp,Qx);1279GEN col = hash_haskey_GEN(lprel, (void*)q);1280#ifdef MPQS_DEBUG1281mpqs_check_rel(h, rel);1282#endif1283if (!col) /* relation up to large prime */1284hash_insert(lprel, (void*)q, (void*)gerepilecopy(btop,rel));1285else if ((rel = combine_large_primes(h, rel, col)))1286{1287if (typ(rel) == t_INT) return rel; /* very unlikely */1288#ifdef MPQS_DEBUG1289mpqs_check_rel(h, rel);1290#endif1291frel_add(frel, gerepilecopy(btop,rel));1292}1293else1294set_avma(btop);1295}1296else1297{ /* TODO: check for double large prime */1298PRINT_IF_VERBOSE("\b.");1299set_avma(btop);1300}1301}1302PRINT_IF_VERBOSE("\n");1303return NULL;1304}13051306/*********************************************************************/1307/** FROM RELATIONS TO DIVISORS **/1308/*********************************************************************/13091310/* create an F2m from a relations list */1311static GEN1312rels_to_F2Ms(GEN rel)1313{1314long i, cols = lg(rel)-1;1315GEN m = cgetg(cols+1, t_VEC);1316for (i = 1; i <= cols; i++)1317{1318GEN relp = gmael(rel,i,2), rel2;1319long j, l = lg(relp), o = 0, k;1320for (j = 1; j < l; j++)1321if (odd(relp[j] >> REL_OFFSET)) o++;1322rel2 = cgetg(o+1, t_VECSMALL);1323for (j = 1, k = 1; j < l; j++)1324if (odd(relp[j] >> REL_OFFSET))1325rel2[k++] = relp[j] & REL_MASK;1326gel(m, i) = rel2;1327}1328return m;1329}13301331static int1332split(GEN *D, long *e)1333{1334ulong mask;1335long flag;1336if (MR_Jaeschke(*D)) { *e = 1; return 1; } /* probable prime */1337if (Z_issquareall(*D, D))1338{ /* squares could cost us a lot of time */1339if (DEBUGLEVEL >= 5) err_printf("MPQS: decomposed a square\n");1340*e = 2; return 1;1341}1342mask = 7;1343/* 5th/7th powers aren't worth the trouble. OTOH once we have the hooks for1344* dealing with cubes, higher powers can be handled essentially for free) */1345if ((flag = is_357_power(*D, D, &mask)))1346{1347if (DEBUGLEVEL >= 5)1348err_printf("MPQS: decomposed a %s power\n", uordinal(flag));1349*e = flag; return 1;1350}1351*e = 0; return 0; /* known composite */1352}13531354/* return a GEN structure containing NULL but safe for gerepileupto */1355static GEN1356mpqs_solve_linear_system(mpqs_handle_t *h, hashtable *frel)1357{1358mpqs_FB_entry_t *FB = h->FB;1359pari_sp av = avma;1360GEN rels = hash_keys(frel), N = h->N, r, c, res, ei, M, Ker;1361long i, j, nrows, rlast, rnext, rmax, rank;13621363M = rels_to_F2Ms(rels);1364Ker = F2Ms_ker(M, h->size_of_FB+1); rank = lg(Ker)-1;1365if (DEBUGLEVEL >= 4)1366{1367if (DEBUGLEVEL >= 7)1368err_printf("\\\\ MPQS RELATION MATRIX\nFREL=%Ps\nKERNEL=%Ps\n",M, Ker);1369err_printf("MPQS: Gauss done: kernel has rank %ld, taking gcds...\n", rank);1370}1371if (!rank)1372{ /* trivial kernel; main loop may look for more relations */1373if (DEBUGLEVEL >= 3)1374pari_warn(warner, "MPQS: no solutions found from linear system solver");1375return gc_NULL(av); /* no factors found */1376}13771378/* Expect up to 2^rank pairwise coprime factors, but a kernel basis vector1379* may not contribute to the decomposition; r stores the factors and c what1380* we know about them (0: composite, 1: probably prime, >= 2: proper power) */1381ei = cgetg(h->size_of_FB + 2, t_VECSMALL);1382rmax = logint(N, utoi(3));1383if (rank <= BITS_IN_LONG-2)1384rmax = minss(rmax, 1L<<rank); /* max # of factors we can hope for */1385r = cgetg(rmax+1, t_VEC);1386c = cgetg(rmax+1, t_VECSMALL);1387rnext = rlast = 1;1388nrows = lg(M)-1;1389for (i = 1; i <= rank; i++)1390{ /* loop over kernel basis */1391GEN X = gen_1, Y_prod = gen_1, X_plus_Y, D;1392pari_sp av2 = avma, av3;1393long done = 0; /* # probably-prime factors or powers whose bases we won't1394* handle any further */1395memset((void *)(ei+1), 0, (h->size_of_FB + 1) * sizeof(long));1396for (j = 1; j <= nrows; j++)1397if (F2m_coeff(Ker, j, i))1398{1399GEN R = gel(rels,j);1400Y_prod = gerepileuptoint(av2, remii(mulii(Y_prod, gel(R,1)), N));1401rel_to_ei(ei, gel(R,2));1402}1403av3 = avma;1404for (j = 2; j <= h->size_of_FB + 1; j++)1405if (ei[j])1406{1407GEN q = utoipos(FB[j].fbe_p);1408if (ei[j] & 1) pari_err_BUG("MPQS (relation is a nonsquare)");1409X = remii(mulii(X, Fp_powu(q, (ulong)ei[j]>>1, N)), N);1410X = gerepileuptoint(av3, X);1411}1412if (MPQS_DEBUGLEVEL >= 1 && !dvdii(subii(sqri(X), sqri(Y_prod)), N))1413{1414err_printf("MPQS: X^2 - Y^2 != 0 mod N\n");1415err_printf("\tindex i = %ld\n", i);1416pari_warn(warner, "MPQS: wrong relation found after Gauss");1417}1418/* At this point, gcd(X-Y, N) * gcd(X+Y, N) = N:1419* 1) N | X^2 - Y^2, so it divides the LHS;1420* 2) let P be any prime factor of N. If P | X-Y and P | X+Y, then P | 2X1421* But X is a product of powers of FB primes => coprime to N.1422* Hence we work with gcd(X+Y, N) alone. */1423X_plus_Y = addii(X, Y_prod);1424if (rnext == 1)1425{ /* we still haven't decomposed, and want both a gcd and its cofactor. */1426D = gcdii(X_plus_Y, N);1427if (is_pm1(D) || equalii(D,N)) { set_avma(av2); continue; }1428/* got something that works */1429if (DEBUGLEVEL >= 5)1430err_printf("MPQS: splitting N after %ld kernel vector%s\n",1431i+1, (i? "s" : ""));1432gel(r,1) = diviiexact(N, D);1433gel(r,2) = D;1434rlast = rnext = 3;1435if (split(&gel(r,1), &c[1])) done++;1436if (split(&gel(r,2), &c[2])) done++;1437if (done == 2 || rmax == 2) break;1438if (DEBUGLEVEL >= 5)1439err_printf("MPQS: got two factors, looking for more...\n");1440}1441else1442{ /* we already have factors */1443for (j = 1; j < rnext; j++)1444{ /* loop over known-composite factors */1445/* skip probable primes and also roots of pure powers: they are a lot1446* smaller than N and should be easy to deal with later */1447if (c[j]) { done++; continue; }1448av3 = avma; D = gcdii(X_plus_Y, gel(r,j));1449if (is_pm1(D) || equalii(D, gel(r,j))) { set_avma(av3); continue; }1450/* got one which splits this factor */1451if (DEBUGLEVEL >= 5)1452err_printf("MPQS: resplitting a factor after %ld kernel vectors\n",1453i+1);1454gel(r,j) = diviiexact(gel(r,j), D);1455gel(r,rnext) = D;1456if (split(&gel(r,j), &c[j])) done++;1457/* Don't increment done: happens later when we revisit c[rnext] during1458* the present inner loop. */1459(void)split(&gel(r,rnext), &c[rnext]);1460if (++rnext > rmax) break; /* all possible factors seen */1461} /* loop over known composite factors */14621463if (rnext > rlast)1464{1465if (DEBUGLEVEL >= 5)1466err_printf("MPQS: got %ld factors%s\n", rlast - 1,1467(done < rlast ? ", looking for more..." : ""));1468rlast = rnext;1469}1470/* break out if we have rmax factors or all current factors are probable1471* primes or tiny roots from pure powers */1472if (rnext > rmax || done == rnext - 1) break;1473}1474}1475if (rnext == 1) return gc_NULL(av); /* no factors found */14761477/* normal case: convert to ifac format as described in ifactor1.c (value,1478* exponent, class [unknown, known composite, known prime]) */1479rlast = rnext - 1; /* # of distinct factors found */1480res = cgetg(3*rlast + 1, t_VEC);1481if (DEBUGLEVEL >= 6) err_printf("MPQS: wrapping up %ld factors\n", rlast);1482for (i = j = 1; i <= rlast; i++, j += 3)1483{1484long C = c[i];1485icopyifstack(gel(r,i), gel(res,j)); /* factor */1486gel(res,j+1) = C <= 1? gen_1: utoipos(C); /* exponent */1487gel(res,j+2) = C ? NULL: gen_0; /* unknown or known composite */1488if (DEBUGLEVEL >= 6)1489err_printf("\tpackaging %ld: %Ps ^%ld (%s)\n", i, gel(r,i),1490itos(gel(res,j+1)), (C? "unknown": "composite"));1491}1492return res;1493}14941495/*********************************************************************/1496/** MAIN ENTRY POINT AND DRIVER ROUTINE **/1497/*********************************************************************/1498static void1499toolarge()1500{ pari_warn(warner, "MPQS: number too big to be factored with MPQS,\n\tgiving up"); }15011502/* Factors N using the self-initializing multipolynomial quadratic sieve1503* (SIMPQS). Returns one of the two factors, or (usually) a vector of factors1504* and exponents and information about which ones are still composite, or NULL1505* when we can't seem to make any headway. */1506GEN1507mpqs(GEN N)1508{1509const long size_N = decimal_len(N);1510mpqs_handle_t H;1511GEN fact; /* will in the end hold our factor(s) */1512mpqs_FB_entry_t *FB; /* factor base */1513double dbg_target, DEFEAT;1514ulong p;1515pari_timer T;1516hashtable lprel, frel;1517pari_sp av = avma;15181519if (DEBUGLEVEL >= 4) err_printf("MPQS: number to factor N = %Ps\n", N);1520if (size_N > MPQS_MAX_DIGIT_SIZE_KN) { toolarge(); return NULL; }1521if (DEBUGLEVEL >= 4)1522{1523timer_start(&T);1524err_printf("MPQS: factoring number of %ld decimal digits\n", size_N);1525}1526H.N = N;1527H.bin_index = 0;1528H.index_i = 0;1529H.index_j = 0;1530H.index2_moved = 0;1531p = mpqs_find_k(&H);1532if (p) { set_avma(av); return utoipos(p); }1533if (DEBUGLEVEL >= 5)1534err_printf("MPQS: found multiplier %ld for N\n", H._k->k);1535H.kN = muliu(N, H._k->k);1536if (!mpqs_set_parameters(&H)) { toolarge(); return NULL; }15371538if (DEBUGLEVEL >= 5)1539err_printf("MPQS: creating factor base and allocating arrays...\n");1540FB = mpqs_create_FB(&H, &p);1541if (p) { set_avma(av); return utoipos(p); }1542mpqs_sieve_array_ctor(&H);1543mpqs_poly_ctor(&H);15441545H.lp_bound = minss(H.largest_FB_p, MPQS_LP_BOUND);1546/* don't allow large primes to have room for two factors both bigger than1547* what the FB contains (...yet!) */1548H.lp_bound *= minss(H.lp_scale, H.largest_FB_p - 1);1549H.dkN = gtodouble(H.kN);1550/* compute the threshold and fill in the byte-sized scaled logarithms */1551mpqs_set_sieve_threshold(&H);1552if (!mpqs_locate_A_range(&H)) return NULL;1553if (DEBUGLEVEL >= 4)1554{1555err_printf("MPQS: sieving interval = [%ld, %ld]\n", -(long)H.M, (long)H.M);1556/* that was a little white lie, we stop one position short at the top */1557err_printf("MPQS: size of factor base = %ld\n", (long)H.size_of_FB);1558err_printf("MPQS: striving for %ld relations\n", (long)H.target_rels);1559err_printf("MPQS: coefficients A will be built from %ld primes each\n",1560(long)H.omega_A);1561err_printf("MPQS: primes for A to be chosen near FB[%ld] = %ld\n",1562(long)H.index2_FB, (long)FB[H.index2_FB].fbe_p);1563err_printf("MPQS: smallest prime used for sieving FB[%ld] = %ld\n",1564(long)H.index1_FB, (long)FB[H.index1_FB].fbe_p);1565err_printf("MPQS: largest prime in FB = %ld\n", (long)H.largest_FB_p);1566err_printf("MPQS: bound for `large primes' = %ld\n", (long)H.lp_bound);1567if (DEBUGLEVEL >= 5)1568err_printf("MPQS: sieve threshold = %u\n", (unsigned int)H.sieve_threshold);1569err_printf("MPQS: computing relations:");1570}15711572/* main loop which1573* - computes polynomials and their zeros (SI)1574* - does the sieving1575* - tests candidates of the sieve array */15761577/* Let (A, B_i) the current pair of coeffs. If i == 0 a new A is generated */1578H.index_j = (mpqs_uint32_t)-1; /* increment below will have it start at 0 */15791580dbg_target = H.target_rels / 100.;1581DEFEAT = H.target_rels * 1.5;1582hash_init_GEN(&frel, H.target_rels, gequal, 1);1583hash_init_ulong(&lprel,H.target_rels, 1);1584for(;;)1585{1586long tc;1587/* self initialization: compute polynomial and its zeros */1588if (!mpqs_self_init(&H))1589{ /* have run out of primes for A; give up */1590if (DEBUGLEVEL >= 2)1591err_printf("MPQS: Ran out of primes for A, giving up.\n");1592return gc_NULL(av);1593}1594mpqs_sieve(&H);1595tc = mpqs_eval_sieve(&H);1596if (DEBUGLEVEL >= 6)1597err_printf("MPQS: found %lu candidate%s\n", tc, (tc==1? "" : "s"));1598if (tc)1599{1600fact = mpqs_eval_cand(&H, tc, &frel, &lprel);1601if (fact)1602{ /* factor found during combining */1603if (DEBUGLEVEL >= 4)1604{1605err_printf("\nMPQS: split N whilst combining, time = %ld ms\n",1606timer_delay(&T));1607err_printf("MPQS: found factor = %Ps\n", fact);1608}1609return gerepileupto(av, fact);1610}1611}1612if (DEBUGLEVEL >= 4 && frel.nb > dbg_target)1613{1614err_printf(" %ld%%", 100*frel.nb/ H.target_rels);1615if (DEBUGLEVEL >= 5) err_printf(" (%ld ms)", timer_delay(&T));1616dbg_target += H.target_rels / 100.;1617}1618if (frel.nb < (ulong)H.target_rels) continue; /* main loop */16191620if (DEBUGLEVEL >= 4)1621{1622timer_start(&T);1623err_printf("\nMPQS: starting Gauss over F_2 on %ld distinct relations\n",1624frel.nb);1625}1626fact = mpqs_solve_linear_system(&H, &frel);1627if (fact)1628{ /* solution found */1629if (DEBUGLEVEL >= 4)1630{1631err_printf("\nMPQS: time in Gauss and gcds = %ld ms\n",timer_delay(&T));1632if (typ(fact) == t_INT) err_printf("MPQS: found factor = %Ps\n", fact);1633else1634{1635long j, nf = (lg(fact)-1)/3;1636err_printf("MPQS: found %ld factors =\n", nf);1637for (j = 1; j <= nf; j++)1638err_printf("\t%Ps%s\n", gel(fact,3*j-2), (j < nf)? ",": "");1639}1640}1641return gerepileupto(av, fact);1642}1643if (DEBUGLEVEL >= 4)1644{1645err_printf("\nMPQS: time in Gauss and gcds = %ld ms\n",timer_delay(&T));1646err_printf("MPQS: no factors found.\n");1647if (frel.nb < DEFEAT)1648err_printf("\nMPQS: restarting sieving ...\n");1649else1650err_printf("\nMPQS: giving up.\n");1651}1652if (frel.nb >= DEFEAT) return gc_NULL(av);1653H.target_rels += 10;1654}1655}165616571658