Testing latest pari + WASM + node.js... and it works?! Wow.
License: GPL3
ubuntu2004
/* - debug support */12#ifdef MPQS_DEBUG_VERBOSE3# ifndef MPQS_DEBUG4# define MPQS_DEBUG5# endif6# define PRINT_IF_VERBOSE(x) err_printf(x)7#else8# define PRINT_IF_VERBOSE(x)9#endif1011#ifdef MPQS_DEBUG12# define MPQS_DEBUGLEVEL 1000 /* infinity */13#else14# define MPQS_DEBUGLEVEL DEBUGLEVEL15#endif1617/* - non-configurable sizing parameters */1819/* 'large primes' must be smaller than min(MPQS_LP_BOUND, largest_FB_p) */20#define MPQS_LP_BOUND 12500000 /* works for 32 and 64bit */2122/* see mpqs_locate_A_range() for an explanation of the following. I had23* some good results with about -log2(0.85) but in the range I was testing,24* this shifts the primes for A only by one position in the FB. Don't go25* over the top with this one... */26#define MPQS_A_FUDGE 0.15 /* ~ -log2(0.9) */2728#define MPQS_CANDIDATE_ARRAY_SIZE 2000 /* max. this many cand's per poly */2930/* - structures, types, and constants */3132/* -- reasonably-sized integers */33#ifdef LONG_IS_64BIT34typedef int mpqs_int32_t;35typedef unsigned int mpqs_uint32_t;36#else37typedef long mpqs_int32_t;38typedef ulong mpqs_uint32_t;39#endif4041/* -- factor base entries should occupy 32 bytes (and we'll keep them42* aligned, for good L1 cache hit rates). Some of the entries will be43* abused for e.g. -1 and (factors of) k instead for real factor base44* primes, and for a sentinel at the end. This is why __p is a signed45* field.-- The two start fields depend on the current polynomial and46* keep changing during sieving, the flags will also change depending on47* the current A. */48/* Let (z1, z2) be the roots of Q(x) = A x^2 + Bx + C mod p_i; then49* Q(z1 + p_i Z) == 0 mod p_i and Q(z2 + p_i Z) == 0 mod p_i;50* start_1, start_2 are the positions where p_i divides Q(x) for the51* first time, already adjusted for the fact that the sieving array,52* nominally [-M, M], is represented by a 0-based C array of length53* 2M + 1. For the prime factors of A and those of k, the two roots54* are equal mod p_i. */5556#define MPQS_FB_ENTRY_PAD 325758typedef union mpqs_FB_entry {59char __pad[MPQS_FB_ENTRY_PAD];60struct {61/* the prime p, the two arith. prog. mod p, sqrt(kN) mod p */62mpqs_int32_t __p, __start1, __start2, __sqrt_kN;63float __flogp; /* log(p) as a 4-byte float */64unsigned char __val; /* 8-bit approx. scaled log for sieving */65unsigned char __flags;66} __entry;67} mpqs_FB_entry_t;6869/* --- convenience accessor macros for the preceding: */70#define fbe_p __entry.__p71#define fbe_flogp __entry.__flogp72#define fbe_start1 __entry.__start173#define fbe_start2 __entry.__start274#define fbe_sqrt_kN __entry.__sqrt_kN75#define fbe_logval __entry.__val76#define fbe_flags __entry.__flags7778/* --- flag bits for fbe_flags: */79#define MPQS_FBE_CLEAR 0x0 /* no flags */8081/* following used for odd FB primes, and applies to the divisors of A but not82* those of k. Must occupy the rightmost bit because we also use it as a83* shift count after extracting it from the byte. */84#define MPQS_FBE_DIVIDES_A 0x1ul /* and Q(x) mod p only has degree 1 */8586/* TODO (tentative): one bit to mark normal FB primes,87* one to mark the factors of k,88* one to mark primes used in sieving,89* later maybe one to mark primes of which we'll be tracking the square,90* one to mark primes currently in use for A;91* once we segment the FB, one bit marking the members of the first segment */9293/* -- multiplier k and attached quantities */94typedef struct mpqs_multiplier {95mpqs_uint32_t k; /* the multiplier (odd, squarefree) */96mpqs_uint32_t omega_k; /* number (0, 1 or 2) of primes dividing k */97mpqs_uint32_t kp[2]; /* prime factors of k, if any */98} mpqs_multiplier_t;99100#define MPQS_POSSIBLE_MULTIPLIERS 15 /* how many values for k we'll try */101/* following must be in range of the cand_multipliers table below */102103static const mpqs_multiplier_t cand_multipliers[] = {104{ 1, 0, { 0, 0}},105{ 3, 1, { 3, 0}},106{ 5, 1, { 5, 0}},107{ 7, 1, { 7, 0}},108{ 11, 1, {11, 0}},109{ 13, 1, {13, 0}},110{ 15, 2, { 3, 5}},111{ 17, 1, {17, 0}},112{ 19, 1, {19, 0}},113{ 21, 2, { 3, 7}},114{ 23, 1, {23, 0}},115{ 29, 1, {29, 0}},116{ 31, 1, {31, 0}},117{ 33, 2, { 3, 11}},118{ 35, 2, { 5, 7}},119{ 37, 1, {37, 0}},120{ 39, 2, { 3, 13}},121{ 41, 1, {41, 0}},122{ 43, 1, {43, 0}},123{ 47, 1, {47, 0}},124{ 51, 2, { 3, 17}},125{ 53, 1, {53, 0}},126{ 55, 2, { 5, 11}},127{ 57, 2, { 3, 19}},128{ 59, 1, {59, 0}},129{ 61, 1, {61, 0}},130{ 65, 2, { 5, 13}},131{ 67, 1, {67, 0}},132{ 69, 2, { 3, 23}},133{ 71, 1, {71, 0}},134{ 73, 1, {73, 0}},135{ 77, 2, { 7, 11}},136{ 79, 1, {79, 0}},137{ 83, 1, {83, 0}},138{ 85, 2, { 5, 17}},139{ 87, 2, { 3, 29}},140{ 89, 1, {89, 0}},141{ 91, 2, { 7, 13}},142{ 93, 2, { 3, 31}},143{ 95, 2, { 5, 19}},144{ 97, 1, {97, 0}}145};146147/* -- the array of (Chinese remainder) idempotents which add/subtract up to148* the middle coefficient B, and for convenience, the FB subscripts of the149* primes in current use for A. We keep these together since both arrays150* are of the same size and are used at the same times. */151typedef struct mqps_per_A_prime {152GEN _H; /* summand for B */153mpqs_int32_t _i; /* subscript into FB */154} mpqs_per_A_prime_t;155156/* following cooperate with names of local variables in the self_init fcns.157* per_A_pr must exist and be an alias for the eponymous handle pointer for158* all of these, and FB must exist and correspond to the handle FB pointer159* for all but the first two of them. */160#define MPQS_H(i) (per_A_pr[i]._H)161#define MPQS_I(i) (per_A_pr[i]._i)162#define MPQS_AP(i) (FB[MPQS_I(i)].fbe_p)163#define MPQS_LP(i) (FB[MPQS_I(i)].fbe_flogp)164#define MPQS_SQRT(i) (FB[MPQS_I(i)].fbe_sqrt_kN)165#define MPQS_FLG(i) (FB[MPQS_I(i)].fbe_flags)166167/* -- the array of addends / subtrahends for changing polynomials during168* self-initialization: (1/A) H[i] mod p_j, with i subscripting the inner169* array in each entry, and j choosing the entry in an outer array.170* Entries will occupy 64 bytes each no matter what (which imposes at most 17171* prime factors for A; thus i will range from 0 to at most 15.) This wastes a172* little memory for smaller N but makes it easier for compilers to generate173* efficient code. */174175/* NOTE: At present, memory locality vis-a-vis accesses to this array is good176* in the slow (new A) branch of mpqs_self_init(), but poor in the fast177* (same A, new B) branch, which now loops over the outer array index,178* reading just one field of each inner array each time through the FB179* loop. This doesn't really harm, but will improve one day when we do180* segmented sieve arrays with the attached segmented FB-range accesses. */181#define MPQS_MAX_OMEGA_A 17182typedef struct mpqs_inv_A_H {183mpqs_uint32_t _i[MPQS_MAX_OMEGA_A - 1];184} mpqs_inv_A_H_t;185186#define MPQS_INV_A_H(i,j) (inv_A_H[j]._i[i])187188/* -- global handle to keep track of everything used through one factorization189* attempt. The order of the fields is determined by keeping most frequently190* used stuff near the beginning. */191typedef struct mpqs_handle {192/* pointers */193unsigned char *sieve_array;/* 0-based, representing [-M,M-1] */194unsigned char *sieve_array_end; /* points at sieve_array[M-1] */195mpqs_FB_entry_t *FB; /* (aligned) FB array itself */196long *candidates; /* collects promising sieve subscripts */197long *relaprimes; /* prime/exponent pairs in a relation */198mpqs_inv_A_H_t *inv_A_H; /* self-init: (aligned) stepping array, and */199mpqs_per_A_prime_t *per_A_pr; /* FB subscripts of primes in A etc. */200201/* other stuff that's being used all the time */202mpqs_int32_t M; /* sieving over |x| <= M */203mpqs_int32_t size_of_FB; /* # primes in FB (or dividing k) */204/* the following three are in non-descending order, and the first two must205* be adjusted for omega_k at the beginning */206mpqs_int32_t index0_FB; /* lowest subscript into FB of a "real" prime207* (i.e. other than -1, 2, factors of k) */208mpqs_int32_t index1_FB; /* lowest subscript into FB for sieving */209mpqs_int32_t index2_FB; /* primes for A are chosen relative to this */210unsigned char index2_moved;/* true when we're starved for small A's */211unsigned char sieve_threshold; /* distinguishes candidates in sieve */212GEN N, kN; /* number to be factored, with multiplier */213GEN A, B; /* leading, middle coefficient */214mpqs_int32_t omega_A; /* number of primes going into each A */215mpqs_int32_t no_B; /* number of B's for each A: 2^(omega_A-1) */216double l2_target_A; /* ~log2 of desired typical A */217/* counters and bit pattern determining and numbering current polynomial: */218mpqs_uint32_t bin_index; /* bit pattern for selecting primes for A */219mpqs_uint32_t index_i; /* running count of A's */220mpqs_uint32_t index_j; /* B's ordinal number in A's cohort */221222/* further sizing parameters: */223mpqs_int32_t target_rels; /* target number of full relations */224mpqs_int32_t largest_FB_p; /* largest prime in the FB */225mpqs_int32_t pmin_index1; /* lower bound for primes used for sieving */226mpqs_int32_t lp_scale; /* factor by which LPs may exceed FB primes */227228/* subscripts determining where to pick primes for A */229/* FIXME: lp_bound might have to be mpqs_int64_t ? */230long lp_bound; /* cutoff for Large Primes */231long digit_size_kN;232const mpqs_multiplier_t *_k; /* multiplier k and attached quantities */233double tolerance; /* controls the tightness of the sieve */234double dkN; /* - double prec. approximation of kN */235double l2sqrtkN; /* ~log2(sqrt(kN)) */236double l2M; /* ~log2(M) (cf. below) */237/* TODO: need an index2_FB here to remember where to start picking primes */238/* bookkeeping pointers to containers of aligned memory chunks: */239void *FB_chunk; /* (unaligned) chunk containing the FB */240void *invAH_chunk; /* (unaligned) chunk for self-init array */241} mpqs_handle_t;242243/* -- sizing table entries */244245/* For "tolerance", see mpqs_set_sieve_threshold(). The LP scale, for very246* large kN, prevents us from accumulating vast amounts of LP relations with247* little chance of hitting any particular large prime a second time and being248* able to combine a full relation from two LP ones; however, the sieve249* threshold (determined by the tolerance) already works against very large LPs250* being produced. The present relations "database" can detect duplicate full251* relations only during the sort/combine phases, so we must do some sort252* points even for tiny kN where we do not admit large primes at all.253* Some constraints imposed by the present implementation:254* + omega_A should be at least 3, and no more than MPQS_MAX_OMEGA_A255* + The size of the FB must be large enough compared to omega_A256* (about 2*omega_A + 3, but this is always true below) */257/* XXX Changes needed for segmented mode:258* XXX When using it (kN large enough),259* XXX - M must become a multiple of the (cache block) segment size260* XXX (or to keep things simple: a multiple of 32K)261* XXX - we need index3_FB to seperate (smaller) primes used for normal262* XXX sieving from larger ones used with transaction buffers263* XXX (and the locate_A_range and attached logic must be changed to264* XXX cap index2_FB below index3_FB instead of below size_of_FB)265*/266typedef struct mpqs_parameterset {267float tolerance; /* "mesh width" of the sieve */268mpqs_int32_t lp_scale; /* factor by which LPs may exceed FB primes */269mpqs_int32_t M; /* size of half the sieving interval */270mpqs_int32_t size_of_FB; /* #primes to use for FB (including 2) */271mpqs_int32_t omega_A; /* #primes to go into each A */272/* Following is auto-adjusted to account for prime factors of k inserted273* near the start of the FB. NB never ever sieve on the prime 2,which would274* just contribute a constant at each sieve point. */275mpqs_int32_t pmin_index1; /* lower bound for primes used for sieving */276} mpqs_parameterset_t;277278/* - the table of sizing parameters itself */279280/* indexed by size of kN in decimal digits, subscript 0 corresponding to281* 9 (or fewer) digits */282static const mpqs_parameterset_t mpqs_parameters[] =283{ /* tol lp_scl M szFB oA pmx1 */284{ /*9*/ 0.8, 1, 350, 19, 3, 5},285{ /*10*/ 0.8, 1, 300, 23, 3, 5},286{ /*11*/ 0.8, 1, 1000, 27, 3, 5},287{ /*12*/ 0.8, 1, 1100, 27, 3, 5},288{ /*13*/ 0.8, 1, 1400, 31, 3, 5},289{ /*14*/ 0.8, 1, 2200, 33, 3, 5},290{ /*15*/ 0.8, 1, 2300, 39, 3, 5},291{ /*16*/ 0.8, 1, 2900, 43, 3, 5},292{ /*17*/ 0.8, 1, 3200, 51, 3, 5},293{ /*18*/ 0.8, 1, 2800, 55, 3, 5},294{ /*19*/ 0.8, 1, 3400, 65, 3, 5},295{ /*20*/ 0.8, 1, 3400, 71, 3, 5},296{ /*21*/ 0.8, 1, 5400, 90, 3, 5},297{ /*22*/ 0.8, 1, 5700, 95, 3, 5},298{ /*23*/ 0.8, 1, 5700, 110, 3, 5},299{ /*24*/ 0.8, 1, 6000, 130, 4, 7},300{ /*25*/ 0.8, 1, 6500, 140, 4, 7},301{ /*26*/ 0.9, 1, 9000, 160, 4, 7},302{ /*27*/ 1.12, 1, 10000, 160, 4, 7},303{ /*28*/ 1.17, 1, 13000, 180, 4, 11},304{ /*29*/ 1.22, 1, 14000, 220, 4, 11},305{ /*30*/ 1.30, 1, 13000, 240, 4, 11},306{ /*31*/ 1.33, 1, 11000, 240, 4, 13},307{ /*32*/ 1.36, 1, 14000, 300, 5, 13},308{ /*33*/ 1.40, 1, 15000, 340, 5, 13},309{ /*34*/ 1.43, 1, 15000, 380, 5, 17},310{ /*35*/ 1.48, 30, 15000, 380, 5, 17},311{ /*36*/ 1.53, 45, 16000, 440, 5, 17},312{ /*37*/ 1.60, 60, 15000, 420, 6, 19},313{ /*38*/ 1.66, 70, 15000, 520, 6, 19},314{ /*39*/ 1.69, 80, 16000, 540, 6, 23},315/* around here, the largest prime in FB becomes comparable to M in size */316{ /*40*/ 1.69, 80, 16000, 600, 6, 23},317{ /*41*/ 1.69, 80, 16000, 700, 6, 23},318{ /*42*/ 1.69, 80, 24000, 900, 6, 29},319{ /*43*/ 1.69, 80, 26000, 1000, 6, 29},320{ /*44*/ 1.69, 80, 18000, 1100, 7, 31},321{ /*45*/ 1.69, 80, 20000, 1200, 7, 31},322{ /*46*/ 1.69, 80, 22000, 1300, 7, 37},323{ /*47*/ 1.69, 80, 24000, 1400, 7, 37},324{ /*48*/ 1.69, 80, 24000, 1600, 7, 37},325{ /*49*/ 1.72, 80, 28000, 1900, 7, 41},326{ /*50*/ 1.75, 80, 36000, 2100, 7, 41},327{ /*51*/ 1.80, 80, 32000, 2100, 7, 43},328{ /*52*/ 1.85, 80, 44000, 2300, 7, 43},329{ /*53*/ 1.90, 80, 44000, 2600, 7, 47},330{ /*54*/ 1.95, 80, 40000, 2700, 7, 47},331{ /*55*/ 1.95, 80, 48000, 3200, 7, 53},332{ /*56*/ 1.95, 80, 56000, 3400, 7, 53},333{ /*57*/ 2.00, 80, 40000, 3000, 8, 53},334{ /*58*/ 2.05, 80, 64000, 3400, 8, 59},335{ /*59*/ 2.10, 80, 64000, 3800, 8, 59},336{ /*60*/ 2.15, 80, 80000, 4300, 8, 61},337{ /*61*/ 2.20, 80, 80000, 4800, 8, 61},338{ /*62*/ 2.25, 80, 80000, 4600, 8, 67},339{ /*63*/ 2.39, 80, 80000, 4800, 8, 67},340{ /*64*/ 2.30, 80, 88000, 5400, 8, 67},341{ /*65*/ 2.31, 80, 120000, 6600, 8, 71},342{ /*66*/ 2.32, 80, 120000, 6800, 8, 71},343{ /*67*/ 2.33, 80, 144000, 7600, 8, 73},344{ /*68*/ 2.34, 80, 144000, 9000, 8, 73},345{ /*69*/ 2.35, 80, 160000, 9500, 8, 79},346{ /*70*/ 2.36, 80, 176000, 10500, 8, 79},347{ /*71*/ 2.37, 80, 240000, 11000, 9, 79},348{ /*72*/ 2.38, 80, 240000, 12500, 9, 83},349{ /*73*/ 2.41, 80, 240000, 13000, 9, 83},350{ /*74*/ 2.46, 80, 256000, 13250, 9, 83},351{ /*75*/ 2.51, 80, 256000, 14500, 9, 89},352{ /*76*/ 2.56, 80, 256000, 15250, 9, 89},353{ /*77*/ 2.58, 80, 320000, 17000, 9, 89},354{ /*78*/ 2.60, 80, 320000, 18000, 9, 89},355{ /*79*/ 2.63, 80, 320000, 19500, 9, 97},356{ /*80*/ 2.65, 80, 448000, 21000, 9, 97},357{ /*81*/ 2.72, 80, 448000, 22000, 9, 97},358{ /*82*/ 2.77, 80, 448000, 24000, 9, 101},359{ /*83*/ 2.82, 80, 480000, 23000, 10, 101},360{ /*84*/ 2.84, 80, 480000, 24000, 10, 103},361{ /*85*/ 2.86, 80, 512000, 28000, 10, 103},362{ /*86*/ 2.88, 80, 448000, 29000, 10, 107},363/* architectures with 1MBy L2 cache will become noticeably slower here364* as 2*M exceeds that mark - to be addressed in a future version by365* segmenting the sieve interval */366{ /*87*/ 2.90, 80, 512000, 32000, 10, 107},367{ /*88*/ 2.91, 80, 512000, 35000, 10, 109},368{ /*89*/ 2.92, 80, 512000, 38000, 10, 109},369{ /*90*/ 2.93, 80, 512000, 40000, 10, 113},370{ /*91*/ 2.94, 80, 770000, 32200, 10, 113},371/* entries below due to Thomas Denny, never tested */372{ /*92*/ 3.6, 90, 2000000, 35000, 9, 113},373{ /*93*/ 3.7, 90, 2000000, 37000, 9, 113},374{ /*94*/ 3.7, 90, 2000000, 39500, 9, 127},375{ /*95*/ 3.7, 90, 2500000, 41500, 9, 127},376{ /*96*/ 3.8, 90, 2500000, 45000, 10, 127},377{ /*97*/ 3.8, 90, 2500000, 47500, 10, 131},378{ /*98*/ 3.7, 90, 3000000, 51000, 10, 131},379{ /*99*/ 3.8, 90, 3000000, 53000, 10, 133},380{/*100*/ 3.8, 90, 875000, 50000, 10, 133},381{/*101*/ 3.8, 90, 3500000, 54000, 10, 139},382{/*102*/ 3.8, 90, 3500000, 57000, 10, 139},383{/*103*/ 3.9, 90, 4000000, 61000, 10, 139},384{/*104*/ 3.9, 90, 4000000, 66000, 10, 149},385{/*105*/ 3.9, 90, 4000000, 70000, 10, 149},386{/*106*/ 3.9, 90, 4000000, 75000, 10, 151},387{/*107*/ 3.9, 90, 4000000, 80000, 10, 151},388};389390#define MPQS_MAX_DIGIT_SIZE_KN 107391392393