Testing latest pari + WASM + node.js... and it works?! Wow.
License: GPL3
ubuntu2004
#line 2 "../src/kernel/none/mp.c"1/* Copyright (C) 2000-2003 The PARI group.23This file is part of the PARI/GP package.45PARI/GP is free software; you can redistribute it and/or modify it under the6terms of the GNU General Public License as published by the Free Software7Foundation; either version 2 of the License, or (at your option) any later8version. It is distributed in the hope that it will be useful, but WITHOUT9ANY WARRANTY WHATSOEVER.1011Check the License for details. You should have received a copy of it, along12with the package; see the file 'COPYING'. If not, write to the Free Software13Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */1415/***********************************************************************/16/** **/17/** MULTIPRECISION KERNEL **/18/** **/19/***********************************************************************/20#include "pari.h"21#include "paripriv.h"22#include "../src/kernel/none/tune-gen.h"2324void25pari_kernel_init(void) { }26void27pari_kernel_close(void) { }28const char *29pari_kernel_version(void) { return ""; }3031/* NOTE: arguments of "spec" routines (muliispec, addiispec, etc.) aren't32* GENs but pairs (long *a, long na) representing a list of digits (in basis33* BITS_IN_LONG) : a[0], ..., a[na-1]. [ In ordre to facilitate splitting: no34* need to reintroduce codewords ] */3536#define LIMBS(x) ((x)+2)37#define NLIMBS(x) (lgefint(x)-2)3839/* Normalize a nonnegative integer */40GEN41int_normalize(GEN x, long known_zero_words)42{43long i, lx = lgefint(x);44GEN x0;45if (lx == 2) { x[1] = evalsigne(0) | evallgefint(2); return x; }46if (!known_zero_words && x[2]) return x;47for (i = 2+known_zero_words; i < lx; i++)48if (x[i]) break;49x0 = x; i -= 2; x += i;50if (x0 == (GEN)avma) set_avma((pari_sp)x);51else stackdummy((pari_sp)(x0+i), (pari_sp)x0);52lx -= i;53x[0] = evaltyp(t_INT) | evallg(lx);54if (lx == 2) x[1] = evalsigne(0) | evallgefint(lx);55else x[1] = evalsigne(1) | evallgefint(lx);56return x;57}5859/***********************************************************************/60/** **/61/** ADDITION / SUBTRACTION **/62/** **/63/***********************************************************************/6465GEN66setloop(GEN a)67{68pari_sp av = avma;69(void)cgetg(lgefint(a) + 3, t_VECSMALL);70return icopy_avma(a, av); /* two cells of extra space before a */71}7273/* we had a = setloop(?), then some incloops. Reset a to b */74GEN75resetloop(GEN a, GEN b) {76long lb = lgefint(b);77a += lgefint(a) - lb;78a[0] = evaltyp(t_INT) | evallg(lb);79affii(b, a); return a;80}8182/* assume a > 0, initialized by setloop. Do a++ */83static GEN84incpos(GEN a)85{86long i, l = lgefint(a);87for (i=l-1; i>1; i--)88if (++a[i]) return a;89l++; a--; /* use extra cell */90a[0]=evaltyp(t_INT) | _evallg(l);91a[1]=evalsigne(1) | evallgefint(l);92a[2]=1; return a;93}9495/* assume a < 0, initialized by setloop. Do a++ */96static GEN97incneg(GEN a)98{99long i, l = lgefint(a)-1;100if (uel(a,l)--)101{102if (l == 2 && !a[2])103{104a++; /* save one cell */105a[0] = evaltyp(t_INT) | _evallg(2);106a[1] = evalsigne(0) | evallgefint(2);107}108return a;109}110for (i = l-1;; i--) /* finishes since a[2] != 0 */111if (uel(a,i)--) break;112if (!a[2])113{114a++; /* save one cell */115a[0] = evaltyp(t_INT) | _evallg(l);116a[1] = evalsigne(-1) | evallgefint(l);117}118return a;119}120121/* assume a initialized by setloop. Do a++ */122GEN123incloop(GEN a)124{125switch(signe(a))126{127case 0: a--; /* use extra cell */128a[0]=evaltyp(t_INT) | _evallg(3);129a[1]=evalsigne(1) | evallgefint(3);130a[2]=1; return a;131case -1: return incneg(a);132default: return incpos(a);133}134}135136INLINE GEN137adduispec(ulong s, GEN x, long nx)138{139GEN xd, zd = (GEN)avma;140long lz;141142if (nx == 1) return adduu(s, uel(x,0));143lz = nx+3; (void)new_chunk(lz);144xd = x + nx;145*--zd = (ulong)*--xd + s;146if ((ulong)*zd < s)147for(;;)148{149if (xd == x) { *--zd = 1; break; } /* enlarge z */150*--zd = ((ulong)*--xd) + 1;151if (*zd) { lz--; break; }152}153else lz--;154while (xd > x) *--zd = *--xd;155*--zd = evalsigne(1) | evallgefint(lz);156*--zd = evaltyp(t_INT) | evallg(lz);157return gc_const((pari_sp)zd, zd);158}159160GEN161adduispec_offset(ulong s, GEN x, long offset, long nx)162{163GEN xd = x+lgefint(x)-nx-offset;164while (nx && *xd==0) {xd++; nx--;}165if (!nx) return utoi(s);166return adduispec(s,xd,nx);167}168169static GEN170addiispec(GEN x, GEN y, long nx, long ny)171{172GEN xd, yd, zd;173long lz, i = -2;174LOCAL_OVERFLOW;175176if (nx < ny) swapspec(x,y, nx,ny);177if (ny == 1) return adduispec(*y,x,nx);178zd = (GEN)avma;179lz = nx+3; (void)new_chunk(lz);180xd = x + nx;181yd = y + ny;182zd[-1] = addll(xd[-1], yd[-1]);183#ifdef addllx8184for ( ; i-8 > -ny; i-=8)185addllx8(xd+i, yd+i, zd+i, overflow);186#endif187for ( ; i >= -ny; i--) zd[i] = addllx(xd[i], yd[i]);188if (overflow)189for(;;)190{191if (i < -nx) { zd[i] = 1; i--; break; } /* enlarge z */192zd[i] = uel(xd,i) + 1;193if (zd[i]) { i--; lz--; break; }194i--;195}196else lz--;197for (; i >= -nx; i--) zd[i] = xd[i];198zd += i+1;199*--zd = evalsigne(1) | evallgefint(lz);200*--zd = evaltyp(t_INT) | evallg(lz);201return gc_const((pari_sp)zd, zd);202}203204/* assume x >= s */205INLINE GEN206subiuspec(GEN x, ulong s, long nx)207{208GEN xd, zd = (GEN)avma;209long lz;210LOCAL_OVERFLOW;211212if (nx == 1) return utoi(x[0] - s);213214lz = nx+2; (void)new_chunk(lz);215xd = x + nx;216*--zd = subll(*--xd, s);217if (overflow)218for(;;)219{220*--zd = ((ulong)*--xd) - 1;221if (*xd) break;222}223if (xd == x)224while (*zd == 0) { zd++; lz--; } /* shorten z */225else226do *--zd = *--xd; while (xd > x);227*--zd = evalsigne(1) | evallgefint(lz);228*--zd = evaltyp(t_INT) | evallg(lz);229return gc_const((pari_sp)zd, zd);230}231232/* assume x > y */233static GEN234subiispec(GEN x, GEN y, long nx, long ny)235{236GEN xd,yd,zd;237long lz, i = -2;238LOCAL_OVERFLOW;239240if (ny==1) return subiuspec(x,*y,nx);241zd = (GEN)avma;242lz = nx+2; (void)new_chunk(lz);243xd = x + nx;244yd = y + ny;245zd[-1] = subll(xd[-1], yd[-1]);246#ifdef subllx8247for ( ; i-8 > -ny; i-=8)248subllx8(xd+i, yd+i, zd+i, overflow);249#endif250for ( ; i >= -ny; i--) zd[i] = subllx(xd[i], yd[i]);251if (overflow)252for(;;)253{254zd[i] = uel(xd,i) - 1;255if (xd[i--]) break;256}257if (i>=-nx)258for (; i >= -nx; i--) zd[i] = xd[i];259else260while (zd[i+1] == 0) { i++; lz--; } /* shorten z */261zd += i+1;262*--zd = evalsigne(1) | evallgefint(lz);263*--zd = evaltyp(t_INT) | evallg(lz);264return gc_const((pari_sp)zd, zd);265}266267static void268roundr_up_ip(GEN x, long l)269{270long i = l;271for(;;)272{273if (++uel(x,--i)) break;274if (i == 2) { x[2] = (long)HIGHBIT; shiftr_inplace(x, 1); break; }275}276}277278void279affir(GEN x, GEN y)280{281const long s = signe(x), ly = lg(y);282long lx, sh, i;283284if (!s)285{286y[1] = evalexpo(-prec2nbits(ly));287return;288}289290lx = lgefint(x); sh = bfffo(x[2]);291y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);292if (sh) {293if (lx <= ly)294{295for (i=lx; i<ly; i++) y[i]=0;296shift_left(y,x,2,lx-1, 0,sh);297return;298}299shift_left(y,x,2,ly-1, x[ly],sh);300/* lx > ly: round properly */301if ((uel(x,ly)<<sh) & HIGHBIT) roundr_up_ip(y, ly);302}303else {304if (lx <= ly)305{306for (i=2; i<lx; i++) y[i]=x[i];307for ( ; i<ly; i++) y[i]=0;308return;309}310for (i=2; i<ly; i++) y[i]=x[i];311/* lx > ly: round properly */312if (uel(x,ly) & HIGHBIT) roundr_up_ip(y, ly);313}314}315316INLINE GEN317shiftispec(GEN x, long nx, long n)318{319long ny, i, m;320GEN y, yd;321if (!n) return icopyspec(x, nx);322323if (n > 0)324{325GEN z = (GEN)avma;326long d = dvmdsBIL(n, &m);327328ny = nx+d; y = new_chunk(ny + 2); yd = y + 2;329for ( ; d; d--) *--z = 0;330if (!m) for (i=0; i<nx; i++) yd[i]=x[i];331else332{333register const ulong sh = BITS_IN_LONG - m;334shift_left(yd,x, 0,nx-1, 0,m);335i = uel(x,0) >> sh;336/* Extend y on the left? */337if (i) { ny++; y = new_chunk(1); y[2] = i; }338}339}340else341{342ny = nx - dvmdsBIL(-n, &m);343if (ny<1) return gen_0;344y = new_chunk(ny + 2); yd = y + 2;345if (m) {346shift_right(yd,x, 0,ny, 0,m);347if (yd[0] == 0)348{349if (ny==1) return gc_const((pari_sp)(y+3), gen_0);350ny--; set_avma((pari_sp)(++y));351}352} else {353for (i=0; i<ny; i++) yd[i]=x[i];354}355}356y[1] = evalsigne(1)|evallgefint(ny + 2);357y[0] = evaltyp(t_INT)|evallg(ny + 2); return y;358}359360GEN361mantissa2nr(GEN x, long n)362{ /*This is a kludge since x is not an integer*/363long s = signe(x);364GEN y;365366if(s == 0) return gen_0;367y = shiftispec(x + 2, lg(x) - 2, n);368if (signe(y)) setsigne(y, s);369return y;370}371372GEN373truncr(GEN x)374{375long d,e,i,s,m;376GEN y;377378if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gen_0;379d = nbits2lg(e+1); m = remsBIL(e);380if (d > lg(x)) pari_err_PREC( "truncr (precision loss in truncation)");381382y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);383if (++m == BITS_IN_LONG)384for (i=2; i<d; i++) y[i]=x[i];385else386shift_right(y,x, 2,d,0, BITS_IN_LONG - m);387return y;388}389390/* integral part */391GEN392floorr(GEN x)393{394long d,e,i,lx,m;395GEN y;396397if (signe(x) >= 0) return truncr(x);398if ((e=expo(x)) < 0) return gen_m1;399d = nbits2lg(e+1); m = remsBIL(e);400lx=lg(x); if (d>lx) pari_err_PREC( "floorr (precision loss in truncation)");401y = new_chunk(d);402if (++m == BITS_IN_LONG)403{404for (i=2; i<d; i++) y[i]=x[i];405i=d; while (i<lx && !x[i]) i++;406if (i==lx) goto END;407}408else409{410shift_right(y,x, 2,d,0, BITS_IN_LONG - m);411if (uel(x,d-1)<<m == 0)412{413i=d; while (i<lx && !x[i]) i++;414if (i==lx) goto END;415}416}417/* set y:=y+1 */418for (i=d-1; i>=2; i--) { uel(y,i)++; if (y[i]) goto END; }419y=new_chunk(1); y[2]=1; d++;420END:421y[1] = evalsigne(-1) | evallgefint(d);422y[0] = evaltyp(t_INT) | evallg(d); return y;423}424425INLINE int426cmpiispec(GEN x, GEN y, long lx, long ly)427{428long i;429if (lx < ly) return -1;430if (lx > ly) return 1;431i = 0; while (i<lx && x[i]==y[i]) i++;432if (i==lx) return 0;433return (uel(x,i) > uel(y,i))? 1: -1;434}435436INLINE int437equaliispec(GEN x, GEN y, long lx, long ly)438{439long i;440if (lx != ly) return 0;441i = ly-1; while (i>=0 && x[i]==y[i]) i--;442return i < 0;443}444445/***********************************************************************/446/** **/447/** MULTIPLICATION **/448/** **/449/***********************************************************************/450/* assume ny > 0 */451INLINE GEN452muluispec(ulong x, GEN y, long ny)453{454GEN yd, z = (GEN)avma;455long lz = ny+3;456LOCAL_HIREMAINDER;457458(void)new_chunk(lz);459yd = y + ny; *--z = mulll(x, *--yd);460while (yd > y) *--z = addmul(x,*--yd);461if (hiremainder) *--z = hiremainder; else lz--;462*--z = evalsigne(1) | evallgefint(lz);463*--z = evaltyp(t_INT) | evallg(lz);464return gc_const((pari_sp)z, z);465}466467/* a + b*|Y| */468GEN469addumului(ulong a, ulong b, GEN Y)470{471GEN yd,y,z;472long ny,lz;473LOCAL_HIREMAINDER;474LOCAL_OVERFLOW;475476if (!b || !signe(Y)) return utoi(a);477478y = LIMBS(Y); z = (GEN)avma;479ny = NLIMBS(Y);480lz = ny+3;481482(void)new_chunk(lz);483yd = y + ny; *--z = addll(a, mulll(b, *--yd));484if (overflow) hiremainder++; /* can't overflow */485while (yd > y) *--z = addmul(b,*--yd);486if (hiremainder) *--z = hiremainder; else lz--;487*--z = evalsigne(1) | evallgefint(lz);488*--z = evaltyp(t_INT) | evallg(lz);489return gc_const((pari_sp)z, z);490}491492/***********************************************************************/493/** **/494/** DIVISION **/495/** **/496/***********************************************************************/497498ulong499umodiu(GEN y, ulong x)500{501long sy=signe(y),ly,i;502ulong xi;503LOCAL_HIREMAINDER;504505if (!x) pari_err_INV("umodiu",gen_0);506if (!sy) return 0;507ly = lgefint(y);508if (x <= uel(y,2))509{510hiremainder=0;511if (ly==3)512{513hiremainder=uel(y,2)%x;514if (!hiremainder) return 0;515return (sy > 0)? hiremainder: x - hiremainder;516}517}518else519{520if (ly==3) return (sy > 0)? uel(y,2): x - uel(y,2);521hiremainder=uel(y,2); ly--; y++;522}523xi = get_Fl_red(x);524for (i=2; i<ly; i++) (void)divll_pre(y[i],x,xi);525if (!hiremainder) return 0;526return (sy > 0)? hiremainder: x - hiremainder;527}528529/* return |y| \/ x */530GEN531absdiviu_rem(GEN y, ulong x, ulong *rem)532{533long ly,i;534GEN z;535ulong xi;536LOCAL_HIREMAINDER;537538if (!x) pari_err_INV("absdiviu_rem",gen_0);539if (!signe(y)) { *rem = 0; return gen_0; }540541ly = lgefint(y);542if (x <= uel(y,2))543{544hiremainder=0;545if (ly==3)546{547z = cgetipos(3);548z[2] = divll(uel(y,2),x);549*rem = hiremainder; return z;550}551}552else553{554if (ly==3) { *rem = uel(y,2); return gen_0; }555hiremainder = uel(y,2); ly--; y++;556}557xi = get_Fl_red(x);558z = cgetipos(ly);559for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x,xi);560*rem = hiremainder; return z;561}562563GEN564divis_rem(GEN y, long x, long *rem)565{566long sy=signe(y),ly,s,i;567GEN z;568ulong xi;569LOCAL_HIREMAINDER;570571if (!x) pari_err_INV("divis_rem",gen_0);572if (!sy) { *rem=0; return gen_0; }573if (x<0) { s = -sy; x = -x; } else s = sy;574575ly = lgefint(y);576if ((ulong)x <= uel(y,2))577{578hiremainder=0;579if (ly==3)580{581z = cgeti(3); z[1] = evallgefint(3) | evalsigne(s);582z[2] = divll(uel(y,2),x);583if (sy<0) hiremainder = - ((long)hiremainder);584*rem = (long)hiremainder; return z;585}586}587else588{589if (ly==3) { *rem = itos(y); return gen_0; }590hiremainder = uel(y,2); ly--; y++;591}592xi = get_Fl_red(x);593z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);594for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x,xi);595if (sy<0) hiremainder = - ((long)hiremainder);596*rem = (long)hiremainder; return z;597}598599GEN600divis(GEN y, long x)601{602long sy=signe(y),ly,s,i;603ulong xi;604GEN z;605LOCAL_HIREMAINDER;606607if (!x) pari_err_INV("divis",gen_0);608if (!sy) return gen_0;609if (x<0) { s = -sy; x = -x; } else s = sy;610611ly = lgefint(y);612if ((ulong)x <= uel(y,2))613{614hiremainder=0;615if (ly==3)616{617z = cgeti(3); z[1] = evallgefint(3) | evalsigne(s);618z[2] = divll(y[2],x);619return z;620}621}622else623{624if (ly==3) return gen_0;625hiremainder=y[2]; ly--; y++;626}627xi = get_Fl_red(x);628z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);629for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x, xi);630return z;631}632633GEN634divrr(GEN x, GEN y)635{636long sx=signe(x), sy=signe(y), lx,ly,lr,e,i,j;637ulong y0,y1;638GEN r, r1;639640if (!x) pari_err_INV("divrr",y);641e = expo(x) - expo(y);642if (!sx) return real_0_bit(e);643if (sy<0) sx = -sx;644645lx=lg(x); ly=lg(y);646if (ly==3)647{648ulong k = x[2], l = (lx>3)? x[3]: 0;649LOCAL_HIREMAINDER;650if (k < uel(y,2)) e--;651else652{653l >>= 1; if (k&1) l |= HIGHBIT;654k >>= 1;655}656hiremainder = k; k = divll(l,y[2]);657if (hiremainder > (uel(y,2) >> 1) && !++k) { k = HIGHBIT; e++; }658r = cgetr(3);659r[1] = evalsigne(sx) | evalexpo(e);660r[2] = k; return r;661}662663lr = minss(lx,ly); r = new_chunk(lr);664r1 = r-1;665r1[1] = 0; for (i=2; i<lr; i++) r1[i]=x[i];666r1[lr] = (lx>ly)? x[lr]: 0;667y0 = y[2]; y1 = y[3];668for (i=0; i<lr-1; i++)669{ /* r1 = r + (i-1), OK up to r1[2] (accesses at most r[lr]) */670ulong k, qp;671LOCAL_HIREMAINDER;672LOCAL_OVERFLOW;673674if (uel(r1,1) == y0) { qp = ULONG_MAX; k = addll(y0,r1[2]); }675else676{677if (uel(r1,1) > y0) /* can't happen if i=0 */678{679GEN y1 = y+1;680j = lr-i; r1[j] = subll(r1[j],y1[j]);681for (j--; j>0; j--) r1[j] = subllx(r1[j],y1[j]);682j=i; do uel(r,--j)++; while (j && !uel(r,j));683}684hiremainder = r1[1]; overflow = 0;685qp = divll(r1[2],y0); k = hiremainder;686}687j = lr-i+1;688if (!overflow)689{690long k3, k4;691k3 = mulll(qp,y1);692if (j == 3) /* i = lr - 2 maximal, r1[3] undefined -> 0 */693k4 = subll(hiremainder,k);694else695{696k3 = subll(k3, r1[3]);697k4 = subllx(hiremainder,k);698}699while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }700}701if (j<ly) (void)mulll(qp,y[j]); else { hiremainder = 0 ; j = ly; }702for (j--; j>1; j--)703{704r1[j] = subll(r1[j], addmul(qp,y[j]));705hiremainder += overflow;706}707if (uel(r1,1) != hiremainder)708{709if (uel(r1,1) < hiremainder)710{711qp--;712j = lr-i-(lr-i>=ly); r1[j] = addll(r1[j], y[j]);713for (j--; j>1; j--) r1[j] = addllx(r1[j], y[j]);714}715else716{717r1[1] -= hiremainder;718while (r1[1])719{720qp++; if (!qp) { j=i; do uel(r,--j)++; while (j && !r[j]); }721j = lr-i-(lr-i>=ly); r1[j] = subll(r1[j],y[j]);722for (j--; j>1; j--) r1[j] = subllx(r1[j],y[j]);723r1[1] -= overflow;724}725}726}727*++r1 = qp;728}729/* i = lr-1 */730/* round correctly */731if (uel(r1,1) > (y0>>1))732{733j=i; do uel(r,--j)++; while (j && !r[j]);734}735r1 = r-1; for (j=i; j>=2; j--) r[j]=r1[j];736if (r[0] == 0) e--;737else if (r[0] == 1) { shift_right(r,r, 2,lr, 1,1); }738else { /* possible only when rounding up to 0x2 0x0 ... */739r[2] = (long)HIGHBIT; e++;740}741r[0] = evaltyp(t_REAL)|evallg(lr);742r[1] = evalsigne(sx) | evalexpo(e);743return r;744}745746GEN747divri(GEN x, GEN y)748{749long lx, s = signe(y);750pari_sp av;751GEN z;752753if (!s) pari_err_INV("divri",y);754if (!signe(x)) return real_0_bit(expo(x) - expi(y));755if (!is_bigint(y)) {756GEN z = divru(x, y[2]);757if (s < 0) togglesign(z);758return z;759}760lx = lg(x); z = cgetr(lx); av = avma;761affrr(divrr(x, itor(y, lx+1)), z);762return gc_const(av, z);763}764765/* Integer division x / y: such that sign(r) = sign(x)766* if z = ONLY_REM return remainder, otherwise return quotient767* if z != NULL set *z to remainder768* *z is the last object on stack (and thus can be disposed of with cgiv769* instead of gerepile)770* If *z is zero, we put gen_0 here and no copy.771* space needed: lx + ly */772GEN773dvmdii(GEN x, GEN y, GEN *z)774{775long sx = signe(x), sy = signe(y);776long lx, ly = lgefint(y), lz, i, j, sh, lq, lr;777pari_sp av;778ulong y0,y0i,y1, *xd,*rd,*qd;779GEN q, r, r1;780781if (!sx)782{783if (ly < 3) pari_err_INV("dvmdii",gen_0);784if (!z || z == ONLY_REM) return gen_0;785*z=gen_0; return gen_0;786}787if (ly <= 3)788{789ulong rem;790if (ly < 3) pari_err_INV("dvmdii",gen_0);791if (z == ONLY_REM)792{793rem = umodiu(x,uel(y,2));794if (!rem) return gen_0;795return (sx < 0)? utoineg(uel(y,2) - rem): utoipos(rem);796}797q = absdiviu_rem(x, uel(y,2), &rem);798if (sx != sy) togglesign(q);799if (!z) return q;800if (!rem) *z = gen_0;801else *z = sx < 0? utoineg(rem): utoipos(rem);802return q;803}804lx=lgefint(x);805lz=lx-ly;806if (lz <= 0)807{808if (lz == 0)809{810for (i=2; i<lx; i++)811if (x[i] != y[i])812{813if (uel(x,i) > uel(y,i)) goto DIVIDE;814goto TRIVIAL;815}816if (z == ONLY_REM) return gen_0;817if (z) *z = gen_0;818if (sx < 0) sy = -sy;819return stoi(sy);820}821TRIVIAL:822if (z == ONLY_REM) return icopy(x);823if (z) *z = icopy(x);824return gen_0;825}826DIVIDE: /* quotient is nonzero */827av=avma; if (sx<0) sy = -sy;828r1 = new_chunk(lx); sh = bfffo(y[2]);829if (sh)830{ /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/831register const ulong m = BITS_IN_LONG - sh;832r = new_chunk(ly);833shift_left(r, y,2,ly-1, 0,sh); y = r;834shift_left(r1,x,2,lx-1, 0,sh);835r1[1] = uel(x,2) >> m;836}837else838{839r1[1] = 0; for (j=2; j<lx; j++) r1[j] = x[j];840}841x = r1;842y0 = y[2]; y0i = get_Fl_red(y0);843y1 = y[3];844for (i=0; i<=lz; i++)845{ /* r1 = x + i */846ulong k, qp;847LOCAL_HIREMAINDER;848LOCAL_OVERFLOW;849850if (uel(r1,1) == y0)851{852qp = ULONG_MAX; k = addll(y0,r1[2]);853}854else855{856hiremainder = r1[1]; overflow = 0;857qp = divll_pre(r1[2],y0,y0i); k = hiremainder;858}859if (!overflow)860{861long k3 = subll(mulll(qp,y1), r1[3]);862long k4 = subllx(hiremainder,k);863while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }864}865hiremainder = 0; j = ly;866for (j--; j>1; j--)867{868r1[j] = subll(r1[j], addmul(qp,y[j]));869hiremainder += overflow;870}871if (uel(r1,1) < hiremainder)872{873qp--;874j = ly-1; r1[j] = addll(r1[j],y[j]);875for (j--; j>1; j--) r1[j] = addllx(r1[j],y[j]);876}877*++r1 = qp;878}879880lq = lz+2;881if (!z)882{883qd = (ulong*)av;884xd = (ulong*)(x + lq);885if (x[1]) { lz++; lq++; }886while (lz--) *--qd = *--xd;887*--qd = evalsigne(sy) | evallgefint(lq);888*--qd = evaltyp(t_INT) | evallg(lq);889return gc_const((pari_sp)qd, (GEN)qd);890}891892j=lq; while (j<lx && !x[j]) j++;893lz = lx-j;894if (z == ONLY_REM)895{896if (lz==0) return gc_const(av, gen_0);897rd = (ulong*)av; lr = lz+2;898xd = (ulong*)(x + lx);899if (!sh) while (lz--) *--rd = *--xd;900else901{ /* shift remainder right by sh bits */902const ulong shl = BITS_IN_LONG - sh;903ulong l;904xd--;905while (--lz) /* fill r[3..] */906{907l = *xd >> sh;908*--rd = l | (*--xd << shl);909}910l = *xd >> sh;911if (l) *--rd = l; else lr--;912}913*--rd = evalsigne(sx) | evallgefint(lr);914*--rd = evaltyp(t_INT) | evallg(lr);915return gc_const((pari_sp)rd, (GEN)rd);916}917918lr = lz+2;919rd = NULL; /* gcc -Wall */920if (lz)921{ /* non zero remainder: initialize rd */922xd = (ulong*)(x + lx);923if (!sh)924{925rd = (ulong*)avma; (void)new_chunk(lr);926while (lz--) *--rd = *--xd;927}928else929{ /* shift remainder right by sh bits */930const ulong shl = BITS_IN_LONG - sh;931ulong l;932rd = (ulong*)x; /* overwrite shifted y */933xd--;934while (--lz)935{936l = *xd >> sh;937*--rd = l | (*--xd << shl);938}939l = *xd >> sh;940if (l) *--rd = l; else lr--;941}942*--rd = evalsigne(sx) | evallgefint(lr);943*--rd = evaltyp(t_INT) | evallg(lr);944rd += lr;945}946qd = (ulong*)av;947xd = (ulong*)(x + lq);948if (x[1]) lq++;949j = lq-2; while (j--) *--qd = *--xd;950*--qd = evalsigne(sy) | evallgefint(lq);951*--qd = evaltyp(t_INT) | evallg(lq);952q = (GEN)qd;953if (lr==2) *z = gen_0;954else955{ /* rd has been properly initialized: we had lz > 0 */956while (lr--) *--qd = *--rd;957*z = (GEN)qd;958}959return gc_const((pari_sp)qd, q);960}961962/* Montgomery reduction.963* N has k words, assume T >= 0 has less than 2k.964* Return res := T / B^k mod N, where B = 2^BIL965* such that 0 <= res < T/B^k + N and res has less than k words */966GEN967red_montgomery(GEN T, GEN N, ulong inv)968{969pari_sp av;970GEN Te, Td, Ne, Nd, scratch;971ulong i, j, m, t, d, k = NLIMBS(N);972int carry;973LOCAL_HIREMAINDER;974LOCAL_OVERFLOW;975976if (k == 0) return gen_0;977d = NLIMBS(T); /* <= 2*k */978if (d == 0) return gen_0;979#ifdef DEBUG980if (d > 2*k) pari_err_BUG("red_montgomery");981#endif982if (k == 1)983{ /* as below, special cased for efficiency */984ulong n = uel(N,2);985if (d == 1) {986hiremainder = uel(T,2);987m = hiremainder * inv;988(void)addmul(m, n); /* t + m*n = 0 */989return utoi(hiremainder);990} else { /* d = 2 */991hiremainder = uel(T,3);992m = hiremainder * inv;993(void)addmul(m, n); /* t + m*n = 0 */994t = addll(hiremainder, uel(T,2));995if (overflow) t -= n; /* t > n doesn't fit in 1 word */996return utoi(t);997}998}999/* assume k >= 2 */1000av = avma; scratch = new_chunk(k<<1); /* >= k + 2: result fits */10011002/* copy T to scratch space (pad with zeroes to 2k words) */1003Td = (GEN)av;1004Te = T + (d+2);1005for (i=0; i < d ; i++) *--Td = *--Te;1006for ( ; i < (k<<1); i++) *--Td = 0;10071008Te = (GEN)av; /* 1 beyond end of current T mantissa (in scratch) */1009Ne = N + k+2; /* 1 beyond end of N mantissa */10101011carry = 0;1012for (i=0; i<k; i++) /* set T := T/B nod N, k times */1013{1014Td = Te; /* one beyond end of (new) T mantissa */1015Nd = Ne;1016hiremainder = *--Td;1017m = hiremainder * inv; /* solve T + m N = O(B) */10181019/* set T := (T + mN) / B */1020Te = Td;1021(void)addmul(m, *--Nd); /* = 0 */1022for (j=1; j<k; j++)1023{1024t = addll(addmul(m, *--Nd), *--Td);1025*Td = t;1026hiremainder += overflow;1027}1028t = addll(hiremainder, *--Td); *Td = t + carry;1029carry = (overflow || (carry && *Td == 0));1030}1031if (carry)1032{ /* Td > N overflows (k+1 words), set Td := Td - N */1033Td = Te;1034Nd = Ne;1035t = subll(*--Td, *--Nd); *Td = t;1036while (Td > scratch) { t = subllx(*--Td, *--Nd); *Td = t; }1037}10381039/* copy result */1040Td = (GEN)av;1041while (*scratch == 0 && Te > scratch) scratch++; /* strip leading 0s */1042while (Te > scratch) *--Td = *--Te;1043k = (GEN)av - Td; if (!k) return gc_const(av, gen_0);1044k += 2;1045*--Td = evalsigne(1) | evallgefint(k);1046*--Td = evaltyp(t_INT) | evallg(k);1047#ifdef DEBUG1048{1049long l = NLIMBS(N), s = BITS_IN_LONG*l;1050GEN R = int2n(s);1051GEN res = remii(mulii(T, Fp_inv(R, N)), N);1052if (k > lgefint(N)1053|| !equalii(remii(Td,N),res)1054|| cmpii(Td, addii(shifti(T, -s), N)) >= 0) pari_err_BUG("red_montgomery");1055}1056#endif1057return gc_const((pari_sp)Td, Td);1058}10591060/* EXACT INTEGER DIVISION */10611062/* assume xy>0, the division is exact and y is odd. Destroy x */1063static GEN1064diviuexact_i(GEN x, ulong y)1065{1066long i, lz, lx;1067ulong q, yinv;1068GEN z, z0, x0, x0min;10691070if (y == 1) return icopy(x);1071lx = lgefint(x);1072if (lx == 3)1073{1074q = uel(x,2) / y;1075if (!q) pari_err_OP("exact division", x, utoi(y));1076return utoipos(q);1077}1078yinv = invmod2BIL(y);1079lz = (y <= uel(x,2)) ? lx : lx-1;1080z = new_chunk(lz);1081z0 = z + lz;1082x0 = x + lx; x0min = x + lx-lz+2;10831084while (x0 > x0min)1085{1086*--z0 = q = yinv*uel(--x0,0); /* i-th quotient */1087if (!q) continue;1088/* x := x - q * y */1089{ /* update neither lowest word (could set it to 0) nor highest ones */1090register GEN x1 = x0 - 1;1091LOCAL_HIREMAINDER;1092(void)mulll(q,y);1093if (hiremainder)1094{1095if (uel(x1,0) < hiremainder)1096{1097uel(x1,0) -= hiremainder;1098do uel(--x1,0)--; while (uel(x1,0) == ULONG_MAX);1099}1100else1101uel(x1,0) -= hiremainder;1102}1103}1104}1105i=2; while(!z[i]) i++;1106z += i-2; lz -= i-2;1107z[0] = evaltyp(t_INT)|evallg(lz);1108z[1] = evalsigne(1)|evallg(lz);1109if (lz == 2) pari_err_OP("exact division", x, utoi(y));1110return gc_const((pari_sp)z, z);1111}11121113/* assume y != 0 and the division is exact */1114GEN1115diviuexact(GEN x, ulong y)1116{1117pari_sp av;1118long lx, vy, s = signe(x);1119GEN z;11201121if (!s) return gen_0;1122if (y == 1) return icopy(x);1123lx = lgefint(x);1124if (lx == 3) {1125ulong q = uel(x,2) / y;1126if (!q) pari_err_OP("exact division", x, utoi(y));1127return (s > 0)? utoipos(q): utoineg(q);1128}1129av = avma; (void)new_chunk(lx); vy = vals(y);1130if (vy) {1131y >>= vy;1132if (y == 1) { set_avma(av); return shifti(x, -vy); }1133x = shifti(x, -vy);1134if (lx == 3) {1135ulong q = uel(x,2) / y;1136set_avma(av);1137if (!q) pari_err_OP("exact division", x, utoi(y));1138return (s > 0)? utoipos(q): utoineg(q);1139}1140} else x = icopy(x);1141set_avma(av);1142z = diviuexact_i(x, y);1143setsigne(z, s); return z;1144}11451146/* Find z such that x=y*z, knowing that y | x (unchecked)1147* Method: y0 z0 = x0 mod B = 2^BITS_IN_LONG ==> z0 = 1/y0 mod B.1148* Set x := (x - z0 y) / B, updating only relevant words, and repeat */1149GEN1150diviiexact(GEN x, GEN y)1151{1152long lx, ly, lz, vy, i, ii, sx = signe(x), sy = signe(y);1153pari_sp av;1154ulong y0inv,q;1155GEN z;11561157if (!sy) pari_err_INV("diviiexact",gen_0);1158if (!sx) return gen_0;1159lx = lgefint(x);1160if (lx == 3) {1161q = uel(x,2) / uel(y,2);1162if (!q) pari_err_OP("exact division", x, y);1163return (sx+sy) ? utoipos(q): utoineg(q);1164}1165vy = vali(y); av = avma;1166(void)new_chunk(lx); /* enough room for z */1167if (vy)1168{ /* make y odd */1169y = shifti(y,-vy);1170x = shifti(x,-vy); lx = lgefint(x);1171}1172else x = icopy(x); /* necessary because we destroy x */1173set_avma(av); /* will erase our x,y when exiting */1174/* now y is odd */1175ly = lgefint(y);1176if (ly == 3)1177{1178z = diviuexact_i(x,uel(y,2)); /* x != 0 */1179setsigne(z, (sx+sy)? 1: -1); return z;1180}1181y0inv = invmod2BIL(y[ly-1]);1182i=2; while (i<ly && y[i]==x[i]) i++;1183lz = (i==ly || uel(y,i) < uel(x,i)) ? lx-ly+3 : lx-ly+2;1184z = new_chunk(lz);11851186y += ly - 1; /* now y[-i] = i-th word of y */1187for (ii=lx-1,i=lz-1; i>=2; i--,ii--)1188{1189long limj;1190LOCAL_HIREMAINDER;1191LOCAL_OVERFLOW;11921193z[i] = q = y0inv*uel(x,ii); /* i-th quotient */1194if (!q) continue;11951196/* x := x - q * y */1197(void)mulll(q,y[0]); limj = maxss(lx - lz, ii+3-ly);1198{ /* update neither lowest word (could set it to 0) nor highest ones */1199register GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj;1200for (; x0 >= xlim; x0--, y0--)1201{1202*x0 = subll(*x0, addmul(q,*y0));1203hiremainder += overflow;1204}1205if (hiremainder && limj != lx - lz)1206{1207if ((ulong)*x0 < hiremainder)1208{1209*x0 -= hiremainder;1210do (*--x0)--; while ((ulong)*x0 == ULONG_MAX);1211}1212else1213*x0 -= hiremainder;1214}1215}1216}1217i=2; while(!z[i]) i++;1218z += i-2; lz -= (i-2);1219z[0] = evaltyp(t_INT)|evallg(lz);1220z[1] = evalsigne((sx+sy)? 1: -1) | evallg(lz);1221if (lz == 2) pari_err_OP("exact division", x, y);1222return gc_const((pari_sp)z, z);1223}12241225/* assume yz != and yz | x */1226GEN1227diviuuexact(GEN x, ulong y, ulong z)1228{1229long tmp[4];1230ulong t;1231LOCAL_HIREMAINDER;1232t = mulll(y, z);1233if (!hiremainder) return diviuexact(x, t);1234tmp[0] = evaltyp(t_INT)|_evallg(4);1235tmp[1] = evalsigne(1)|evallgefint(4);1236tmp[2] = hiremainder;1237tmp[3] = t;1238return diviiexact(x, tmp);1239}12401241/********************************************************************/1242/** **/1243/** INTEGER MULTIPLICATION (BASECASE) **/1244/** **/1245/********************************************************************/1246/* nx >= ny = num. of digits of x, y (not GEN, see mulii) */1247INLINE GEN1248muliispec_basecase(GEN x, GEN y, long nx, long ny)1249{1250GEN z2e,z2d,yd,xd,ye,zd;1251long p1,lz;1252LOCAL_HIREMAINDER;12531254if (ny == 1) return muluispec((ulong)*y, x, nx);1255if (ny == 0) return gen_0;1256zd = (GEN)avma; lz = nx+ny+2;1257(void)new_chunk(lz);1258xd = x + nx;1259yd = y + ny;1260ye = yd; p1 = *--xd;12611262*--zd = mulll(p1, *--yd); z2e = zd;1263while (yd > y) *--zd = addmul(p1, *--yd);1264*--zd = hiremainder;12651266while (xd > x)1267{1268LOCAL_OVERFLOW;1269yd = ye; p1 = *--xd;12701271z2d = --z2e;1272*z2d = addll(mulll(p1, *--yd), *z2d); z2d--;1273while (yd > y)1274{1275hiremainder += overflow;1276*z2d = addll(addmul(p1, *--yd), *z2d); z2d--;1277}1278*--zd = hiremainder + overflow;1279}1280if (*zd == 0) { zd++; lz--; } /* normalize */1281*--zd = evalsigne(1) | evallgefint(lz);1282*--zd = evaltyp(t_INT) | evallg(lz);1283return gc_const((pari_sp)zd, zd);1284}12851286INLINE GEN1287sqrispec_basecase(GEN x, long nx)1288{1289GEN z2e,z2d,yd,xd,zd,x0,z0;1290long p1,lz;1291LOCAL_HIREMAINDER;1292LOCAL_OVERFLOW;12931294if (nx == 1) return sqru((ulong)*x);1295if (nx == 0) return gen_0;1296zd = (GEN)avma; lz = (nx+1) << 1;1297z0 = new_chunk(lz);1298if (nx == 1)1299{1300*--zd = mulll(*x, *x);1301*--zd = hiremainder; goto END;1302}1303xd = x + nx;13041305/* compute double products --> zd */1306p1 = *--xd; yd = xd; --zd;1307*--zd = mulll(p1, *--yd); z2e = zd;1308while (yd > x) *--zd = addmul(p1, *--yd);1309*--zd = hiremainder;13101311x0 = x+1;1312while (xd > x0)1313{1314LOCAL_OVERFLOW;1315p1 = *--xd; yd = xd;13161317z2e -= 2; z2d = z2e;1318*z2d = addll(mulll(p1, *--yd), *z2d); z2d--;1319while (yd > x)1320{1321hiremainder += overflow;1322*z2d = addll(addmul(p1, *--yd), *z2d); z2d--;1323}1324*--zd = hiremainder + overflow;1325}1326/* multiply zd by 2 (put result in zd - 1) */1327zd[-1] = ((*zd & HIGHBIT) != 0);1328shift_left(zd, zd, 0, (nx<<1)-3, 0, 1);13291330/* add the squares */1331xd = x + nx; zd = z0 + lz;1332p1 = *--xd;1333zd--; *zd = mulll(p1,p1);1334zd--; *zd = addll(hiremainder, *zd);1335while (xd > x)1336{1337p1 = *--xd;1338zd--; *zd = addll(mulll(p1,p1)+ overflow, *zd);1339zd--; *zd = addll(hiremainder + overflow, *zd);1340}13411342END:1343if (*zd == 0) { zd++; lz--; } /* normalize */1344*--zd = evalsigne(1) | evallgefint(lz);1345*--zd = evaltyp(t_INT) | evallg(lz);1346return gc_const((pari_sp)zd, zd);1347}13481349/********************************************************************/1350/** **/1351/** INTEGER MULTIPLICATION (FFT) **/1352/** **/1353/********************************************************************/13541355/*1356Compute parameters for FFT:1357len: result length1358k: FFT depth.1359n: number of blocks (2^k)1360bs: block size1361mod: Modulus is M=2^(BIL*mod)+11362ord: order of 2 in Z/MZ.1363We must have:1364bs*n >= l13652^(BIL*mod) > nb*2^(2*BIL*bs)13662^k | 2*BIL*mod1367*/1368static void1369mulliifft_params(long len, long *k, long *mod, long *bs, long *n, ulong *ord)1370{1371long r;1372*k = expu((3*len)>>2)-3;1373do {1374(*k)--;1375r = *k-(TWOPOTBITS_IN_LONG+2);1376*n = 1L<<*k;1377*bs = (len+*n-1)>>*k;1378*mod= 2**bs+1;1379if (r>0)1380*mod=((*mod+(1L<<r)-1)>>r)<<r;1381} while(*mod>=3**bs);1382*ord= 4**mod*BITS_IN_LONG;1383}13841385/* Zf_: arithmetic in ring Z/MZ where M= 2^(BITS_IN_LONG*mod)+11386* for some mod.1387* Do not garbage collect.1388*/13891390static GEN1391Zf_add(GEN a, GEN b, GEN M)1392{1393GEN y, z = addii(a,b);1394long mod = lgefint(M)-3;1395long l = NLIMBS(z);1396if (l<=mod) return z;1397y = subiu(z, 1);1398if (NLIMBS(y)<=mod) return z;1399return int_normalize(y,1);1400}14011402static GEN1403Zf_sub(GEN a, GEN b, GEN M)1404{1405GEN z = subii(a,b);1406return signe(z)>=0? z: addii(M,z);1407}14081409/* destroy z */1410static GEN1411Zf_red_destroy(GEN z, GEN M)1412{1413long mod = lgefint(M)-3;1414long l = NLIMBS(z);1415GEN y;1416if (l<=mod) return z;1417y = shifti(z, -mod*BITS_IN_LONG);1418z = int_normalize(z, NLIMBS(y));1419y = Zf_red_destroy(y, M);1420z = subii(z, y);1421if (signe(z)<0) z = addii(z, M);1422return z;1423}14241425INLINE GEN1426Zf_shift(GEN a, ulong s, GEN M) { return Zf_red_destroy(shifti(a, s), M); }14271428/*1429Multiply by sqrt(2)^s1430We use the formula sqrt(2)=z_8*(1-z_4)) && z_8=2^(ord/16) [2^(ord/4)+1]1431*/14321433static GEN1434Zf_mulsqrt2(GEN a, ulong s, ulong ord, GEN M)1435{1436ulong hord = ord>>1;1437if (!signe(a)) return gen_0;1438if (odd(s)) /* Multiply by 2^(s/2) */1439{1440GEN az8 = Zf_shift(a, ord>>4, M);1441GEN az83 = Zf_shift(az8, ord>>3, M);1442a = Zf_sub(az8, az83, M);1443s--;1444}1445if (s < hord)1446return Zf_shift(a, s>>1, M);1447else1448return subii(M,Zf_shift(a, (s-hord)>>1, M));1449}14501451INLINE GEN1452Zf_sqr(GEN a, GEN M) { return Zf_red_destroy(sqri(a), M); }14531454INLINE GEN1455Zf_mul(GEN a, GEN b, GEN M) { return Zf_red_destroy(mulii(a,b), M); }14561457/* In place, bit reversing FFT */1458static void1459muliifft_dit(ulong o, ulong ord, GEN M, GEN FFT, long d, long step)1460{1461pari_sp av = avma;1462long i;1463ulong j, no = (o<<1)%ord;1464long hstep=step>>1;1465for (i = d+1, j = 0; i <= d+hstep; ++i, j =(j+o)%ord)1466{1467GEN a = Zf_add(gel(FFT,i), gel(FFT,i+hstep), M);1468GEN b = Zf_mulsqrt2(Zf_sub(gel(FFT,i), gel(FFT,i+hstep), M), j, ord, M);1469affii(a,gel(FFT,i));1470affii(b,gel(FFT,i+hstep));1471set_avma(av);1472}1473if (hstep>1)1474{1475muliifft_dit(no, ord, M, FFT, d, hstep);1476muliifft_dit(no, ord, M, FFT, d+hstep, hstep);1477}1478}14791480/* In place, bit reversed FFT, inverse of muliifft_dit */1481static void1482muliifft_dis(ulong o, ulong ord, GEN M, GEN FFT, long d, long step)1483{1484pari_sp av = avma;1485long i;1486ulong j, no = (o<<1)%ord;1487long hstep=step>>1;1488if (hstep>1)1489{1490muliifft_dis(no, ord, M, FFT, d, hstep);1491muliifft_dis(no, ord, M, FFT, d+hstep, hstep);1492}1493for (i = d+1, j = 0; i <= d+hstep; ++i, j =(j+o)%ord)1494{1495GEN z = Zf_mulsqrt2(gel(FFT,i+hstep), j, ord, M);1496GEN a = Zf_add(gel(FFT,i), z, M);1497GEN b = Zf_sub(gel(FFT,i), z, M);1498affii(a,gel(FFT,i));1499affii(b,gel(FFT,i+hstep));1500set_avma(av);1501}1502}15031504static GEN1505muliifft_spliti(GEN a, long na, long bs, long n, long mod)1506{1507GEN ap = a+na-1;1508GEN c = cgetg(n+1, t_VEC);1509long i,j;1510for(i=1;i<=n;i++)1511{1512GEN z = cgeti(mod+3);1513if (na)1514{1515long m = minss(bs, na), v=0;1516GEN zp, aa=ap-m+1;1517while (!*aa && v<m) {aa++; v++;}1518zp = z+m-v+1;1519for (j=v; j < m; j++)1520*zp-- = *ap--;1521ap -= v; na -= m;1522z[1] = evalsigne(m!=v) | evallgefint(m-v+2);1523} else1524z[1] = evalsigne(0) | evallgefint(2);1525gel(c, i) = z;1526}1527return c;1528}15291530static GEN1531muliifft_unspliti(GEN V, long bs, long len)1532{1533long s, i, j, l = lg(V);1534GEN a = cgeti(len);1535a[1] = evalsigne(1)|evallgefint(len);1536for(i=2;i<len;i++)1537a[i] = 0;1538for(i=1, s=0; i<l; i++, s+=bs)1539{1540GEN u = gel(V,i);1541if (signe(u))1542{1543GEN ap = int_W(a,s);1544GEN up = int_LSW(u);1545long lu = NLIMBS(u);1546LOCAL_OVERFLOW;1547*ap = addll(*ap, *up--); ap--;1548for (j=1; j<lu; j++)1549{ *ap = addllx(*ap, *up--); ap--; }1550while (overflow)1551{ *ap = addll(*ap, 1); ap--; }1552}1553}1554return int_normalize(a,0);1555}15561557static GEN1558sqrispec_fft(GEN a, long na)1559{1560pari_sp av, ltop = avma;1561long len = 2*na;1562long k, mod, bs, n;1563GEN FFT, M;1564long i;1565ulong o, ord;15661567mulliifft_params(len,&k,&mod,&bs,&n,&ord);1568o = ord>>k;1569M = int2n(mod*BITS_IN_LONG);1570M[2+mod] = 1;1571FFT = muliifft_spliti(a, na, bs, n, mod);1572muliifft_dit(o, ord, M, FFT, 0, n);1573av = avma;1574for(i=1; i<=n; i++)1575{1576affii(Zf_sqr(gel(FFT,i), M), gel(FFT,i));1577set_avma(av);1578}1579muliifft_dis(ord-o, ord, M, FFT, 0, n);1580for(i=1; i<=n; i++)1581{1582affii(Zf_shift(gel(FFT,i), (ord>>1)-k, M), gel(FFT,i));1583set_avma(av);1584}1585return gerepileuptoint(ltop, muliifft_unspliti(FFT,bs,2+len));1586}15871588static GEN1589muliispec_fft(GEN a, GEN b, long na, long nb)1590{1591pari_sp av, av2, ltop = avma;1592long len = na+nb;1593long k, mod, bs, n;1594GEN FFT, FFTb, M;1595long i;1596ulong o, ord;15971598mulliifft_params(len,&k,&mod,&bs,&n,&ord);1599o = ord>>k;1600M = int2n(mod*BITS_IN_LONG);1601M[2+mod] = 1;1602FFT = muliifft_spliti(a, na, bs, n, mod);1603av=avma;1604muliifft_dit(o, ord, M, FFT, 0, n);1605FFTb = muliifft_spliti(b, nb, bs, n, mod);1606av2 = avma;1607muliifft_dit(o, ord, M, FFTb, 0, n);1608for(i=1; i<=n; i++)1609{1610affii(Zf_mul(gel(FFT,i), gel(FFTb,i), M), gel(FFT,i));1611set_avma(av2);1612}1613set_avma(av);1614muliifft_dis(ord-o, ord, M, FFT, 0, n);1615for(i=1; i<=n; i++)1616{1617affii(Zf_shift(gel(FFT,i),(ord>>1)-k,M), gel(FFT,i));1618set_avma(av);1619}1620return gerepileuptoint(ltop, muliifft_unspliti(FFT,bs,2+len));1621}16221623/********************************************************************/1624/** **/1625/** INTEGER MULTIPLICATION (KARATSUBA) **/1626/** **/1627/********************************************************************/16281629/* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */1630static GEN1631addshiftw(GEN x, GEN y, long d)1632{1633GEN z,z0,y0,yd, zd = (GEN)avma;1634long a,lz,ly = lgefint(y);16351636z0 = new_chunk(d);1637a = ly-2; yd = y+ly;1638if (a >= d)1639{1640y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */1641a -= d;1642if (a)1643z = addiispec(LIMBS(x), LIMBS(y), NLIMBS(x), a);1644else1645z = icopy(x);1646}1647else1648{1649y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */1650while (zd > z0) *--zd = 0; /* complete with 0s */1651z = icopy(x);1652}1653lz = lgefint(z)+d;1654z[1] = evalsigne(1) | evallgefint(lz);1655z[0] = evaltyp(t_INT) | evallg(lz); return z;1656}16571658/* Fast product (Karatsuba) of integers. a and b are "special" GENs1659* c,c0,c1,c2 are genuine GENs.1660*/1661GEN1662muliispec(GEN a, GEN b, long na, long nb)1663{1664GEN a0,c,c0;1665long n0, n0a, i;1666pari_sp av;16671668if (na < nb) swapspec(a,b, na,nb);1669if (nb < MULII_KARATSUBA_LIMIT) return muliispec_basecase(a,b,na,nb);1670if (nb >= MULII_FFT_LIMIT) return muliispec_fft(a,b,na,nb);1671i=(na>>1); n0=na-i; na=i;1672av=avma; a0=a+na; n0a=n0;1673while (n0a && !*a0) { a0++; n0a--; }16741675if (n0a && nb > n0)1676{ /* nb <= na <= n0 */1677GEN b0,c1,c2;1678long n0b;16791680nb -= n0;1681c = muliispec(a,b,na,nb);1682b0 = b+nb; n0b = n0;1683while (n0b && !*b0) { b0++; n0b--; }1684if (n0b)1685{1686c0 = muliispec(a0,b0, n0a,n0b);16871688c2 = addiispec(a0,a, n0a,na);1689c1 = addiispec(b0,b, n0b,nb);1690c1 = muliispec(LIMBS(c1),LIMBS(c2), NLIMBS(c1),NLIMBS(c2));1691c2 = addiispec(LIMBS(c0),LIMBS(c), NLIMBS(c0),NLIMBS(c));16921693c1 = subiispec(LIMBS(c1),LIMBS(c2), NLIMBS(c1),NLIMBS(c2));1694}1695else1696{1697c0 = gen_0;1698c1 = muliispec(a0,b, n0a,nb);1699}1700c = addshiftw(c,c1, n0);1701}1702else1703{1704c = muliispec(a,b,na,nb);1705c0 = muliispec(a0,b,n0a,nb);1706}1707return gerepileuptoint(av, addshiftw(c,c0, n0));1708}1709GEN1710muluui(ulong x, ulong y, GEN z)1711{1712long t, s = signe(z);1713GEN r;1714LOCAL_HIREMAINDER;17151716if (!x || !y || !signe(z)) return gen_0;1717t = mulll(x,y);1718if (!hiremainder)1719r = muluispec(t, z+2, lgefint(z)-2);1720else1721{1722long tmp[2];1723tmp[0] = hiremainder;1724tmp[1] = t;1725r = muliispec(z+2,tmp,lgefint(z)-2,2);1726}1727setsigne(r,s); return r;1728}17291730#define sqrispec_mirror sqrispec1731#define muliispec_mirror muliispec17321733/* x % (2^n), assuming n >= 0 */1734GEN1735remi2n(GEN x, long n)1736{1737long hi,l,k,lx,ly, sx = signe(x);1738GEN z, xd, zd;17391740if (!sx || !n) return gen_0;17411742k = dvmdsBIL(n, &l);1743lx = lgefint(x);1744if (lx < k+3) return icopy(x);17451746xd = x + (lx-k-1);1747/* x = |_|...|#|1|...|k| : copy the last l bits of # and the last k words1748* ^--- initial xd */1749hi = ((ulong)*xd) & ((1UL<<l)-1); /* last l bits of # = top bits of result */1750if (!hi)1751{ /* strip leading zeroes from result */1752xd++; while (k && !*xd) { k--; xd++; }1753if (!k) return gen_0;1754ly = k+2; xd--;1755}1756else1757ly = k+3;17581759zd = z = cgeti(ly);1760*++zd = evalsigne(sx) | evallgefint(ly);1761if (hi) *++zd = hi;1762for ( ;k; k--) *++zd = *++xd;1763return z;1764}17651766GEN1767sqrispec(GEN a, long na)1768{1769GEN a0,c;1770long n0, n0a, i;1771pari_sp av;17721773if (na < SQRI_KARATSUBA_LIMIT) return sqrispec_basecase(a,na);1774if (na >= SQRI_FFT_LIMIT) return sqrispec_fft(a,na);1775i=(na>>1); n0=na-i; na=i;1776av=avma; a0=a+na; n0a=n0;1777while (n0a && !*a0) { a0++; n0a--; }1778c = sqrispec(a,na);1779if (n0a)1780{1781GEN t, c1, c0 = sqrispec(a0,n0a);1782#if 01783c1 = shifti(muliispec(a0,a, n0a,na),1);1784#else /* faster */1785t = addiispec(a0,a,n0a,na);1786t = sqrispec(LIMBS(t),NLIMBS(t));1787c1= addiispec(LIMBS(c0),LIMBS(c), NLIMBS(c0), NLIMBS(c));1788c1= subiispec(LIMBS(t),LIMBS(c1), NLIMBS(t), NLIMBS(c1));1789#endif1790c = addshiftw(c,c1, n0);1791c = addshiftw(c,c0, n0);1792}1793else1794c = addshiftw(c,gen_0,n0<<1);1795return gerepileuptoint(av, c);1796}17971798/********************************************************************/1799/** **/1800/** KARATSUBA SQUARE ROOT **/1801/** adapted from Paul Zimmermann's implementation of **/1802/** his algorithm in GMP (mpn_sqrtrem) **/1803/** **/1804/********************************************************************/18051806/* Square roots table */1807static const unsigned char approx_tab[192] = {1808128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,1809143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,1810156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,1811169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,1812181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,1813192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,1814202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,1815212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,1816221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,1817230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,1818239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,1819247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,2551820};18211822/* N[0], assume N[0] >= 2^(BIL-2).1823* Return r,s such that s^2 + r = N, 0 <= r <= 2s */1824static void1825p_sqrtu1(ulong *N, ulong *ps, ulong *pr)1826{1827ulong prec, r, s, q, u, n0 = N[0];18281829q = n0 >> (BITS_IN_LONG - 8);1830/* 2^6 = 64 <= q < 256 = 2^8 */1831s = approx_tab[q - 64]; /* 128 <= s < 255 */1832r = (n0 >> (BITS_IN_LONG - 16)) - s * s; /* r <= 2*s */1833if (r > (s << 1)) { r -= (s << 1) | 1; s++; }18341835/* 8-bit approximation from the high 8-bits of N[0] */1836prec = 8;1837n0 <<= 2 * prec;1838while (2 * prec < BITS_IN_LONG)1839{ /* invariant: s has prec bits, and r <= 2*s */1840r = (r << prec) + (n0 >> (BITS_IN_LONG - prec));1841n0 <<= prec;1842u = 2 * s;1843q = r / u; u = r - q * u;1844s = (s << prec) + q;1845u = (u << prec) + (n0 >> (BITS_IN_LONG - prec));1846q = q * q;1847r = u - q;1848if (u < q) { s--; r += (s << 1) | 1; }1849n0 <<= prec;1850prec = 2 * prec;1851}1852*ps = s;1853*pr = r;1854}18551856/* N[0..1], assume N[0] >= 2^(BIL-2).1857* Return 1 if remainder overflows, 0 otherwise */1858static int1859p_sqrtu2(ulong *N, ulong *ps, ulong *pr)1860{1861ulong cc, qhl, r, s, q, u, n1 = N[1];1862LOCAL_OVERFLOW;18631864p_sqrtu1(N, &s, &r); /* r <= 2s */1865qhl = 0; while (r >= s) { qhl++; r -= s; }1866/* now r < s < 2^(BIL/2) */1867r = (r << BITS_IN_HALFULONG) | (n1 >> BITS_IN_HALFULONG);1868u = s << 1;1869q = r / u; u = r - q * u;1870q += (qhl & 1) << (BITS_IN_HALFULONG - 1);1871qhl >>= 1;1872/* (initial r)<<(BIL/2) + n1>>(BIL/2) = (qhl<<(BIL/2) + q) * 2s + u */1873s = ((s + qhl) << BITS_IN_HALFULONG) + q;1874cc = u >> BITS_IN_HALFULONG;1875r = (u << BITS_IN_HALFULONG) | (n1 & LOWMASK);1876r = subll(r, q * q);1877cc -= overflow + qhl;1878/* now subtract 2*q*2^(BIL/2) + 2^BIL if qhl is set */1879if ((long)cc < 0)1880{1881if (s) {1882r = addll(r, s);1883cc += overflow;1884s--;1885} else {1886cc++;1887s = ~0UL;1888}1889r = addll(r, s);1890cc += overflow;1891}1892*ps = s;1893*pr = r; return cc;1894}18951896static void1897xmpn_zero(GEN x, long n)1898{1899while (--n >= 0) x[n]=0;1900}1901static void1902xmpn_copy(GEN z, GEN x, long n)1903{1904long k = n;1905while (--k >= 0) z[k] = x[k];1906}1907/* a[0..la-1] * 2^(lb BIL) | b[0..lb-1] */1908static GEN1909catii(GEN a, long la, GEN b, long lb)1910{1911long l = la + lb + 2;1912GEN z = cgetipos(l);1913xmpn_copy(LIMBS(z), a, la);1914xmpn_copy(LIMBS(z) + la, b, lb);1915return int_normalize(z, 0);1916}19171918/* sqrt n[0..1], assume n normalized */1919static GEN1920sqrtispec2(GEN n, GEN *pr)1921{1922ulong s, r;1923int hi = p_sqrtu2((ulong*)n, &s, &r);1924GEN S = utoi(s);1925*pr = hi? uutoi(1,r): utoi(r);1926return S;1927}19281929/* sqrt n[0], _dont_ assume n normalized */1930static GEN1931sqrtispec1_sh(GEN n, GEN *pr)1932{1933GEN S;1934ulong r, s, u0 = uel(n,0);1935int sh = bfffo(u0) & ~1UL;1936if (sh) u0 <<= sh;1937p_sqrtu1(&u0, &s, &r);1938/* s^2 + r = u0, s < 2^(BIL/2). Rescale back:1939* 2^(2k) n = S^2 + R1940* so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */1941if (sh) {1942int k = sh >> 1;1943ulong s0 = s & ((1L<<k) - 1);1944r += s * (s0<<1);1945s >>= k;1946r >>= sh;1947}1948S = utoi(s);1949if (pr) *pr = utoi(r);1950return S;1951}19521953/* sqrt n[0..1], _dont_ assume n normalized */1954static GEN1955sqrtispec2_sh(GEN n, GEN *pr)1956{1957GEN S;1958ulong U[2], r, s, u0 = uel(n,0), u1 = uel(n,1);1959int hi, sh = bfffo(u0) & ~1UL;1960if (sh) {1961u0 = (u0 << sh) | (u1 >> (BITS_IN_LONG-sh));1962u1 <<= sh;1963}1964U[0] = u0;1965U[1] = u1; hi = p_sqrtu2(U, &s, &r);1966/* s^2 + R = u0|u1. Rescale back:1967* 2^(2k) n = S^2 + R1968* so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */1969if (sh) {1970int k = sh >> 1;1971ulong s0 = s & ((1L<<k) - 1);1972LOCAL_HIREMAINDER;1973LOCAL_OVERFLOW;1974r = addll(r, mulll(s, (s0<<1)));1975if (overflow) hiremainder++;1976hiremainder += hi; /* + 0 or 1 */1977s >>= k;1978r = (r>>sh) | (hiremainder << (BITS_IN_LONG-sh));1979hi = (hiremainder & (1L<<sh));1980}1981S = utoi(s);1982if (pr) *pr = hi? uutoi(1,r): utoi(r);1983return S;1984}19851986/* Let N = N[0..2n-1]. Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S1987* Assume N normalized */1988static GEN1989sqrtispec(GEN N, long n, GEN *r)1990{1991GEN S, R, q, z, u;1992long l, h;19931994if (n == 1) return sqrtispec2(N, r);1995l = n >> 1;1996h = n - l; /* N = a3(h) | a2(h) | a1(l) | a0(l words) */1997S = sqrtispec(N, h, &R); /* S^2 + R = a3|a2 */19981999z = catii(LIMBS(R), NLIMBS(R), N + 2*h, l); /* = R | a1(l) */2000q = dvmdii(z, shifti(S,1), &u);2001z = catii(LIMBS(u), NLIMBS(u), N + n + h, l); /* = u | a0(l) */20022003S = addshiftw(S, q, l);2004R = subii(z, sqri(q));2005if (signe(R) < 0)2006{2007GEN S2 = shifti(S,1);2008R = addis(subiispec(LIMBS(S2),LIMBS(R), NLIMBS(S2),NLIMBS(R)), -1);2009S = addis(S, -1);2010}2011*r = R; return S;2012}20132014/* Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S.2015* As for dvmdii, R is last on stack and guaranteed to be gen_0 in case the2016* remainder is 0. R = NULL is allowed. */2017GEN2018sqrtremi(GEN N, GEN *r)2019{2020pari_sp av;2021GEN S, R, n = N+2;2022long k, l2, ln = NLIMBS(N);2023int sh;20242025if (ln <= 2)2026{2027if (ln == 2) return sqrtispec2_sh(n, r);2028if (ln == 1) return sqrtispec1_sh(n, r);2029if (r) *r = gen_0;2030return gen_0;2031}2032av = avma;2033sh = bfffo(n[0]) >> 1;2034l2 = (ln + 1) >> 1;2035if (sh || (ln & 1)) { /* normalize n, so that n[0] >= 2^BIL / 4 */2036GEN s0, t = new_chunk(ln + 1);2037t[ln] = 0;2038if (sh)2039shift_left(t, n, 0,ln-1, 0, sh << 1);2040else2041xmpn_copy(t, n, ln);2042S = sqrtispec(t, l2, &R); /* t normalized, 2 * l2 words */2043/* Rescale back:2044* 2^(2k) n = S^2 + R, k = sh + (ln & 1)*BIL/22045* so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */2046k = sh + (ln & 1) * (BITS_IN_LONG/2);2047s0 = remi2n(S, k);2048R = addii(shifti(R,-1), mulii(s0, S));2049R = shifti(R, 1 - (k<<1));2050S = shifti(S, -k);2051}2052else2053S = sqrtispec(n, l2, &R);20542055if (!r) { set_avma((pari_sp)S); return gerepileuptoint(av, S); }2056gerepileall(av, 2, &S, &R); *r = R; return S;2057}20582059/* compute sqrt(|a|), assuming a != 0 */20602061#if 12062GEN2063sqrtr_abs(GEN x)2064{2065long l = realprec(x) - 2, e = expo(x), er = e>>1;2066GEN b, c, res = cgetr(2 + l);2067res[1] = evalsigne(1) | evalexpo(er);2068if (e&1) {2069b = new_chunk(l << 1);2070xmpn_copy(b, x+2, l);2071xmpn_zero(b + l,l);2072b = sqrtispec(b, l, &c);2073xmpn_copy(res+2, b+2, l);2074if (cmpii(c, b) > 0) roundr_up_ip(res, l+2);2075} else {2076ulong u;2077b = new_chunk(2 + (l << 1));2078shift_left(b+1, x+2, 0,l-1, 0, BITS_IN_LONG-1);2079b[0] = uel(x,2)>>1;2080xmpn_zero(b + l+1,l+1);2081b = sqrtispec(b, l+1, &c);2082xmpn_copy(res+2, b+2, l);2083u = uel(b,l+2);2084if ( u&HIGHBIT || (u == ~HIGHBIT && cmpii(c,b) > 0))2085roundr_up_ip(res, l+2);2086}2087return gc_const((pari_sp)res, res);2088}20892090#else /* use t_REAL: currently much slower (quadratic division) */20912092#ifdef LONG_IS_64BIT2093/* 64 bits of b = sqrt(a[0] * 2^64 + a[1]) [ up to 1ulp ] */2094static ulong2095sqrtu2(ulong *a)2096{2097ulong c, b = dblmantissa( sqrt((double)a[0]) );2098LOCAL_HIREMAINDER;2099LOCAL_OVERFLOW;21002101/* > 32 correct bits, 1 Newton iteration to reach 64 */2102if (b <= a[0]) return HIGHBIT | (a[0] >> 1);2103hiremainder = a[0]; c = divll(a[1], b);2104return (addll(c, b) >> 1) | HIGHBIT;2105}2106/* 64 bits of sqrt(a[0] * 2^63) */2107static ulong2108sqrtu2_1(ulong *a)2109{2110ulong t[2];2111t[0] = (a[0] >> 1);2112t[1] = (a[0] << (BITS_IN_LONG-1)) | (a[1] >> 1);2113return sqrtu2(t);2114}2115#else2116/* 32 bits of sqrt(a[0] * 2^32) */2117static ulong2118sqrtu2(ulong *a) { return dblmantissa( sqrt((double)a[0]) ); }2119/* 32 bits of sqrt(a[0] * 2^31) */2120static ulong2121sqrtu2_1(ulong *a) { return dblmantissa( sqrt(2. * a[0]) ); }2122#endif21232124GEN2125sqrtr_abs(GEN x)2126{2127long l1, i, l = lg(x), ex = expo(x);2128GEN a, t, y = cgetr(l);2129pari_sp av, av0 = avma;21302131a = rtor(x, l+1);2132t = cgetr(l+1);2133if (ex & 1) { /* odd exponent */2134a[1] = evalsigne(1) | _evalexpo(1);2135t[2] = (long)sqrtu2((ulong*)a + 2);2136} else { /* even exponent */2137a[1] = evalsigne(1) | _evalexpo(0);2138t[2] = (long)sqrtu2_1((ulong*)a + 2);2139}2140t[1] = evalsigne(1) | _evalexpo(0);2141for (i = 3; i <= l; i++) t[i] = 0;21422143/* |x| = 2^(ex/2) a, t ~ sqrt(a) */2144l--; l1 = 1; av = avma;2145while (l1 < l) { /* let t := (t + a/t)/2 */2146l1 <<= 1; if (l1 > l) l1 = l;2147setlg(a, l1 + 2);2148setlg(t, l1 + 2);2149affrr(addrr(t, divrr(a,t)), t); shiftr_inplace(t, -1);2150set_avma(av);2151}2152affrr(t,y); shiftr_inplace(y, (ex>>1));2153return gc_const(av0, y);2154}21552156#endif21572158/*******************************************************************2159* *2160* Base Conversion *2161* *2162*******************************************************************/21632164static void2165convi_dac(GEN x, ulong l, ulong *res)2166{2167pari_sp ltop=avma;2168ulong m;2169GEN x1,x2;2170if (l==1) { *res=itou(x); return; }2171m=l>>1;2172x1=dvmdii(x,powuu(1000000000UL,m),&x2);2173convi_dac(x1,l-m,res+m);2174convi_dac(x2,m,res);2175set_avma(ltop);2176}21772178/* convert integer --> base 10^9 [not memory clean] */2179ulong *2180convi(GEN x, long *l)2181{2182long lz, lx = lgefint(x);2183ulong *z;2184if (lx == 3 && uel(x,2) < 1000000000UL) {2185z = (ulong*)new_chunk(1);2186*z = x[2];2187*l = 1; return z+1;2188}2189lz = 1 + (long)bit_accuracy_mul(lx, LOG10_2/9);2190z = (ulong*)new_chunk(lz);2191convi_dac(x,(ulong)lz,z);2192while (z[lz-1]==0) lz--;2193*l=lz; return z+lz;2194}2195219621972198