Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

Testing latest pari + WASM + node.js... and it works?! Wow.

28495 views
License: GPL3
ubuntu2004
1
#line 2 "../src/kernel/none/mp.c"
2
/* Copyright (C) 2000-2003 The PARI group.
3
4
This file is part of the PARI/GP package.
5
6
PARI/GP is free software; you can redistribute it and/or modify it under the
7
terms of the GNU General Public License as published by the Free Software
8
Foundation; either version 2 of the License, or (at your option) any later
9
version. It is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY WHATSOEVER.
11
12
Check the License for details. You should have received a copy of it, along
13
with the package; see the file 'COPYING'. If not, write to the Free Software
14
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
15
16
/***********************************************************************/
17
/** **/
18
/** MULTIPRECISION KERNEL **/
19
/** **/
20
/***********************************************************************/
21
#include "pari.h"
22
#include "paripriv.h"
23
#include "../src/kernel/none/tune-gen.h"
24
25
void
26
pari_kernel_init(void) { }
27
void
28
pari_kernel_close(void) { }
29
const char *
30
pari_kernel_version(void) { return ""; }
31
32
/* NOTE: arguments of "spec" routines (muliispec, addiispec, etc.) aren't
33
* GENs but pairs (long *a, long na) representing a list of digits (in basis
34
* BITS_IN_LONG) : a[0], ..., a[na-1]. [ In ordre to facilitate splitting: no
35
* need to reintroduce codewords ] */
36
37
#define LIMBS(x) ((x)+2)
38
#define NLIMBS(x) (lgefint(x)-2)
39
40
/* Normalize a nonnegative integer */
41
GEN
42
int_normalize(GEN x, long known_zero_words)
43
{
44
long i, lx = lgefint(x);
45
GEN x0;
46
if (lx == 2) { x[1] = evalsigne(0) | evallgefint(2); return x; }
47
if (!known_zero_words && x[2]) return x;
48
for (i = 2+known_zero_words; i < lx; i++)
49
if (x[i]) break;
50
x0 = x; i -= 2; x += i;
51
if (x0 == (GEN)avma) set_avma((pari_sp)x);
52
else stackdummy((pari_sp)(x0+i), (pari_sp)x0);
53
lx -= i;
54
x[0] = evaltyp(t_INT) | evallg(lx);
55
if (lx == 2) x[1] = evalsigne(0) | evallgefint(lx);
56
else x[1] = evalsigne(1) | evallgefint(lx);
57
return x;
58
}
59
60
/***********************************************************************/
61
/** **/
62
/** ADDITION / SUBTRACTION **/
63
/** **/
64
/***********************************************************************/
65
66
GEN
67
setloop(GEN a)
68
{
69
pari_sp av = avma;
70
(void)cgetg(lgefint(a) + 3, t_VECSMALL);
71
return icopy_avma(a, av); /* two cells of extra space before a */
72
}
73
74
/* we had a = setloop(?), then some incloops. Reset a to b */
75
GEN
76
resetloop(GEN a, GEN b) {
77
long lb = lgefint(b);
78
a += lgefint(a) - lb;
79
a[0] = evaltyp(t_INT) | evallg(lb);
80
affii(b, a); return a;
81
}
82
83
/* assume a > 0, initialized by setloop. Do a++ */
84
static GEN
85
incpos(GEN a)
86
{
87
long i, l = lgefint(a);
88
for (i=l-1; i>1; i--)
89
if (++a[i]) return a;
90
l++; a--; /* use extra cell */
91
a[0]=evaltyp(t_INT) | _evallg(l);
92
a[1]=evalsigne(1) | evallgefint(l);
93
a[2]=1; return a;
94
}
95
96
/* assume a < 0, initialized by setloop. Do a++ */
97
static GEN
98
incneg(GEN a)
99
{
100
long i, l = lgefint(a)-1;
101
if (uel(a,l)--)
102
{
103
if (l == 2 && !a[2])
104
{
105
a++; /* save one cell */
106
a[0] = evaltyp(t_INT) | _evallg(2);
107
a[1] = evalsigne(0) | evallgefint(2);
108
}
109
return a;
110
}
111
for (i = l-1;; i--) /* finishes since a[2] != 0 */
112
if (uel(a,i)--) break;
113
if (!a[2])
114
{
115
a++; /* save one cell */
116
a[0] = evaltyp(t_INT) | _evallg(l);
117
a[1] = evalsigne(-1) | evallgefint(l);
118
}
119
return a;
120
}
121
122
/* assume a initialized by setloop. Do a++ */
123
GEN
124
incloop(GEN a)
125
{
126
switch(signe(a))
127
{
128
case 0: a--; /* use extra cell */
129
a[0]=evaltyp(t_INT) | _evallg(3);
130
a[1]=evalsigne(1) | evallgefint(3);
131
a[2]=1; return a;
132
case -1: return incneg(a);
133
default: return incpos(a);
134
}
135
}
136
137
INLINE GEN
138
adduispec(ulong s, GEN x, long nx)
139
{
140
GEN xd, zd = (GEN)avma;
141
long lz;
142
143
if (nx == 1) return adduu(s, uel(x,0));
144
lz = nx+3; (void)new_chunk(lz);
145
xd = x + nx;
146
*--zd = (ulong)*--xd + s;
147
if ((ulong)*zd < s)
148
for(;;)
149
{
150
if (xd == x) { *--zd = 1; break; } /* enlarge z */
151
*--zd = ((ulong)*--xd) + 1;
152
if (*zd) { lz--; break; }
153
}
154
else lz--;
155
while (xd > x) *--zd = *--xd;
156
*--zd = evalsigne(1) | evallgefint(lz);
157
*--zd = evaltyp(t_INT) | evallg(lz);
158
return gc_const((pari_sp)zd, zd);
159
}
160
161
GEN
162
adduispec_offset(ulong s, GEN x, long offset, long nx)
163
{
164
GEN xd = x+lgefint(x)-nx-offset;
165
while (nx && *xd==0) {xd++; nx--;}
166
if (!nx) return utoi(s);
167
return adduispec(s,xd,nx);
168
}
169
170
static GEN
171
addiispec(GEN x, GEN y, long nx, long ny)
172
{
173
GEN xd, yd, zd;
174
long lz, i = -2;
175
LOCAL_OVERFLOW;
176
177
if (nx < ny) swapspec(x,y, nx,ny);
178
if (ny == 1) return adduispec(*y,x,nx);
179
zd = (GEN)avma;
180
lz = nx+3; (void)new_chunk(lz);
181
xd = x + nx;
182
yd = y + ny;
183
zd[-1] = addll(xd[-1], yd[-1]);
184
#ifdef addllx8
185
for ( ; i-8 > -ny; i-=8)
186
addllx8(xd+i, yd+i, zd+i, overflow);
187
#endif
188
for ( ; i >= -ny; i--) zd[i] = addllx(xd[i], yd[i]);
189
if (overflow)
190
for(;;)
191
{
192
if (i < -nx) { zd[i] = 1; i--; break; } /* enlarge z */
193
zd[i] = uel(xd,i) + 1;
194
if (zd[i]) { i--; lz--; break; }
195
i--;
196
}
197
else lz--;
198
for (; i >= -nx; i--) zd[i] = xd[i];
199
zd += i+1;
200
*--zd = evalsigne(1) | evallgefint(lz);
201
*--zd = evaltyp(t_INT) | evallg(lz);
202
return gc_const((pari_sp)zd, zd);
203
}
204
205
/* assume x >= s */
206
INLINE GEN
207
subiuspec(GEN x, ulong s, long nx)
208
{
209
GEN xd, zd = (GEN)avma;
210
long lz;
211
LOCAL_OVERFLOW;
212
213
if (nx == 1) return utoi(x[0] - s);
214
215
lz = nx+2; (void)new_chunk(lz);
216
xd = x + nx;
217
*--zd = subll(*--xd, s);
218
if (overflow)
219
for(;;)
220
{
221
*--zd = ((ulong)*--xd) - 1;
222
if (*xd) break;
223
}
224
if (xd == x)
225
while (*zd == 0) { zd++; lz--; } /* shorten z */
226
else
227
do *--zd = *--xd; while (xd > x);
228
*--zd = evalsigne(1) | evallgefint(lz);
229
*--zd = evaltyp(t_INT) | evallg(lz);
230
return gc_const((pari_sp)zd, zd);
231
}
232
233
/* assume x > y */
234
static GEN
235
subiispec(GEN x, GEN y, long nx, long ny)
236
{
237
GEN xd,yd,zd;
238
long lz, i = -2;
239
LOCAL_OVERFLOW;
240
241
if (ny==1) return subiuspec(x,*y,nx);
242
zd = (GEN)avma;
243
lz = nx+2; (void)new_chunk(lz);
244
xd = x + nx;
245
yd = y + ny;
246
zd[-1] = subll(xd[-1], yd[-1]);
247
#ifdef subllx8
248
for ( ; i-8 > -ny; i-=8)
249
subllx8(xd+i, yd+i, zd+i, overflow);
250
#endif
251
for ( ; i >= -ny; i--) zd[i] = subllx(xd[i], yd[i]);
252
if (overflow)
253
for(;;)
254
{
255
zd[i] = uel(xd,i) - 1;
256
if (xd[i--]) break;
257
}
258
if (i>=-nx)
259
for (; i >= -nx; i--) zd[i] = xd[i];
260
else
261
while (zd[i+1] == 0) { i++; lz--; } /* shorten z */
262
zd += i+1;
263
*--zd = evalsigne(1) | evallgefint(lz);
264
*--zd = evaltyp(t_INT) | evallg(lz);
265
return gc_const((pari_sp)zd, zd);
266
}
267
268
static void
269
roundr_up_ip(GEN x, long l)
270
{
271
long i = l;
272
for(;;)
273
{
274
if (++uel(x,--i)) break;
275
if (i == 2) { x[2] = (long)HIGHBIT; shiftr_inplace(x, 1); break; }
276
}
277
}
278
279
void
280
affir(GEN x, GEN y)
281
{
282
const long s = signe(x), ly = lg(y);
283
long lx, sh, i;
284
285
if (!s)
286
{
287
y[1] = evalexpo(-prec2nbits(ly));
288
return;
289
}
290
291
lx = lgefint(x); sh = bfffo(x[2]);
292
y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);
293
if (sh) {
294
if (lx <= ly)
295
{
296
for (i=lx; i<ly; i++) y[i]=0;
297
shift_left(y,x,2,lx-1, 0,sh);
298
return;
299
}
300
shift_left(y,x,2,ly-1, x[ly],sh);
301
/* lx > ly: round properly */
302
if ((uel(x,ly)<<sh) & HIGHBIT) roundr_up_ip(y, ly);
303
}
304
else {
305
if (lx <= ly)
306
{
307
for (i=2; i<lx; i++) y[i]=x[i];
308
for ( ; i<ly; i++) y[i]=0;
309
return;
310
}
311
for (i=2; i<ly; i++) y[i]=x[i];
312
/* lx > ly: round properly */
313
if (uel(x,ly) & HIGHBIT) roundr_up_ip(y, ly);
314
}
315
}
316
317
INLINE GEN
318
shiftispec(GEN x, long nx, long n)
319
{
320
long ny, i, m;
321
GEN y, yd;
322
if (!n) return icopyspec(x, nx);
323
324
if (n > 0)
325
{
326
GEN z = (GEN)avma;
327
long d = dvmdsBIL(n, &m);
328
329
ny = nx+d; y = new_chunk(ny + 2); yd = y + 2;
330
for ( ; d; d--) *--z = 0;
331
if (!m) for (i=0; i<nx; i++) yd[i]=x[i];
332
else
333
{
334
register const ulong sh = BITS_IN_LONG - m;
335
shift_left(yd,x, 0,nx-1, 0,m);
336
i = uel(x,0) >> sh;
337
/* Extend y on the left? */
338
if (i) { ny++; y = new_chunk(1); y[2] = i; }
339
}
340
}
341
else
342
{
343
ny = nx - dvmdsBIL(-n, &m);
344
if (ny<1) return gen_0;
345
y = new_chunk(ny + 2); yd = y + 2;
346
if (m) {
347
shift_right(yd,x, 0,ny, 0,m);
348
if (yd[0] == 0)
349
{
350
if (ny==1) return gc_const((pari_sp)(y+3), gen_0);
351
ny--; set_avma((pari_sp)(++y));
352
}
353
} else {
354
for (i=0; i<ny; i++) yd[i]=x[i];
355
}
356
}
357
y[1] = evalsigne(1)|evallgefint(ny + 2);
358
y[0] = evaltyp(t_INT)|evallg(ny + 2); return y;
359
}
360
361
GEN
362
mantissa2nr(GEN x, long n)
363
{ /*This is a kludge since x is not an integer*/
364
long s = signe(x);
365
GEN y;
366
367
if(s == 0) return gen_0;
368
y = shiftispec(x + 2, lg(x) - 2, n);
369
if (signe(y)) setsigne(y, s);
370
return y;
371
}
372
373
GEN
374
truncr(GEN x)
375
{
376
long d,e,i,s,m;
377
GEN y;
378
379
if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gen_0;
380
d = nbits2lg(e+1); m = remsBIL(e);
381
if (d > lg(x)) pari_err_PREC( "truncr (precision loss in truncation)");
382
383
y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);
384
if (++m == BITS_IN_LONG)
385
for (i=2; i<d; i++) y[i]=x[i];
386
else
387
shift_right(y,x, 2,d,0, BITS_IN_LONG - m);
388
return y;
389
}
390
391
/* integral part */
392
GEN
393
floorr(GEN x)
394
{
395
long d,e,i,lx,m;
396
GEN y;
397
398
if (signe(x) >= 0) return truncr(x);
399
if ((e=expo(x)) < 0) return gen_m1;
400
d = nbits2lg(e+1); m = remsBIL(e);
401
lx=lg(x); if (d>lx) pari_err_PREC( "floorr (precision loss in truncation)");
402
y = new_chunk(d);
403
if (++m == BITS_IN_LONG)
404
{
405
for (i=2; i<d; i++) y[i]=x[i];
406
i=d; while (i<lx && !x[i]) i++;
407
if (i==lx) goto END;
408
}
409
else
410
{
411
shift_right(y,x, 2,d,0, BITS_IN_LONG - m);
412
if (uel(x,d-1)<<m == 0)
413
{
414
i=d; while (i<lx && !x[i]) i++;
415
if (i==lx) goto END;
416
}
417
}
418
/* set y:=y+1 */
419
for (i=d-1; i>=2; i--) { uel(y,i)++; if (y[i]) goto END; }
420
y=new_chunk(1); y[2]=1; d++;
421
END:
422
y[1] = evalsigne(-1) | evallgefint(d);
423
y[0] = evaltyp(t_INT) | evallg(d); return y;
424
}
425
426
INLINE int
427
cmpiispec(GEN x, GEN y, long lx, long ly)
428
{
429
long i;
430
if (lx < ly) return -1;
431
if (lx > ly) return 1;
432
i = 0; while (i<lx && x[i]==y[i]) i++;
433
if (i==lx) return 0;
434
return (uel(x,i) > uel(y,i))? 1: -1;
435
}
436
437
INLINE int
438
equaliispec(GEN x, GEN y, long lx, long ly)
439
{
440
long i;
441
if (lx != ly) return 0;
442
i = ly-1; while (i>=0 && x[i]==y[i]) i--;
443
return i < 0;
444
}
445
446
/***********************************************************************/
447
/** **/
448
/** MULTIPLICATION **/
449
/** **/
450
/***********************************************************************/
451
/* assume ny > 0 */
452
INLINE GEN
453
muluispec(ulong x, GEN y, long ny)
454
{
455
GEN yd, z = (GEN)avma;
456
long lz = ny+3;
457
LOCAL_HIREMAINDER;
458
459
(void)new_chunk(lz);
460
yd = y + ny; *--z = mulll(x, *--yd);
461
while (yd > y) *--z = addmul(x,*--yd);
462
if (hiremainder) *--z = hiremainder; else lz--;
463
*--z = evalsigne(1) | evallgefint(lz);
464
*--z = evaltyp(t_INT) | evallg(lz);
465
return gc_const((pari_sp)z, z);
466
}
467
468
/* a + b*|Y| */
469
GEN
470
addumului(ulong a, ulong b, GEN Y)
471
{
472
GEN yd,y,z;
473
long ny,lz;
474
LOCAL_HIREMAINDER;
475
LOCAL_OVERFLOW;
476
477
if (!b || !signe(Y)) return utoi(a);
478
479
y = LIMBS(Y); z = (GEN)avma;
480
ny = NLIMBS(Y);
481
lz = ny+3;
482
483
(void)new_chunk(lz);
484
yd = y + ny; *--z = addll(a, mulll(b, *--yd));
485
if (overflow) hiremainder++; /* can't overflow */
486
while (yd > y) *--z = addmul(b,*--yd);
487
if (hiremainder) *--z = hiremainder; else lz--;
488
*--z = evalsigne(1) | evallgefint(lz);
489
*--z = evaltyp(t_INT) | evallg(lz);
490
return gc_const((pari_sp)z, z);
491
}
492
493
/***********************************************************************/
494
/** **/
495
/** DIVISION **/
496
/** **/
497
/***********************************************************************/
498
499
ulong
500
umodiu(GEN y, ulong x)
501
{
502
long sy=signe(y),ly,i;
503
ulong xi;
504
LOCAL_HIREMAINDER;
505
506
if (!x) pari_err_INV("umodiu",gen_0);
507
if (!sy) return 0;
508
ly = lgefint(y);
509
if (x <= uel(y,2))
510
{
511
hiremainder=0;
512
if (ly==3)
513
{
514
hiremainder=uel(y,2)%x;
515
if (!hiremainder) return 0;
516
return (sy > 0)? hiremainder: x - hiremainder;
517
}
518
}
519
else
520
{
521
if (ly==3) return (sy > 0)? uel(y,2): x - uel(y,2);
522
hiremainder=uel(y,2); ly--; y++;
523
}
524
xi = get_Fl_red(x);
525
for (i=2; i<ly; i++) (void)divll_pre(y[i],x,xi);
526
if (!hiremainder) return 0;
527
return (sy > 0)? hiremainder: x - hiremainder;
528
}
529
530
/* return |y| \/ x */
531
GEN
532
absdiviu_rem(GEN y, ulong x, ulong *rem)
533
{
534
long ly,i;
535
GEN z;
536
ulong xi;
537
LOCAL_HIREMAINDER;
538
539
if (!x) pari_err_INV("absdiviu_rem",gen_0);
540
if (!signe(y)) { *rem = 0; return gen_0; }
541
542
ly = lgefint(y);
543
if (x <= uel(y,2))
544
{
545
hiremainder=0;
546
if (ly==3)
547
{
548
z = cgetipos(3);
549
z[2] = divll(uel(y,2),x);
550
*rem = hiremainder; return z;
551
}
552
}
553
else
554
{
555
if (ly==3) { *rem = uel(y,2); return gen_0; }
556
hiremainder = uel(y,2); ly--; y++;
557
}
558
xi = get_Fl_red(x);
559
z = cgetipos(ly);
560
for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x,xi);
561
*rem = hiremainder; return z;
562
}
563
564
GEN
565
divis_rem(GEN y, long x, long *rem)
566
{
567
long sy=signe(y),ly,s,i;
568
GEN z;
569
ulong xi;
570
LOCAL_HIREMAINDER;
571
572
if (!x) pari_err_INV("divis_rem",gen_0);
573
if (!sy) { *rem=0; return gen_0; }
574
if (x<0) { s = -sy; x = -x; } else s = sy;
575
576
ly = lgefint(y);
577
if ((ulong)x <= uel(y,2))
578
{
579
hiremainder=0;
580
if (ly==3)
581
{
582
z = cgeti(3); z[1] = evallgefint(3) | evalsigne(s);
583
z[2] = divll(uel(y,2),x);
584
if (sy<0) hiremainder = - ((long)hiremainder);
585
*rem = (long)hiremainder; return z;
586
}
587
}
588
else
589
{
590
if (ly==3) { *rem = itos(y); return gen_0; }
591
hiremainder = uel(y,2); ly--; y++;
592
}
593
xi = get_Fl_red(x);
594
z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
595
for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x,xi);
596
if (sy<0) hiremainder = - ((long)hiremainder);
597
*rem = (long)hiremainder; return z;
598
}
599
600
GEN
601
divis(GEN y, long x)
602
{
603
long sy=signe(y),ly,s,i;
604
ulong xi;
605
GEN z;
606
LOCAL_HIREMAINDER;
607
608
if (!x) pari_err_INV("divis",gen_0);
609
if (!sy) return gen_0;
610
if (x<0) { s = -sy; x = -x; } else s = sy;
611
612
ly = lgefint(y);
613
if ((ulong)x <= uel(y,2))
614
{
615
hiremainder=0;
616
if (ly==3)
617
{
618
z = cgeti(3); z[1] = evallgefint(3) | evalsigne(s);
619
z[2] = divll(y[2],x);
620
return z;
621
}
622
}
623
else
624
{
625
if (ly==3) return gen_0;
626
hiremainder=y[2]; ly--; y++;
627
}
628
xi = get_Fl_red(x);
629
z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
630
for (i=2; i<ly; i++) z[i]=divll_pre(y[i],x, xi);
631
return z;
632
}
633
634
GEN
635
divrr(GEN x, GEN y)
636
{
637
long sx=signe(x), sy=signe(y), lx,ly,lr,e,i,j;
638
ulong y0,y1;
639
GEN r, r1;
640
641
if (!x) pari_err_INV("divrr",y);
642
e = expo(x) - expo(y);
643
if (!sx) return real_0_bit(e);
644
if (sy<0) sx = -sx;
645
646
lx=lg(x); ly=lg(y);
647
if (ly==3)
648
{
649
ulong k = x[2], l = (lx>3)? x[3]: 0;
650
LOCAL_HIREMAINDER;
651
if (k < uel(y,2)) e--;
652
else
653
{
654
l >>= 1; if (k&1) l |= HIGHBIT;
655
k >>= 1;
656
}
657
hiremainder = k; k = divll(l,y[2]);
658
if (hiremainder > (uel(y,2) >> 1) && !++k) { k = HIGHBIT; e++; }
659
r = cgetr(3);
660
r[1] = evalsigne(sx) | evalexpo(e);
661
r[2] = k; return r;
662
}
663
664
lr = minss(lx,ly); r = new_chunk(lr);
665
r1 = r-1;
666
r1[1] = 0; for (i=2; i<lr; i++) r1[i]=x[i];
667
r1[lr] = (lx>ly)? x[lr]: 0;
668
y0 = y[2]; y1 = y[3];
669
for (i=0; i<lr-1; i++)
670
{ /* r1 = r + (i-1), OK up to r1[2] (accesses at most r[lr]) */
671
ulong k, qp;
672
LOCAL_HIREMAINDER;
673
LOCAL_OVERFLOW;
674
675
if (uel(r1,1) == y0) { qp = ULONG_MAX; k = addll(y0,r1[2]); }
676
else
677
{
678
if (uel(r1,1) > y0) /* can't happen if i=0 */
679
{
680
GEN y1 = y+1;
681
j = lr-i; r1[j] = subll(r1[j],y1[j]);
682
for (j--; j>0; j--) r1[j] = subllx(r1[j],y1[j]);
683
j=i; do uel(r,--j)++; while (j && !uel(r,j));
684
}
685
hiremainder = r1[1]; overflow = 0;
686
qp = divll(r1[2],y0); k = hiremainder;
687
}
688
j = lr-i+1;
689
if (!overflow)
690
{
691
long k3, k4;
692
k3 = mulll(qp,y1);
693
if (j == 3) /* i = lr - 2 maximal, r1[3] undefined -> 0 */
694
k4 = subll(hiremainder,k);
695
else
696
{
697
k3 = subll(k3, r1[3]);
698
k4 = subllx(hiremainder,k);
699
}
700
while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }
701
}
702
if (j<ly) (void)mulll(qp,y[j]); else { hiremainder = 0 ; j = ly; }
703
for (j--; j>1; j--)
704
{
705
r1[j] = subll(r1[j], addmul(qp,y[j]));
706
hiremainder += overflow;
707
}
708
if (uel(r1,1) != hiremainder)
709
{
710
if (uel(r1,1) < hiremainder)
711
{
712
qp--;
713
j = lr-i-(lr-i>=ly); r1[j] = addll(r1[j], y[j]);
714
for (j--; j>1; j--) r1[j] = addllx(r1[j], y[j]);
715
}
716
else
717
{
718
r1[1] -= hiremainder;
719
while (r1[1])
720
{
721
qp++; if (!qp) { j=i; do uel(r,--j)++; while (j && !r[j]); }
722
j = lr-i-(lr-i>=ly); r1[j] = subll(r1[j],y[j]);
723
for (j--; j>1; j--) r1[j] = subllx(r1[j],y[j]);
724
r1[1] -= overflow;
725
}
726
}
727
}
728
*++r1 = qp;
729
}
730
/* i = lr-1 */
731
/* round correctly */
732
if (uel(r1,1) > (y0>>1))
733
{
734
j=i; do uel(r,--j)++; while (j && !r[j]);
735
}
736
r1 = r-1; for (j=i; j>=2; j--) r[j]=r1[j];
737
if (r[0] == 0) e--;
738
else if (r[0] == 1) { shift_right(r,r, 2,lr, 1,1); }
739
else { /* possible only when rounding up to 0x2 0x0 ... */
740
r[2] = (long)HIGHBIT; e++;
741
}
742
r[0] = evaltyp(t_REAL)|evallg(lr);
743
r[1] = evalsigne(sx) | evalexpo(e);
744
return r;
745
}
746
747
GEN
748
divri(GEN x, GEN y)
749
{
750
long lx, s = signe(y);
751
pari_sp av;
752
GEN z;
753
754
if (!s) pari_err_INV("divri",y);
755
if (!signe(x)) return real_0_bit(expo(x) - expi(y));
756
if (!is_bigint(y)) {
757
GEN z = divru(x, y[2]);
758
if (s < 0) togglesign(z);
759
return z;
760
}
761
lx = lg(x); z = cgetr(lx); av = avma;
762
affrr(divrr(x, itor(y, lx+1)), z);
763
return gc_const(av, z);
764
}
765
766
/* Integer division x / y: such that sign(r) = sign(x)
767
* if z = ONLY_REM return remainder, otherwise return quotient
768
* if z != NULL set *z to remainder
769
* *z is the last object on stack (and thus can be disposed of with cgiv
770
* instead of gerepile)
771
* If *z is zero, we put gen_0 here and no copy.
772
* space needed: lx + ly */
773
GEN
774
dvmdii(GEN x, GEN y, GEN *z)
775
{
776
long sx = signe(x), sy = signe(y);
777
long lx, ly = lgefint(y), lz, i, j, sh, lq, lr;
778
pari_sp av;
779
ulong y0,y0i,y1, *xd,*rd,*qd;
780
GEN q, r, r1;
781
782
if (!sx)
783
{
784
if (ly < 3) pari_err_INV("dvmdii",gen_0);
785
if (!z || z == ONLY_REM) return gen_0;
786
*z=gen_0; return gen_0;
787
}
788
if (ly <= 3)
789
{
790
ulong rem;
791
if (ly < 3) pari_err_INV("dvmdii",gen_0);
792
if (z == ONLY_REM)
793
{
794
rem = umodiu(x,uel(y,2));
795
if (!rem) return gen_0;
796
return (sx < 0)? utoineg(uel(y,2) - rem): utoipos(rem);
797
}
798
q = absdiviu_rem(x, uel(y,2), &rem);
799
if (sx != sy) togglesign(q);
800
if (!z) return q;
801
if (!rem) *z = gen_0;
802
else *z = sx < 0? utoineg(rem): utoipos(rem);
803
return q;
804
}
805
lx=lgefint(x);
806
lz=lx-ly;
807
if (lz <= 0)
808
{
809
if (lz == 0)
810
{
811
for (i=2; i<lx; i++)
812
if (x[i] != y[i])
813
{
814
if (uel(x,i) > uel(y,i)) goto DIVIDE;
815
goto TRIVIAL;
816
}
817
if (z == ONLY_REM) return gen_0;
818
if (z) *z = gen_0;
819
if (sx < 0) sy = -sy;
820
return stoi(sy);
821
}
822
TRIVIAL:
823
if (z == ONLY_REM) return icopy(x);
824
if (z) *z = icopy(x);
825
return gen_0;
826
}
827
DIVIDE: /* quotient is nonzero */
828
av=avma; if (sx<0) sy = -sy;
829
r1 = new_chunk(lx); sh = bfffo(y[2]);
830
if (sh)
831
{ /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/
832
register const ulong m = BITS_IN_LONG - sh;
833
r = new_chunk(ly);
834
shift_left(r, y,2,ly-1, 0,sh); y = r;
835
shift_left(r1,x,2,lx-1, 0,sh);
836
r1[1] = uel(x,2) >> m;
837
}
838
else
839
{
840
r1[1] = 0; for (j=2; j<lx; j++) r1[j] = x[j];
841
}
842
x = r1;
843
y0 = y[2]; y0i = get_Fl_red(y0);
844
y1 = y[3];
845
for (i=0; i<=lz; i++)
846
{ /* r1 = x + i */
847
ulong k, qp;
848
LOCAL_HIREMAINDER;
849
LOCAL_OVERFLOW;
850
851
if (uel(r1,1) == y0)
852
{
853
qp = ULONG_MAX; k = addll(y0,r1[2]);
854
}
855
else
856
{
857
hiremainder = r1[1]; overflow = 0;
858
qp = divll_pre(r1[2],y0,y0i); k = hiremainder;
859
}
860
if (!overflow)
861
{
862
long k3 = subll(mulll(qp,y1), r1[3]);
863
long k4 = subllx(hiremainder,k);
864
while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }
865
}
866
hiremainder = 0; j = ly;
867
for (j--; j>1; j--)
868
{
869
r1[j] = subll(r1[j], addmul(qp,y[j]));
870
hiremainder += overflow;
871
}
872
if (uel(r1,1) < hiremainder)
873
{
874
qp--;
875
j = ly-1; r1[j] = addll(r1[j],y[j]);
876
for (j--; j>1; j--) r1[j] = addllx(r1[j],y[j]);
877
}
878
*++r1 = qp;
879
}
880
881
lq = lz+2;
882
if (!z)
883
{
884
qd = (ulong*)av;
885
xd = (ulong*)(x + lq);
886
if (x[1]) { lz++; lq++; }
887
while (lz--) *--qd = *--xd;
888
*--qd = evalsigne(sy) | evallgefint(lq);
889
*--qd = evaltyp(t_INT) | evallg(lq);
890
return gc_const((pari_sp)qd, (GEN)qd);
891
}
892
893
j=lq; while (j<lx && !x[j]) j++;
894
lz = lx-j;
895
if (z == ONLY_REM)
896
{
897
if (lz==0) return gc_const(av, gen_0);
898
rd = (ulong*)av; lr = lz+2;
899
xd = (ulong*)(x + lx);
900
if (!sh) while (lz--) *--rd = *--xd;
901
else
902
{ /* shift remainder right by sh bits */
903
const ulong shl = BITS_IN_LONG - sh;
904
ulong l;
905
xd--;
906
while (--lz) /* fill r[3..] */
907
{
908
l = *xd >> sh;
909
*--rd = l | (*--xd << shl);
910
}
911
l = *xd >> sh;
912
if (l) *--rd = l; else lr--;
913
}
914
*--rd = evalsigne(sx) | evallgefint(lr);
915
*--rd = evaltyp(t_INT) | evallg(lr);
916
return gc_const((pari_sp)rd, (GEN)rd);
917
}
918
919
lr = lz+2;
920
rd = NULL; /* gcc -Wall */
921
if (lz)
922
{ /* non zero remainder: initialize rd */
923
xd = (ulong*)(x + lx);
924
if (!sh)
925
{
926
rd = (ulong*)avma; (void)new_chunk(lr);
927
while (lz--) *--rd = *--xd;
928
}
929
else
930
{ /* shift remainder right by sh bits */
931
const ulong shl = BITS_IN_LONG - sh;
932
ulong l;
933
rd = (ulong*)x; /* overwrite shifted y */
934
xd--;
935
while (--lz)
936
{
937
l = *xd >> sh;
938
*--rd = l | (*--xd << shl);
939
}
940
l = *xd >> sh;
941
if (l) *--rd = l; else lr--;
942
}
943
*--rd = evalsigne(sx) | evallgefint(lr);
944
*--rd = evaltyp(t_INT) | evallg(lr);
945
rd += lr;
946
}
947
qd = (ulong*)av;
948
xd = (ulong*)(x + lq);
949
if (x[1]) lq++;
950
j = lq-2; while (j--) *--qd = *--xd;
951
*--qd = evalsigne(sy) | evallgefint(lq);
952
*--qd = evaltyp(t_INT) | evallg(lq);
953
q = (GEN)qd;
954
if (lr==2) *z = gen_0;
955
else
956
{ /* rd has been properly initialized: we had lz > 0 */
957
while (lr--) *--qd = *--rd;
958
*z = (GEN)qd;
959
}
960
return gc_const((pari_sp)qd, q);
961
}
962
963
/* Montgomery reduction.
964
* N has k words, assume T >= 0 has less than 2k.
965
* Return res := T / B^k mod N, where B = 2^BIL
966
* such that 0 <= res < T/B^k + N and res has less than k words */
967
GEN
968
red_montgomery(GEN T, GEN N, ulong inv)
969
{
970
pari_sp av;
971
GEN Te, Td, Ne, Nd, scratch;
972
ulong i, j, m, t, d, k = NLIMBS(N);
973
int carry;
974
LOCAL_HIREMAINDER;
975
LOCAL_OVERFLOW;
976
977
if (k == 0) return gen_0;
978
d = NLIMBS(T); /* <= 2*k */
979
if (d == 0) return gen_0;
980
#ifdef DEBUG
981
if (d > 2*k) pari_err_BUG("red_montgomery");
982
#endif
983
if (k == 1)
984
{ /* as below, special cased for efficiency */
985
ulong n = uel(N,2);
986
if (d == 1) {
987
hiremainder = uel(T,2);
988
m = hiremainder * inv;
989
(void)addmul(m, n); /* t + m*n = 0 */
990
return utoi(hiremainder);
991
} else { /* d = 2 */
992
hiremainder = uel(T,3);
993
m = hiremainder * inv;
994
(void)addmul(m, n); /* t + m*n = 0 */
995
t = addll(hiremainder, uel(T,2));
996
if (overflow) t -= n; /* t > n doesn't fit in 1 word */
997
return utoi(t);
998
}
999
}
1000
/* assume k >= 2 */
1001
av = avma; scratch = new_chunk(k<<1); /* >= k + 2: result fits */
1002
1003
/* copy T to scratch space (pad with zeroes to 2k words) */
1004
Td = (GEN)av;
1005
Te = T + (d+2);
1006
for (i=0; i < d ; i++) *--Td = *--Te;
1007
for ( ; i < (k<<1); i++) *--Td = 0;
1008
1009
Te = (GEN)av; /* 1 beyond end of current T mantissa (in scratch) */
1010
Ne = N + k+2; /* 1 beyond end of N mantissa */
1011
1012
carry = 0;
1013
for (i=0; i<k; i++) /* set T := T/B nod N, k times */
1014
{
1015
Td = Te; /* one beyond end of (new) T mantissa */
1016
Nd = Ne;
1017
hiremainder = *--Td;
1018
m = hiremainder * inv; /* solve T + m N = O(B) */
1019
1020
/* set T := (T + mN) / B */
1021
Te = Td;
1022
(void)addmul(m, *--Nd); /* = 0 */
1023
for (j=1; j<k; j++)
1024
{
1025
t = addll(addmul(m, *--Nd), *--Td);
1026
*Td = t;
1027
hiremainder += overflow;
1028
}
1029
t = addll(hiremainder, *--Td); *Td = t + carry;
1030
carry = (overflow || (carry && *Td == 0));
1031
}
1032
if (carry)
1033
{ /* Td > N overflows (k+1 words), set Td := Td - N */
1034
Td = Te;
1035
Nd = Ne;
1036
t = subll(*--Td, *--Nd); *Td = t;
1037
while (Td > scratch) { t = subllx(*--Td, *--Nd); *Td = t; }
1038
}
1039
1040
/* copy result */
1041
Td = (GEN)av;
1042
while (*scratch == 0 && Te > scratch) scratch++; /* strip leading 0s */
1043
while (Te > scratch) *--Td = *--Te;
1044
k = (GEN)av - Td; if (!k) return gc_const(av, gen_0);
1045
k += 2;
1046
*--Td = evalsigne(1) | evallgefint(k);
1047
*--Td = evaltyp(t_INT) | evallg(k);
1048
#ifdef DEBUG
1049
{
1050
long l = NLIMBS(N), s = BITS_IN_LONG*l;
1051
GEN R = int2n(s);
1052
GEN res = remii(mulii(T, Fp_inv(R, N)), N);
1053
if (k > lgefint(N)
1054
|| !equalii(remii(Td,N),res)
1055
|| cmpii(Td, addii(shifti(T, -s), N)) >= 0) pari_err_BUG("red_montgomery");
1056
}
1057
#endif
1058
return gc_const((pari_sp)Td, Td);
1059
}
1060
1061
/* EXACT INTEGER DIVISION */
1062
1063
/* assume xy>0, the division is exact and y is odd. Destroy x */
1064
static GEN
1065
diviuexact_i(GEN x, ulong y)
1066
{
1067
long i, lz, lx;
1068
ulong q, yinv;
1069
GEN z, z0, x0, x0min;
1070
1071
if (y == 1) return icopy(x);
1072
lx = lgefint(x);
1073
if (lx == 3)
1074
{
1075
q = uel(x,2) / y;
1076
if (!q) pari_err_OP("exact division", x, utoi(y));
1077
return utoipos(q);
1078
}
1079
yinv = invmod2BIL(y);
1080
lz = (y <= uel(x,2)) ? lx : lx-1;
1081
z = new_chunk(lz);
1082
z0 = z + lz;
1083
x0 = x + lx; x0min = x + lx-lz+2;
1084
1085
while (x0 > x0min)
1086
{
1087
*--z0 = q = yinv*uel(--x0,0); /* i-th quotient */
1088
if (!q) continue;
1089
/* x := x - q * y */
1090
{ /* update neither lowest word (could set it to 0) nor highest ones */
1091
register GEN x1 = x0 - 1;
1092
LOCAL_HIREMAINDER;
1093
(void)mulll(q,y);
1094
if (hiremainder)
1095
{
1096
if (uel(x1,0) < hiremainder)
1097
{
1098
uel(x1,0) -= hiremainder;
1099
do uel(--x1,0)--; while (uel(x1,0) == ULONG_MAX);
1100
}
1101
else
1102
uel(x1,0) -= hiremainder;
1103
}
1104
}
1105
}
1106
i=2; while(!z[i]) i++;
1107
z += i-2; lz -= i-2;
1108
z[0] = evaltyp(t_INT)|evallg(lz);
1109
z[1] = evalsigne(1)|evallg(lz);
1110
if (lz == 2) pari_err_OP("exact division", x, utoi(y));
1111
return gc_const((pari_sp)z, z);
1112
}
1113
1114
/* assume y != 0 and the division is exact */
1115
GEN
1116
diviuexact(GEN x, ulong y)
1117
{
1118
pari_sp av;
1119
long lx, vy, s = signe(x);
1120
GEN z;
1121
1122
if (!s) return gen_0;
1123
if (y == 1) return icopy(x);
1124
lx = lgefint(x);
1125
if (lx == 3) {
1126
ulong q = uel(x,2) / y;
1127
if (!q) pari_err_OP("exact division", x, utoi(y));
1128
return (s > 0)? utoipos(q): utoineg(q);
1129
}
1130
av = avma; (void)new_chunk(lx); vy = vals(y);
1131
if (vy) {
1132
y >>= vy;
1133
if (y == 1) { set_avma(av); return shifti(x, -vy); }
1134
x = shifti(x, -vy);
1135
if (lx == 3) {
1136
ulong q = uel(x,2) / y;
1137
set_avma(av);
1138
if (!q) pari_err_OP("exact division", x, utoi(y));
1139
return (s > 0)? utoipos(q): utoineg(q);
1140
}
1141
} else x = icopy(x);
1142
set_avma(av);
1143
z = diviuexact_i(x, y);
1144
setsigne(z, s); return z;
1145
}
1146
1147
/* Find z such that x=y*z, knowing that y | x (unchecked)
1148
* Method: y0 z0 = x0 mod B = 2^BITS_IN_LONG ==> z0 = 1/y0 mod B.
1149
* Set x := (x - z0 y) / B, updating only relevant words, and repeat */
1150
GEN
1151
diviiexact(GEN x, GEN y)
1152
{
1153
long lx, ly, lz, vy, i, ii, sx = signe(x), sy = signe(y);
1154
pari_sp av;
1155
ulong y0inv,q;
1156
GEN z;
1157
1158
if (!sy) pari_err_INV("diviiexact",gen_0);
1159
if (!sx) return gen_0;
1160
lx = lgefint(x);
1161
if (lx == 3) {
1162
q = uel(x,2) / uel(y,2);
1163
if (!q) pari_err_OP("exact division", x, y);
1164
return (sx+sy) ? utoipos(q): utoineg(q);
1165
}
1166
vy = vali(y); av = avma;
1167
(void)new_chunk(lx); /* enough room for z */
1168
if (vy)
1169
{ /* make y odd */
1170
y = shifti(y,-vy);
1171
x = shifti(x,-vy); lx = lgefint(x);
1172
}
1173
else x = icopy(x); /* necessary because we destroy x */
1174
set_avma(av); /* will erase our x,y when exiting */
1175
/* now y is odd */
1176
ly = lgefint(y);
1177
if (ly == 3)
1178
{
1179
z = diviuexact_i(x,uel(y,2)); /* x != 0 */
1180
setsigne(z, (sx+sy)? 1: -1); return z;
1181
}
1182
y0inv = invmod2BIL(y[ly-1]);
1183
i=2; while (i<ly && y[i]==x[i]) i++;
1184
lz = (i==ly || uel(y,i) < uel(x,i)) ? lx-ly+3 : lx-ly+2;
1185
z = new_chunk(lz);
1186
1187
y += ly - 1; /* now y[-i] = i-th word of y */
1188
for (ii=lx-1,i=lz-1; i>=2; i--,ii--)
1189
{
1190
long limj;
1191
LOCAL_HIREMAINDER;
1192
LOCAL_OVERFLOW;
1193
1194
z[i] = q = y0inv*uel(x,ii); /* i-th quotient */
1195
if (!q) continue;
1196
1197
/* x := x - q * y */
1198
(void)mulll(q,y[0]); limj = maxss(lx - lz, ii+3-ly);
1199
{ /* update neither lowest word (could set it to 0) nor highest ones */
1200
register GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj;
1201
for (; x0 >= xlim; x0--, y0--)
1202
{
1203
*x0 = subll(*x0, addmul(q,*y0));
1204
hiremainder += overflow;
1205
}
1206
if (hiremainder && limj != lx - lz)
1207
{
1208
if ((ulong)*x0 < hiremainder)
1209
{
1210
*x0 -= hiremainder;
1211
do (*--x0)--; while ((ulong)*x0 == ULONG_MAX);
1212
}
1213
else
1214
*x0 -= hiremainder;
1215
}
1216
}
1217
}
1218
i=2; while(!z[i]) i++;
1219
z += i-2; lz -= (i-2);
1220
z[0] = evaltyp(t_INT)|evallg(lz);
1221
z[1] = evalsigne((sx+sy)? 1: -1) | evallg(lz);
1222
if (lz == 2) pari_err_OP("exact division", x, y);
1223
return gc_const((pari_sp)z, z);
1224
}
1225
1226
/* assume yz != and yz | x */
1227
GEN
1228
diviuuexact(GEN x, ulong y, ulong z)
1229
{
1230
long tmp[4];
1231
ulong t;
1232
LOCAL_HIREMAINDER;
1233
t = mulll(y, z);
1234
if (!hiremainder) return diviuexact(x, t);
1235
tmp[0] = evaltyp(t_INT)|_evallg(4);
1236
tmp[1] = evalsigne(1)|evallgefint(4);
1237
tmp[2] = hiremainder;
1238
tmp[3] = t;
1239
return diviiexact(x, tmp);
1240
}
1241
1242
/********************************************************************/
1243
/** **/
1244
/** INTEGER MULTIPLICATION (BASECASE) **/
1245
/** **/
1246
/********************************************************************/
1247
/* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
1248
INLINE GEN
1249
muliispec_basecase(GEN x, GEN y, long nx, long ny)
1250
{
1251
GEN z2e,z2d,yd,xd,ye,zd;
1252
long p1,lz;
1253
LOCAL_HIREMAINDER;
1254
1255
if (ny == 1) return muluispec((ulong)*y, x, nx);
1256
if (ny == 0) return gen_0;
1257
zd = (GEN)avma; lz = nx+ny+2;
1258
(void)new_chunk(lz);
1259
xd = x + nx;
1260
yd = y + ny;
1261
ye = yd; p1 = *--xd;
1262
1263
*--zd = mulll(p1, *--yd); z2e = zd;
1264
while (yd > y) *--zd = addmul(p1, *--yd);
1265
*--zd = hiremainder;
1266
1267
while (xd > x)
1268
{
1269
LOCAL_OVERFLOW;
1270
yd = ye; p1 = *--xd;
1271
1272
z2d = --z2e;
1273
*z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
1274
while (yd > y)
1275
{
1276
hiremainder += overflow;
1277
*z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
1278
}
1279
*--zd = hiremainder + overflow;
1280
}
1281
if (*zd == 0) { zd++; lz--; } /* normalize */
1282
*--zd = evalsigne(1) | evallgefint(lz);
1283
*--zd = evaltyp(t_INT) | evallg(lz);
1284
return gc_const((pari_sp)zd, zd);
1285
}
1286
1287
INLINE GEN
1288
sqrispec_basecase(GEN x, long nx)
1289
{
1290
GEN z2e,z2d,yd,xd,zd,x0,z0;
1291
long p1,lz;
1292
LOCAL_HIREMAINDER;
1293
LOCAL_OVERFLOW;
1294
1295
if (nx == 1) return sqru((ulong)*x);
1296
if (nx == 0) return gen_0;
1297
zd = (GEN)avma; lz = (nx+1) << 1;
1298
z0 = new_chunk(lz);
1299
if (nx == 1)
1300
{
1301
*--zd = mulll(*x, *x);
1302
*--zd = hiremainder; goto END;
1303
}
1304
xd = x + nx;
1305
1306
/* compute double products --> zd */
1307
p1 = *--xd; yd = xd; --zd;
1308
*--zd = mulll(p1, *--yd); z2e = zd;
1309
while (yd > x) *--zd = addmul(p1, *--yd);
1310
*--zd = hiremainder;
1311
1312
x0 = x+1;
1313
while (xd > x0)
1314
{
1315
LOCAL_OVERFLOW;
1316
p1 = *--xd; yd = xd;
1317
1318
z2e -= 2; z2d = z2e;
1319
*z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
1320
while (yd > x)
1321
{
1322
hiremainder += overflow;
1323
*z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
1324
}
1325
*--zd = hiremainder + overflow;
1326
}
1327
/* multiply zd by 2 (put result in zd - 1) */
1328
zd[-1] = ((*zd & HIGHBIT) != 0);
1329
shift_left(zd, zd, 0, (nx<<1)-3, 0, 1);
1330
1331
/* add the squares */
1332
xd = x + nx; zd = z0 + lz;
1333
p1 = *--xd;
1334
zd--; *zd = mulll(p1,p1);
1335
zd--; *zd = addll(hiremainder, *zd);
1336
while (xd > x)
1337
{
1338
p1 = *--xd;
1339
zd--; *zd = addll(mulll(p1,p1)+ overflow, *zd);
1340
zd--; *zd = addll(hiremainder + overflow, *zd);
1341
}
1342
1343
END:
1344
if (*zd == 0) { zd++; lz--; } /* normalize */
1345
*--zd = evalsigne(1) | evallgefint(lz);
1346
*--zd = evaltyp(t_INT) | evallg(lz);
1347
return gc_const((pari_sp)zd, zd);
1348
}
1349
1350
/********************************************************************/
1351
/** **/
1352
/** INTEGER MULTIPLICATION (FFT) **/
1353
/** **/
1354
/********************************************************************/
1355
1356
/*
1357
Compute parameters for FFT:
1358
len: result length
1359
k: FFT depth.
1360
n: number of blocks (2^k)
1361
bs: block size
1362
mod: Modulus is M=2^(BIL*mod)+1
1363
ord: order of 2 in Z/MZ.
1364
We must have:
1365
bs*n >= l
1366
2^(BIL*mod) > nb*2^(2*BIL*bs)
1367
2^k | 2*BIL*mod
1368
*/
1369
static void
1370
mulliifft_params(long len, long *k, long *mod, long *bs, long *n, ulong *ord)
1371
{
1372
long r;
1373
*k = expu((3*len)>>2)-3;
1374
do {
1375
(*k)--;
1376
r = *k-(TWOPOTBITS_IN_LONG+2);
1377
*n = 1L<<*k;
1378
*bs = (len+*n-1)>>*k;
1379
*mod= 2**bs+1;
1380
if (r>0)
1381
*mod=((*mod+(1L<<r)-1)>>r)<<r;
1382
} while(*mod>=3**bs);
1383
*ord= 4**mod*BITS_IN_LONG;
1384
}
1385
1386
/* Zf_: arithmetic in ring Z/MZ where M= 2^(BITS_IN_LONG*mod)+1
1387
* for some mod.
1388
* Do not garbage collect.
1389
*/
1390
1391
static GEN
1392
Zf_add(GEN a, GEN b, GEN M)
1393
{
1394
GEN y, z = addii(a,b);
1395
long mod = lgefint(M)-3;
1396
long l = NLIMBS(z);
1397
if (l<=mod) return z;
1398
y = subiu(z, 1);
1399
if (NLIMBS(y)<=mod) return z;
1400
return int_normalize(y,1);
1401
}
1402
1403
static GEN
1404
Zf_sub(GEN a, GEN b, GEN M)
1405
{
1406
GEN z = subii(a,b);
1407
return signe(z)>=0? z: addii(M,z);
1408
}
1409
1410
/* destroy z */
1411
static GEN
1412
Zf_red_destroy(GEN z, GEN M)
1413
{
1414
long mod = lgefint(M)-3;
1415
long l = NLIMBS(z);
1416
GEN y;
1417
if (l<=mod) return z;
1418
y = shifti(z, -mod*BITS_IN_LONG);
1419
z = int_normalize(z, NLIMBS(y));
1420
y = Zf_red_destroy(y, M);
1421
z = subii(z, y);
1422
if (signe(z)<0) z = addii(z, M);
1423
return z;
1424
}
1425
1426
INLINE GEN
1427
Zf_shift(GEN a, ulong s, GEN M) { return Zf_red_destroy(shifti(a, s), M); }
1428
1429
/*
1430
Multiply by sqrt(2)^s
1431
We use the formula sqrt(2)=z_8*(1-z_4)) && z_8=2^(ord/16) [2^(ord/4)+1]
1432
*/
1433
1434
static GEN
1435
Zf_mulsqrt2(GEN a, ulong s, ulong ord, GEN M)
1436
{
1437
ulong hord = ord>>1;
1438
if (!signe(a)) return gen_0;
1439
if (odd(s)) /* Multiply by 2^(s/2) */
1440
{
1441
GEN az8 = Zf_shift(a, ord>>4, M);
1442
GEN az83 = Zf_shift(az8, ord>>3, M);
1443
a = Zf_sub(az8, az83, M);
1444
s--;
1445
}
1446
if (s < hord)
1447
return Zf_shift(a, s>>1, M);
1448
else
1449
return subii(M,Zf_shift(a, (s-hord)>>1, M));
1450
}
1451
1452
INLINE GEN
1453
Zf_sqr(GEN a, GEN M) { return Zf_red_destroy(sqri(a), M); }
1454
1455
INLINE GEN
1456
Zf_mul(GEN a, GEN b, GEN M) { return Zf_red_destroy(mulii(a,b), M); }
1457
1458
/* In place, bit reversing FFT */
1459
static void
1460
muliifft_dit(ulong o, ulong ord, GEN M, GEN FFT, long d, long step)
1461
{
1462
pari_sp av = avma;
1463
long i;
1464
ulong j, no = (o<<1)%ord;
1465
long hstep=step>>1;
1466
for (i = d+1, j = 0; i <= d+hstep; ++i, j =(j+o)%ord)
1467
{
1468
GEN a = Zf_add(gel(FFT,i), gel(FFT,i+hstep), M);
1469
GEN b = Zf_mulsqrt2(Zf_sub(gel(FFT,i), gel(FFT,i+hstep), M), j, ord, M);
1470
affii(a,gel(FFT,i));
1471
affii(b,gel(FFT,i+hstep));
1472
set_avma(av);
1473
}
1474
if (hstep>1)
1475
{
1476
muliifft_dit(no, ord, M, FFT, d, hstep);
1477
muliifft_dit(no, ord, M, FFT, d+hstep, hstep);
1478
}
1479
}
1480
1481
/* In place, bit reversed FFT, inverse of muliifft_dit */
1482
static void
1483
muliifft_dis(ulong o, ulong ord, GEN M, GEN FFT, long d, long step)
1484
{
1485
pari_sp av = avma;
1486
long i;
1487
ulong j, no = (o<<1)%ord;
1488
long hstep=step>>1;
1489
if (hstep>1)
1490
{
1491
muliifft_dis(no, ord, M, FFT, d, hstep);
1492
muliifft_dis(no, ord, M, FFT, d+hstep, hstep);
1493
}
1494
for (i = d+1, j = 0; i <= d+hstep; ++i, j =(j+o)%ord)
1495
{
1496
GEN z = Zf_mulsqrt2(gel(FFT,i+hstep), j, ord, M);
1497
GEN a = Zf_add(gel(FFT,i), z, M);
1498
GEN b = Zf_sub(gel(FFT,i), z, M);
1499
affii(a,gel(FFT,i));
1500
affii(b,gel(FFT,i+hstep));
1501
set_avma(av);
1502
}
1503
}
1504
1505
static GEN
1506
muliifft_spliti(GEN a, long na, long bs, long n, long mod)
1507
{
1508
GEN ap = a+na-1;
1509
GEN c = cgetg(n+1, t_VEC);
1510
long i,j;
1511
for(i=1;i<=n;i++)
1512
{
1513
GEN z = cgeti(mod+3);
1514
if (na)
1515
{
1516
long m = minss(bs, na), v=0;
1517
GEN zp, aa=ap-m+1;
1518
while (!*aa && v<m) {aa++; v++;}
1519
zp = z+m-v+1;
1520
for (j=v; j < m; j++)
1521
*zp-- = *ap--;
1522
ap -= v; na -= m;
1523
z[1] = evalsigne(m!=v) | evallgefint(m-v+2);
1524
} else
1525
z[1] = evalsigne(0) | evallgefint(2);
1526
gel(c, i) = z;
1527
}
1528
return c;
1529
}
1530
1531
static GEN
1532
muliifft_unspliti(GEN V, long bs, long len)
1533
{
1534
long s, i, j, l = lg(V);
1535
GEN a = cgeti(len);
1536
a[1] = evalsigne(1)|evallgefint(len);
1537
for(i=2;i<len;i++)
1538
a[i] = 0;
1539
for(i=1, s=0; i<l; i++, s+=bs)
1540
{
1541
GEN u = gel(V,i);
1542
if (signe(u))
1543
{
1544
GEN ap = int_W(a,s);
1545
GEN up = int_LSW(u);
1546
long lu = NLIMBS(u);
1547
LOCAL_OVERFLOW;
1548
*ap = addll(*ap, *up--); ap--;
1549
for (j=1; j<lu; j++)
1550
{ *ap = addllx(*ap, *up--); ap--; }
1551
while (overflow)
1552
{ *ap = addll(*ap, 1); ap--; }
1553
}
1554
}
1555
return int_normalize(a,0);
1556
}
1557
1558
static GEN
1559
sqrispec_fft(GEN a, long na)
1560
{
1561
pari_sp av, ltop = avma;
1562
long len = 2*na;
1563
long k, mod, bs, n;
1564
GEN FFT, M;
1565
long i;
1566
ulong o, ord;
1567
1568
mulliifft_params(len,&k,&mod,&bs,&n,&ord);
1569
o = ord>>k;
1570
M = int2n(mod*BITS_IN_LONG);
1571
M[2+mod] = 1;
1572
FFT = muliifft_spliti(a, na, bs, n, mod);
1573
muliifft_dit(o, ord, M, FFT, 0, n);
1574
av = avma;
1575
for(i=1; i<=n; i++)
1576
{
1577
affii(Zf_sqr(gel(FFT,i), M), gel(FFT,i));
1578
set_avma(av);
1579
}
1580
muliifft_dis(ord-o, ord, M, FFT, 0, n);
1581
for(i=1; i<=n; i++)
1582
{
1583
affii(Zf_shift(gel(FFT,i), (ord>>1)-k, M), gel(FFT,i));
1584
set_avma(av);
1585
}
1586
return gerepileuptoint(ltop, muliifft_unspliti(FFT,bs,2+len));
1587
}
1588
1589
static GEN
1590
muliispec_fft(GEN a, GEN b, long na, long nb)
1591
{
1592
pari_sp av, av2, ltop = avma;
1593
long len = na+nb;
1594
long k, mod, bs, n;
1595
GEN FFT, FFTb, M;
1596
long i;
1597
ulong o, ord;
1598
1599
mulliifft_params(len,&k,&mod,&bs,&n,&ord);
1600
o = ord>>k;
1601
M = int2n(mod*BITS_IN_LONG);
1602
M[2+mod] = 1;
1603
FFT = muliifft_spliti(a, na, bs, n, mod);
1604
av=avma;
1605
muliifft_dit(o, ord, M, FFT, 0, n);
1606
FFTb = muliifft_spliti(b, nb, bs, n, mod);
1607
av2 = avma;
1608
muliifft_dit(o, ord, M, FFTb, 0, n);
1609
for(i=1; i<=n; i++)
1610
{
1611
affii(Zf_mul(gel(FFT,i), gel(FFTb,i), M), gel(FFT,i));
1612
set_avma(av2);
1613
}
1614
set_avma(av);
1615
muliifft_dis(ord-o, ord, M, FFT, 0, n);
1616
for(i=1; i<=n; i++)
1617
{
1618
affii(Zf_shift(gel(FFT,i),(ord>>1)-k,M), gel(FFT,i));
1619
set_avma(av);
1620
}
1621
return gerepileuptoint(ltop, muliifft_unspliti(FFT,bs,2+len));
1622
}
1623
1624
/********************************************************************/
1625
/** **/
1626
/** INTEGER MULTIPLICATION (KARATSUBA) **/
1627
/** **/
1628
/********************************************************************/
1629
1630
/* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */
1631
static GEN
1632
addshiftw(GEN x, GEN y, long d)
1633
{
1634
GEN z,z0,y0,yd, zd = (GEN)avma;
1635
long a,lz,ly = lgefint(y);
1636
1637
z0 = new_chunk(d);
1638
a = ly-2; yd = y+ly;
1639
if (a >= d)
1640
{
1641
y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */
1642
a -= d;
1643
if (a)
1644
z = addiispec(LIMBS(x), LIMBS(y), NLIMBS(x), a);
1645
else
1646
z = icopy(x);
1647
}
1648
else
1649
{
1650
y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */
1651
while (zd > z0) *--zd = 0; /* complete with 0s */
1652
z = icopy(x);
1653
}
1654
lz = lgefint(z)+d;
1655
z[1] = evalsigne(1) | evallgefint(lz);
1656
z[0] = evaltyp(t_INT) | evallg(lz); return z;
1657
}
1658
1659
/* Fast product (Karatsuba) of integers. a and b are "special" GENs
1660
* c,c0,c1,c2 are genuine GENs.
1661
*/
1662
GEN
1663
muliispec(GEN a, GEN b, long na, long nb)
1664
{
1665
GEN a0,c,c0;
1666
long n0, n0a, i;
1667
pari_sp av;
1668
1669
if (na < nb) swapspec(a,b, na,nb);
1670
if (nb < MULII_KARATSUBA_LIMIT) return muliispec_basecase(a,b,na,nb);
1671
if (nb >= MULII_FFT_LIMIT) return muliispec_fft(a,b,na,nb);
1672
i=(na>>1); n0=na-i; na=i;
1673
av=avma; a0=a+na; n0a=n0;
1674
while (n0a && !*a0) { a0++; n0a--; }
1675
1676
if (n0a && nb > n0)
1677
{ /* nb <= na <= n0 */
1678
GEN b0,c1,c2;
1679
long n0b;
1680
1681
nb -= n0;
1682
c = muliispec(a,b,na,nb);
1683
b0 = b+nb; n0b = n0;
1684
while (n0b && !*b0) { b0++; n0b--; }
1685
if (n0b)
1686
{
1687
c0 = muliispec(a0,b0, n0a,n0b);
1688
1689
c2 = addiispec(a0,a, n0a,na);
1690
c1 = addiispec(b0,b, n0b,nb);
1691
c1 = muliispec(LIMBS(c1),LIMBS(c2), NLIMBS(c1),NLIMBS(c2));
1692
c2 = addiispec(LIMBS(c0),LIMBS(c), NLIMBS(c0),NLIMBS(c));
1693
1694
c1 = subiispec(LIMBS(c1),LIMBS(c2), NLIMBS(c1),NLIMBS(c2));
1695
}
1696
else
1697
{
1698
c0 = gen_0;
1699
c1 = muliispec(a0,b, n0a,nb);
1700
}
1701
c = addshiftw(c,c1, n0);
1702
}
1703
else
1704
{
1705
c = muliispec(a,b,na,nb);
1706
c0 = muliispec(a0,b,n0a,nb);
1707
}
1708
return gerepileuptoint(av, addshiftw(c,c0, n0));
1709
}
1710
GEN
1711
muluui(ulong x, ulong y, GEN z)
1712
{
1713
long t, s = signe(z);
1714
GEN r;
1715
LOCAL_HIREMAINDER;
1716
1717
if (!x || !y || !signe(z)) return gen_0;
1718
t = mulll(x,y);
1719
if (!hiremainder)
1720
r = muluispec(t, z+2, lgefint(z)-2);
1721
else
1722
{
1723
long tmp[2];
1724
tmp[0] = hiremainder;
1725
tmp[1] = t;
1726
r = muliispec(z+2,tmp,lgefint(z)-2,2);
1727
}
1728
setsigne(r,s); return r;
1729
}
1730
1731
#define sqrispec_mirror sqrispec
1732
#define muliispec_mirror muliispec
1733
1734
/* x % (2^n), assuming n >= 0 */
1735
GEN
1736
remi2n(GEN x, long n)
1737
{
1738
long hi,l,k,lx,ly, sx = signe(x);
1739
GEN z, xd, zd;
1740
1741
if (!sx || !n) return gen_0;
1742
1743
k = dvmdsBIL(n, &l);
1744
lx = lgefint(x);
1745
if (lx < k+3) return icopy(x);
1746
1747
xd = x + (lx-k-1);
1748
/* x = |_|...|#|1|...|k| : copy the last l bits of # and the last k words
1749
* ^--- initial xd */
1750
hi = ((ulong)*xd) & ((1UL<<l)-1); /* last l bits of # = top bits of result */
1751
if (!hi)
1752
{ /* strip leading zeroes from result */
1753
xd++; while (k && !*xd) { k--; xd++; }
1754
if (!k) return gen_0;
1755
ly = k+2; xd--;
1756
}
1757
else
1758
ly = k+3;
1759
1760
zd = z = cgeti(ly);
1761
*++zd = evalsigne(sx) | evallgefint(ly);
1762
if (hi) *++zd = hi;
1763
for ( ;k; k--) *++zd = *++xd;
1764
return z;
1765
}
1766
1767
GEN
1768
sqrispec(GEN a, long na)
1769
{
1770
GEN a0,c;
1771
long n0, n0a, i;
1772
pari_sp av;
1773
1774
if (na < SQRI_KARATSUBA_LIMIT) return sqrispec_basecase(a,na);
1775
if (na >= SQRI_FFT_LIMIT) return sqrispec_fft(a,na);
1776
i=(na>>1); n0=na-i; na=i;
1777
av=avma; a0=a+na; n0a=n0;
1778
while (n0a && !*a0) { a0++; n0a--; }
1779
c = sqrispec(a,na);
1780
if (n0a)
1781
{
1782
GEN t, c1, c0 = sqrispec(a0,n0a);
1783
#if 0
1784
c1 = shifti(muliispec(a0,a, n0a,na),1);
1785
#else /* faster */
1786
t = addiispec(a0,a,n0a,na);
1787
t = sqrispec(LIMBS(t),NLIMBS(t));
1788
c1= addiispec(LIMBS(c0),LIMBS(c), NLIMBS(c0), NLIMBS(c));
1789
c1= subiispec(LIMBS(t),LIMBS(c1), NLIMBS(t), NLIMBS(c1));
1790
#endif
1791
c = addshiftw(c,c1, n0);
1792
c = addshiftw(c,c0, n0);
1793
}
1794
else
1795
c = addshiftw(c,gen_0,n0<<1);
1796
return gerepileuptoint(av, c);
1797
}
1798
1799
/********************************************************************/
1800
/** **/
1801
/** KARATSUBA SQUARE ROOT **/
1802
/** adapted from Paul Zimmermann's implementation of **/
1803
/** his algorithm in GMP (mpn_sqrtrem) **/
1804
/** **/
1805
/********************************************************************/
1806
1807
/* Square roots table */
1808
static const unsigned char approx_tab[192] = {
1809
128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,
1810
143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,
1811
156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,
1812
169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,
1813
181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,
1814
192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,
1815
202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,
1816
212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,
1817
221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,
1818
230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,
1819
239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,
1820
247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255
1821
};
1822
1823
/* N[0], assume N[0] >= 2^(BIL-2).
1824
* Return r,s such that s^2 + r = N, 0 <= r <= 2s */
1825
static void
1826
p_sqrtu1(ulong *N, ulong *ps, ulong *pr)
1827
{
1828
ulong prec, r, s, q, u, n0 = N[0];
1829
1830
q = n0 >> (BITS_IN_LONG - 8);
1831
/* 2^6 = 64 <= q < 256 = 2^8 */
1832
s = approx_tab[q - 64]; /* 128 <= s < 255 */
1833
r = (n0 >> (BITS_IN_LONG - 16)) - s * s; /* r <= 2*s */
1834
if (r > (s << 1)) { r -= (s << 1) | 1; s++; }
1835
1836
/* 8-bit approximation from the high 8-bits of N[0] */
1837
prec = 8;
1838
n0 <<= 2 * prec;
1839
while (2 * prec < BITS_IN_LONG)
1840
{ /* invariant: s has prec bits, and r <= 2*s */
1841
r = (r << prec) + (n0 >> (BITS_IN_LONG - prec));
1842
n0 <<= prec;
1843
u = 2 * s;
1844
q = r / u; u = r - q * u;
1845
s = (s << prec) + q;
1846
u = (u << prec) + (n0 >> (BITS_IN_LONG - prec));
1847
q = q * q;
1848
r = u - q;
1849
if (u < q) { s--; r += (s << 1) | 1; }
1850
n0 <<= prec;
1851
prec = 2 * prec;
1852
}
1853
*ps = s;
1854
*pr = r;
1855
}
1856
1857
/* N[0..1], assume N[0] >= 2^(BIL-2).
1858
* Return 1 if remainder overflows, 0 otherwise */
1859
static int
1860
p_sqrtu2(ulong *N, ulong *ps, ulong *pr)
1861
{
1862
ulong cc, qhl, r, s, q, u, n1 = N[1];
1863
LOCAL_OVERFLOW;
1864
1865
p_sqrtu1(N, &s, &r); /* r <= 2s */
1866
qhl = 0; while (r >= s) { qhl++; r -= s; }
1867
/* now r < s < 2^(BIL/2) */
1868
r = (r << BITS_IN_HALFULONG) | (n1 >> BITS_IN_HALFULONG);
1869
u = s << 1;
1870
q = r / u; u = r - q * u;
1871
q += (qhl & 1) << (BITS_IN_HALFULONG - 1);
1872
qhl >>= 1;
1873
/* (initial r)<<(BIL/2) + n1>>(BIL/2) = (qhl<<(BIL/2) + q) * 2s + u */
1874
s = ((s + qhl) << BITS_IN_HALFULONG) + q;
1875
cc = u >> BITS_IN_HALFULONG;
1876
r = (u << BITS_IN_HALFULONG) | (n1 & LOWMASK);
1877
r = subll(r, q * q);
1878
cc -= overflow + qhl;
1879
/* now subtract 2*q*2^(BIL/2) + 2^BIL if qhl is set */
1880
if ((long)cc < 0)
1881
{
1882
if (s) {
1883
r = addll(r, s);
1884
cc += overflow;
1885
s--;
1886
} else {
1887
cc++;
1888
s = ~0UL;
1889
}
1890
r = addll(r, s);
1891
cc += overflow;
1892
}
1893
*ps = s;
1894
*pr = r; return cc;
1895
}
1896
1897
static void
1898
xmpn_zero(GEN x, long n)
1899
{
1900
while (--n >= 0) x[n]=0;
1901
}
1902
static void
1903
xmpn_copy(GEN z, GEN x, long n)
1904
{
1905
long k = n;
1906
while (--k >= 0) z[k] = x[k];
1907
}
1908
/* a[0..la-1] * 2^(lb BIL) | b[0..lb-1] */
1909
static GEN
1910
catii(GEN a, long la, GEN b, long lb)
1911
{
1912
long l = la + lb + 2;
1913
GEN z = cgetipos(l);
1914
xmpn_copy(LIMBS(z), a, la);
1915
xmpn_copy(LIMBS(z) + la, b, lb);
1916
return int_normalize(z, 0);
1917
}
1918
1919
/* sqrt n[0..1], assume n normalized */
1920
static GEN
1921
sqrtispec2(GEN n, GEN *pr)
1922
{
1923
ulong s, r;
1924
int hi = p_sqrtu2((ulong*)n, &s, &r);
1925
GEN S = utoi(s);
1926
*pr = hi? uutoi(1,r): utoi(r);
1927
return S;
1928
}
1929
1930
/* sqrt n[0], _dont_ assume n normalized */
1931
static GEN
1932
sqrtispec1_sh(GEN n, GEN *pr)
1933
{
1934
GEN S;
1935
ulong r, s, u0 = uel(n,0);
1936
int sh = bfffo(u0) & ~1UL;
1937
if (sh) u0 <<= sh;
1938
p_sqrtu1(&u0, &s, &r);
1939
/* s^2 + r = u0, s < 2^(BIL/2). Rescale back:
1940
* 2^(2k) n = S^2 + R
1941
* so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
1942
if (sh) {
1943
int k = sh >> 1;
1944
ulong s0 = s & ((1L<<k) - 1);
1945
r += s * (s0<<1);
1946
s >>= k;
1947
r >>= sh;
1948
}
1949
S = utoi(s);
1950
if (pr) *pr = utoi(r);
1951
return S;
1952
}
1953
1954
/* sqrt n[0..1], _dont_ assume n normalized */
1955
static GEN
1956
sqrtispec2_sh(GEN n, GEN *pr)
1957
{
1958
GEN S;
1959
ulong U[2], r, s, u0 = uel(n,0), u1 = uel(n,1);
1960
int hi, sh = bfffo(u0) & ~1UL;
1961
if (sh) {
1962
u0 = (u0 << sh) | (u1 >> (BITS_IN_LONG-sh));
1963
u1 <<= sh;
1964
}
1965
U[0] = u0;
1966
U[1] = u1; hi = p_sqrtu2(U, &s, &r);
1967
/* s^2 + R = u0|u1. Rescale back:
1968
* 2^(2k) n = S^2 + R
1969
* so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
1970
if (sh) {
1971
int k = sh >> 1;
1972
ulong s0 = s & ((1L<<k) - 1);
1973
LOCAL_HIREMAINDER;
1974
LOCAL_OVERFLOW;
1975
r = addll(r, mulll(s, (s0<<1)));
1976
if (overflow) hiremainder++;
1977
hiremainder += hi; /* + 0 or 1 */
1978
s >>= k;
1979
r = (r>>sh) | (hiremainder << (BITS_IN_LONG-sh));
1980
hi = (hiremainder & (1L<<sh));
1981
}
1982
S = utoi(s);
1983
if (pr) *pr = hi? uutoi(1,r): utoi(r);
1984
return S;
1985
}
1986
1987
/* Let N = N[0..2n-1]. Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S
1988
* Assume N normalized */
1989
static GEN
1990
sqrtispec(GEN N, long n, GEN *r)
1991
{
1992
GEN S, R, q, z, u;
1993
long l, h;
1994
1995
if (n == 1) return sqrtispec2(N, r);
1996
l = n >> 1;
1997
h = n - l; /* N = a3(h) | a2(h) | a1(l) | a0(l words) */
1998
S = sqrtispec(N, h, &R); /* S^2 + R = a3|a2 */
1999
2000
z = catii(LIMBS(R), NLIMBS(R), N + 2*h, l); /* = R | a1(l) */
2001
q = dvmdii(z, shifti(S,1), &u);
2002
z = catii(LIMBS(u), NLIMBS(u), N + n + h, l); /* = u | a0(l) */
2003
2004
S = addshiftw(S, q, l);
2005
R = subii(z, sqri(q));
2006
if (signe(R) < 0)
2007
{
2008
GEN S2 = shifti(S,1);
2009
R = addis(subiispec(LIMBS(S2),LIMBS(R), NLIMBS(S2),NLIMBS(R)), -1);
2010
S = addis(S, -1);
2011
}
2012
*r = R; return S;
2013
}
2014
2015
/* Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S.
2016
* As for dvmdii, R is last on stack and guaranteed to be gen_0 in case the
2017
* remainder is 0. R = NULL is allowed. */
2018
GEN
2019
sqrtremi(GEN N, GEN *r)
2020
{
2021
pari_sp av;
2022
GEN S, R, n = N+2;
2023
long k, l2, ln = NLIMBS(N);
2024
int sh;
2025
2026
if (ln <= 2)
2027
{
2028
if (ln == 2) return sqrtispec2_sh(n, r);
2029
if (ln == 1) return sqrtispec1_sh(n, r);
2030
if (r) *r = gen_0;
2031
return gen_0;
2032
}
2033
av = avma;
2034
sh = bfffo(n[0]) >> 1;
2035
l2 = (ln + 1) >> 1;
2036
if (sh || (ln & 1)) { /* normalize n, so that n[0] >= 2^BIL / 4 */
2037
GEN s0, t = new_chunk(ln + 1);
2038
t[ln] = 0;
2039
if (sh)
2040
shift_left(t, n, 0,ln-1, 0, sh << 1);
2041
else
2042
xmpn_copy(t, n, ln);
2043
S = sqrtispec(t, l2, &R); /* t normalized, 2 * l2 words */
2044
/* Rescale back:
2045
* 2^(2k) n = S^2 + R, k = sh + (ln & 1)*BIL/2
2046
* so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
2047
k = sh + (ln & 1) * (BITS_IN_LONG/2);
2048
s0 = remi2n(S, k);
2049
R = addii(shifti(R,-1), mulii(s0, S));
2050
R = shifti(R, 1 - (k<<1));
2051
S = shifti(S, -k);
2052
}
2053
else
2054
S = sqrtispec(n, l2, &R);
2055
2056
if (!r) { set_avma((pari_sp)S); return gerepileuptoint(av, S); }
2057
gerepileall(av, 2, &S, &R); *r = R; return S;
2058
}
2059
2060
/* compute sqrt(|a|), assuming a != 0 */
2061
2062
#if 1
2063
GEN
2064
sqrtr_abs(GEN x)
2065
{
2066
long l = realprec(x) - 2, e = expo(x), er = e>>1;
2067
GEN b, c, res = cgetr(2 + l);
2068
res[1] = evalsigne(1) | evalexpo(er);
2069
if (e&1) {
2070
b = new_chunk(l << 1);
2071
xmpn_copy(b, x+2, l);
2072
xmpn_zero(b + l,l);
2073
b = sqrtispec(b, l, &c);
2074
xmpn_copy(res+2, b+2, l);
2075
if (cmpii(c, b) > 0) roundr_up_ip(res, l+2);
2076
} else {
2077
ulong u;
2078
b = new_chunk(2 + (l << 1));
2079
shift_left(b+1, x+2, 0,l-1, 0, BITS_IN_LONG-1);
2080
b[0] = uel(x,2)>>1;
2081
xmpn_zero(b + l+1,l+1);
2082
b = sqrtispec(b, l+1, &c);
2083
xmpn_copy(res+2, b+2, l);
2084
u = uel(b,l+2);
2085
if ( u&HIGHBIT || (u == ~HIGHBIT && cmpii(c,b) > 0))
2086
roundr_up_ip(res, l+2);
2087
}
2088
return gc_const((pari_sp)res, res);
2089
}
2090
2091
#else /* use t_REAL: currently much slower (quadratic division) */
2092
2093
#ifdef LONG_IS_64BIT
2094
/* 64 bits of b = sqrt(a[0] * 2^64 + a[1]) [ up to 1ulp ] */
2095
static ulong
2096
sqrtu2(ulong *a)
2097
{
2098
ulong c, b = dblmantissa( sqrt((double)a[0]) );
2099
LOCAL_HIREMAINDER;
2100
LOCAL_OVERFLOW;
2101
2102
/* > 32 correct bits, 1 Newton iteration to reach 64 */
2103
if (b <= a[0]) return HIGHBIT | (a[0] >> 1);
2104
hiremainder = a[0]; c = divll(a[1], b);
2105
return (addll(c, b) >> 1) | HIGHBIT;
2106
}
2107
/* 64 bits of sqrt(a[0] * 2^63) */
2108
static ulong
2109
sqrtu2_1(ulong *a)
2110
{
2111
ulong t[2];
2112
t[0] = (a[0] >> 1);
2113
t[1] = (a[0] << (BITS_IN_LONG-1)) | (a[1] >> 1);
2114
return sqrtu2(t);
2115
}
2116
#else
2117
/* 32 bits of sqrt(a[0] * 2^32) */
2118
static ulong
2119
sqrtu2(ulong *a) { return dblmantissa( sqrt((double)a[0]) ); }
2120
/* 32 bits of sqrt(a[0] * 2^31) */
2121
static ulong
2122
sqrtu2_1(ulong *a) { return dblmantissa( sqrt(2. * a[0]) ); }
2123
#endif
2124
2125
GEN
2126
sqrtr_abs(GEN x)
2127
{
2128
long l1, i, l = lg(x), ex = expo(x);
2129
GEN a, t, y = cgetr(l);
2130
pari_sp av, av0 = avma;
2131
2132
a = rtor(x, l+1);
2133
t = cgetr(l+1);
2134
if (ex & 1) { /* odd exponent */
2135
a[1] = evalsigne(1) | _evalexpo(1);
2136
t[2] = (long)sqrtu2((ulong*)a + 2);
2137
} else { /* even exponent */
2138
a[1] = evalsigne(1) | _evalexpo(0);
2139
t[2] = (long)sqrtu2_1((ulong*)a + 2);
2140
}
2141
t[1] = evalsigne(1) | _evalexpo(0);
2142
for (i = 3; i <= l; i++) t[i] = 0;
2143
2144
/* |x| = 2^(ex/2) a, t ~ sqrt(a) */
2145
l--; l1 = 1; av = avma;
2146
while (l1 < l) { /* let t := (t + a/t)/2 */
2147
l1 <<= 1; if (l1 > l) l1 = l;
2148
setlg(a, l1 + 2);
2149
setlg(t, l1 + 2);
2150
affrr(addrr(t, divrr(a,t)), t); shiftr_inplace(t, -1);
2151
set_avma(av);
2152
}
2153
affrr(t,y); shiftr_inplace(y, (ex>>1));
2154
return gc_const(av0, y);
2155
}
2156
2157
#endif
2158
2159
/*******************************************************************
2160
* *
2161
* Base Conversion *
2162
* *
2163
*******************************************************************/
2164
2165
static void
2166
convi_dac(GEN x, ulong l, ulong *res)
2167
{
2168
pari_sp ltop=avma;
2169
ulong m;
2170
GEN x1,x2;
2171
if (l==1) { *res=itou(x); return; }
2172
m=l>>1;
2173
x1=dvmdii(x,powuu(1000000000UL,m),&x2);
2174
convi_dac(x1,l-m,res+m);
2175
convi_dac(x2,m,res);
2176
set_avma(ltop);
2177
}
2178
2179
/* convert integer --> base 10^9 [not memory clean] */
2180
ulong *
2181
convi(GEN x, long *l)
2182
{
2183
long lz, lx = lgefint(x);
2184
ulong *z;
2185
if (lx == 3 && uel(x,2) < 1000000000UL) {
2186
z = (ulong*)new_chunk(1);
2187
*z = x[2];
2188
*l = 1; return z+1;
2189
}
2190
lz = 1 + (long)bit_accuracy_mul(lx, LOG10_2/9);
2191
z = (ulong*)new_chunk(lz);
2192
convi_dac(x,(ulong)lz,z);
2193
while (z[lz-1]==0) lz--;
2194
*l=lz; return z+lz;
2195
}
2196
2197
2198