Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

Testing latest pari + WASM + node.js... and it works?! Wow.

28485 views
License: GPL3
ubuntu2004
1
/* Copyright (C) 2001 The PARI group.
2
3
This file is part of the PARI/GP package.
4
5
PARI/GP is free software; you can redistribute it and/or modify it under the
6
terms of the GNU General Public License as published by the Free Software
7
Foundation; either version 2 of the License, or (at your option) any later
8
version. It is distributed in the hope that it will be useful, but WITHOUT
9
ANY WARRANTY WHATSOEVER.
10
11
Check the License for details. You should have received a copy of it, along
12
with the package; see the file 'COPYING'. If not, write to the Free Software
13
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
14
15
/* This file is a quick hack adapted from gmp-4.0 tuning utilities
16
* (T. Granlund et al.)
17
*
18
* (GMU MP Library is Copyright Free Software Foundation, Inc.) */
19
#define PARI_TUNE
20
#include "pari.h"
21
#include "paripriv.h"
22
23
int option_trace = 0;
24
double Step_Factor = .01; /* small steps by default */
25
ulong DFLT_mod, DFLT_mod2, DFLT_deg;
26
GEN LARGE_mod;
27
28
typedef struct {
29
ulong reps, type;
30
long *var, *var_disable, *var_enable, var_enable_min, size, enabled;
31
GEN x, y;
32
ulong l;
33
GEN T, p;
34
} speed_param;
35
36
typedef double (*speed_function_t)(speed_param *s);
37
38
typedef struct {
39
int kernel;
40
const char *name;
41
long *var;
42
int type; /* t_INT or t_REAL */
43
long min_size;
44
long max_size;
45
speed_function_t fun;
46
double step_factor; /* how much to step sizes (rounded down) */
47
double stop_factor;
48
long *var_disable;
49
long *var_enable;
50
} tune_param;
51
52
/* ========================================================== */
53
/* To use GMP cycle counting functions, look for GMP in Oxxx/Makefile */
54
#ifdef GMP_TIMER
55
/* needed to link with gmp-4.0/tune/{time,freq}.o */
56
int speed_option_verbose = 0;
57
extern double speed_unittime;
58
extern int speed_precision;
59
void speed_starttime(void);
60
double speed_endtime(void);
61
#else
62
static pari_timer __T;
63
static double speed_unittime = 1e-4;
64
static int speed_precision= 1000;
65
static void speed_starttime() { timer_start(&__T); }
66
static double speed_endtime() { return (double)timer_delay(&__T)/1000.; }
67
#endif
68
69
/* ========================================================== */
70
/* int, n words, odd */
71
static GEN
72
rand_INT(long n)
73
{
74
pari_sp av = avma;
75
GEN x, N = int2n(n*BITS_IN_LONG);
76
do x = randomi(N); while (lgefint(x) != n+2);
77
if (!mpodd(x)) x = addis(x,1); /*For Barrett REDC */
78
return gerepileuptoint(av, x);
79
}
80
/* real, n words */
81
static GEN
82
rand_REAL(long n) { return gmul2n(itor(rand_INT(n), n+2),-BITS_IN_LONG*n); }
83
84
static GEN
85
rand_FpX(long n)
86
{
87
GEN x;
88
do x = random_FpX(n+1, 0, LARGE_mod); while (degpol(x) < n);
89
return x;
90
}
91
/* Flx, degree n */
92
static GEN
93
rand_F2x(long n)
94
{
95
GEN x;
96
do x = random_F2x(BITS_IN_LONG*(n+1), 0); while (degpol(x) < n);
97
return x;
98
}
99
/* Flx, degree n */
100
static GEN
101
rand_Flx(long n, ulong l)
102
{
103
GEN x;
104
do x = random_Flx(n+1, 0, l); while (degpol(x) < n);
105
return x;
106
}
107
108
static GEN
109
rand_F2xqX(long n, GEN T)
110
{
111
GEN x;
112
do x = random_F2xqX(n+1, 0, T); while (degpol(x) < n);
113
return x;
114
}
115
116
static GEN
117
rand_FlxqX(long n, GEN T, ulong l)
118
{
119
GEN x;
120
do x = random_FlxqX(n+1, 0, T, l); while (degpol(x) < n);
121
return x;
122
}
123
124
static GEN
125
rand_FpXQX(long n, GEN T)
126
{
127
GEN x;
128
do x = random_FpXQX(n+1, 0, T, LARGE_mod); while (degpol(x) < n);
129
return x;
130
}
131
/* normalized Fpx, degree n */
132
static GEN
133
rand_NFpX(long n)
134
{
135
pari_sp av = avma;
136
GEN x = gadd(pol_xn(n,0), random_FpX(n, 0, LARGE_mod));
137
return gerepileupto(av, x);
138
}
139
140
/* normalized Flx, degree n */
141
static GEN
142
rand_NFlx(long n, ulong l)
143
{
144
pari_sp av = avma;
145
GEN x = Flx_add(Flx_shift(pol1_Flx(0),n), random_Flx(n, 0, l), l);
146
return gerepileuptoleaf(av, x);
147
}
148
149
static GEN
150
rand_NF2xqX(long n, GEN T)
151
{
152
pari_sp av = avma;
153
GEN x = F2xX_add(monomial(pol1_F2x(0),n,0), random_F2xqX(n, 0, T));
154
return gerepileupto(av, x);
155
}
156
157
static GEN
158
rand_NFlxqX(long n, GEN T, long l)
159
{
160
pari_sp av = avma;
161
GEN x = FlxX_add(monomial(pol1_Flx(0),n,0), random_FlxqX(n, 0, T, l), l);
162
return gerepileupto(av, x);
163
}
164
165
static GEN
166
rand_NFpXQX(long n, GEN T)
167
{
168
pari_sp av = avma;
169
GEN x = gadd(pol_xn(n,0), random_FpXQX(n, 0, T, LARGE_mod));
170
return gerepileupto(av, x);
171
}
172
173
#define t_F2x 100
174
#define t_Flx 200
175
#define t_Fl2x 201
176
#define t_NFlx 210
177
#define t_NFl2x 211
178
#define t_FpX 300
179
#define t_NFpX 310
180
#define t_F2xqX 400
181
#define t_NF2xqX 410
182
#define t_FlxqX 500
183
#define t_NFlxqX 510
184
#define t_FpXQX 600
185
#define t_NFpXQX 610
186
187
static GEN
188
rand_g(speed_param *s)
189
{
190
long n = s->size;
191
switch (s->type) {
192
case t_INT: return rand_INT(n);
193
case t_REAL: return rand_REAL(n);
194
case t_F2x: return rand_F2x(n);
195
case t_Flx: return rand_Flx(n,DFLT_mod);
196
case t_Fl2x: return rand_Flx(n,DFLT_mod2);
197
case t_NFlx: return rand_NFlx(n,DFLT_mod);
198
case t_NFl2x: return rand_NFlx(n,DFLT_mod2);
199
case t_FpX: return rand_FpX(n);
200
case t_NFpX: return rand_NFpX(n);
201
case t_F2xqX: return rand_F2xqX(n, s->T);
202
case t_NF2xqX: return rand_NF2xqX(n, s->T);
203
case t_FlxqX: return rand_FlxqX(n, s->T, s->l);
204
case t_NFlxqX: return rand_NFlxqX(n, s->T, s->l);
205
case t_FpXQX: return rand_FpXQX(n, s->T);
206
case t_NFpXQX: return rand_NFpXQX(n, s->T);
207
}
208
return NULL;
209
}
210
211
static void
212
dft_F2xq(speed_param *s)
213
{
214
pari_sp av = avma;
215
do
216
{
217
avma = av;
218
s->T = random_F2x(BITS_IN_LONG*2, 0);
219
} while (!F2x_is_irred(s->T));
220
s->T[1] = evalvarn(1);
221
s->T = F2x_get_red(s->T);
222
}
223
224
static void
225
dft_Flxq(speed_param *s)
226
{
227
pari_sp av = avma;
228
do
229
{
230
avma = av;
231
s->T = rand_NFlx(DFLT_deg, s->l);
232
} while (!Flx_is_irred(s->T, s->l));
233
s->T[1] = evalvarn(1);
234
s->T = Flx_get_red(s->T, s->l);
235
}
236
237
static void
238
dft_FpXQ(speed_param *s)
239
{
240
pari_sp av = avma;
241
do
242
{
243
avma = av;
244
s->T = rand_NFpX(DFLT_deg);
245
} while (!FpX_is_irred(s->T, LARGE_mod));
246
setvarn(s->T, 1);
247
s->T = FpX_get_red(s->T, s->p);
248
}
249
250
static void
251
dftmod(speed_param *s)
252
{
253
switch (s->type) {
254
case t_Flx: s->l=DFLT_mod; return;
255
case t_Fl2x: s->l=DFLT_mod2; return;
256
case t_NFlx: s->l=DFLT_mod; return;
257
case t_NFl2x: s->l=DFLT_mod2; return;
258
case t_FpX: s->p=LARGE_mod; return;
259
case t_NFpX: s->p=LARGE_mod; return;
260
case t_F2xqX: s->l=2; dft_F2xq(s); return;
261
case t_NF2xqX: s->l=2; dft_F2xq(s); return;
262
case t_FlxqX: s->l=DFLT_mod; dft_Flxq(s); return;
263
case t_NFlxqX: s->l=DFLT_mod; dft_Flxq(s); return;
264
case t_FpXQX: s->p=LARGE_mod; dft_FpXQ(s); return;
265
case t_NFpXQX: s->p=LARGE_mod; dft_FpXQ(s); return;
266
}
267
}
268
269
/* ========================================================== */
270
#define TIME_FUN(call) {\
271
{ \
272
pari_sp av = avma; \
273
int i; \
274
speed_starttime(); \
275
i = (s)->reps; \
276
do { call; set_avma(av); } while (--i); \
277
} \
278
return speed_endtime(); \
279
}
280
281
#define m_menable(s,var,min) (*(s->var)=minss(lg(s->x)-2,s->min))
282
#define m_enable(s,var) (*(s->var)=lg(s->x)-2)/* enable asymptotically fastest */
283
#define m_disable(s,var) (*(s->var)=lg(s->x)+1)/* disable asymptotically fastest */
284
285
static void enable(speed_param *s)
286
{
287
m_enable(s,var); s->enabled = 1;
288
if (s->var_disable) m_disable(s,var_disable);
289
if (s->var_enable) m_menable(s,var_enable,var_enable_min);
290
}
291
292
static void disable(speed_param *s)
293
{
294
m_disable(s,var); s->enabled = 0;
295
if (s->var_disable) m_disable(s,var_disable);
296
if (s->var_enable) m_menable(s,var_enable,var_enable_min);
297
}
298
299
static double speed_mulrr(speed_param *s)
300
{ TIME_FUN(mulrr(s->x, s->y)); }
301
302
static double speed_sqrr(speed_param *s)
303
{ TIME_FUN(sqrr(s->x)); }
304
305
static double speed_mulii(speed_param *s)
306
{ TIME_FUN(mulii(s->x, s->y)); }
307
308
static double speed_sqri (speed_param *s)
309
{ TIME_FUN(sqri(s->x)); }
310
311
static double speed_extgcdii(speed_param *s)
312
{ GEN u,v; TIME_FUN(bezout(s->x, s->y, &u, &v)); }
313
314
static double speed_gcdii(speed_param *s)
315
{ TIME_FUN(gcdii(s->x, s->y)); }
316
317
static double speed_halfgcdii(speed_param *s)
318
{ TIME_FUN(halfgcdii(s->x, s->y)); }
319
320
static double speed_exp(speed_param *s)
321
{ TIME_FUN(mpexp(s->x)); }
322
323
static double speed_inv(speed_param *s)
324
{ TIME_FUN(invr(s->x)); }
325
326
static double speed_log(speed_param *s)
327
{ TIME_FUN(mplog(s->x)); }
328
329
static double speed_logcx(speed_param *s)
330
{ GEN z; setexpo(s->x,0); z = mkcomplex(gen_1, s->x);
331
glog(z,s->size);
332
TIME_FUN(glog(z,s->size)); }
333
334
static double speed_atan(speed_param *s)
335
{ setexpo(s->x, 0);
336
gatan(s->x, 0);
337
TIME_FUN(gatan(s->x, 0)); }
338
339
static double speed_Fp_pow(speed_param *s)
340
{ TIME_FUN( Fp_pow(s->x, subis(s->y,1), s->y)); }
341
342
static double speed_divrr(speed_param *s)
343
{ TIME_FUN(divrr(s->x, s->y)); }
344
345
static double speed_invmod(speed_param *s)
346
{ GEN T; TIME_FUN(invmod(s->x, s->y, &T)); }
347
348
static double speed_F2x_mul(speed_param *s)
349
{ TIME_FUN(F2x_mul(s->x, s->y)); }
350
351
static double speed_Flx_sqr(speed_param *s)
352
{ TIME_FUN(Flx_sqr(s->x, s->l)); }
353
354
static double speed_Flx_inv(speed_param *s)
355
{ TIME_FUN(Flx_invBarrett(s->x, s->l)); }
356
357
static double speed_Flx_mul(speed_param *s)
358
{ TIME_FUN(Flx_mul(s->x, s->y, s->l)); }
359
360
static double speed_Flx_divrem(speed_param *s)
361
{
362
GEN r, x = rand_NFlx((degpol(s->x)-1)*2, s->l);
363
TIME_FUN(Flx_divrem(x, s->x, s->l, &r));
364
}
365
366
static double speed_Flx_rem(speed_param *s) {
367
GEN x = rand_NFlx((degpol(s->x)-1)*2, s->l);
368
TIME_FUN(Flx_rem(x, s->x, s->l));
369
}
370
371
static double speed_Flxq_red(speed_param *s) {
372
GEN x = rand_NFlx((degpol(s->x)-1)*2, s->l);
373
GEN q = Flx_get_red(s->x, s->l);
374
TIME_FUN(Flx_rem(x, q, s->l));
375
}
376
377
static double speed_Flx_halfgcd(speed_param *s)
378
{ TIME_FUN(Flx_halfgcd(s->x, s->y, s->l)); }
379
380
static double speed_Flx_gcd(speed_param *s)
381
{ TIME_FUN(Flx_gcd(s->x, s->y, s->l)); }
382
383
static double speed_Flx_extgcd(speed_param *s)
384
{ GEN u,v; TIME_FUN(Flx_extgcd(s->x, s->y, s->l, &u, &v)); }
385
386
static double speed_FpX_inv(speed_param *s)
387
{ TIME_FUN(FpX_invBarrett(s->x, s->p)); }
388
389
static double speed_FpX_divrem(speed_param *s)
390
{
391
GEN r, x = rand_NFpX((degpol(s->x)-1)*2);
392
TIME_FUN(FpX_divrem(x, s->x, s->p, &r));
393
}
394
395
static double speed_FpX_rem(speed_param *s)
396
{
397
GEN x = rand_NFpX((degpol(s->x)-1)*2);
398
TIME_FUN(FpX_rem(x, s->x, s->p));
399
}
400
401
static double speed_FpXQ_red(speed_param *s) {
402
GEN x = rand_NFpX((degpol(s->x)-1)*2);
403
GEN q = FpX_get_red(s->x, s->p);
404
TIME_FUN(FpX_rem(x, q, s->p));
405
}
406
407
static double speed_FpX_halfgcd(speed_param *s)
408
{ TIME_FUN(FpX_halfgcd(s->x, s->y, s->p)); }
409
static double speed_FpX_gcd(speed_param *s)
410
{ TIME_FUN(FpX_gcd(s->x, s->y, s->p)); }
411
static double speed_FpX_extgcd(speed_param *s)
412
{ GEN u,v; TIME_FUN(FpX_extgcd(s->x, s->y, s->p, &u, &v)); }
413
414
static double speed_F2xqX_inv(speed_param *s)
415
{ TIME_FUN(F2xqX_invBarrett(s->x, s->T)); }
416
417
static double speed_F2xqX_divrem(speed_param *s)
418
{
419
GEN r, x = rand_NF2xqX((degpol(s->x)-1)*2, s->T);
420
TIME_FUN(F2xqX_divrem(x, s->x, s->T, &r));
421
}
422
423
static double speed_F2xqX_rem(speed_param *s)
424
{
425
GEN x = rand_NF2xqX((degpol(s->x)-1)*2, s->T);
426
TIME_FUN(F2xqX_rem(x, s->x, s->T));
427
}
428
429
static double speed_F2xqXQ_red(speed_param *s) {
430
GEN x = rand_NF2xqX((degpol(s->x)-1)*2, s->T);
431
GEN q = F2xqX_get_red(s->x, s->T);
432
TIME_FUN(F2xqX_rem(x, q, s->T));
433
}
434
435
static double speed_F2xqX_halfgcd(speed_param *s)
436
{ TIME_FUN(F2xqX_halfgcd(s->x, s->y, s->T)); }
437
438
static double speed_F2xqX_extgcd(speed_param *s)
439
{ GEN u,v; TIME_FUN(F2xqX_extgcd(s->x, s->y, s->T, &u, &v)); }
440
441
static double speed_F2xqX_gcd(speed_param *s)
442
{ TIME_FUN(F2xqX_gcd(s->x, s->y, s->T)); }
443
444
static double speed_FlxqX_inv(speed_param *s)
445
{ TIME_FUN(FlxqX_invBarrett(s->x, s->T, s->l)); }
446
447
448
static double speed_FlxqX_divrem(speed_param *s)
449
{
450
GEN r, x = rand_NFlxqX((degpol(s->x)-1)*2, s->T, s->l);
451
TIME_FUN(FlxqX_divrem(x, s->x, s->T, s->l, &r));
452
}
453
454
static double speed_FlxqX_rem(speed_param *s)
455
{
456
GEN x = rand_NFlxqX((degpol(s->x)-1)*2, s->T, s->l);
457
TIME_FUN(FlxqX_rem(x, s->x, s->T, s->l));
458
}
459
460
static double speed_FlxqXQ_red(speed_param *s) {
461
GEN x = rand_NFlxqX((degpol(s->x)-1)*2, s->T, s->l);
462
GEN q = FlxqX_get_red(s->x, s->T, s->l);
463
TIME_FUN(FlxqX_rem(x, q, s->T, s->l));
464
}
465
466
static double speed_FlxqX_halfgcd(speed_param *s)
467
{ TIME_FUN(FlxqX_halfgcd(s->x, s->y, s->T, s->l)); }
468
469
static double speed_FlxqX_extgcd(speed_param *s)
470
{ GEN u,v; TIME_FUN(FlxqX_extgcd(s->x, s->y, s->T, s->l, &u, &v)); }
471
472
static double speed_FlxqX_gcd(speed_param *s)
473
{ TIME_FUN(FlxqX_gcd(s->x, s->y, s->T, s->l)); }
474
475
static double speed_FpXQX_inv(speed_param *s)
476
{ TIME_FUN(FpXQX_invBarrett(s->x, s->T, s->p)); }
477
478
static double speed_FpXQX_divrem(speed_param *s)
479
{
480
GEN r, x = rand_NFpXQX((degpol(s->x)-1)*2, s->T);
481
TIME_FUN(FpXQX_divrem(x, s->x, s->T, s->p, &r));
482
}
483
484
static double speed_FpXQX_rem(speed_param *s)
485
{
486
GEN x = rand_NFpXQX((degpol(s->x)-1)*2, s->T);
487
TIME_FUN(FpXQX_rem(x, s->x, s->T, s->p));
488
}
489
490
static double speed_FpXQXQ_red(speed_param *s) {
491
GEN x = rand_NFpXQX((degpol(s->x)-1)*2, s->T);
492
GEN q = FpXQX_get_red(s->x, s->T, s->p);
493
TIME_FUN(FpXQX_rem(x, q, s->T, s->p));
494
}
495
496
static double speed_FpXQX_halfgcd(speed_param *s)
497
{ TIME_FUN(FpXQX_halfgcd(s->x, s->y, s->T, s->p)); }
498
499
static double speed_FpXQX_extgcd(speed_param *s)
500
{ GEN u,v; TIME_FUN(FpXQX_extgcd(s->x, s->y, s->T, s->p, &u, &v)); }
501
502
static double speed_FpXQX_gcd(speed_param *s)
503
{ TIME_FUN(FpXQX_gcd(s->x, s->y, s->T, s->p)); }
504
505
/* small coeffs: earlier thresholds for more complicated rings */
506
static double speed_RgX_sqr(speed_param *s)
507
{ TIME_FUN(RgX_sqr_i(s->x)); }
508
static double speed_RgX_mul(speed_param *s)
509
{ TIME_FUN(RgX_mul_i(s->x, s->y)); }
510
511
enum { PARI = 1, GMP = 2 };
512
#ifdef PARI_KERNEL_GMP
513
# define AVOID PARI
514
#else
515
# define AVOID GMP
516
#endif
517
518
/* Thresholds are set in this order. If f() depends on g(), g() should
519
* occur first */
520
#define var(a) # a, &a
521
static tune_param param[] = {
522
{PARI,var(MULII_KARATSUBA_LIMIT), t_INT, 4,0, speed_mulii,0,0,&MULII_FFT_LIMIT},
523
{PARI,var(SQRI_KARATSUBA_LIMIT), t_INT, 4,0, speed_sqri,0,0,&SQRI_FFT_LIMIT},
524
{PARI,var(MULII_FFT_LIMIT), t_INT, 500,0, speed_mulii,0.02},
525
{PARI,var(SQRI_FFT_LIMIT), t_INT, 500,0, speed_sqri,0.02},
526
{0, var(HALFGCD_LIMIT), t_INT, 3,0, speed_halfgcdii,0.02},
527
{PARI,var(EXTGCD_HALFGCD_LIMIT), t_INT, 5,0, speed_extgcdii,0.02},
528
{PARI,var(GCD_HALFGCD_LIMIT), t_INT, 5,0, speed_gcdii,0.02},
529
{0, var(MULRR_MULII_LIMIT), t_REAL,4,0, speed_mulrr},
530
{0, var(SQRR_SQRI_LIMIT), t_REAL,4,0, speed_sqrr},
531
{0, var(Fp_POW_REDC_LIMIT), t_INT, 3,100, speed_Fp_pow,0,0,&Fp_POW_BARRETT_LIMIT},
532
{0, var(Fp_POW_BARRETT_LIMIT), t_INT, 3,0, speed_Fp_pow},
533
{0, var(INVNEWTON_LIMIT), t_REAL,66,0, speed_inv,0.03},
534
{GMP, var(DIVRR_GMP_LIMIT), t_REAL,4,0, speed_divrr},
535
{0, var(EXPNEWTON_LIMIT), t_REAL,66,0, speed_exp},
536
{0, var(LOGAGM_LIMIT), t_REAL,4,0, speed_log},
537
{0, var(LOGAGMCX_LIMIT), t_REAL,3,0, speed_logcx,0.05},
538
{0, var(AGM_ATAN_LIMIT), t_REAL,20,0, speed_atan,0.05},
539
{GMP, var(INVMOD_GMP_LIMIT), t_INT, 3,0, speed_invmod},
540
{0, var(F2x_MUL_KARATSUBA_LIMIT),t_F2x,3,0, speed_F2x_mul,0,0,&F2x_MUL_MULII_LIMIT},
541
{0, var(F2x_MUL_MULII_LIMIT), t_F2x,20,20000, speed_F2x_mul},
542
{0, var(Flx_MUL_KARATSUBA_LIMIT),t_Flx,5,0, speed_Flx_mul,0,0,&Flx_MUL_MULII_LIMIT},
543
{0, var(Flx_SQR_KARATSUBA_LIMIT),t_Flx,5,0, speed_Flx_sqr,0,0,&Flx_SQR_SQRI_LIMIT},
544
{0, var(Flx_MUL2_KARATSUBA_LIMIT),t_Fl2x,5,0, speed_Flx_mul,0,0,&Flx_MUL2_MULII_LIMIT},
545
{0, var(Flx_SQR2_KARATSUBA_LIMIT),t_Fl2x,5,0, speed_Flx_sqr,0,0,&Flx_SQR2_SQRI_LIMIT},
546
{0, var(Flx_MUL_MULII_LIMIT), t_Flx,5,0, speed_Flx_mul},
547
{0, var(Flx_SQR_SQRI_LIMIT), t_Flx,5,0, speed_Flx_sqr},
548
{0, var(Flx_MUL2_MULII_LIMIT), t_Fl2x,5,20000, speed_Flx_mul,0.05},
549
{0, var(Flx_SQR2_SQRI_LIMIT), t_Fl2x,5,20000, speed_Flx_sqr,0.05},
550
{0, var(Flx_INVBARRETT_LIMIT), t_NFlx, 5,0, speed_Flx_inv},
551
{0, var(Flx_INVBARRETT2_LIMIT),t_NFl2x,5,0, speed_Flx_inv},
552
{0, var(Flx_DIVREM_BARRETT_LIMIT) ,t_NFlx,10,0, speed_Flx_divrem,0.05},
553
{0, var(Flx_DIVREM2_BARRETT_LIMIT),t_NFl2x,10,0, speed_Flx_divrem,0.05},
554
{0, var(Flx_REM_BARRETT_LIMIT), t_NFlx,10,0, speed_Flx_rem,0.05},
555
{0, var(Flx_REM2_BARRETT_LIMIT), t_NFl2x,10,0, speed_Flx_rem,0.05},
556
{0, var(Flx_BARRETT_LIMIT), t_NFlx, 5,0, speed_Flxq_red},
557
{0, var(Flx_BARRETT2_LIMIT),t_NFl2x,5,0, speed_Flxq_red},
558
{0, var(Flx_HALFGCD_LIMIT), t_Flx, 10,0, speed_Flx_halfgcd},
559
{0, var(Flx_HALFGCD2_LIMIT),t_Fl2x,10,0, speed_Flx_halfgcd},
560
{0, var(Flx_GCD_LIMIT), t_Flx,10,0, speed_Flx_gcd,0.1},
561
{0, var(Flx_GCD2_LIMIT), t_Fl2x,10,0, speed_Flx_gcd,0.1},
562
{0, var(Flx_EXTGCD_LIMIT), t_Flx,10,0, speed_Flx_extgcd},
563
{0, var(Flx_EXTGCD2_LIMIT), t_Fl2x,10,0, speed_Flx_extgcd},
564
{0, var(F2xqX_INVBARRETT_LIMIT),t_NF2xqX,10,0, speed_F2xqX_inv,0.05},
565
{0, var(F2xqX_BARRETT_LIMIT), t_NF2xqX,10,0, speed_F2xqXQ_red,0.05},
566
{0, var(F2xqX_DIVREM_BARRETT_LIMIT), t_NF2xqX,10,0, speed_F2xqX_divrem,0.05},
567
{0, var(F2xqX_REM_BARRETT_LIMIT), t_NF2xqX,10,0, speed_F2xqX_rem,0.05},
568
{0, var(F2xqX_HALFGCD_LIMIT), t_F2xqX,10,0, speed_F2xqX_halfgcd,0.05},
569
{0, var(F2xqX_GCD_LIMIT), t_F2xqX,10,0, speed_F2xqX_gcd,0.1},
570
{0, var(F2xqX_EXTGCD_LIMIT), t_F2xqX,10,0, speed_F2xqX_extgcd,0.05},
571
{0, var(FlxqX_INVBARRETT_LIMIT),t_NFlxqX,10,0, speed_FlxqX_inv,0.05},
572
{0, var(FlxqX_BARRETT_LIMIT), t_NFlxqX,10,0, speed_FlxqXQ_red,0.05},
573
{0, var(FlxqX_DIVREM_BARRETT_LIMIT), t_NFlxqX,10,0, speed_FlxqX_divrem,0.05},
574
{0, var(FlxqX_REM_BARRETT_LIMIT), t_NFlxqX,10,0, speed_FlxqX_rem,0.05},
575
{0, var(FlxqX_HALFGCD_LIMIT), t_FlxqX,10,0, speed_FlxqX_halfgcd,0.05},
576
{0, var(FlxqX_GCD_LIMIT), t_FlxqX,10,0, speed_FlxqX_gcd,0.05},
577
{0, var(FlxqX_EXTGCD_LIMIT), t_FlxqX,10,0, speed_FlxqX_extgcd,0.05},
578
{0, var(FpX_INVBARRETT_LIMIT), t_NFpX,10,0, speed_FpX_inv,0.05},
579
{0, var(FpX_DIVREM_BARRETT_LIMIT),t_NFpX,10,0, speed_FpX_divrem,0.05},
580
{0, var(FpX_REM_BARRETT_LIMIT), t_NFpX,10,0, speed_FpX_rem,0.05},
581
{0, var(FpX_BARRETT_LIMIT), t_NFpX,10,0, speed_FpXQ_red},
582
{0, var(FpX_HALFGCD_LIMIT), t_FpX,10,0, speed_FpX_halfgcd},
583
{0, var(FpX_GCD_LIMIT), t_FpX,10,0, speed_FpX_gcd,0.1},
584
{0, var(FpX_EXTGCD_LIMIT), t_FpX,10,0, speed_FpX_extgcd},
585
{0, var(FpXQX_INVBARRETT_LIMIT),t_NFpXQX,10,0, speed_FpXQX_inv,0.05},
586
{0, var(FpXQX_BARRETT_LIMIT), t_NFpXQX,10,0, speed_FpXQXQ_red,0.05},
587
{0, var(FpXQX_DIVREM_BARRETT_LIMIT), t_NFpXQX,10,0, speed_FpXQX_divrem,0.05},
588
{0, var(FpXQX_REM_BARRETT_LIMIT), t_NFpXQX,10,0, speed_FpXQX_rem,0.05},
589
{0, var(FpXQX_HALFGCD_LIMIT), t_FpXQX,10,0, speed_FpXQX_halfgcd,0.05},
590
{0, var(FpXQX_GCD_LIMIT), t_FpXQX,10,0, speed_FpXQX_gcd,0.05},
591
{0, var(FpXQX_EXTGCD_LIMIT), t_FpXQX,10,0, speed_FpXQX_extgcd,0.05},
592
{0, var(RgX_MUL_LIMIT), t_FpX, 4,0, speed_RgX_mul},
593
{0, var(RgX_SQR_LIMIT), t_FpX, 4,0, speed_RgX_sqr},
594
};
595
596
/* ========================================================== */
597
int ndat = 0, allocdat = 0;
598
struct dat_t {
599
long size;
600
double d;
601
} *dat = NULL;
602
603
int
604
double_cmp_ptr(double *x, double *y) { return (int)(*x - *y); }
605
606
double
607
time_fun(speed_function_t fun, speed_param *s, long enabled)
608
{
609
const double TOLERANCE = 1.005; /* 0.5% */
610
pari_sp av = avma;
611
double t[30];
612
ulong i, j, e;
613
614
s->reps = 1;
615
if (enabled) enable(s); else disable(s);
616
for (i = 0; i < numberof(t); i++)
617
{
618
for (;;)
619
{
620
double reps_d;
621
t[i] = fun(s);
622
if (!t[i]) { s->reps *= 10; continue; }
623
if (t[i] >= speed_unittime * speed_precision) break;
624
625
/* go to a value of reps to make t[i] >= precision */
626
reps_d = ceil (1.1 * s->reps
627
* speed_unittime * speed_precision
628
/ maxdd(t[i], speed_unittime));
629
if (reps_d > 2e9 || reps_d < 1.0)
630
pari_err(e_MISC, "Fatal error: new reps bad: %.2f", reps_d);
631
632
s->reps = (ulong)reps_d;
633
}
634
t[i] /= s->reps;
635
636
/* require 3 values within TOLERANCE when >= 2 secs, 4 when below */
637
e = (t[0] >= 2.0)? 3: 4;
638
639
/* Look for e many t[]'s within TOLERANCE of each other to consider a
640
valid measurement. Return smallest among them. */
641
if (i >= e)
642
{
643
qsort (t, i+1, sizeof(t[0]), (QSCOMP)double_cmp_ptr);
644
for (j = e-1; j < i; j++)
645
if (t[j] <= t[j-e+1] * TOLERANCE) return gc_double(av, t[j-e+1]);
646
}
647
}
648
pari_err(e_MISC,"couldn't measure time");
649
return -1.0; /* LCOV_EXCL_LINE */
650
}
651
652
int
653
cmpdat(const void *a, const void *b)
654
{
655
struct dat_t *da =(struct dat_t *)a;
656
struct dat_t *db =(struct dat_t *)b;
657
return da->size-db->size;
658
}
659
660
void
661
add_dat(long size, double d)
662
{
663
if (ndat == allocdat)
664
{
665
allocdat += maxss(allocdat, 100);
666
pari_realloc_ip((void**)&dat, allocdat * sizeof(dat[0]));
667
}
668
dat[ndat].size = size;
669
dat[ndat].d = d; ndat++;
670
qsort(dat, ndat, sizeof(*dat), cmpdat);
671
}
672
673
void
674
diag(const char *format, ...)
675
{
676
va_list ap;
677
va_start(ap, format);
678
vfprintf(stderr, format, ap);
679
va_end(ap);
680
}
681
void
682
print_define(const char *name, long value)
683
{ printf("#define __%-30s %ld\n", name, value); }
684
685
long
686
analyze_dat(int final)
687
{
688
double x, min_x;
689
int j, min_j;
690
691
/* If the threshold is set at dat[0].size, any positive values are bad. */
692
x = 0.0;
693
for (j = 0; j < ndat; j++)
694
if (dat[j].d > 0.0) x += dat[j].d;
695
696
if (final && option_trace >= 3)
697
{
698
diag("\n");
699
diag("x is the sum of the badness from setting thresh at given size\n");
700
diag(" (minimum x is sought)\n");
701
diag("size=%ld first x=%.4f\n", dat[j].size, x);
702
}
703
704
min_x = x;
705
min_j = 0;
706
707
/* When stepping to the next dat[j].size, positive values are no longer
708
bad (so subtracted), negative values become bad (so add the absolute
709
value, meaning subtract). */
710
for (j = 0; j < ndat; j++)
711
{
712
if (final && option_trace >= 3)
713
diag ("size=%ld x=%.4f\n", dat[j].size, x);
714
715
if (x < min_x) { min_x = x; min_j = j; }
716
x -= dat[j].d;
717
}
718
return min_j;
719
}
720
721
void
722
Test(tune_param *param, long linear)
723
{
724
int since_positive, since_change, thresh, new_thresh;
725
speed_param s;
726
long save_var_disable = -1;
727
pari_timer T;
728
pari_sp av=avma;
729
long good = -1, bad = param->min_size;
730
731
if (param->kernel == AVOID) { print_define(param->name, -1); return; }
732
733
#define DEFAULT(x,n) if (! (param->x)) param->x = (n);
734
DEFAULT(step_factor, Step_Factor);
735
DEFAULT(stop_factor, 1.2);
736
DEFAULT(max_size, 10000);
737
if (param->var_disable) save_var_disable = *(param->var_disable);
738
if (param->var_enable) s.var_enable_min = *(param->var_enable);
739
740
s.type = param->type;
741
s.size = param->min_size;
742
s.var = param->var;
743
s.var_disable = param->var_disable;
744
s.var_enable = param->var_enable;
745
dftmod(&s);
746
ndat = since_positive = since_change = thresh = 0;
747
if (option_trace >= 1)
748
{
749
timer_start(&T);
750
diag("\nSetting %s... (default %ld)\n", param->name, *(param->var));
751
}
752
if (option_trace >= 2)
753
{
754
diag(" algorithm-A algorithm-B ratio possible\n");
755
diag(" (seconds) (seconds) diff thresh\n");
756
}
757
758
for(;;)
759
{
760
pari_sp av=avma;
761
double t1, t2, d;
762
s.x = rand_g(&s);
763
s.y = rand_g(&s);
764
t1 = time_fun(param->fun, &s, 0);
765
t2 = time_fun(param->fun, &s, 1);
766
set_avma(av);
767
if (t2 >= t1) d = (t2 - t1) / t2;
768
else d = (t2 - t1) / t1;
769
770
add_dat(s.size, d);
771
new_thresh = analyze_dat(0);
772
773
if (option_trace >= 2)
774
diag ("size =%4ld %.8f %.8f % .4f %c %ld\n",
775
s.size, t1,t2, d, d < 0? '#': ' ', dat[new_thresh].size);
776
777
#define SINCE_POSITIVE 20
778
#define SINCE_CHANGE 50
779
if (linear)
780
{
781
/* Stop if method B has been consistently faster for a while */
782
if (d >= 0)
783
since_positive = 0;
784
else
785
if (++since_positive > SINCE_POSITIVE)
786
{
787
if (option_trace >= 1)
788
diag("Stop: since_positive (%d)\n", SINCE_POSITIVE);
789
break;
790
}
791
/* Stop if method A has become slower by a certain factor */
792
if (t1 >= t2 * param->stop_factor)
793
{
794
if (option_trace >= 1)
795
diag("Stop: t1 >= t2 * factor (%.1f)\n", param->stop_factor);
796
break;
797
}
798
/* Stop if threshold implied hasn't changed for a while */
799
if (thresh != new_thresh)
800
since_change = 0, thresh = new_thresh;
801
else
802
if (++since_change > SINCE_CHANGE)
803
{
804
if (option_trace >= 1)
805
diag("Stop: since_change (%d)\n", SINCE_CHANGE);
806
break;
807
}
808
s.size += maxss((long)floor(s.size * param->step_factor), 1);
809
}
810
else
811
{
812
if (t2 <= t1)
813
new_thresh = good = s.size;
814
else
815
bad = s.size;
816
if (bad == -1)
817
linear = 1;
818
else if (good == -1)
819
{
820
long new_size = minss(2*s.size,param->max_size-1);
821
if (new_size==s.size) linear = 1;
822
s.size = new_size;
823
}
824
else if (good-bad <= maxss(1,(long)(20*param->step_factor*bad)))
825
{
826
linear = 1;
827
new_thresh = s.size = bad + 1;
828
}
829
else s.size = (good+bad)/2;
830
}
831
if (s.size >= param->max_size)
832
{
833
if (option_trace >= 1)
834
diag("Stop: max_size (%ld). Disable Algorithm B?\n",param->max_size);
835
break;
836
}
837
}
838
thresh = dat[analyze_dat(1)].size;
839
if (option_trace >= 1)
840
diag("Total time: %gs\n", (double)timer_delay(&T)/1000.);
841
print_define(param->name, thresh);
842
*(param->var) = thresh; /* set to optimal value for next tests */
843
if (param->var_disable) *(param->var_disable) = save_var_disable;
844
if (param->var_enable) *(param->var_enable) = s.var_enable_min;
845
set_avma(av);
846
}
847
848
void error(char **argv) {
849
long i;
850
diag("This is the PARI/GP tuning utility. Usage: tune [OPTION] var1 var2...\n");
851
diag("Options:\n");
852
diag(" -t: verbose output\n");
853
diag(" -tt: very verbose output\n");
854
diag(" -ttt: output everything\n");
855
diag(" -l: use linear search (slower)\n");
856
diag(" -d xxx: set finite field degree to xxx (default 10)\n");
857
diag(" -p xxx: set Flx modulus to xxx (default 27449)\n");
858
diag(" -s xxx: set step factor between successive sizes to xxx (default 0.01)\n");
859
diag(" -u xxx: set speed_unittime to xxx (default 1e-4s)\n");
860
diag("Tunable variables (omitting variable indices tunes everybody):\n");
861
for (i = 0; i < (long)numberof(param); i++)
862
diag(" %2ld: %-25s (default %4ld)\n", i, param[i].name, *(param[i].var));
863
exit(1);
864
}
865
866
int
867
main(int argc, char **argv)
868
{
869
int i, r, n = 0;
870
int linear = 0;
871
GEN v;
872
pari_init(160000000, 2);
873
LARGE_mod=subis(powuu(3,128),62);
874
DFLT_mod = unextprime((1UL<<((BITS_IN_LONG-2)>>1))+1);
875
DFLT_mod2 = unextprime((1UL<<(BITS_IN_LONG-1))+1);
876
DFLT_deg = 10;
877
v = new_chunk(argc);
878
for (i = 1; i < argc; i++)
879
{
880
char *s = argv[i];
881
if (*s == '-') {
882
switch(*++s) {
883
case 't': option_trace++;
884
while (*++s == 't') option_trace++;
885
break;
886
case 'l': linear = 1-linear;
887
break;
888
case 'd':
889
if (!*++s)
890
{
891
if (++i == argc) error(argv);
892
s = argv[i];
893
}
894
DFLT_deg = itou(gp_read_str(s)); break;
895
case 'p':
896
if (!*++s)
897
{
898
if (++i == argc) error(argv);
899
s = argv[i];
900
}
901
DFLT_mod = itou(gp_read_str(s)); break;
902
case 's':
903
if (!*++s)
904
{
905
if (++i == argc) error(argv);
906
s = argv[i];
907
}
908
Step_Factor = atof(s); break;
909
case 'u': s++;
910
if (!*++s)
911
{
912
if (++i == argc) error(argv);
913
s = argv[i];
914
}
915
speed_unittime = atof(s); break;
916
default: error(argv);
917
}
918
} else {
919
if (!isdigit((int)*s)) error(argv);
920
r = atol(s); if (r >= (long)numberof(param) || r < 0) error(argv);
921
v[n++] = r;
922
}
923
}
924
if (n) { for (i = 0; i < n; i++) Test(&param[ v[i] ], linear); return 0; }
925
n = numberof(param);
926
for (i = 0; i < n; i++) Test(&param[i], linear);
927
return 0;
928
}
929
930