Testing latest pari + WASM + node.js... and it works?! Wow.
License: GPL3
ubuntu2004
/* Copyright (C) 2001 The PARI group.12This file is part of the PARI/GP package.34PARI/GP is free software; you can redistribute it and/or modify it under the5terms of the GNU General Public License as published by the Free Software6Foundation; either version 2 of the License, or (at your option) any later7version. It is distributed in the hope that it will be useful, but WITHOUT8ANY WARRANTY WHATSOEVER.910Check the License for details. You should have received a copy of it, along11with the package; see the file 'COPYING'. If not, write to the Free Software12Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */1314/* This file is a quick hack adapted from gmp-4.0 tuning utilities15* (T. Granlund et al.)16*17* (GMU MP Library is Copyright Free Software Foundation, Inc.) */18#define PARI_TUNE19#include "pari.h"20#include "paripriv.h"2122int option_trace = 0;23double Step_Factor = .01; /* small steps by default */24ulong DFLT_mod, DFLT_mod2, DFLT_deg;25GEN LARGE_mod;2627typedef struct {28ulong reps, type;29long *var, *var_disable, *var_enable, var_enable_min, size, enabled;30GEN x, y;31ulong l;32GEN T, p;33} speed_param;3435typedef double (*speed_function_t)(speed_param *s);3637typedef struct {38int kernel;39const char *name;40long *var;41int type; /* t_INT or t_REAL */42long min_size;43long max_size;44speed_function_t fun;45double step_factor; /* how much to step sizes (rounded down) */46double stop_factor;47long *var_disable;48long *var_enable;49} tune_param;5051/* ========================================================== */52/* To use GMP cycle counting functions, look for GMP in Oxxx/Makefile */53#ifdef GMP_TIMER54/* needed to link with gmp-4.0/tune/{time,freq}.o */55int speed_option_verbose = 0;56extern double speed_unittime;57extern int speed_precision;58void speed_starttime(void);59double speed_endtime(void);60#else61static pari_timer __T;62static double speed_unittime = 1e-4;63static int speed_precision= 1000;64static void speed_starttime() { timer_start(&__T); }65static double speed_endtime() { return (double)timer_delay(&__T)/1000.; }66#endif6768/* ========================================================== */69/* int, n words, odd */70static GEN71rand_INT(long n)72{73pari_sp av = avma;74GEN x, N = int2n(n*BITS_IN_LONG);75do x = randomi(N); while (lgefint(x) != n+2);76if (!mpodd(x)) x = addis(x,1); /*For Barrett REDC */77return gerepileuptoint(av, x);78}79/* real, n words */80static GEN81rand_REAL(long n) { return gmul2n(itor(rand_INT(n), n+2),-BITS_IN_LONG*n); }8283static GEN84rand_FpX(long n)85{86GEN x;87do x = random_FpX(n+1, 0, LARGE_mod); while (degpol(x) < n);88return x;89}90/* Flx, degree n */91static GEN92rand_F2x(long n)93{94GEN x;95do x = random_F2x(BITS_IN_LONG*(n+1), 0); while (degpol(x) < n);96return x;97}98/* Flx, degree n */99static GEN100rand_Flx(long n, ulong l)101{102GEN x;103do x = random_Flx(n+1, 0, l); while (degpol(x) < n);104return x;105}106107static GEN108rand_F2xqX(long n, GEN T)109{110GEN x;111do x = random_F2xqX(n+1, 0, T); while (degpol(x) < n);112return x;113}114115static GEN116rand_FlxqX(long n, GEN T, ulong l)117{118GEN x;119do x = random_FlxqX(n+1, 0, T, l); while (degpol(x) < n);120return x;121}122123static GEN124rand_FpXQX(long n, GEN T)125{126GEN x;127do x = random_FpXQX(n+1, 0, T, LARGE_mod); while (degpol(x) < n);128return x;129}130/* normalized Fpx, degree n */131static GEN132rand_NFpX(long n)133{134pari_sp av = avma;135GEN x = gadd(pol_xn(n,0), random_FpX(n, 0, LARGE_mod));136return gerepileupto(av, x);137}138139/* normalized Flx, degree n */140static GEN141rand_NFlx(long n, ulong l)142{143pari_sp av = avma;144GEN x = Flx_add(Flx_shift(pol1_Flx(0),n), random_Flx(n, 0, l), l);145return gerepileuptoleaf(av, x);146}147148static GEN149rand_NF2xqX(long n, GEN T)150{151pari_sp av = avma;152GEN x = F2xX_add(monomial(pol1_F2x(0),n,0), random_F2xqX(n, 0, T));153return gerepileupto(av, x);154}155156static GEN157rand_NFlxqX(long n, GEN T, long l)158{159pari_sp av = avma;160GEN x = FlxX_add(monomial(pol1_Flx(0),n,0), random_FlxqX(n, 0, T, l), l);161return gerepileupto(av, x);162}163164static GEN165rand_NFpXQX(long n, GEN T)166{167pari_sp av = avma;168GEN x = gadd(pol_xn(n,0), random_FpXQX(n, 0, T, LARGE_mod));169return gerepileupto(av, x);170}171172#define t_F2x 100173#define t_Flx 200174#define t_Fl2x 201175#define t_NFlx 210176#define t_NFl2x 211177#define t_FpX 300178#define t_NFpX 310179#define t_F2xqX 400180#define t_NF2xqX 410181#define t_FlxqX 500182#define t_NFlxqX 510183#define t_FpXQX 600184#define t_NFpXQX 610185186static GEN187rand_g(speed_param *s)188{189long n = s->size;190switch (s->type) {191case t_INT: return rand_INT(n);192case t_REAL: return rand_REAL(n);193case t_F2x: return rand_F2x(n);194case t_Flx: return rand_Flx(n,DFLT_mod);195case t_Fl2x: return rand_Flx(n,DFLT_mod2);196case t_NFlx: return rand_NFlx(n,DFLT_mod);197case t_NFl2x: return rand_NFlx(n,DFLT_mod2);198case t_FpX: return rand_FpX(n);199case t_NFpX: return rand_NFpX(n);200case t_F2xqX: return rand_F2xqX(n, s->T);201case t_NF2xqX: return rand_NF2xqX(n, s->T);202case t_FlxqX: return rand_FlxqX(n, s->T, s->l);203case t_NFlxqX: return rand_NFlxqX(n, s->T, s->l);204case t_FpXQX: return rand_FpXQX(n, s->T);205case t_NFpXQX: return rand_NFpXQX(n, s->T);206}207return NULL;208}209210static void211dft_F2xq(speed_param *s)212{213pari_sp av = avma;214do215{216avma = av;217s->T = random_F2x(BITS_IN_LONG*2, 0);218} while (!F2x_is_irred(s->T));219s->T[1] = evalvarn(1);220s->T = F2x_get_red(s->T);221}222223static void224dft_Flxq(speed_param *s)225{226pari_sp av = avma;227do228{229avma = av;230s->T = rand_NFlx(DFLT_deg, s->l);231} while (!Flx_is_irred(s->T, s->l));232s->T[1] = evalvarn(1);233s->T = Flx_get_red(s->T, s->l);234}235236static void237dft_FpXQ(speed_param *s)238{239pari_sp av = avma;240do241{242avma = av;243s->T = rand_NFpX(DFLT_deg);244} while (!FpX_is_irred(s->T, LARGE_mod));245setvarn(s->T, 1);246s->T = FpX_get_red(s->T, s->p);247}248249static void250dftmod(speed_param *s)251{252switch (s->type) {253case t_Flx: s->l=DFLT_mod; return;254case t_Fl2x: s->l=DFLT_mod2; return;255case t_NFlx: s->l=DFLT_mod; return;256case t_NFl2x: s->l=DFLT_mod2; return;257case t_FpX: s->p=LARGE_mod; return;258case t_NFpX: s->p=LARGE_mod; return;259case t_F2xqX: s->l=2; dft_F2xq(s); return;260case t_NF2xqX: s->l=2; dft_F2xq(s); return;261case t_FlxqX: s->l=DFLT_mod; dft_Flxq(s); return;262case t_NFlxqX: s->l=DFLT_mod; dft_Flxq(s); return;263case t_FpXQX: s->p=LARGE_mod; dft_FpXQ(s); return;264case t_NFpXQX: s->p=LARGE_mod; dft_FpXQ(s); return;265}266}267268/* ========================================================== */269#define TIME_FUN(call) {\270{ \271pari_sp av = avma; \272int i; \273speed_starttime(); \274i = (s)->reps; \275do { call; set_avma(av); } while (--i); \276} \277return speed_endtime(); \278}279280#define m_menable(s,var,min) (*(s->var)=minss(lg(s->x)-2,s->min))281#define m_enable(s,var) (*(s->var)=lg(s->x)-2)/* enable asymptotically fastest */282#define m_disable(s,var) (*(s->var)=lg(s->x)+1)/* disable asymptotically fastest */283284static void enable(speed_param *s)285{286m_enable(s,var); s->enabled = 1;287if (s->var_disable) m_disable(s,var_disable);288if (s->var_enable) m_menable(s,var_enable,var_enable_min);289}290291static void disable(speed_param *s)292{293m_disable(s,var); s->enabled = 0;294if (s->var_disable) m_disable(s,var_disable);295if (s->var_enable) m_menable(s,var_enable,var_enable_min);296}297298static double speed_mulrr(speed_param *s)299{ TIME_FUN(mulrr(s->x, s->y)); }300301static double speed_sqrr(speed_param *s)302{ TIME_FUN(sqrr(s->x)); }303304static double speed_mulii(speed_param *s)305{ TIME_FUN(mulii(s->x, s->y)); }306307static double speed_sqri (speed_param *s)308{ TIME_FUN(sqri(s->x)); }309310static double speed_extgcdii(speed_param *s)311{ GEN u,v; TIME_FUN(bezout(s->x, s->y, &u, &v)); }312313static double speed_gcdii(speed_param *s)314{ TIME_FUN(gcdii(s->x, s->y)); }315316static double speed_halfgcdii(speed_param *s)317{ TIME_FUN(halfgcdii(s->x, s->y)); }318319static double speed_exp(speed_param *s)320{ TIME_FUN(mpexp(s->x)); }321322static double speed_inv(speed_param *s)323{ TIME_FUN(invr(s->x)); }324325static double speed_log(speed_param *s)326{ TIME_FUN(mplog(s->x)); }327328static double speed_logcx(speed_param *s)329{ GEN z; setexpo(s->x,0); z = mkcomplex(gen_1, s->x);330glog(z,s->size);331TIME_FUN(glog(z,s->size)); }332333static double speed_atan(speed_param *s)334{ setexpo(s->x, 0);335gatan(s->x, 0);336TIME_FUN(gatan(s->x, 0)); }337338static double speed_Fp_pow(speed_param *s)339{ TIME_FUN( Fp_pow(s->x, subis(s->y,1), s->y)); }340341static double speed_divrr(speed_param *s)342{ TIME_FUN(divrr(s->x, s->y)); }343344static double speed_invmod(speed_param *s)345{ GEN T; TIME_FUN(invmod(s->x, s->y, &T)); }346347static double speed_F2x_mul(speed_param *s)348{ TIME_FUN(F2x_mul(s->x, s->y)); }349350static double speed_Flx_sqr(speed_param *s)351{ TIME_FUN(Flx_sqr(s->x, s->l)); }352353static double speed_Flx_inv(speed_param *s)354{ TIME_FUN(Flx_invBarrett(s->x, s->l)); }355356static double speed_Flx_mul(speed_param *s)357{ TIME_FUN(Flx_mul(s->x, s->y, s->l)); }358359static double speed_Flx_divrem(speed_param *s)360{361GEN r, x = rand_NFlx((degpol(s->x)-1)*2, s->l);362TIME_FUN(Flx_divrem(x, s->x, s->l, &r));363}364365static double speed_Flx_rem(speed_param *s) {366GEN x = rand_NFlx((degpol(s->x)-1)*2, s->l);367TIME_FUN(Flx_rem(x, s->x, s->l));368}369370static double speed_Flxq_red(speed_param *s) {371GEN x = rand_NFlx((degpol(s->x)-1)*2, s->l);372GEN q = Flx_get_red(s->x, s->l);373TIME_FUN(Flx_rem(x, q, s->l));374}375376static double speed_Flx_halfgcd(speed_param *s)377{ TIME_FUN(Flx_halfgcd(s->x, s->y, s->l)); }378379static double speed_Flx_gcd(speed_param *s)380{ TIME_FUN(Flx_gcd(s->x, s->y, s->l)); }381382static double speed_Flx_extgcd(speed_param *s)383{ GEN u,v; TIME_FUN(Flx_extgcd(s->x, s->y, s->l, &u, &v)); }384385static double speed_FpX_inv(speed_param *s)386{ TIME_FUN(FpX_invBarrett(s->x, s->p)); }387388static double speed_FpX_divrem(speed_param *s)389{390GEN r, x = rand_NFpX((degpol(s->x)-1)*2);391TIME_FUN(FpX_divrem(x, s->x, s->p, &r));392}393394static double speed_FpX_rem(speed_param *s)395{396GEN x = rand_NFpX((degpol(s->x)-1)*2);397TIME_FUN(FpX_rem(x, s->x, s->p));398}399400static double speed_FpXQ_red(speed_param *s) {401GEN x = rand_NFpX((degpol(s->x)-1)*2);402GEN q = FpX_get_red(s->x, s->p);403TIME_FUN(FpX_rem(x, q, s->p));404}405406static double speed_FpX_halfgcd(speed_param *s)407{ TIME_FUN(FpX_halfgcd(s->x, s->y, s->p)); }408static double speed_FpX_gcd(speed_param *s)409{ TIME_FUN(FpX_gcd(s->x, s->y, s->p)); }410static double speed_FpX_extgcd(speed_param *s)411{ GEN u,v; TIME_FUN(FpX_extgcd(s->x, s->y, s->p, &u, &v)); }412413static double speed_F2xqX_inv(speed_param *s)414{ TIME_FUN(F2xqX_invBarrett(s->x, s->T)); }415416static double speed_F2xqX_divrem(speed_param *s)417{418GEN r, x = rand_NF2xqX((degpol(s->x)-1)*2, s->T);419TIME_FUN(F2xqX_divrem(x, s->x, s->T, &r));420}421422static double speed_F2xqX_rem(speed_param *s)423{424GEN x = rand_NF2xqX((degpol(s->x)-1)*2, s->T);425TIME_FUN(F2xqX_rem(x, s->x, s->T));426}427428static double speed_F2xqXQ_red(speed_param *s) {429GEN x = rand_NF2xqX((degpol(s->x)-1)*2, s->T);430GEN q = F2xqX_get_red(s->x, s->T);431TIME_FUN(F2xqX_rem(x, q, s->T));432}433434static double speed_F2xqX_halfgcd(speed_param *s)435{ TIME_FUN(F2xqX_halfgcd(s->x, s->y, s->T)); }436437static double speed_F2xqX_extgcd(speed_param *s)438{ GEN u,v; TIME_FUN(F2xqX_extgcd(s->x, s->y, s->T, &u, &v)); }439440static double speed_F2xqX_gcd(speed_param *s)441{ TIME_FUN(F2xqX_gcd(s->x, s->y, s->T)); }442443static double speed_FlxqX_inv(speed_param *s)444{ TIME_FUN(FlxqX_invBarrett(s->x, s->T, s->l)); }445446447static double speed_FlxqX_divrem(speed_param *s)448{449GEN r, x = rand_NFlxqX((degpol(s->x)-1)*2, s->T, s->l);450TIME_FUN(FlxqX_divrem(x, s->x, s->T, s->l, &r));451}452453static double speed_FlxqX_rem(speed_param *s)454{455GEN x = rand_NFlxqX((degpol(s->x)-1)*2, s->T, s->l);456TIME_FUN(FlxqX_rem(x, s->x, s->T, s->l));457}458459static double speed_FlxqXQ_red(speed_param *s) {460GEN x = rand_NFlxqX((degpol(s->x)-1)*2, s->T, s->l);461GEN q = FlxqX_get_red(s->x, s->T, s->l);462TIME_FUN(FlxqX_rem(x, q, s->T, s->l));463}464465static double speed_FlxqX_halfgcd(speed_param *s)466{ TIME_FUN(FlxqX_halfgcd(s->x, s->y, s->T, s->l)); }467468static double speed_FlxqX_extgcd(speed_param *s)469{ GEN u,v; TIME_FUN(FlxqX_extgcd(s->x, s->y, s->T, s->l, &u, &v)); }470471static double speed_FlxqX_gcd(speed_param *s)472{ TIME_FUN(FlxqX_gcd(s->x, s->y, s->T, s->l)); }473474static double speed_FpXQX_inv(speed_param *s)475{ TIME_FUN(FpXQX_invBarrett(s->x, s->T, s->p)); }476477static double speed_FpXQX_divrem(speed_param *s)478{479GEN r, x = rand_NFpXQX((degpol(s->x)-1)*2, s->T);480TIME_FUN(FpXQX_divrem(x, s->x, s->T, s->p, &r));481}482483static double speed_FpXQX_rem(speed_param *s)484{485GEN x = rand_NFpXQX((degpol(s->x)-1)*2, s->T);486TIME_FUN(FpXQX_rem(x, s->x, s->T, s->p));487}488489static double speed_FpXQXQ_red(speed_param *s) {490GEN x = rand_NFpXQX((degpol(s->x)-1)*2, s->T);491GEN q = FpXQX_get_red(s->x, s->T, s->p);492TIME_FUN(FpXQX_rem(x, q, s->T, s->p));493}494495static double speed_FpXQX_halfgcd(speed_param *s)496{ TIME_FUN(FpXQX_halfgcd(s->x, s->y, s->T, s->p)); }497498static double speed_FpXQX_extgcd(speed_param *s)499{ GEN u,v; TIME_FUN(FpXQX_extgcd(s->x, s->y, s->T, s->p, &u, &v)); }500501static double speed_FpXQX_gcd(speed_param *s)502{ TIME_FUN(FpXQX_gcd(s->x, s->y, s->T, s->p)); }503504/* small coeffs: earlier thresholds for more complicated rings */505static double speed_RgX_sqr(speed_param *s)506{ TIME_FUN(RgX_sqr_i(s->x)); }507static double speed_RgX_mul(speed_param *s)508{ TIME_FUN(RgX_mul_i(s->x, s->y)); }509510enum { PARI = 1, GMP = 2 };511#ifdef PARI_KERNEL_GMP512# define AVOID PARI513#else514# define AVOID GMP515#endif516517/* Thresholds are set in this order. If f() depends on g(), g() should518* occur first */519#define var(a) # a, &a520static tune_param param[] = {521{PARI,var(MULII_KARATSUBA_LIMIT), t_INT, 4,0, speed_mulii,0,0,&MULII_FFT_LIMIT},522{PARI,var(SQRI_KARATSUBA_LIMIT), t_INT, 4,0, speed_sqri,0,0,&SQRI_FFT_LIMIT},523{PARI,var(MULII_FFT_LIMIT), t_INT, 500,0, speed_mulii,0.02},524{PARI,var(SQRI_FFT_LIMIT), t_INT, 500,0, speed_sqri,0.02},525{0, var(HALFGCD_LIMIT), t_INT, 3,0, speed_halfgcdii,0.02},526{PARI,var(EXTGCD_HALFGCD_LIMIT), t_INT, 5,0, speed_extgcdii,0.02},527{PARI,var(GCD_HALFGCD_LIMIT), t_INT, 5,0, speed_gcdii,0.02},528{0, var(MULRR_MULII_LIMIT), t_REAL,4,0, speed_mulrr},529{0, var(SQRR_SQRI_LIMIT), t_REAL,4,0, speed_sqrr},530{0, var(Fp_POW_REDC_LIMIT), t_INT, 3,100, speed_Fp_pow,0,0,&Fp_POW_BARRETT_LIMIT},531{0, var(Fp_POW_BARRETT_LIMIT), t_INT, 3,0, speed_Fp_pow},532{0, var(INVNEWTON_LIMIT), t_REAL,66,0, speed_inv,0.03},533{GMP, var(DIVRR_GMP_LIMIT), t_REAL,4,0, speed_divrr},534{0, var(EXPNEWTON_LIMIT), t_REAL,66,0, speed_exp},535{0, var(LOGAGM_LIMIT), t_REAL,4,0, speed_log},536{0, var(LOGAGMCX_LIMIT), t_REAL,3,0, speed_logcx,0.05},537{0, var(AGM_ATAN_LIMIT), t_REAL,20,0, speed_atan,0.05},538{GMP, var(INVMOD_GMP_LIMIT), t_INT, 3,0, speed_invmod},539{0, var(F2x_MUL_KARATSUBA_LIMIT),t_F2x,3,0, speed_F2x_mul,0,0,&F2x_MUL_MULII_LIMIT},540{0, var(F2x_MUL_MULII_LIMIT), t_F2x,20,20000, speed_F2x_mul},541{0, var(Flx_MUL_KARATSUBA_LIMIT),t_Flx,5,0, speed_Flx_mul,0,0,&Flx_MUL_MULII_LIMIT},542{0, var(Flx_SQR_KARATSUBA_LIMIT),t_Flx,5,0, speed_Flx_sqr,0,0,&Flx_SQR_SQRI_LIMIT},543{0, var(Flx_MUL2_KARATSUBA_LIMIT),t_Fl2x,5,0, speed_Flx_mul,0,0,&Flx_MUL2_MULII_LIMIT},544{0, var(Flx_SQR2_KARATSUBA_LIMIT),t_Fl2x,5,0, speed_Flx_sqr,0,0,&Flx_SQR2_SQRI_LIMIT},545{0, var(Flx_MUL_MULII_LIMIT), t_Flx,5,0, speed_Flx_mul},546{0, var(Flx_SQR_SQRI_LIMIT), t_Flx,5,0, speed_Flx_sqr},547{0, var(Flx_MUL2_MULII_LIMIT), t_Fl2x,5,20000, speed_Flx_mul,0.05},548{0, var(Flx_SQR2_SQRI_LIMIT), t_Fl2x,5,20000, speed_Flx_sqr,0.05},549{0, var(Flx_INVBARRETT_LIMIT), t_NFlx, 5,0, speed_Flx_inv},550{0, var(Flx_INVBARRETT2_LIMIT),t_NFl2x,5,0, speed_Flx_inv},551{0, var(Flx_DIVREM_BARRETT_LIMIT) ,t_NFlx,10,0, speed_Flx_divrem,0.05},552{0, var(Flx_DIVREM2_BARRETT_LIMIT),t_NFl2x,10,0, speed_Flx_divrem,0.05},553{0, var(Flx_REM_BARRETT_LIMIT), t_NFlx,10,0, speed_Flx_rem,0.05},554{0, var(Flx_REM2_BARRETT_LIMIT), t_NFl2x,10,0, speed_Flx_rem,0.05},555{0, var(Flx_BARRETT_LIMIT), t_NFlx, 5,0, speed_Flxq_red},556{0, var(Flx_BARRETT2_LIMIT),t_NFl2x,5,0, speed_Flxq_red},557{0, var(Flx_HALFGCD_LIMIT), t_Flx, 10,0, speed_Flx_halfgcd},558{0, var(Flx_HALFGCD2_LIMIT),t_Fl2x,10,0, speed_Flx_halfgcd},559{0, var(Flx_GCD_LIMIT), t_Flx,10,0, speed_Flx_gcd,0.1},560{0, var(Flx_GCD2_LIMIT), t_Fl2x,10,0, speed_Flx_gcd,0.1},561{0, var(Flx_EXTGCD_LIMIT), t_Flx,10,0, speed_Flx_extgcd},562{0, var(Flx_EXTGCD2_LIMIT), t_Fl2x,10,0, speed_Flx_extgcd},563{0, var(F2xqX_INVBARRETT_LIMIT),t_NF2xqX,10,0, speed_F2xqX_inv,0.05},564{0, var(F2xqX_BARRETT_LIMIT), t_NF2xqX,10,0, speed_F2xqXQ_red,0.05},565{0, var(F2xqX_DIVREM_BARRETT_LIMIT), t_NF2xqX,10,0, speed_F2xqX_divrem,0.05},566{0, var(F2xqX_REM_BARRETT_LIMIT), t_NF2xqX,10,0, speed_F2xqX_rem,0.05},567{0, var(F2xqX_HALFGCD_LIMIT), t_F2xqX,10,0, speed_F2xqX_halfgcd,0.05},568{0, var(F2xqX_GCD_LIMIT), t_F2xqX,10,0, speed_F2xqX_gcd,0.1},569{0, var(F2xqX_EXTGCD_LIMIT), t_F2xqX,10,0, speed_F2xqX_extgcd,0.05},570{0, var(FlxqX_INVBARRETT_LIMIT),t_NFlxqX,10,0, speed_FlxqX_inv,0.05},571{0, var(FlxqX_BARRETT_LIMIT), t_NFlxqX,10,0, speed_FlxqXQ_red,0.05},572{0, var(FlxqX_DIVREM_BARRETT_LIMIT), t_NFlxqX,10,0, speed_FlxqX_divrem,0.05},573{0, var(FlxqX_REM_BARRETT_LIMIT), t_NFlxqX,10,0, speed_FlxqX_rem,0.05},574{0, var(FlxqX_HALFGCD_LIMIT), t_FlxqX,10,0, speed_FlxqX_halfgcd,0.05},575{0, var(FlxqX_GCD_LIMIT), t_FlxqX,10,0, speed_FlxqX_gcd,0.05},576{0, var(FlxqX_EXTGCD_LIMIT), t_FlxqX,10,0, speed_FlxqX_extgcd,0.05},577{0, var(FpX_INVBARRETT_LIMIT), t_NFpX,10,0, speed_FpX_inv,0.05},578{0, var(FpX_DIVREM_BARRETT_LIMIT),t_NFpX,10,0, speed_FpX_divrem,0.05},579{0, var(FpX_REM_BARRETT_LIMIT), t_NFpX,10,0, speed_FpX_rem,0.05},580{0, var(FpX_BARRETT_LIMIT), t_NFpX,10,0, speed_FpXQ_red},581{0, var(FpX_HALFGCD_LIMIT), t_FpX,10,0, speed_FpX_halfgcd},582{0, var(FpX_GCD_LIMIT), t_FpX,10,0, speed_FpX_gcd,0.1},583{0, var(FpX_EXTGCD_LIMIT), t_FpX,10,0, speed_FpX_extgcd},584{0, var(FpXQX_INVBARRETT_LIMIT),t_NFpXQX,10,0, speed_FpXQX_inv,0.05},585{0, var(FpXQX_BARRETT_LIMIT), t_NFpXQX,10,0, speed_FpXQXQ_red,0.05},586{0, var(FpXQX_DIVREM_BARRETT_LIMIT), t_NFpXQX,10,0, speed_FpXQX_divrem,0.05},587{0, var(FpXQX_REM_BARRETT_LIMIT), t_NFpXQX,10,0, speed_FpXQX_rem,0.05},588{0, var(FpXQX_HALFGCD_LIMIT), t_FpXQX,10,0, speed_FpXQX_halfgcd,0.05},589{0, var(FpXQX_GCD_LIMIT), t_FpXQX,10,0, speed_FpXQX_gcd,0.05},590{0, var(FpXQX_EXTGCD_LIMIT), t_FpXQX,10,0, speed_FpXQX_extgcd,0.05},591{0, var(RgX_MUL_LIMIT), t_FpX, 4,0, speed_RgX_mul},592{0, var(RgX_SQR_LIMIT), t_FpX, 4,0, speed_RgX_sqr},593};594595/* ========================================================== */596int ndat = 0, allocdat = 0;597struct dat_t {598long size;599double d;600} *dat = NULL;601602int603double_cmp_ptr(double *x, double *y) { return (int)(*x - *y); }604605double606time_fun(speed_function_t fun, speed_param *s, long enabled)607{608const double TOLERANCE = 1.005; /* 0.5% */609pari_sp av = avma;610double t[30];611ulong i, j, e;612613s->reps = 1;614if (enabled) enable(s); else disable(s);615for (i = 0; i < numberof(t); i++)616{617for (;;)618{619double reps_d;620t[i] = fun(s);621if (!t[i]) { s->reps *= 10; continue; }622if (t[i] >= speed_unittime * speed_precision) break;623624/* go to a value of reps to make t[i] >= precision */625reps_d = ceil (1.1 * s->reps626* speed_unittime * speed_precision627/ maxdd(t[i], speed_unittime));628if (reps_d > 2e9 || reps_d < 1.0)629pari_err(e_MISC, "Fatal error: new reps bad: %.2f", reps_d);630631s->reps = (ulong)reps_d;632}633t[i] /= s->reps;634635/* require 3 values within TOLERANCE when >= 2 secs, 4 when below */636e = (t[0] >= 2.0)? 3: 4;637638/* Look for e many t[]'s within TOLERANCE of each other to consider a639valid measurement. Return smallest among them. */640if (i >= e)641{642qsort (t, i+1, sizeof(t[0]), (QSCOMP)double_cmp_ptr);643for (j = e-1; j < i; j++)644if (t[j] <= t[j-e+1] * TOLERANCE) return gc_double(av, t[j-e+1]);645}646}647pari_err(e_MISC,"couldn't measure time");648return -1.0; /* LCOV_EXCL_LINE */649}650651int652cmpdat(const void *a, const void *b)653{654struct dat_t *da =(struct dat_t *)a;655struct dat_t *db =(struct dat_t *)b;656return da->size-db->size;657}658659void660add_dat(long size, double d)661{662if (ndat == allocdat)663{664allocdat += maxss(allocdat, 100);665pari_realloc_ip((void**)&dat, allocdat * sizeof(dat[0]));666}667dat[ndat].size = size;668dat[ndat].d = d; ndat++;669qsort(dat, ndat, sizeof(*dat), cmpdat);670}671672void673diag(const char *format, ...)674{675va_list ap;676va_start(ap, format);677vfprintf(stderr, format, ap);678va_end(ap);679}680void681print_define(const char *name, long value)682{ printf("#define __%-30s %ld\n", name, value); }683684long685analyze_dat(int final)686{687double x, min_x;688int j, min_j;689690/* If the threshold is set at dat[0].size, any positive values are bad. */691x = 0.0;692for (j = 0; j < ndat; j++)693if (dat[j].d > 0.0) x += dat[j].d;694695if (final && option_trace >= 3)696{697diag("\n");698diag("x is the sum of the badness from setting thresh at given size\n");699diag(" (minimum x is sought)\n");700diag("size=%ld first x=%.4f\n", dat[j].size, x);701}702703min_x = x;704min_j = 0;705706/* When stepping to the next dat[j].size, positive values are no longer707bad (so subtracted), negative values become bad (so add the absolute708value, meaning subtract). */709for (j = 0; j < ndat; j++)710{711if (final && option_trace >= 3)712diag ("size=%ld x=%.4f\n", dat[j].size, x);713714if (x < min_x) { min_x = x; min_j = j; }715x -= dat[j].d;716}717return min_j;718}719720void721Test(tune_param *param, long linear)722{723int since_positive, since_change, thresh, new_thresh;724speed_param s;725long save_var_disable = -1;726pari_timer T;727pari_sp av=avma;728long good = -1, bad = param->min_size;729730if (param->kernel == AVOID) { print_define(param->name, -1); return; }731732#define DEFAULT(x,n) if (! (param->x)) param->x = (n);733DEFAULT(step_factor, Step_Factor);734DEFAULT(stop_factor, 1.2);735DEFAULT(max_size, 10000);736if (param->var_disable) save_var_disable = *(param->var_disable);737if (param->var_enable) s.var_enable_min = *(param->var_enable);738739s.type = param->type;740s.size = param->min_size;741s.var = param->var;742s.var_disable = param->var_disable;743s.var_enable = param->var_enable;744dftmod(&s);745ndat = since_positive = since_change = thresh = 0;746if (option_trace >= 1)747{748timer_start(&T);749diag("\nSetting %s... (default %ld)\n", param->name, *(param->var));750}751if (option_trace >= 2)752{753diag(" algorithm-A algorithm-B ratio possible\n");754diag(" (seconds) (seconds) diff thresh\n");755}756757for(;;)758{759pari_sp av=avma;760double t1, t2, d;761s.x = rand_g(&s);762s.y = rand_g(&s);763t1 = time_fun(param->fun, &s, 0);764t2 = time_fun(param->fun, &s, 1);765set_avma(av);766if (t2 >= t1) d = (t2 - t1) / t2;767else d = (t2 - t1) / t1;768769add_dat(s.size, d);770new_thresh = analyze_dat(0);771772if (option_trace >= 2)773diag ("size =%4ld %.8f %.8f % .4f %c %ld\n",774s.size, t1,t2, d, d < 0? '#': ' ', dat[new_thresh].size);775776#define SINCE_POSITIVE 20777#define SINCE_CHANGE 50778if (linear)779{780/* Stop if method B has been consistently faster for a while */781if (d >= 0)782since_positive = 0;783else784if (++since_positive > SINCE_POSITIVE)785{786if (option_trace >= 1)787diag("Stop: since_positive (%d)\n", SINCE_POSITIVE);788break;789}790/* Stop if method A has become slower by a certain factor */791if (t1 >= t2 * param->stop_factor)792{793if (option_trace >= 1)794diag("Stop: t1 >= t2 * factor (%.1f)\n", param->stop_factor);795break;796}797/* Stop if threshold implied hasn't changed for a while */798if (thresh != new_thresh)799since_change = 0, thresh = new_thresh;800else801if (++since_change > SINCE_CHANGE)802{803if (option_trace >= 1)804diag("Stop: since_change (%d)\n", SINCE_CHANGE);805break;806}807s.size += maxss((long)floor(s.size * param->step_factor), 1);808}809else810{811if (t2 <= t1)812new_thresh = good = s.size;813else814bad = s.size;815if (bad == -1)816linear = 1;817else if (good == -1)818{819long new_size = minss(2*s.size,param->max_size-1);820if (new_size==s.size) linear = 1;821s.size = new_size;822}823else if (good-bad <= maxss(1,(long)(20*param->step_factor*bad)))824{825linear = 1;826new_thresh = s.size = bad + 1;827}828else s.size = (good+bad)/2;829}830if (s.size >= param->max_size)831{832if (option_trace >= 1)833diag("Stop: max_size (%ld). Disable Algorithm B?\n",param->max_size);834break;835}836}837thresh = dat[analyze_dat(1)].size;838if (option_trace >= 1)839diag("Total time: %gs\n", (double)timer_delay(&T)/1000.);840print_define(param->name, thresh);841*(param->var) = thresh; /* set to optimal value for next tests */842if (param->var_disable) *(param->var_disable) = save_var_disable;843if (param->var_enable) *(param->var_enable) = s.var_enable_min;844set_avma(av);845}846847void error(char **argv) {848long i;849diag("This is the PARI/GP tuning utility. Usage: tune [OPTION] var1 var2...\n");850diag("Options:\n");851diag(" -t: verbose output\n");852diag(" -tt: very verbose output\n");853diag(" -ttt: output everything\n");854diag(" -l: use linear search (slower)\n");855diag(" -d xxx: set finite field degree to xxx (default 10)\n");856diag(" -p xxx: set Flx modulus to xxx (default 27449)\n");857diag(" -s xxx: set step factor between successive sizes to xxx (default 0.01)\n");858diag(" -u xxx: set speed_unittime to xxx (default 1e-4s)\n");859diag("Tunable variables (omitting variable indices tunes everybody):\n");860for (i = 0; i < (long)numberof(param); i++)861diag(" %2ld: %-25s (default %4ld)\n", i, param[i].name, *(param[i].var));862exit(1);863}864865int866main(int argc, char **argv)867{868int i, r, n = 0;869int linear = 0;870GEN v;871pari_init(160000000, 2);872LARGE_mod=subis(powuu(3,128),62);873DFLT_mod = unextprime((1UL<<((BITS_IN_LONG-2)>>1))+1);874DFLT_mod2 = unextprime((1UL<<(BITS_IN_LONG-1))+1);875DFLT_deg = 10;876v = new_chunk(argc);877for (i = 1; i < argc; i++)878{879char *s = argv[i];880if (*s == '-') {881switch(*++s) {882case 't': option_trace++;883while (*++s == 't') option_trace++;884break;885case 'l': linear = 1-linear;886break;887case 'd':888if (!*++s)889{890if (++i == argc) error(argv);891s = argv[i];892}893DFLT_deg = itou(gp_read_str(s)); break;894case 'p':895if (!*++s)896{897if (++i == argc) error(argv);898s = argv[i];899}900DFLT_mod = itou(gp_read_str(s)); break;901case 's':902if (!*++s)903{904if (++i == argc) error(argv);905s = argv[i];906}907Step_Factor = atof(s); break;908case 'u': s++;909if (!*++s)910{911if (++i == argc) error(argv);912s = argv[i];913}914speed_unittime = atof(s); break;915default: error(argv);916}917} else {918if (!isdigit((int)*s)) error(argv);919r = atol(s); if (r >= (long)numberof(param) || r < 0) error(argv);920v[n++] = r;921}922}923if (n) { for (i = 0; i < n; i++) Test(¶m[ v[i] ], linear); return 0; }924n = numberof(param);925for (i = 0; i < n; i++) Test(¶m[i], linear);926return 0;927}928929930