Path: blob/master/src/java.base/share/native/libfdlibm/k_rem_pio2.c
41149 views
/*1* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425/*26* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)27* double x[],y[]; int e0,nx,prec; int ipio2[];28*29* __kernel_rem_pio2 return the last three digits of N with30* y = x - N*pi/231* so that |y| < pi/2.32*33* The method is to compute the integer (mod 8) and fraction parts of34* (2/pi)*x without doing the full multiplication. In general we35* skip the part of the product that are known to be a huge integer (36* more accurately, = 0 mod 8 ). Thus the number of operations are37* independent of the exponent of the input.38*39* (2/pi) is represented by an array of 24-bit integers in ipio2[].40*41* Input parameters:42* x[] The input value (must be positive) is broken into nx43* pieces of 24-bit integers in double precision format.44* x[i] will be the i-th 24 bit of x. The scaled exponent45* of x[0] is given in input parameter e0 (i.e., x[0]*2^e046* match x's up to 24 bits.47*48* Example of breaking a double positive z into x[0]+x[1]+x[2]:49* e0 = ilogb(z)-2350* z = scalbn(z,-e0)51* for i = 0,1,252* x[i] = floor(z)53* z = (z-x[i])*2**2454*55*56* y[] output result in an array of double precision numbers.57* The dimension of y[] is:58* 24-bit precision 159* 53-bit precision 260* 64-bit precision 261* 113-bit precision 362* The actual value is the sum of them. Thus for 113-bit63* precison, one may have to do something like:64*65* long double t,w,r_head, r_tail;66* t = (long double)y[2] + (long double)y[1];67* w = (long double)y[0];68* r_head = t+w;69* r_tail = w - (r_head - t);70*71* e0 The exponent of x[0]72*73* nx dimension of x[]74*75* prec an integer indicating the precision:76* 0 24 bits (single)77* 1 53 bits (double)78* 2 64 bits (extended)79* 3 113 bits (quad)80*81* ipio2[]82* integer array, contains the (24*i)-th to (24*i+23)-th83* bit of 2/pi after binary point. The corresponding84* floating value is85*86* ipio2[i] * 2^(-24(i+1)).87*88* External function:89* double scalbn(), floor();90*91*92* Here is the description of some local variables:93*94* jk jk+1 is the initial number of terms of ipio2[] needed95* in the computation. The recommended value is 2,3,4,96* 6 for single, double, extended,and quad.97*98* jz local integer variable indicating the number of99* terms of ipio2[] used.100*101* jx nx - 1102*103* jv index for pointing to the suitable ipio2[] for the104* computation. In general, we want105* ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8106* is an integer. Thus107* e0-3-24*jv >= 0 or (e0-3)/24 >= jv108* Hence jv = max(0,(e0-3)/24).109*110* jp jp+1 is the number of terms in PIo2[] needed, jp = jk.111*112* q[] double array with integral value, representing the113* 24-bits chunk of the product of x and 2/pi.114*115* q0 the corresponding exponent of q[0]. Note that the116* exponent for q[i] would be q0-24*i.117*118* PIo2[] double precision array, obtained by cutting pi/2119* into 24 bits chunks.120*121* f[] ipio2[] in floating point122*123* iq[] integer array by breaking up q[] in 24-bits chunk.124*125* fq[] final product of x*(2/pi) in fq[0],..,fq[jk]126*127* ih integer. If >0 it indicates q[] is >= 0.5, hence128* it also indicates the *sign* of the result.129*130*/131132133/*134* Constants:135* The hexadecimal values are the intended ones for the following136* constants. The decimal values may be used, provided that the137* compiler will convert from decimal to binary accurately enough138* to produce the hexadecimal values shown.139*/140141#include "fdlibm.h"142143#ifdef __STDC__144static const int init_jk[] = {2,3,4,6}; /* initial value for jk */145#else146static int init_jk[] = {2,3,4,6};147#endif148149#ifdef __STDC__150static const double PIo2[] = {151#else152static double PIo2[] = {153#endif1541.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */1557.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */1565.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */1573.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */1581.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */1591.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */1602.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */1612.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */162};163164#ifdef __STDC__165static const double166#else167static double168#endif169zero = 0.0,170one = 1.0,171two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */172twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */173174#ifdef __STDC__175int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2)176#else177int __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)178double x[], y[]; int e0,nx,prec; int ipio2[];179#endif180{181int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;182double z,fw,f[20],fq[20],q[20];183184/* initialize jk*/185jk = init_jk[prec];186jp = jk;187188/* determine jx,jv,q0, note that 3>q0 */189jx = nx-1;190jv = (e0-3)/24; if(jv<0) jv=0;191q0 = e0-24*(jv+1);192193/* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */194j = jv-jx; m = jx+jk;195for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];196197/* compute q[0],q[1],...q[jk] */198for (i=0;i<=jk;i++) {199for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;200}201202jz = jk;203recompute:204/* distill q[] into iq[] reversingly */205for(i=0,j=jz,z=q[jz];j>0;i++,j--) {206fw = (double)((int)(twon24* z));207iq[i] = (int)(z-two24*fw);208z = q[j-1]+fw;209}210211/* compute n */212z = scalbn(z,q0); /* actual value of z */213z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */214n = (int) z;215z -= (double)n;216ih = 0;217if(q0>0) { /* need iq[jz-1] to determine n */218i = (iq[jz-1]>>(24-q0)); n += i;219iq[jz-1] -= i<<(24-q0);220ih = iq[jz-1]>>(23-q0);221}222else if(q0==0) ih = iq[jz-1]>>23;223else if(z>=0.5) ih=2;224225if(ih>0) { /* q > 0.5 */226n += 1; carry = 0;227for(i=0;i<jz ;i++) { /* compute 1-q */228j = iq[i];229if(carry==0) {230if(j!=0) {231carry = 1; iq[i] = 0x1000000- j;232}233} else iq[i] = 0xffffff - j;234}235if(q0>0) { /* rare case: chance is 1 in 12 */236switch(q0) {237case 1:238iq[jz-1] &= 0x7fffff; break;239case 2:240iq[jz-1] &= 0x3fffff; break;241}242}243if(ih==2) {244z = one - z;245if(carry!=0) z -= scalbn(one,q0);246}247}248249/* check if recomputation is needed */250if(z==zero) {251j = 0;252for (i=jz-1;i>=jk;i--) j |= iq[i];253if(j==0) { /* need recomputation */254for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */255256for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */257f[jx+i] = (double) ipio2[jv+i];258for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];259q[i] = fw;260}261jz += k;262goto recompute;263}264}265266/* chop off zero terms */267if(z==0.0) {268jz -= 1; q0 -= 24;269while(iq[jz]==0) { jz--; q0-=24;}270} else { /* break z into 24-bit if necessary */271z = scalbn(z,-q0);272if(z>=two24) {273fw = (double)((int)(twon24*z));274iq[jz] = (int)(z-two24*fw);275jz += 1; q0 += 24;276iq[jz] = (int) fw;277} else iq[jz] = (int) z ;278}279280/* convert integer "bit" chunk to floating-point value */281fw = scalbn(one,q0);282for(i=jz;i>=0;i--) {283q[i] = fw*(double)iq[i]; fw*=twon24;284}285286/* compute PIo2[0,...,jp]*q[jz,...,0] */287for(i=jz;i>=0;i--) {288for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];289fq[jz-i] = fw;290}291292/* compress fq[] into y[] */293switch(prec) {294case 0:295fw = 0.0;296for (i=jz;i>=0;i--) fw += fq[i];297y[0] = (ih==0)? fw: -fw;298break;299case 1:300case 2:301fw = 0.0;302for (i=jz;i>=0;i--) fw += fq[i];303y[0] = (ih==0)? fw: -fw;304fw = fq[0]-fw;305for (i=1;i<=jz;i++) fw += fq[i];306y[1] = (ih==0)? fw: -fw;307break;308case 3: /* painful */309for (i=jz;i>0;i--) {310fw = fq[i-1]+fq[i];311fq[i] += fq[i-1]-fw;312fq[i-1] = fw;313}314for (i=jz;i>1;i--) {315fw = fq[i-1]+fq[i];316fq[i] += fq[i-1]-fw;317fq[i-1] = fw;318}319for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];320if(ih==0) {321y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;322} else {323y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;324}325}326return n&7;327}328329330