CoCalc -- e

GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/native/libfdlibm/e_sqrt.c
⁴¹¹⁵² views
1
/*
2
 * Copyright (c) 1998, 2001, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
/* __ieee754_sqrt(x)
27
 * Return correctly rounded sqrt.
28
 *           ------------------------------------------
29
 *           |  Use the hardware sqrt if you have one |
30
 *           ------------------------------------------
31
 * Method:
32
 *   Bit by bit method using integer arithmetic. (Slow, but portable)
33
 *   1. Normalization
34
 *      Scale x to y in [1,4) with even powers of 2:
35
 *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
36
 *              sqrt(x) = 2^k * sqrt(y)
37
 *   2. Bit by bit computation
38
 *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
39
 *           i                                                   0
40
 *                                     i+1         2
41
 *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1)
42
 *           i      i            i                 i
43
 *
44
 *      To compute q    from q , one checks whether
45
 *                  i+1       i
46
 *
47
 *                            -(i+1) 2
48
 *                      (q + 2      ) <= y.                     (2)
49
 *                        i
50
 *                                                            -(i+1)
51
 *      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
52
 *                             i+1   i             i+1   i
53
 *
54
 *      With some algebric manipulation, it is not difficult to see
55
 *      that (2) is equivalent to
56
 *                             -(i+1)
57
 *                      s  +  2       <= y                      (3)
58
 *                       i                i
59
 *
60
 *      The advantage of (3) is that s  and y  can be computed by
61
 *                                    i      i
62
 *      the following recurrence formula:
63
 *          if (3) is false
64
 *
65
 *          s     =  s  ,       y    = y   ;                    (4)
66
 *           i+1      i          i+1    i
67
 *
68
 *          otherwise,
69
 *                         -i                     -(i+1)
70
 *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5)
71
 *           i+1      i          i+1    i     i
72
 *
73
 *      One may easily use induction to prove (4) and (5).
74
 *      Note. Since the left hand side of (3) contain only i+2 bits,
75
 *            it does not necessary to do a full (53-bit) comparison
76
 *            in (3).
77
 *   3. Final rounding
78
 *      After generating the 53 bits result, we compute one more bit.
79
 *      Together with the remainder, we can decide whether the
80
 *      result is exact, bigger than 1/2ulp, or less than 1/2ulp
81
 *      (it will never equal to 1/2ulp).
82
 *      The rounding mode can be detected by checking whether
83
 *      huge + tiny is equal to huge, and whether huge - tiny is
84
 *      equal to huge for some floating point number "huge" and "tiny".
85
 *
86
 * Special cases:
87
 *      sqrt(+-0) = +-0         ... exact
88
 *      sqrt(inf) = inf
89
 *      sqrt(-ve) = NaN         ... with invalid signal
90
 *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
91
 *
92
 * Other methods : see the appended file at the end of the program below.
93
 *---------------
94
 */
95

96
#include "fdlibm.h"
97

98
#ifdef __STDC__
99
static  const double    one     = 1.0, tiny=1.0e-300;
100
#else
101
static  double  one     = 1.0, tiny=1.0e-300;
102
#endif
103

104
#ifdef __STDC__
105
        double __ieee754_sqrt(double x)
106
#else
107
        double __ieee754_sqrt(x)
108
        double x;
109
#endif
110
{
111
        double z;
112
        int     sign = (int)0x80000000;
113
        unsigned r,t1,s1,ix1,q1;
114
        int ix0,s0,q,m,t,i;
115

116
        ix0 = __HI(x);                  /* high word of x */
117
        ix1 = __LO(x);          /* low word of x */
118

119
    /* take care of Inf and NaN */
120
        if((ix0&0x7ff00000)==0x7ff00000) {
121
            return x*x+x;               /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
122
                                           sqrt(-inf)=sNaN */
123
        }
124
    /* take care of zero */
125
        if(ix0<=0) {
126
            if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
127
            else if(ix0<0)
128
                return (x-x)/(x-x);             /* sqrt(-ve) = sNaN */
129
        }
130
    /* normalize x */
131
        m = (ix0>>20);
132
        if(m==0) {                              /* subnormal x */
133
            while(ix0==0) {
134
                m -= 21;
135
                ix0 |= (ix1>>11); ix1 <<= 21;
136
            }
137
            for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
138
            m -= i-1;
139
            ix0 |= (ix1>>(32-i));
140
            ix1 <<= i;
141
        }
142
        m -= 1023;      /* unbias exponent */
143
        ix0 = (ix0&0x000fffff)|0x00100000;
144
        if(m&1){        /* odd m, double x to make it even */
145
            ix0 += ix0 + ((ix1&sign)>>31);
146
            ix1 += ix1;
147
        }
148
        m >>= 1;        /* m = [m/2] */
149

150
    /* generate sqrt(x) bit by bit */
151
        ix0 += ix0 + ((ix1&sign)>>31);
152
        ix1 += ix1;
153
        q = q1 = s0 = s1 = 0;   /* [q,q1] = sqrt(x) */
154
        r = 0x00200000;         /* r = moving bit from right to left */
155

156
        while(r!=0) {
157
            t = s0+r;
158
            if(t<=ix0) {
159
                s0   = t+r;
160
                ix0 -= t;
161
                q   += r;
162
            }
163
            ix0 += ix0 + ((ix1&sign)>>31);
164
            ix1 += ix1;
165
            r>>=1;
166
        }
167

168
        r = sign;
169
        while(r!=0) {
170
            t1 = s1+r;
171
            t  = s0;
172
            if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
173
                s1  = t1+r;
174
                if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
175
                ix0 -= t;
176
                if (ix1 < t1) ix0 -= 1;
177
                ix1 -= t1;
178
                q1  += r;
179
            }
180
            ix0 += ix0 + ((ix1&sign)>>31);
181
            ix1 += ix1;
182
            r>>=1;
183
        }
184

185
    /* use floating add to find out rounding direction */
186
        if((ix0|ix1)!=0) {
187
            z = one-tiny; /* trigger inexact flag */
188
            if (z>=one) {
189
                z = one+tiny;
190
                if (q1==(unsigned)0xffffffff) { q1=0; q += 1;}
191
                else if (z>one) {
192
                    if (q1==(unsigned)0xfffffffe) q+=1;
193
                    q1+=2;
194
                } else
195
                    q1 += (q1&1);
196
            }
197
        }
198
        ix0 = (q>>1)+0x3fe00000;
199
        ix1 =  q1>>1;
200
        if ((q&1)==1) ix1 |= sign;
201
        ix0 += (m <<20);
202
        __HI(z) = ix0;
203
        __LO(z) = ix1;
204
        return z;
205
}
206

207
/*
208
Other methods  (use floating-point arithmetic)
209
-------------
210
(This is a copy of a drafted paper by Prof W. Kahan
211
and K.C. Ng, written in May, 1986)
212

213
        Two algorithms are given here to implement sqrt(x)
214
        (IEEE double precision arithmetic) in software.
215
        Both supply sqrt(x) correctly rounded. The first algorithm (in
216
        Section A) uses newton iterations and involves four divisions.
217
        The second one uses reciproot iterations to avoid division, but
218
        requires more multiplications. Both algorithms need the ability
219
        to chop results of arithmetic operations instead of round them,
220
        and the INEXACT flag to indicate when an arithmetic operation
221
        is executed exactly with no roundoff error, all part of the
222
        standard (IEEE 754-1985). The ability to perform shift, add,
223
        subtract and logical AND operations upon 32-bit words is needed
224
        too, though not part of the standard.
225

226
A.  sqrt(x) by Newton Iteration
227

228
   (1)  Initial approximation
229

230
        Let x0 and x1 be the leading and the trailing 32-bit words of
231
        a floating point number x (in IEEE double format) respectively
232

233
            1    11                  52                           ...widths
234
           ------------------------------------------------------
235
        x: |s|    e     |             f                         |
236
           ------------------------------------------------------
237
              msb    lsb  msb                                 lsb ...order
238

239

240
             ------------------------        ------------------------
241
        x0:  |s|   e    |    f1     |    x1: |          f2           |
242
             ------------------------        ------------------------
243

244
        By performing shifts and subtracts on x0 and x1 (both regarded
245
        as integers), we obtain an 8-bit approximation of sqrt(x) as
246
        follows.
247

248
                k  := (x0>>1) + 0x1ff80000;
249
                y0 := k - T1[31&(k>>15)].       ... y ~ sqrt(x) to 8 bits
250
        Here k is a 32-bit integer and T1[] is an integer array containing
251
        correction terms. Now magically the floating value of y (y's
252
        leading 32-bit word is y0, the value of its trailing word is 0)
253
        approximates sqrt(x) to almost 8-bit.
254

255
        Value of T1:
256
        static int T1[32]= {
257
        0,      1024,   3062,   5746,   9193,   13348,  18162,  23592,
258
        29598,  36145,  43202,  50740,  58733,  67158,  75992,  85215,
259
        83599,  71378,  60428,  50647,  41945,  34246,  27478,  21581,
260
        16499,  12183,  8588,   5674,   3403,   1742,   661,    130,};
261

262
    (2) Iterative refinement
263

264
        Apply Heron's rule three times to y, we have y approximates
265
        sqrt(x) to within 1 ulp (Unit in the Last Place):
266

267
                y := (y+x/y)/2          ... almost 17 sig. bits
268
                y := (y+x/y)/2          ... almost 35 sig. bits
269
                y := y-(y-x/y)/2        ... within 1 ulp
270

271

272
        Remark 1.
273
            Another way to improve y to within 1 ulp is:
274

275
                y := (y+x/y)            ... almost 17 sig. bits to 2*sqrt(x)
276
                y := y - 0x00100006     ... almost 18 sig. bits to sqrt(x)
277

278
                                2
279
                            (x-y )*y
280
                y := y + 2* ----------  ...within 1 ulp
281
                               2
282
                             3y  + x
283

284

285
        This formula has one division fewer than the one above; however,
286
        it requires more multiplications and additions. Also x must be
287
        scaled in advance to avoid spurious overflow in evaluating the
288
        expression 3y*y+x. Hence it is not recommended uless division
289
        is slow. If division is very slow, then one should use the
290
        reciproot algorithm given in section B.
291

292
    (3) Final adjustment
293

294
        By twiddling y's last bit it is possible to force y to be
295
        correctly rounded according to the prevailing rounding mode
296
        as follows. Let r and i be copies of the rounding mode and
297
        inexact flag before entering the square root program. Also we
298
        use the expression y+-ulp for the next representable floating
299
        numbers (up and down) of y. Note that y+-ulp = either fixed
300
        point y+-1, or multiply y by nextafter(1,+-inf) in chopped
301
        mode.
302

303
                I := FALSE;     ... reset INEXACT flag I
304
                R := RZ;        ... set rounding mode to round-toward-zero
305
                z := x/y;       ... chopped quotient, possibly inexact
306
                If(not I) then {        ... if the quotient is exact
307
                    if(z=y) {
308
                        I := i;  ... restore inexact flag
309
                        R := r;  ... restore rounded mode
310
                        return sqrt(x):=y.
311
                    } else {
312
                        z := z - ulp;   ... special rounding
313
                    }
314
                }
315
                i := TRUE;              ... sqrt(x) is inexact
316
                If (r=RN) then z=z+ulp  ... rounded-to-nearest
317
                If (r=RP) then {        ... round-toward-+inf
318
                    y = y+ulp; z=z+ulp;
319
                }
320
                y := y+z;               ... chopped sum
321
                y0:=y0-0x00100000;      ... y := y/2 is correctly rounded.
322
                I := i;                 ... restore inexact flag
323
                R := r;                 ... restore rounded mode
324
                return sqrt(x):=y.
325

326
    (4) Special cases
327

328
        Square root of +inf, +-0, or NaN is itself;
329
        Square root of a negative number is NaN with invalid signal.
330

331

332
B.  sqrt(x) by Reciproot Iteration
333

334
   (1)  Initial approximation
335

336
        Let x0 and x1 be the leading and the trailing 32-bit words of
337
        a floating point number x (in IEEE double format) respectively
338
        (see section A). By performing shifs and subtracts on x0 and y0,
339
        we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
340

341
            k := 0x5fe80000 - (x0>>1);
342
            y0:= k - T2[63&(k>>14)].    ... y ~ 1/sqrt(x) to 7.8 bits
343

344
        Here k is a 32-bit integer and T2[] is an integer array
345
        containing correction terms. Now magically the floating
346
        value of y (y's leading 32-bit word is y0, the value of
347
        its trailing word y1 is set to zero) approximates 1/sqrt(x)
348
        to almost 7.8-bit.
349

350
        Value of T2:
351
        static int T2[64]= {
352
        0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
353
        0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
354
        0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
355
        0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
356
        0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
357
        0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
358
        0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
359
        0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
360

361
    (2) Iterative refinement
362

363
        Apply Reciproot iteration three times to y and multiply the
364
        result by x to get an approximation z that matches sqrt(x)
365
        to about 1 ulp. To be exact, we will have
366
                -1ulp < sqrt(x)-z<1.0625ulp.
367

368
        ... set rounding mode to Round-to-nearest
369
           y := y*(1.5-0.5*x*y*y)       ... almost 15 sig. bits to 1/sqrt(x)
370
           y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
371
        ... special arrangement for better accuracy
372
           z := x*y                     ... 29 bits to sqrt(x), with z*y<1
373
           z := z + 0.5*z*(1-z*y)       ... about 1 ulp to sqrt(x)
374

375
        Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
376
        (a) the term z*y in the final iteration is always less than 1;
377
        (b) the error in the final result is biased upward so that
378
                -1 ulp < sqrt(x) - z < 1.0625 ulp
379
            instead of |sqrt(x)-z|<1.03125ulp.
380

381
    (3) Final adjustment
382

383
        By twiddling y's last bit it is possible to force y to be
384
        correctly rounded according to the prevailing rounding mode
385
        as follows. Let r and i be copies of the rounding mode and
386
        inexact flag before entering the square root program. Also we
387
        use the expression y+-ulp for the next representable floating
388
        numbers (up and down) of y. Note that y+-ulp = either fixed
389
        point y+-1, or multiply y by nextafter(1,+-inf) in chopped
390
        mode.
391

392
        R := RZ;                ... set rounding mode to round-toward-zero
393
        switch(r) {
394
            case RN:            ... round-to-nearest
395
               if(x<= z*(z-ulp)...chopped) z = z - ulp; else
396
               if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
397
               break;
398
            case RZ:case RM:    ... round-to-zero or round-to--inf
399
               R:=RP;           ... reset rounding mod to round-to-+inf
400
               if(x<z*z ... rounded up) z = z - ulp; else
401
               if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
402
               break;
403
            case RP:            ... round-to-+inf
404
               if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
405
               if(x>z*z ...chopped) z = z+ulp;
406
               break;
407
        }
408

409
        Remark 3. The above comparisons can be done in fixed point. For
410
        example, to compare x and w=z*z chopped, it suffices to compare
411
        x1 and w1 (the trailing parts of x and w), regarding them as
412
        two's complement integers.
413

414
        ...Is z an exact square root?
415
        To determine whether z is an exact square root of x, let z1 be the
416
        trailing part of z, and also let x0 and x1 be the leading and
417
        trailing parts of x.
418

419
        If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
420
            I := 1;             ... Raise Inexact flag: z is not exact
421
        else {
422
            j := 1 - [(x0>>20)&1]       ... j = logb(x) mod 2
423
            k := z1 >> 26;              ... get z's 25-th and 26-th
424
                                            fraction bits
425
            I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
426
        }
427
        R:= r           ... restore rounded mode
428
        return sqrt(x):=z.
429

430
        If multiplication is cheaper then the foregoing red tape, the
431
        Inexact flag can be evaluated by
432

433
            I := i;
434
            I := (z*z!=x) or I.
435

436
        Note that z*z can overwrite I; this value must be sensed if it is
437
        True.
438

439
        Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
440
        zero.
441

442
                    --------------------
443
                z1: |        f2        |
444
                    --------------------
445
                bit 31             bit 0
446

447
        Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
448
        or even of logb(x) have the following relations:
449

450
        -------------------------------------------------
451
        bit 27,26 of z1         bit 1,0 of x1   logb(x)
452
        -------------------------------------------------
453
        00                      00              odd and even
454
        01                      01              even
455
        10                      10              odd
456
        10                      00              even
457
        11                      01              even
458
        -------------------------------------------------
459

460
    (4) Special cases (see (4) of Section A).
461

462
 */
463

464
Product

Resources

Company