Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/crypto/mpi/longlong.h
29278 views
1
/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2
* Note: I added some stuff for use with gnupg
3
*
4
* Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
5
* 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
6
*
7
* This file is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU Library General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or (at your
10
* option) any later version.
11
*
12
* This file is distributed in the hope that it will be useful, but
13
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15
* License for more details.
16
*
17
* You should have received a copy of the GNU Library General Public License
18
* along with this file; see the file COPYING.LIB. If not, write to
19
* the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20
* MA 02111-1307, USA. */
21
22
#include <linux/count_zeros.h>
23
24
/* You have to define the following before including this file:
25
*
26
* UWtype -- An unsigned type, default type for operations (typically a "word")
27
* UHWtype -- An unsigned type, at least half the size of UWtype.
28
* UDWtype -- An unsigned type, at least twice as large a UWtype
29
* W_TYPE_SIZE -- size in bits of UWtype
30
*
31
* SItype, USItype -- Signed and unsigned 32 bit types.
32
* DItype, UDItype -- Signed and unsigned 64 bit types.
33
*
34
* On a 32 bit machine UWtype should typically be USItype;
35
* on a 64 bit machine, UWtype should typically be UDItype.
36
*/
37
38
#define __BITS4 (W_TYPE_SIZE / 4)
39
#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
40
#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
41
#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
42
43
/* This is used to make sure no undesirable sharing between different libraries
44
that use this file takes place. */
45
#ifndef __MPN
46
#define __MPN(x) __##x
47
#endif
48
49
/* Define auxiliary asm macros.
50
*
51
* 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
52
* UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
53
* word product in HIGH_PROD and LOW_PROD.
54
*
55
* 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
56
* UDWtype product. This is just a variant of umul_ppmm.
57
58
* 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
59
* denominator) divides a UDWtype, composed by the UWtype integers
60
* HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
61
* in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
62
* than DENOMINATOR for correct operation. If, in addition, the most
63
* significant bit of DENOMINATOR must be 1, then the pre-processor symbol
64
* UDIV_NEEDS_NORMALIZATION is defined to 1.
65
* 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
66
* denominator). Like udiv_qrnnd but the numbers are signed. The quotient
67
* is rounded towards 0.
68
*
69
* 5) count_leading_zeros(count, x) counts the number of zero-bits from the
70
* msb to the first non-zero bit in the UWtype X. This is the number of
71
* steps X needs to be shifted left to set the msb. Undefined for X == 0,
72
* unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
73
*
74
* 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
75
* from the least significant end.
76
*
77
* 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
78
* high_addend_2, low_addend_2) adds two UWtype integers, composed by
79
* HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
80
* respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
81
* (i.e. carry out) is not stored anywhere, and is lost.
82
*
83
* 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
84
* high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
85
* composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
86
* LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
87
* and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
88
* and is lost.
89
*
90
* If any of these macros are left undefined for a particular CPU,
91
* C macros are used. */
92
93
/* The CPUs come in alphabetical order below.
94
*
95
* Please add support for more CPUs here, or improve the current support
96
* for the CPUs below! */
97
98
#if defined(__GNUC__) && !defined(NO_ASM)
99
100
/* We sometimes need to clobber "cc" with gcc2, but that would not be
101
understood by gcc1. Use cpp to avoid major code duplication. */
102
#if __GNUC__ < 2
103
#define __CLOBBER_CC
104
#define __AND_CLOBBER_CC
105
#else /* __GNUC__ >= 2 */
106
#define __CLOBBER_CC : "cc"
107
#define __AND_CLOBBER_CC , "cc"
108
#endif /* __GNUC__ < 2 */
109
110
/***************************************
111
************** A29K *****************
112
***************************************/
113
#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
114
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
115
__asm__ ("add %1,%4,%5\n" \
116
"addc %0,%2,%3" \
117
: "=r" ((USItype)(sh)), \
118
"=&r" ((USItype)(sl)) \
119
: "%r" ((USItype)(ah)), \
120
"rI" ((USItype)(bh)), \
121
"%r" ((USItype)(al)), \
122
"rI" ((USItype)(bl)))
123
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
124
__asm__ ("sub %1,%4,%5\n" \
125
"subc %0,%2,%3" \
126
: "=r" ((USItype)(sh)), \
127
"=&r" ((USItype)(sl)) \
128
: "r" ((USItype)(ah)), \
129
"rI" ((USItype)(bh)), \
130
"r" ((USItype)(al)), \
131
"rI" ((USItype)(bl)))
132
#define umul_ppmm(xh, xl, m0, m1) \
133
do { \
134
USItype __m0 = (m0), __m1 = (m1); \
135
__asm__ ("multiplu %0,%1,%2" \
136
: "=r" ((USItype)(xl)) \
137
: "r" (__m0), \
138
"r" (__m1)); \
139
__asm__ ("multmu %0,%1,%2" \
140
: "=r" ((USItype)(xh)) \
141
: "r" (__m0), \
142
"r" (__m1)); \
143
} while (0)
144
#define udiv_qrnnd(q, r, n1, n0, d) \
145
__asm__ ("dividu %0,%3,%4" \
146
: "=r" ((USItype)(q)), \
147
"=q" ((USItype)(r)) \
148
: "1" ((USItype)(n1)), \
149
"r" ((USItype)(n0)), \
150
"r" ((USItype)(d)))
151
#endif /* __a29k__ */
152
153
#if defined(__alpha) && W_TYPE_SIZE == 64
154
#define umul_ppmm(ph, pl, m0, m1) \
155
do { \
156
UDItype __m0 = (m0), __m1 = (m1); \
157
(ph) = __builtin_alpha_umulh(__m0, __m1); \
158
(pl) = __m0 * __m1; \
159
} while (0)
160
#define UMUL_TIME 46
161
#ifndef LONGLONG_STANDALONE
162
#define udiv_qrnnd(q, r, n1, n0, d) \
163
do { UDItype __r; \
164
(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
165
(r) = __r; \
166
} while (0)
167
extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
168
#define UDIV_TIME 220
169
#endif /* LONGLONG_STANDALONE */
170
#endif /* __alpha */
171
172
/***************************************
173
************** ARM ******************
174
***************************************/
175
#if defined(__arm__) && W_TYPE_SIZE == 32
176
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
177
__asm__ ("adds %1, %4, %5\n" \
178
"adc %0, %2, %3" \
179
: "=r" (sh), \
180
"=&r" (sl) \
181
: "%r" ((USItype)(ah)), \
182
"rI" ((USItype)(bh)), \
183
"%r" ((USItype)(al)), \
184
"rI" ((USItype)(bl)))
185
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
186
__asm__ ("subs %1, %4, %5\n" \
187
"sbc %0, %2, %3" \
188
: "=r" (sh), \
189
"=&r" (sl) \
190
: "r" ((USItype)(ah)), \
191
"rI" ((USItype)(bh)), \
192
"r" ((USItype)(al)), \
193
"rI" ((USItype)(bl)))
194
#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
195
#define umul_ppmm(xh, xl, a, b) \
196
__asm__ ("@ Inlined umul_ppmm\n" \
197
"mov %|r0, %2, lsr #16 @ AAAA\n" \
198
"mov %|r2, %3, lsr #16 @ BBBB\n" \
199
"bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \
200
"bic %0, %3, %|r2, lsl #16 @ bbbb\n" \
201
"mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \
202
"mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \
203
"mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \
204
"mul %0, %|r0, %0 @ AAAA * bbbb\n" \
205
"adds %|r0, %1, %0 @ central sum\n" \
206
"addcs %|r2, %|r2, #65536\n" \
207
"adds %1, %|r1, %|r0, lsl #16\n" \
208
"adc %0, %|r2, %|r0, lsr #16" \
209
: "=&r" (xh), \
210
"=r" (xl) \
211
: "r" ((USItype)(a)), \
212
"r" ((USItype)(b)) \
213
: "r0", "r1", "r2")
214
#else
215
#define umul_ppmm(xh, xl, a, b) \
216
__asm__ ("@ Inlined umul_ppmm\n" \
217
"umull %1, %0, %2, %3" \
218
: "=&r" (xh), \
219
"=&r" (xl) \
220
: "r" ((USItype)(a)), \
221
"r" ((USItype)(b)) \
222
: "r0", "r1")
223
#endif
224
#define UMUL_TIME 20
225
#define UDIV_TIME 100
226
#endif /* __arm__ */
227
228
/***************************************
229
************** CLIPPER **************
230
***************************************/
231
#if defined(__clipper__) && W_TYPE_SIZE == 32
232
#define umul_ppmm(w1, w0, u, v) \
233
({union {UDItype __ll; \
234
struct {USItype __l, __h; } __i; \
235
} __xx; \
236
__asm__ ("mulwux %2,%0" \
237
: "=r" (__xx.__ll) \
238
: "%0" ((USItype)(u)), \
239
"r" ((USItype)(v))); \
240
(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
241
#define smul_ppmm(w1, w0, u, v) \
242
({union {DItype __ll; \
243
struct {SItype __l, __h; } __i; \
244
} __xx; \
245
__asm__ ("mulwx %2,%0" \
246
: "=r" (__xx.__ll) \
247
: "%0" ((SItype)(u)), \
248
"r" ((SItype)(v))); \
249
(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
250
#define __umulsidi3(u, v) \
251
({UDItype __w; \
252
__asm__ ("mulwux %2,%0" \
253
: "=r" (__w) \
254
: "%0" ((USItype)(u)), \
255
"r" ((USItype)(v))); \
256
__w; })
257
#endif /* __clipper__ */
258
259
/***************************************
260
************** GMICRO ***************
261
***************************************/
262
#if defined(__gmicro__) && W_TYPE_SIZE == 32
263
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
264
__asm__ ("add.w %5,%1\n" \
265
"addx %3,%0" \
266
: "=g" ((USItype)(sh)), \
267
"=&g" ((USItype)(sl)) \
268
: "%0" ((USItype)(ah)), \
269
"g" ((USItype)(bh)), \
270
"%1" ((USItype)(al)), \
271
"g" ((USItype)(bl)))
272
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
273
__asm__ ("sub.w %5,%1\n" \
274
"subx %3,%0" \
275
: "=g" ((USItype)(sh)), \
276
"=&g" ((USItype)(sl)) \
277
: "0" ((USItype)(ah)), \
278
"g" ((USItype)(bh)), \
279
"1" ((USItype)(al)), \
280
"g" ((USItype)(bl)))
281
#define umul_ppmm(ph, pl, m0, m1) \
282
__asm__ ("mulx %3,%0,%1" \
283
: "=g" ((USItype)(ph)), \
284
"=r" ((USItype)(pl)) \
285
: "%0" ((USItype)(m0)), \
286
"g" ((USItype)(m1)))
287
#define udiv_qrnnd(q, r, nh, nl, d) \
288
__asm__ ("divx %4,%0,%1" \
289
: "=g" ((USItype)(q)), \
290
"=r" ((USItype)(r)) \
291
: "1" ((USItype)(nh)), \
292
"0" ((USItype)(nl)), \
293
"g" ((USItype)(d)))
294
#endif
295
296
/***************************************
297
************** HPPA *****************
298
***************************************/
299
#if defined(__hppa) && W_TYPE_SIZE == 32
300
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
301
__asm__ ("add %4,%5,%1\n" \
302
"addc %2,%3,%0" \
303
: "=r" ((USItype)(sh)), \
304
"=&r" ((USItype)(sl)) \
305
: "%rM" ((USItype)(ah)), \
306
"rM" ((USItype)(bh)), \
307
"%rM" ((USItype)(al)), \
308
"rM" ((USItype)(bl)))
309
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
310
__asm__ ("sub %4,%5,%1\n" \
311
"subb %2,%3,%0" \
312
: "=r" ((USItype)(sh)), \
313
"=&r" ((USItype)(sl)) \
314
: "rM" ((USItype)(ah)), \
315
"rM" ((USItype)(bh)), \
316
"rM" ((USItype)(al)), \
317
"rM" ((USItype)(bl)))
318
#if 0 && defined(_PA_RISC1_1)
319
/* xmpyu uses floating point register which is not allowed in Linux kernel. */
320
#define umul_ppmm(wh, wl, u, v) \
321
do { \
322
union {UDItype __ll; \
323
struct {USItype __h, __l; } __i; \
324
} __xx; \
325
__asm__ ("xmpyu %1,%2,%0" \
326
: "=*f" (__xx.__ll) \
327
: "*f" ((USItype)(u)), \
328
"*f" ((USItype)(v))); \
329
(wh) = __xx.__i.__h; \
330
(wl) = __xx.__i.__l; \
331
} while (0)
332
#define UMUL_TIME 8
333
#define UDIV_TIME 60
334
#else
335
#define UMUL_TIME 40
336
#define UDIV_TIME 80
337
#endif
338
#if 0 /* #ifndef LONGLONG_STANDALONE */
339
#define udiv_qrnnd(q, r, n1, n0, d) \
340
do { USItype __r; \
341
(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
342
(r) = __r; \
343
} while (0)
344
extern USItype __udiv_qrnnd();
345
#endif /* LONGLONG_STANDALONE */
346
#endif /* hppa */
347
348
/***************************************
349
************** I370 *****************
350
***************************************/
351
#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
352
#define umul_ppmm(xh, xl, m0, m1) \
353
do { \
354
union {UDItype __ll; \
355
struct {USItype __h, __l; } __i; \
356
} __xx; \
357
USItype __m0 = (m0), __m1 = (m1); \
358
__asm__ ("mr %0,%3" \
359
: "=r" (__xx.__i.__h), \
360
"=r" (__xx.__i.__l) \
361
: "%1" (__m0), \
362
"r" (__m1)); \
363
(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
364
(xh) += ((((SItype) __m0 >> 31) & __m1) \
365
+ (((SItype) __m1 >> 31) & __m0)); \
366
} while (0)
367
#define smul_ppmm(xh, xl, m0, m1) \
368
do { \
369
union {DItype __ll; \
370
struct {USItype __h, __l; } __i; \
371
} __xx; \
372
__asm__ ("mr %0,%3" \
373
: "=r" (__xx.__i.__h), \
374
"=r" (__xx.__i.__l) \
375
: "%1" (m0), \
376
"r" (m1)); \
377
(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
378
} while (0)
379
#define sdiv_qrnnd(q, r, n1, n0, d) \
380
do { \
381
union {DItype __ll; \
382
struct {USItype __h, __l; } __i; \
383
} __xx; \
384
__xx.__i.__h = n1; __xx.__i.__l = n0; \
385
__asm__ ("dr %0,%2" \
386
: "=r" (__xx.__ll) \
387
: "0" (__xx.__ll), "r" (d)); \
388
(q) = __xx.__i.__l; (r) = __xx.__i.__h; \
389
} while (0)
390
#endif
391
392
/***************************************
393
************** I386 *****************
394
***************************************/
395
#undef __i386__
396
#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
397
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
398
__asm__ ("addl %5,%1\n" \
399
"adcl %3,%0" \
400
: "=r" (sh), \
401
"=&r" (sl) \
402
: "%0" ((USItype)(ah)), \
403
"g" ((USItype)(bh)), \
404
"%1" ((USItype)(al)), \
405
"g" ((USItype)(bl)))
406
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
407
__asm__ ("subl %5,%1\n" \
408
"sbbl %3,%0" \
409
: "=r" (sh), \
410
"=&r" (sl) \
411
: "0" ((USItype)(ah)), \
412
"g" ((USItype)(bh)), \
413
"1" ((USItype)(al)), \
414
"g" ((USItype)(bl)))
415
#define umul_ppmm(w1, w0, u, v) \
416
__asm__ ("mull %3" \
417
: "=a" (w0), \
418
"=d" (w1) \
419
: "%0" ((USItype)(u)), \
420
"rm" ((USItype)(v)))
421
#define udiv_qrnnd(q, r, n1, n0, d) \
422
__asm__ ("divl %4" \
423
: "=a" (q), \
424
"=d" (r) \
425
: "0" ((USItype)(n0)), \
426
"1" ((USItype)(n1)), \
427
"rm" ((USItype)(d)))
428
#ifndef UMUL_TIME
429
#define UMUL_TIME 40
430
#endif
431
#ifndef UDIV_TIME
432
#define UDIV_TIME 40
433
#endif
434
#endif /* 80x86 */
435
436
/***************************************
437
************** I860 *****************
438
***************************************/
439
#if defined(__i860__) && W_TYPE_SIZE == 32
440
#define rshift_rhlc(r, h, l, c) \
441
__asm__ ("shr %3,r0,r0\n" \
442
"shrd %1,%2,%0" \
443
"=r" (r) : "r" (h), "r" (l), "rn" (c))
444
#endif /* i860 */
445
446
/***************************************
447
************** I960 *****************
448
***************************************/
449
#if defined(__i960__) && W_TYPE_SIZE == 32
450
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
451
__asm__ ("cmpo 1,0\n" \
452
"addc %5,%4,%1\n" \
453
"addc %3,%2,%0" \
454
: "=r" ((USItype)(sh)), \
455
"=&r" ((USItype)(sl)) \
456
: "%dI" ((USItype)(ah)), \
457
"dI" ((USItype)(bh)), \
458
"%dI" ((USItype)(al)), \
459
"dI" ((USItype)(bl)))
460
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
461
__asm__ ("cmpo 0,0\n" \
462
"subc %5,%4,%1\n" \
463
"subc %3,%2,%0" \
464
: "=r" ((USItype)(sh)), \
465
"=&r" ((USItype)(sl)) \
466
: "dI" ((USItype)(ah)), \
467
"dI" ((USItype)(bh)), \
468
"dI" ((USItype)(al)), \
469
"dI" ((USItype)(bl)))
470
#define umul_ppmm(w1, w0, u, v) \
471
({union {UDItype __ll; \
472
struct {USItype __l, __h; } __i; \
473
} __xx; \
474
__asm__ ("emul %2,%1,%0" \
475
: "=d" (__xx.__ll) \
476
: "%dI" ((USItype)(u)), \
477
"dI" ((USItype)(v))); \
478
(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
479
#define __umulsidi3(u, v) \
480
({UDItype __w; \
481
__asm__ ("emul %2,%1,%0" \
482
: "=d" (__w) \
483
: "%dI" ((USItype)(u)), \
484
"dI" ((USItype)(v))); \
485
__w; })
486
#define udiv_qrnnd(q, r, nh, nl, d) \
487
do { \
488
union {UDItype __ll; \
489
struct {USItype __l, __h; } __i; \
490
} __nn; \
491
__nn.__i.__h = (nh); __nn.__i.__l = (nl); \
492
__asm__ ("ediv %d,%n,%0" \
493
: "=d" (__rq.__ll) \
494
: "dI" (__nn.__ll), \
495
"dI" ((USItype)(d))); \
496
(r) = __rq.__i.__l; (q) = __rq.__i.__h; \
497
} while (0)
498
#if defined(__i960mx) /* what is the proper symbol to test??? */
499
#define rshift_rhlc(r, h, l, c) \
500
do { \
501
union {UDItype __ll; \
502
struct {USItype __l, __h; } __i; \
503
} __nn; \
504
__nn.__i.__h = (h); __nn.__i.__l = (l); \
505
__asm__ ("shre %2,%1,%0" \
506
: "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
507
}
508
#endif /* i960mx */
509
#endif /* i960 */
510
511
/***************************************
512
************** 68000 ****************
513
***************************************/
514
#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
515
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
516
__asm__ ("add%.l %5,%1\n" \
517
"addx%.l %3,%0" \
518
: "=d" ((USItype)(sh)), \
519
"=&d" ((USItype)(sl)) \
520
: "%0" ((USItype)(ah)), \
521
"d" ((USItype)(bh)), \
522
"%1" ((USItype)(al)), \
523
"g" ((USItype)(bl)))
524
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
525
__asm__ ("sub%.l %5,%1\n" \
526
"subx%.l %3,%0" \
527
: "=d" ((USItype)(sh)), \
528
"=&d" ((USItype)(sl)) \
529
: "0" ((USItype)(ah)), \
530
"d" ((USItype)(bh)), \
531
"1" ((USItype)(al)), \
532
"g" ((USItype)(bl)))
533
#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
534
#define umul_ppmm(w1, w0, u, v) \
535
__asm__ ("mulu%.l %3,%1:%0" \
536
: "=d" ((USItype)(w0)), \
537
"=d" ((USItype)(w1)) \
538
: "%0" ((USItype)(u)), \
539
"dmi" ((USItype)(v)))
540
#define UMUL_TIME 45
541
#define udiv_qrnnd(q, r, n1, n0, d) \
542
__asm__ ("divu%.l %4,%1:%0" \
543
: "=d" ((USItype)(q)), \
544
"=d" ((USItype)(r)) \
545
: "0" ((USItype)(n0)), \
546
"1" ((USItype)(n1)), \
547
"dmi" ((USItype)(d)))
548
#define UDIV_TIME 90
549
#define sdiv_qrnnd(q, r, n1, n0, d) \
550
__asm__ ("divs%.l %4,%1:%0" \
551
: "=d" ((USItype)(q)), \
552
"=d" ((USItype)(r)) \
553
: "0" ((USItype)(n0)), \
554
"1" ((USItype)(n1)), \
555
"dmi" ((USItype)(d)))
556
#else /* not mc68020 */
557
#define umul_ppmm(xh, xl, a, b) \
558
do { USItype __umul_tmp1, __umul_tmp2; \
559
__asm__ ("| Inlined umul_ppmm\n" \
560
"move%.l %5,%3\n" \
561
"move%.l %2,%0\n" \
562
"move%.w %3,%1\n" \
563
"swap %3\n" \
564
"swap %0\n" \
565
"mulu %2,%1\n" \
566
"mulu %3,%0\n" \
567
"mulu %2,%3\n" \
568
"swap %2\n" \
569
"mulu %5,%2\n" \
570
"add%.l %3,%2\n" \
571
"jcc 1f\n" \
572
"add%.l %#0x10000,%0\n" \
573
"1: move%.l %2,%3\n" \
574
"clr%.w %2\n" \
575
"swap %2\n" \
576
"swap %3\n" \
577
"clr%.w %3\n" \
578
"add%.l %3,%1\n" \
579
"addx%.l %2,%0\n" \
580
"| End inlined umul_ppmm" \
581
: "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
582
"=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
583
: "%2" ((USItype)(a)), "d" ((USItype)(b))); \
584
} while (0)
585
#define UMUL_TIME 100
586
#define UDIV_TIME 400
587
#endif /* not mc68020 */
588
#endif /* mc68000 */
589
590
/***************************************
591
************** 88000 ****************
592
***************************************/
593
#if defined(__m88000__) && W_TYPE_SIZE == 32
594
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
595
__asm__ ("addu.co %1,%r4,%r5\n" \
596
"addu.ci %0,%r2,%r3" \
597
: "=r" ((USItype)(sh)), \
598
"=&r" ((USItype)(sl)) \
599
: "%rJ" ((USItype)(ah)), \
600
"rJ" ((USItype)(bh)), \
601
"%rJ" ((USItype)(al)), \
602
"rJ" ((USItype)(bl)))
603
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
604
__asm__ ("subu.co %1,%r4,%r5\n" \
605
"subu.ci %0,%r2,%r3" \
606
: "=r" ((USItype)(sh)), \
607
"=&r" ((USItype)(sl)) \
608
: "rJ" ((USItype)(ah)), \
609
"rJ" ((USItype)(bh)), \
610
"rJ" ((USItype)(al)), \
611
"rJ" ((USItype)(bl)))
612
#if defined(__m88110__)
613
#define umul_ppmm(wh, wl, u, v) \
614
do { \
615
union {UDItype __ll; \
616
struct {USItype __h, __l; } __i; \
617
} __x; \
618
__asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
619
(wh) = __x.__i.__h; \
620
(wl) = __x.__i.__l; \
621
} while (0)
622
#define udiv_qrnnd(q, r, n1, n0, d) \
623
({union {UDItype __ll; \
624
struct {USItype __h, __l; } __i; \
625
} __x, __q; \
626
__x.__i.__h = (n1); __x.__i.__l = (n0); \
627
__asm__ ("divu.d %0,%1,%2" \
628
: "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
629
(r) = (n0) - __q.__l * (d); (q) = __q.__l; })
630
#define UMUL_TIME 5
631
#define UDIV_TIME 25
632
#else
633
#define UMUL_TIME 17
634
#define UDIV_TIME 150
635
#endif /* __m88110__ */
636
#endif /* __m88000__ */
637
638
/***************************************
639
************** MIPS *****************
640
***************************************/
641
#if defined(__mips__) && W_TYPE_SIZE == 32
642
#define umul_ppmm(w1, w0, u, v) \
643
do { \
644
UDItype __ll = (UDItype)(u) * (v); \
645
w1 = __ll >> 32; \
646
w0 = __ll; \
647
} while (0)
648
#define UMUL_TIME 10
649
#define UDIV_TIME 100
650
#endif /* __mips__ */
651
652
/***************************************
653
************** MIPS/64 **************
654
***************************************/
655
#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
656
#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 && defined(CONFIG_CC_IS_GCC)
657
/*
658
* GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C
659
* code below, so we special case MIPS64r6 until the compiler can do better.
660
*/
661
#define umul_ppmm(w1, w0, u, v) \
662
do { \
663
__asm__ ("dmulu %0,%1,%2" \
664
: "=d" ((UDItype)(w0)) \
665
: "d" ((UDItype)(u)), \
666
"d" ((UDItype)(v))); \
667
__asm__ ("dmuhu %0,%1,%2" \
668
: "=d" ((UDItype)(w1)) \
669
: "d" ((UDItype)(u)), \
670
"d" ((UDItype)(v))); \
671
} while (0)
672
#else
673
#define umul_ppmm(w1, w0, u, v) \
674
do { \
675
typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
676
__ll_UTItype __ll = (__ll_UTItype)(u) * (v); \
677
w1 = __ll >> 64; \
678
w0 = __ll; \
679
} while (0)
680
#endif
681
#define UMUL_TIME 20
682
#define UDIV_TIME 140
683
#endif /* __mips__ */
684
685
/***************************************
686
************** 32000 ****************
687
***************************************/
688
#if defined(__ns32000__) && W_TYPE_SIZE == 32
689
#define umul_ppmm(w1, w0, u, v) \
690
({union {UDItype __ll; \
691
struct {USItype __l, __h; } __i; \
692
} __xx; \
693
__asm__ ("meid %2,%0" \
694
: "=g" (__xx.__ll) \
695
: "%0" ((USItype)(u)), \
696
"g" ((USItype)(v))); \
697
(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
698
#define __umulsidi3(u, v) \
699
({UDItype __w; \
700
__asm__ ("meid %2,%0" \
701
: "=g" (__w) \
702
: "%0" ((USItype)(u)), \
703
"g" ((USItype)(v))); \
704
__w; })
705
#define udiv_qrnnd(q, r, n1, n0, d) \
706
({union {UDItype __ll; \
707
struct {USItype __l, __h; } __i; \
708
} __xx; \
709
__xx.__i.__h = (n1); __xx.__i.__l = (n0); \
710
__asm__ ("deid %2,%0" \
711
: "=g" (__xx.__ll) \
712
: "0" (__xx.__ll), \
713
"g" ((USItype)(d))); \
714
(r) = __xx.__i.__l; (q) = __xx.__i.__h; })
715
#endif /* __ns32000__ */
716
717
/***************************************
718
************** PPC ******************
719
***************************************/
720
#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
721
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
722
do { \
723
if (__builtin_constant_p(bh) && (bh) == 0) \
724
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
725
: "=r" (sh), \
726
"=&r" (sl) \
727
: "%r" ((USItype)(ah)), \
728
"%r" ((USItype)(al)), \
729
"rI" ((USItype)(bl))); \
730
else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
731
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
732
: "=r" (sh), \
733
"=&r" (sl) \
734
: "%r" ((USItype)(ah)), \
735
"%r" ((USItype)(al)), \
736
"rI" ((USItype)(bl))); \
737
else \
738
__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
739
: "=r" (sh), \
740
"=&r" (sl) \
741
: "%r" ((USItype)(ah)), \
742
"r" ((USItype)(bh)), \
743
"%r" ((USItype)(al)), \
744
"rI" ((USItype)(bl))); \
745
} while (0)
746
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
747
do { \
748
if (__builtin_constant_p(ah) && (ah) == 0) \
749
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
750
: "=r" (sh), \
751
"=&r" (sl) \
752
: "r" ((USItype)(bh)), \
753
"rI" ((USItype)(al)), \
754
"r" ((USItype)(bl))); \
755
else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
756
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
757
: "=r" (sh), \
758
"=&r" (sl) \
759
: "r" ((USItype)(bh)), \
760
"rI" ((USItype)(al)), \
761
"r" ((USItype)(bl))); \
762
else if (__builtin_constant_p(bh) && (bh) == 0) \
763
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
764
: "=r" (sh), \
765
"=&r" (sl) \
766
: "r" ((USItype)(ah)), \
767
"rI" ((USItype)(al)), \
768
"r" ((USItype)(bl))); \
769
else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
770
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
771
: "=r" (sh), \
772
"=&r" (sl) \
773
: "r" ((USItype)(ah)), \
774
"rI" ((USItype)(al)), \
775
"r" ((USItype)(bl))); \
776
else \
777
__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
778
: "=r" (sh), \
779
"=&r" (sl) \
780
: "r" ((USItype)(ah)), \
781
"r" ((USItype)(bh)), \
782
"rI" ((USItype)(al)), \
783
"r" ((USItype)(bl))); \
784
} while (0)
785
#if defined(_ARCH_PPC)
786
#define umul_ppmm(ph, pl, m0, m1) \
787
do { \
788
USItype __m0 = (m0), __m1 = (m1); \
789
__asm__ ("mulhwu %0,%1,%2" \
790
: "=r" (ph) \
791
: "%r" (__m0), \
792
"r" (__m1)); \
793
(pl) = __m0 * __m1; \
794
} while (0)
795
#define UMUL_TIME 15
796
#define smul_ppmm(ph, pl, m0, m1) \
797
do { \
798
SItype __m0 = (m0), __m1 = (m1); \
799
__asm__ ("mulhw %0,%1,%2" \
800
: "=r" ((SItype) ph) \
801
: "%r" (__m0), \
802
"r" (__m1)); \
803
(pl) = __m0 * __m1; \
804
} while (0)
805
#define SMUL_TIME 14
806
#define UDIV_TIME 120
807
#else
808
#define umul_ppmm(xh, xl, m0, m1) \
809
do { \
810
USItype __m0 = (m0), __m1 = (m1); \
811
__asm__ ("mul %0,%2,%3" \
812
: "=r" ((USItype)(xh)), \
813
"=q" ((USItype)(xl)) \
814
: "r" (__m0), \
815
"r" (__m1)); \
816
(xh) += ((((SItype) __m0 >> 31) & __m1) \
817
+ (((SItype) __m1 >> 31) & __m0)); \
818
} while (0)
819
#define UMUL_TIME 8
820
#define smul_ppmm(xh, xl, m0, m1) \
821
__asm__ ("mul %0,%2,%3" \
822
: "=r" ((SItype)(xh)), \
823
"=q" ((SItype)(xl)) \
824
: "r" (m0), \
825
"r" (m1))
826
#define SMUL_TIME 4
827
#define sdiv_qrnnd(q, r, nh, nl, d) \
828
__asm__ ("div %0,%2,%4" \
829
: "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
830
: "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
831
#define UDIV_TIME 100
832
#endif
833
#endif /* Power architecture variants. */
834
835
/***************************************
836
************** PYR ******************
837
***************************************/
838
#if defined(__pyr__) && W_TYPE_SIZE == 32
839
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
840
__asm__ ("addw %5,%1\n" \
841
"addwc %3,%0" \
842
: "=r" ((USItype)(sh)), \
843
"=&r" ((USItype)(sl)) \
844
: "%0" ((USItype)(ah)), \
845
"g" ((USItype)(bh)), \
846
"%1" ((USItype)(al)), \
847
"g" ((USItype)(bl)))
848
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
849
__asm__ ("subw %5,%1\n" \
850
"subwb %3,%0" \
851
: "=r" ((USItype)(sh)), \
852
"=&r" ((USItype)(sl)) \
853
: "0" ((USItype)(ah)), \
854
"g" ((USItype)(bh)), \
855
"1" ((USItype)(al)), \
856
"g" ((USItype)(bl)))
857
/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
858
#define umul_ppmm(w1, w0, u, v) \
859
({union {UDItype __ll; \
860
struct {USItype __h, __l; } __i; \
861
} __xx; \
862
__asm__ ("movw %1,%R0\n" \
863
"uemul %2,%0" \
864
: "=&r" (__xx.__ll) \
865
: "g" ((USItype) (u)), \
866
"g" ((USItype)(v))); \
867
(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
868
#endif /* __pyr__ */
869
870
/***************************************
871
************** RT/ROMP **************
872
***************************************/
873
#if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
874
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
875
__asm__ ("a %1,%5\n" \
876
"ae %0,%3" \
877
: "=r" ((USItype)(sh)), \
878
"=&r" ((USItype)(sl)) \
879
: "%0" ((USItype)(ah)), \
880
"r" ((USItype)(bh)), \
881
"%1" ((USItype)(al)), \
882
"r" ((USItype)(bl)))
883
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
884
__asm__ ("s %1,%5\n" \
885
"se %0,%3" \
886
: "=r" ((USItype)(sh)), \
887
"=&r" ((USItype)(sl)) \
888
: "0" ((USItype)(ah)), \
889
"r" ((USItype)(bh)), \
890
"1" ((USItype)(al)), \
891
"r" ((USItype)(bl)))
892
#define umul_ppmm(ph, pl, m0, m1) \
893
do { \
894
USItype __m0 = (m0), __m1 = (m1); \
895
__asm__ ( \
896
"s r2,r2\n" \
897
"mts r10,%2\n" \
898
"m r2,%3\n" \
899
"m r2,%3\n" \
900
"m r2,%3\n" \
901
"m r2,%3\n" \
902
"m r2,%3\n" \
903
"m r2,%3\n" \
904
"m r2,%3\n" \
905
"m r2,%3\n" \
906
"m r2,%3\n" \
907
"m r2,%3\n" \
908
"m r2,%3\n" \
909
"m r2,%3\n" \
910
"m r2,%3\n" \
911
"m r2,%3\n" \
912
"m r2,%3\n" \
913
"m r2,%3\n" \
914
"cas %0,r2,r0\n" \
915
"mfs r10,%1" \
916
: "=r" ((USItype)(ph)), \
917
"=r" ((USItype)(pl)) \
918
: "%r" (__m0), \
919
"r" (__m1) \
920
: "r2"); \
921
(ph) += ((((SItype) __m0 >> 31) & __m1) \
922
+ (((SItype) __m1 >> 31) & __m0)); \
923
} while (0)
924
#define UMUL_TIME 20
925
#define UDIV_TIME 200
926
#endif /* RT/ROMP */
927
928
/***************************************
929
************** SH2 ******************
930
***************************************/
931
#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
932
&& W_TYPE_SIZE == 32
933
#define umul_ppmm(w1, w0, u, v) \
934
__asm__ ( \
935
"dmulu.l %2,%3\n" \
936
"sts macl,%1\n" \
937
"sts mach,%0" \
938
: "=r" ((USItype)(w1)), \
939
"=r" ((USItype)(w0)) \
940
: "r" ((USItype)(u)), \
941
"r" ((USItype)(v)) \
942
: "macl", "mach")
943
#define UMUL_TIME 5
944
#endif
945
946
/***************************************
947
************** SPARC ****************
948
***************************************/
949
#if defined(__sparc__) && W_TYPE_SIZE == 32
950
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
951
__asm__ ("addcc %r4,%5,%1\n" \
952
"addx %r2,%3,%0" \
953
: "=r" ((USItype)(sh)), \
954
"=&r" ((USItype)(sl)) \
955
: "%rJ" ((USItype)(ah)), \
956
"rI" ((USItype)(bh)), \
957
"%rJ" ((USItype)(al)), \
958
"rI" ((USItype)(bl)) \
959
__CLOBBER_CC)
960
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
961
__asm__ ("subcc %r4,%5,%1\n" \
962
"subx %r2,%3,%0" \
963
: "=r" ((USItype)(sh)), \
964
"=&r" ((USItype)(sl)) \
965
: "rJ" ((USItype)(ah)), \
966
"rI" ((USItype)(bh)), \
967
"rJ" ((USItype)(al)), \
968
"rI" ((USItype)(bl)) \
969
__CLOBBER_CC)
970
#if defined(__sparc_v8__)
971
/* Don't match immediate range because, 1) it is not often useful,
972
2) the 'I' flag thinks of the range as a 13 bit signed interval,
973
while we want to match a 13 bit interval, sign extended to 32 bits,
974
but INTERPRETED AS UNSIGNED. */
975
#define umul_ppmm(w1, w0, u, v) \
976
__asm__ ("umul %2,%3,%1;rd %%y,%0" \
977
: "=r" ((USItype)(w1)), \
978
"=r" ((USItype)(w0)) \
979
: "r" ((USItype)(u)), \
980
"r" ((USItype)(v)))
981
#define UMUL_TIME 5
982
#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */
983
#define udiv_qrnnd(q, r, n1, n0, d) \
984
do { \
985
USItype __q; \
986
__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
987
: "=r" ((USItype)(__q)) \
988
: "r" ((USItype)(n1)), \
989
"r" ((USItype)(n0)), \
990
"r" ((USItype)(d))); \
991
(r) = (n0) - __q * (d); \
992
(q) = __q; \
993
} while (0)
994
#define UDIV_TIME 25
995
#endif /* SUPERSPARC */
996
#else /* ! __sparc_v8__ */
997
#if defined(__sparclite__)
998
/* This has hardware multiply but not divide. It also has two additional
999
instructions scan (ffs from high bit) and divscc. */
1000
#define umul_ppmm(w1, w0, u, v) \
1001
__asm__ ("umul %2,%3,%1;rd %%y,%0" \
1002
: "=r" ((USItype)(w1)), \
1003
"=r" ((USItype)(w0)) \
1004
: "r" ((USItype)(u)), \
1005
"r" ((USItype)(v)))
1006
#define UMUL_TIME 5
1007
#define udiv_qrnnd(q, r, n1, n0, d) \
1008
__asm__ ("! Inlined udiv_qrnnd\n" \
1009
"wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
1010
"tst %%g0\n" \
1011
"divscc %3,%4,%%g1\n" \
1012
"divscc %%g1,%4,%%g1\n" \
1013
"divscc %%g1,%4,%%g1\n" \
1014
"divscc %%g1,%4,%%g1\n" \
1015
"divscc %%g1,%4,%%g1\n" \
1016
"divscc %%g1,%4,%%g1\n" \
1017
"divscc %%g1,%4,%%g1\n" \
1018
"divscc %%g1,%4,%%g1\n" \
1019
"divscc %%g1,%4,%%g1\n" \
1020
"divscc %%g1,%4,%%g1\n" \
1021
"divscc %%g1,%4,%%g1\n" \
1022
"divscc %%g1,%4,%%g1\n" \
1023
"divscc %%g1,%4,%%g1\n" \
1024
"divscc %%g1,%4,%%g1\n" \
1025
"divscc %%g1,%4,%%g1\n" \
1026
"divscc %%g1,%4,%%g1\n" \
1027
"divscc %%g1,%4,%%g1\n" \
1028
"divscc %%g1,%4,%%g1\n" \
1029
"divscc %%g1,%4,%%g1\n" \
1030
"divscc %%g1,%4,%%g1\n" \
1031
"divscc %%g1,%4,%%g1\n" \
1032
"divscc %%g1,%4,%%g1\n" \
1033
"divscc %%g1,%4,%%g1\n" \
1034
"divscc %%g1,%4,%%g1\n" \
1035
"divscc %%g1,%4,%%g1\n" \
1036
"divscc %%g1,%4,%%g1\n" \
1037
"divscc %%g1,%4,%%g1\n" \
1038
"divscc %%g1,%4,%%g1\n" \
1039
"divscc %%g1,%4,%%g1\n" \
1040
"divscc %%g1,%4,%%g1\n" \
1041
"divscc %%g1,%4,%%g1\n" \
1042
"divscc %%g1,%4,%0\n" \
1043
"rd %%y,%1\n" \
1044
"bl,a 1f\n" \
1045
"add %1,%4,%1\n" \
1046
"1: ! End of inline udiv_qrnnd" \
1047
: "=r" ((USItype)(q)), \
1048
"=r" ((USItype)(r)) \
1049
: "r" ((USItype)(n1)), \
1050
"r" ((USItype)(n0)), \
1051
"rI" ((USItype)(d)) \
1052
: "%g1" __AND_CLOBBER_CC)
1053
#define UDIV_TIME 37
1054
#endif /* __sparclite__ */
1055
#endif /* __sparc_v8__ */
1056
/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
1057
#ifndef umul_ppmm
1058
#define umul_ppmm(w1, w0, u, v) \
1059
__asm__ ("! Inlined umul_ppmm\n" \
1060
"wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \
1061
"sra %3,31,%%g2 ! Don't move this insn\n" \
1062
"and %2,%%g2,%%g2 ! Don't move this insn\n" \
1063
"andcc %%g0,0,%%g1 ! Don't move this insn\n" \
1064
"mulscc %%g1,%3,%%g1\n" \
1065
"mulscc %%g1,%3,%%g1\n" \
1066
"mulscc %%g1,%3,%%g1\n" \
1067
"mulscc %%g1,%3,%%g1\n" \
1068
"mulscc %%g1,%3,%%g1\n" \
1069
"mulscc %%g1,%3,%%g1\n" \
1070
"mulscc %%g1,%3,%%g1\n" \
1071
"mulscc %%g1,%3,%%g1\n" \
1072
"mulscc %%g1,%3,%%g1\n" \
1073
"mulscc %%g1,%3,%%g1\n" \
1074
"mulscc %%g1,%3,%%g1\n" \
1075
"mulscc %%g1,%3,%%g1\n" \
1076
"mulscc %%g1,%3,%%g1\n" \
1077
"mulscc %%g1,%3,%%g1\n" \
1078
"mulscc %%g1,%3,%%g1\n" \
1079
"mulscc %%g1,%3,%%g1\n" \
1080
"mulscc %%g1,%3,%%g1\n" \
1081
"mulscc %%g1,%3,%%g1\n" \
1082
"mulscc %%g1,%3,%%g1\n" \
1083
"mulscc %%g1,%3,%%g1\n" \
1084
"mulscc %%g1,%3,%%g1\n" \
1085
"mulscc %%g1,%3,%%g1\n" \
1086
"mulscc %%g1,%3,%%g1\n" \
1087
"mulscc %%g1,%3,%%g1\n" \
1088
"mulscc %%g1,%3,%%g1\n" \
1089
"mulscc %%g1,%3,%%g1\n" \
1090
"mulscc %%g1,%3,%%g1\n" \
1091
"mulscc %%g1,%3,%%g1\n" \
1092
"mulscc %%g1,%3,%%g1\n" \
1093
"mulscc %%g1,%3,%%g1\n" \
1094
"mulscc %%g1,%3,%%g1\n" \
1095
"mulscc %%g1,%3,%%g1\n" \
1096
"mulscc %%g1,0,%%g1\n" \
1097
"add %%g1,%%g2,%0\n" \
1098
"rd %%y,%1" \
1099
: "=r" ((USItype)(w1)), \
1100
"=r" ((USItype)(w0)) \
1101
: "%rI" ((USItype)(u)), \
1102
"r" ((USItype)(v)) \
1103
: "%g1", "%g2" __AND_CLOBBER_CC)
1104
#define UMUL_TIME 39 /* 39 instructions */
1105
/* It's quite necessary to add this much assembler for the sparc.
1106
The default udiv_qrnnd (in C) is more than 10 times slower! */
1107
#define udiv_qrnnd(q, r, n1, n0, d) \
1108
__asm__ ("! Inlined udiv_qrnnd\n\t" \
1109
"mov 32,%%g1\n\t" \
1110
"subcc %1,%2,%%g0\n\t" \
1111
"1: bcs 5f\n\t" \
1112
"addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
1113
"sub %1,%2,%1 ! this kills msb of n\n\t" \
1114
"addx %1,%1,%1 ! so this can't give carry\n\t" \
1115
"subcc %%g1,1,%%g1\n\t" \
1116
"2: bne 1b\n\t" \
1117
"subcc %1,%2,%%g0\n\t" \
1118
"bcs 3f\n\t" \
1119
"addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
1120
"b 3f\n\t" \
1121
"sub %1,%2,%1 ! this kills msb of n\n\t" \
1122
"4: sub %1,%2,%1\n\t" \
1123
"5: addxcc %1,%1,%1\n\t" \
1124
"bcc 2b\n\t" \
1125
"subcc %%g1,1,%%g1\n\t" \
1126
"! Got carry from n. Subtract next step to cancel this carry.\n\t" \
1127
"bne 4b\n\t" \
1128
"addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \
1129
"sub %1,%2,%1\n\t" \
1130
"3: xnor %0,0,%0\n\t" \
1131
"! End of inline udiv_qrnnd\n" \
1132
: "=&r" ((USItype)(q)), \
1133
"=&r" ((USItype)(r)) \
1134
: "r" ((USItype)(d)), \
1135
"1" ((USItype)(n1)), \
1136
"0" ((USItype)(n0)) : "%g1", "cc")
1137
#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
1138
#endif
1139
#endif /* __sparc__ */
1140
1141
/***************************************
1142
************** VAX ******************
1143
***************************************/
1144
#if defined(__vax__) && W_TYPE_SIZE == 32
1145
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1146
__asm__ ("addl2 %5,%1\n" \
1147
"adwc %3,%0" \
1148
: "=g" ((USItype)(sh)), \
1149
"=&g" ((USItype)(sl)) \
1150
: "%0" ((USItype)(ah)), \
1151
"g" ((USItype)(bh)), \
1152
"%1" ((USItype)(al)), \
1153
"g" ((USItype)(bl)))
1154
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1155
__asm__ ("subl2 %5,%1\n" \
1156
"sbwc %3,%0" \
1157
: "=g" ((USItype)(sh)), \
1158
"=&g" ((USItype)(sl)) \
1159
: "0" ((USItype)(ah)), \
1160
"g" ((USItype)(bh)), \
1161
"1" ((USItype)(al)), \
1162
"g" ((USItype)(bl)))
1163
#define umul_ppmm(xh, xl, m0, m1) \
1164
do { \
1165
union {UDItype __ll; \
1166
struct {USItype __l, __h; } __i; \
1167
} __xx; \
1168
USItype __m0 = (m0), __m1 = (m1); \
1169
__asm__ ("emul %1,%2,$0,%0" \
1170
: "=g" (__xx.__ll) \
1171
: "g" (__m0), \
1172
"g" (__m1)); \
1173
(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1174
(xh) += ((((SItype) __m0 >> 31) & __m1) \
1175
+ (((SItype) __m1 >> 31) & __m0)); \
1176
} while (0)
1177
#define sdiv_qrnnd(q, r, n1, n0, d) \
1178
do { \
1179
union {DItype __ll; \
1180
struct {SItype __l, __h; } __i; \
1181
} __xx; \
1182
__xx.__i.__h = n1; __xx.__i.__l = n0; \
1183
__asm__ ("ediv %3,%2,%0,%1" \
1184
: "=g" (q), "=g" (r) \
1185
: "g" (__xx.__ll), "g" (d)); \
1186
} while (0)
1187
#endif /* __vax__ */
1188
1189
/***************************************
1190
************** Z8000 ****************
1191
***************************************/
1192
#if defined(__z8000__) && W_TYPE_SIZE == 16
1193
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1194
__asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1195
: "=r" ((unsigned int)(sh)), \
1196
"=&r" ((unsigned int)(sl)) \
1197
: "%0" ((unsigned int)(ah)), \
1198
"r" ((unsigned int)(bh)), \
1199
"%1" ((unsigned int)(al)), \
1200
"rQR" ((unsigned int)(bl)))
1201
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1202
__asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1203
: "=r" ((unsigned int)(sh)), \
1204
"=&r" ((unsigned int)(sl)) \
1205
: "0" ((unsigned int)(ah)), \
1206
"r" ((unsigned int)(bh)), \
1207
"1" ((unsigned int)(al)), \
1208
"rQR" ((unsigned int)(bl)))
1209
#define umul_ppmm(xh, xl, m0, m1) \
1210
do { \
1211
union {long int __ll; \
1212
struct {unsigned int __h, __l; } __i; \
1213
} __xx; \
1214
unsigned int __m0 = (m0), __m1 = (m1); \
1215
__asm__ ("mult %S0,%H3" \
1216
: "=r" (__xx.__i.__h), \
1217
"=r" (__xx.__i.__l) \
1218
: "%1" (__m0), \
1219
"rQR" (__m1)); \
1220
(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1221
(xh) += ((((signed int) __m0 >> 15) & __m1) \
1222
+ (((signed int) __m1 >> 15) & __m0)); \
1223
} while (0)
1224
#endif /* __z8000__ */
1225
1226
#endif /* __GNUC__ */
1227
1228
/***************************************
1229
*********** Generic Versions ********
1230
***************************************/
1231
#if !defined(umul_ppmm) && defined(__umulsidi3)
1232
#define umul_ppmm(ph, pl, m0, m1) \
1233
{ \
1234
UDWtype __ll = __umulsidi3(m0, m1); \
1235
ph = (UWtype) (__ll >> W_TYPE_SIZE); \
1236
pl = (UWtype) __ll; \
1237
}
1238
#endif
1239
1240
#if !defined(__umulsidi3)
1241
#define __umulsidi3(u, v) \
1242
({UWtype __hi, __lo; \
1243
umul_ppmm(__hi, __lo, u, v); \
1244
((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
1245
#endif
1246
1247
/* If this machine has no inline assembler, use C macros. */
1248
1249
#if !defined(add_ssaaaa)
1250
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1251
do { \
1252
UWtype __x; \
1253
__x = (al) + (bl); \
1254
(sh) = (ah) + (bh) + (__x < (al)); \
1255
(sl) = __x; \
1256
} while (0)
1257
#endif
1258
1259
#if !defined(sub_ddmmss)
1260
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1261
do { \
1262
UWtype __x; \
1263
__x = (al) - (bl); \
1264
(sh) = (ah) - (bh) - (__x > (al)); \
1265
(sl) = __x; \
1266
} while (0)
1267
#endif
1268
1269
#if !defined(umul_ppmm)
1270
#define umul_ppmm(w1, w0, u, v) \
1271
do { \
1272
UWtype __x0, __x1, __x2, __x3; \
1273
UHWtype __ul, __vl, __uh, __vh; \
1274
UWtype __u = (u), __v = (v); \
1275
\
1276
__ul = __ll_lowpart(__u); \
1277
__uh = __ll_highpart(__u); \
1278
__vl = __ll_lowpart(__v); \
1279
__vh = __ll_highpart(__v); \
1280
\
1281
__x0 = (UWtype) __ul * __vl; \
1282
__x1 = (UWtype) __ul * __vh; \
1283
__x2 = (UWtype) __uh * __vl; \
1284
__x3 = (UWtype) __uh * __vh; \
1285
\
1286
__x1 += __ll_highpart(__x0);/* this can't give carry */ \
1287
__x1 += __x2; /* but this indeed can */ \
1288
if (__x1 < __x2) /* did we get it? */ \
1289
__x3 += __ll_B; /* yes, add it in the proper pos. */ \
1290
\
1291
(w1) = __x3 + __ll_highpart(__x1); \
1292
(w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
1293
} while (0)
1294
#endif
1295
1296
#if !defined(umul_ppmm)
1297
#define smul_ppmm(w1, w0, u, v) \
1298
do { \
1299
UWtype __w1; \
1300
UWtype __m0 = (u), __m1 = (v); \
1301
umul_ppmm(__w1, w0, __m0, __m1); \
1302
(w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
1303
- (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
1304
} while (0)
1305
#endif
1306
1307
/* Define this unconditionally, so it can be used for debugging. */
1308
#define __udiv_qrnnd_c(q, r, n1, n0, d) \
1309
do { \
1310
UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
1311
__d1 = __ll_highpart(d); \
1312
__d0 = __ll_lowpart(d); \
1313
\
1314
__r1 = (n1) % __d1; \
1315
__q1 = (n1) / __d1; \
1316
__m = (UWtype) __q1 * __d0; \
1317
__r1 = __r1 * __ll_B | __ll_highpart(n0); \
1318
if (__r1 < __m) { \
1319
__q1--, __r1 += (d); \
1320
if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
1321
if (__r1 < __m) \
1322
__q1--, __r1 += (d); \
1323
} \
1324
__r1 -= __m; \
1325
\
1326
__r0 = __r1 % __d1; \
1327
__q0 = __r1 / __d1; \
1328
__m = (UWtype) __q0 * __d0; \
1329
__r0 = __r0 * __ll_B | __ll_lowpart(n0); \
1330
if (__r0 < __m) { \
1331
__q0--, __r0 += (d); \
1332
if (__r0 >= (d)) \
1333
if (__r0 < __m) \
1334
__q0--, __r0 += (d); \
1335
} \
1336
__r0 -= __m; \
1337
\
1338
(q) = (UWtype) __q1 * __ll_B | __q0; \
1339
(r) = __r0; \
1340
} while (0)
1341
1342
/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1343
__udiv_w_sdiv (defined in libgcc or elsewhere). */
1344
#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
1345
#define udiv_qrnnd(q, r, nh, nl, d) \
1346
do { \
1347
UWtype __r; \
1348
(q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
1349
(r) = __r; \
1350
} while (0)
1351
#endif
1352
1353
/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1354
#if !defined(udiv_qrnnd)
1355
#define UDIV_NEEDS_NORMALIZATION 1
1356
#define udiv_qrnnd __udiv_qrnnd_c
1357
#endif
1358
1359
#ifndef UDIV_NEEDS_NORMALIZATION
1360
#define UDIV_NEEDS_NORMALIZATION 0
1361
#endif
1362
1363