Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/m68k/ifpsp060/src/fpsp.S
29269 views
1
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3
M68000 Hi-Performance Microprocessor Division
4
M68060 Software Package
5
Production Release P1.00 -- October 10, 1994
6
7
M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
8
9
THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10
To the maximum extent permitted by applicable law,
11
MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12
INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13
and any warranty against infringement with regard to the SOFTWARE
14
(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15
16
To the maximum extent permitted by applicable law,
17
IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18
(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19
BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20
ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21
Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22
23
You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24
so long as this entire notice is retained without alteration in any modified and/or
25
redistributed versions, and that such modified versions are clearly identified as such.
26
No licenses are granted by implication, estoppel or otherwise under any patents
27
or trademarks of Motorola, Inc.
28
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29
#
30
# freal.s:
31
# This file is appended to the top of the 060FPSP package
32
# and contains the entry points into the package. The user, in
33
# effect, branches to one of the branch table entries located
34
# after _060FPSP_TABLE.
35
# Also, subroutine stubs exist in this file (_fpsp_done for
36
# example) that are referenced by the FPSP package itself in order
37
# to call a given routine. The stub routine actually performs the
38
# callout. The FPSP code does a "bsr" to the stub routine. This
39
# extra layer of hierarchy adds a slight performance penalty but
40
# it makes the FPSP code easier to read and more mainatinable.
41
#
42
43
set _off_bsun, 0x00
44
set _off_snan, 0x04
45
set _off_operr, 0x08
46
set _off_ovfl, 0x0c
47
set _off_unfl, 0x10
48
set _off_dz, 0x14
49
set _off_inex, 0x18
50
set _off_fline, 0x1c
51
set _off_fpu_dis, 0x20
52
set _off_trap, 0x24
53
set _off_trace, 0x28
54
set _off_access, 0x2c
55
set _off_done, 0x30
56
57
set _off_imr, 0x40
58
set _off_dmr, 0x44
59
set _off_dmw, 0x48
60
set _off_irw, 0x4c
61
set _off_irl, 0x50
62
set _off_drb, 0x54
63
set _off_drw, 0x58
64
set _off_drl, 0x5c
65
set _off_dwb, 0x60
66
set _off_dww, 0x64
67
set _off_dwl, 0x68
68
69
_060FPSP_TABLE:
70
71
###############################################################
72
73
# Here's the table of ENTRY POINTS for those linking the package.
74
bra.l _fpsp_snan
75
short 0x0000
76
bra.l _fpsp_operr
77
short 0x0000
78
bra.l _fpsp_ovfl
79
short 0x0000
80
bra.l _fpsp_unfl
81
short 0x0000
82
bra.l _fpsp_dz
83
short 0x0000
84
bra.l _fpsp_inex
85
short 0x0000
86
bra.l _fpsp_fline
87
short 0x0000
88
bra.l _fpsp_unsupp
89
short 0x0000
90
bra.l _fpsp_effadd
91
short 0x0000
92
93
space 56
94
95
###############################################################
96
global _fpsp_done
97
_fpsp_done:
98
mov.l %d0,-(%sp)
99
mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
100
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
101
mov.l 0x4(%sp),%d0
102
rtd &0x4
103
104
global _real_ovfl
105
_real_ovfl:
106
mov.l %d0,-(%sp)
107
mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
108
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
109
mov.l 0x4(%sp),%d0
110
rtd &0x4
111
112
global _real_unfl
113
_real_unfl:
114
mov.l %d0,-(%sp)
115
mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
116
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
117
mov.l 0x4(%sp),%d0
118
rtd &0x4
119
120
global _real_inex
121
_real_inex:
122
mov.l %d0,-(%sp)
123
mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
124
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
125
mov.l 0x4(%sp),%d0
126
rtd &0x4
127
128
global _real_bsun
129
_real_bsun:
130
mov.l %d0,-(%sp)
131
mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
132
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
133
mov.l 0x4(%sp),%d0
134
rtd &0x4
135
136
global _real_operr
137
_real_operr:
138
mov.l %d0,-(%sp)
139
mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
140
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
141
mov.l 0x4(%sp),%d0
142
rtd &0x4
143
144
global _real_snan
145
_real_snan:
146
mov.l %d0,-(%sp)
147
mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
148
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
149
mov.l 0x4(%sp),%d0
150
rtd &0x4
151
152
global _real_dz
153
_real_dz:
154
mov.l %d0,-(%sp)
155
mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
156
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
157
mov.l 0x4(%sp),%d0
158
rtd &0x4
159
160
global _real_fline
161
_real_fline:
162
mov.l %d0,-(%sp)
163
mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
164
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
165
mov.l 0x4(%sp),%d0
166
rtd &0x4
167
168
global _real_fpu_disabled
169
_real_fpu_disabled:
170
mov.l %d0,-(%sp)
171
mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
172
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
173
mov.l 0x4(%sp),%d0
174
rtd &0x4
175
176
global _real_trap
177
_real_trap:
178
mov.l %d0,-(%sp)
179
mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
180
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
181
mov.l 0x4(%sp),%d0
182
rtd &0x4
183
184
global _real_trace
185
_real_trace:
186
mov.l %d0,-(%sp)
187
mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
188
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
189
mov.l 0x4(%sp),%d0
190
rtd &0x4
191
192
global _real_access
193
_real_access:
194
mov.l %d0,-(%sp)
195
mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
196
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
197
mov.l 0x4(%sp),%d0
198
rtd &0x4
199
200
#######################################
201
202
global _imem_read
203
_imem_read:
204
mov.l %d0,-(%sp)
205
mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
206
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
207
mov.l 0x4(%sp),%d0
208
rtd &0x4
209
210
global _dmem_read
211
_dmem_read:
212
mov.l %d0,-(%sp)
213
mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
214
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
215
mov.l 0x4(%sp),%d0
216
rtd &0x4
217
218
global _dmem_write
219
_dmem_write:
220
mov.l %d0,-(%sp)
221
mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
222
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
223
mov.l 0x4(%sp),%d0
224
rtd &0x4
225
226
global _imem_read_word
227
_imem_read_word:
228
mov.l %d0,-(%sp)
229
mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
230
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
231
mov.l 0x4(%sp),%d0
232
rtd &0x4
233
234
global _imem_read_long
235
_imem_read_long:
236
mov.l %d0,-(%sp)
237
mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
238
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
239
mov.l 0x4(%sp),%d0
240
rtd &0x4
241
242
global _dmem_read_byte
243
_dmem_read_byte:
244
mov.l %d0,-(%sp)
245
mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
246
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
247
mov.l 0x4(%sp),%d0
248
rtd &0x4
249
250
global _dmem_read_word
251
_dmem_read_word:
252
mov.l %d0,-(%sp)
253
mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
254
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
255
mov.l 0x4(%sp),%d0
256
rtd &0x4
257
258
global _dmem_read_long
259
_dmem_read_long:
260
mov.l %d0,-(%sp)
261
mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
262
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
263
mov.l 0x4(%sp),%d0
264
rtd &0x4
265
266
global _dmem_write_byte
267
_dmem_write_byte:
268
mov.l %d0,-(%sp)
269
mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
270
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
271
mov.l 0x4(%sp),%d0
272
rtd &0x4
273
274
global _dmem_write_word
275
_dmem_write_word:
276
mov.l %d0,-(%sp)
277
mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
278
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
279
mov.l 0x4(%sp),%d0
280
rtd &0x4
281
282
global _dmem_write_long
283
_dmem_write_long:
284
mov.l %d0,-(%sp)
285
mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
286
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
287
mov.l 0x4(%sp),%d0
288
rtd &0x4
289
290
#
291
# This file contains a set of define statements for constants
292
# in order to promote readability within the corecode itself.
293
#
294
295
set LOCAL_SIZE, 192 # stack frame size(bytes)
296
set LV, -LOCAL_SIZE # stack offset
297
298
set EXC_SR, 0x4 # stack status register
299
set EXC_PC, 0x6 # stack pc
300
set EXC_VOFF, 0xa # stacked vector offset
301
set EXC_EA, 0xc # stacked <ea>
302
303
set EXC_FP, 0x0 # frame pointer
304
305
set EXC_AREGS, -68 # offset of all address regs
306
set EXC_DREGS, -100 # offset of all data regs
307
set EXC_FPREGS, -36 # offset of all fp regs
308
309
set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
310
set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
311
set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
312
set EXC_A5, EXC_AREGS+(5*4)
313
set EXC_A4, EXC_AREGS+(4*4)
314
set EXC_A3, EXC_AREGS+(3*4)
315
set EXC_A2, EXC_AREGS+(2*4)
316
set EXC_A1, EXC_AREGS+(1*4)
317
set EXC_A0, EXC_AREGS+(0*4)
318
set EXC_D7, EXC_DREGS+(7*4)
319
set EXC_D6, EXC_DREGS+(6*4)
320
set EXC_D5, EXC_DREGS+(5*4)
321
set EXC_D4, EXC_DREGS+(4*4)
322
set EXC_D3, EXC_DREGS+(3*4)
323
set EXC_D2, EXC_DREGS+(2*4)
324
set EXC_D1, EXC_DREGS+(1*4)
325
set EXC_D0, EXC_DREGS+(0*4)
326
327
set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
328
set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
329
set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
330
331
set FP_SCR1, LV+80 # fp scratch 1
332
set FP_SCR1_EX, FP_SCR1+0
333
set FP_SCR1_SGN, FP_SCR1+2
334
set FP_SCR1_HI, FP_SCR1+4
335
set FP_SCR1_LO, FP_SCR1+8
336
337
set FP_SCR0, LV+68 # fp scratch 0
338
set FP_SCR0_EX, FP_SCR0+0
339
set FP_SCR0_SGN, FP_SCR0+2
340
set FP_SCR0_HI, FP_SCR0+4
341
set FP_SCR0_LO, FP_SCR0+8
342
343
set FP_DST, LV+56 # fp destination operand
344
set FP_DST_EX, FP_DST+0
345
set FP_DST_SGN, FP_DST+2
346
set FP_DST_HI, FP_DST+4
347
set FP_DST_LO, FP_DST+8
348
349
set FP_SRC, LV+44 # fp source operand
350
set FP_SRC_EX, FP_SRC+0
351
set FP_SRC_SGN, FP_SRC+2
352
set FP_SRC_HI, FP_SRC+4
353
set FP_SRC_LO, FP_SRC+8
354
355
set USER_FPIAR, LV+40 # FP instr address register
356
357
set USER_FPSR, LV+36 # FP status register
358
set FPSR_CC, USER_FPSR+0 # FPSR condition codes
359
set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
360
set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
361
set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
362
363
set USER_FPCR, LV+32 # FP control register
364
set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
365
set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
366
367
set L_SCR3, LV+28 # integer scratch 3
368
set L_SCR2, LV+24 # integer scratch 2
369
set L_SCR1, LV+20 # integer scratch 1
370
371
set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
372
373
set EXC_TEMP2, LV+24 # temporary space
374
set EXC_TEMP, LV+16 # temporary space
375
376
set DTAG, LV+15 # destination operand type
377
set STAG, LV+14 # source operand type
378
379
set SPCOND_FLG, LV+10 # flag: special case (see below)
380
381
set EXC_CC, LV+8 # saved condition codes
382
set EXC_EXTWPTR, LV+4 # saved current PC (active)
383
set EXC_EXTWORD, LV+2 # saved extension word
384
set EXC_CMDREG, LV+2 # saved extension word
385
set EXC_OPWORD, LV+0 # saved operation word
386
387
################################
388
389
# Helpful macros
390
391
set FTEMP, 0 # offsets within an
392
set FTEMP_EX, 0 # extended precision
393
set FTEMP_SGN, 2 # value saved in memory.
394
set FTEMP_HI, 4
395
set FTEMP_LO, 8
396
set FTEMP_GRS, 12
397
398
set LOCAL, 0 # offsets within an
399
set LOCAL_EX, 0 # extended precision
400
set LOCAL_SGN, 2 # value saved in memory.
401
set LOCAL_HI, 4
402
set LOCAL_LO, 8
403
set LOCAL_GRS, 12
404
405
set DST, 0 # offsets within an
406
set DST_EX, 0 # extended precision
407
set DST_HI, 4 # value saved in memory.
408
set DST_LO, 8
409
410
set SRC, 0 # offsets within an
411
set SRC_EX, 0 # extended precision
412
set SRC_HI, 4 # value saved in memory.
413
set SRC_LO, 8
414
415
set SGL_LO, 0x3f81 # min sgl prec exponent
416
set SGL_HI, 0x407e # max sgl prec exponent
417
set DBL_LO, 0x3c01 # min dbl prec exponent
418
set DBL_HI, 0x43fe # max dbl prec exponent
419
set EXT_LO, 0x0 # min ext prec exponent
420
set EXT_HI, 0x7ffe # max ext prec exponent
421
422
set EXT_BIAS, 0x3fff # extended precision bias
423
set SGL_BIAS, 0x007f # single precision bias
424
set DBL_BIAS, 0x03ff # double precision bias
425
426
set NORM, 0x00 # operand type for STAG/DTAG
427
set ZERO, 0x01 # operand type for STAG/DTAG
428
set INF, 0x02 # operand type for STAG/DTAG
429
set QNAN, 0x03 # operand type for STAG/DTAG
430
set DENORM, 0x04 # operand type for STAG/DTAG
431
set SNAN, 0x05 # operand type for STAG/DTAG
432
set UNNORM, 0x06 # operand type for STAG/DTAG
433
434
##################
435
# FPSR/FPCR bits #
436
##################
437
set neg_bit, 0x3 # negative result
438
set z_bit, 0x2 # zero result
439
set inf_bit, 0x1 # infinite result
440
set nan_bit, 0x0 # NAN result
441
442
set q_sn_bit, 0x7 # sign bit of quotient byte
443
444
set bsun_bit, 7 # branch on unordered
445
set snan_bit, 6 # signalling NAN
446
set operr_bit, 5 # operand error
447
set ovfl_bit, 4 # overflow
448
set unfl_bit, 3 # underflow
449
set dz_bit, 2 # divide by zero
450
set inex2_bit, 1 # inexact result 2
451
set inex1_bit, 0 # inexact result 1
452
453
set aiop_bit, 7 # accrued inexact operation bit
454
set aovfl_bit, 6 # accrued overflow bit
455
set aunfl_bit, 5 # accrued underflow bit
456
set adz_bit, 4 # accrued dz bit
457
set ainex_bit, 3 # accrued inexact bit
458
459
#############################
460
# FPSR individual bit masks #
461
#############################
462
set neg_mask, 0x08000000 # negative bit mask (lw)
463
set inf_mask, 0x02000000 # infinity bit mask (lw)
464
set z_mask, 0x04000000 # zero bit mask (lw)
465
set nan_mask, 0x01000000 # nan bit mask (lw)
466
467
set neg_bmask, 0x08 # negative bit mask (byte)
468
set inf_bmask, 0x02 # infinity bit mask (byte)
469
set z_bmask, 0x04 # zero bit mask (byte)
470
set nan_bmask, 0x01 # nan bit mask (byte)
471
472
set bsun_mask, 0x00008000 # bsun exception mask
473
set snan_mask, 0x00004000 # snan exception mask
474
set operr_mask, 0x00002000 # operr exception mask
475
set ovfl_mask, 0x00001000 # overflow exception mask
476
set unfl_mask, 0x00000800 # underflow exception mask
477
set dz_mask, 0x00000400 # dz exception mask
478
set inex2_mask, 0x00000200 # inex2 exception mask
479
set inex1_mask, 0x00000100 # inex1 exception mask
480
481
set aiop_mask, 0x00000080 # accrued illegal operation
482
set aovfl_mask, 0x00000040 # accrued overflow
483
set aunfl_mask, 0x00000020 # accrued underflow
484
set adz_mask, 0x00000010 # accrued divide by zero
485
set ainex_mask, 0x00000008 # accrued inexact
486
487
######################################
488
# FPSR combinations used in the FPSP #
489
######################################
490
set dzinf_mask, inf_mask+dz_mask+adz_mask
491
set opnan_mask, nan_mask+operr_mask+aiop_mask
492
set nzi_mask, 0x01ffffff #clears N, Z, and I
493
set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
494
set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
495
set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
496
set inx1a_mask, inex1_mask+ainex_mask
497
set inx2a_mask, inex2_mask+ainex_mask
498
set snaniop_mask, nan_mask+snan_mask+aiop_mask
499
set snaniop2_mask, snan_mask+aiop_mask
500
set naniop_mask, nan_mask+aiop_mask
501
set neginf_mask, neg_mask+inf_mask
502
set infaiop_mask, inf_mask+aiop_mask
503
set negz_mask, neg_mask+z_mask
504
set opaop_mask, operr_mask+aiop_mask
505
set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
506
set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
507
508
#########
509
# misc. #
510
#########
511
set rnd_stky_bit, 29 # stky bit pos in longword
512
513
set sign_bit, 0x7 # sign bit
514
set signan_bit, 0x6 # signalling nan bit
515
516
set sgl_thresh, 0x3f81 # minimum sgl exponent
517
set dbl_thresh, 0x3c01 # minimum dbl exponent
518
519
set x_mode, 0x0 # extended precision
520
set s_mode, 0x4 # single precision
521
set d_mode, 0x8 # double precision
522
523
set rn_mode, 0x0 # round-to-nearest
524
set rz_mode, 0x1 # round-to-zero
525
set rm_mode, 0x2 # round-tp-minus-infinity
526
set rp_mode, 0x3 # round-to-plus-infinity
527
528
set mantissalen, 64 # length of mantissa in bits
529
530
set BYTE, 1 # len(byte) == 1 byte
531
set WORD, 2 # len(word) == 2 bytes
532
set LONG, 4 # len(longword) == 2 bytes
533
534
set BSUN_VEC, 0xc0 # bsun vector offset
535
set INEX_VEC, 0xc4 # inexact vector offset
536
set DZ_VEC, 0xc8 # dz vector offset
537
set UNFL_VEC, 0xcc # unfl vector offset
538
set OPERR_VEC, 0xd0 # operr vector offset
539
set OVFL_VEC, 0xd4 # ovfl vector offset
540
set SNAN_VEC, 0xd8 # snan vector offset
541
542
###########################
543
# SPecial CONDition FLaGs #
544
###########################
545
set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
546
set fbsun_flg, 0x02 # flag bit: bsun exception
547
set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
548
set mda7_flg, 0x08 # flag bit: -(a7) <ea>
549
set fmovm_flg, 0x40 # flag bit: fmovm instruction
550
set immed_flg, 0x80 # flag bit: &<data> <ea>
551
552
set ftrapcc_bit, 0x0
553
set fbsun_bit, 0x1
554
set mia7_bit, 0x2
555
set mda7_bit, 0x3
556
set immed_bit, 0x7
557
558
##################################
559
# TRANSCENDENTAL "LAST-OP" FLAGS #
560
##################################
561
set FMUL_OP, 0x0 # fmul instr performed last
562
set FDIV_OP, 0x1 # fdiv performed last
563
set FADD_OP, 0x2 # fadd performed last
564
set FMOV_OP, 0x3 # fmov performed last
565
566
#############
567
# CONSTANTS #
568
#############
569
T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
570
T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
571
572
PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
573
PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
574
575
TWOBYPI:
576
long 0x3FE45F30,0x6DC9C883
577
578
#########################################################################
579
# XDEF **************************************************************** #
580
# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
581
# #
582
# This handler should be the first code executed upon taking the #
583
# FP Overflow exception in an operating system. #
584
# #
585
# XREF **************************************************************** #
586
# _imem_read_long() - read instruction longword #
587
# fix_skewed_ops() - adjust src operand in fsave frame #
588
# set_tag_x() - determine optype of src/dst operands #
589
# store_fpreg() - store opclass 0 or 2 result to FP regfile #
590
# unnorm_fix() - change UNNORM operands to NORM or ZERO #
591
# load_fpn2() - load dst operand from FP regfile #
592
# fout() - emulate an opclass 3 instruction #
593
# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
594
# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
595
# _real_ovfl() - "callout" for Overflow exception enabled code #
596
# _real_inex() - "callout" for Inexact exception enabled code #
597
# _real_trace() - "callout" for Trace exception code #
598
# #
599
# INPUT *************************************************************** #
600
# - The system stack contains the FP Ovfl exception stack frame #
601
# - The fsave frame contains the source operand #
602
# #
603
# OUTPUT ************************************************************** #
604
# Overflow Exception enabled: #
605
# - The system stack is unchanged #
606
# - The fsave frame contains the adjusted src op for opclass 0,2 #
607
# Overflow Exception disabled: #
608
# - The system stack is unchanged #
609
# - The "exception present" flag in the fsave frame is cleared #
610
# #
611
# ALGORITHM *********************************************************** #
612
# On the 060, if an FP overflow is present as the result of any #
613
# instruction, the 060 will take an overflow exception whether the #
614
# exception is enabled or disabled in the FPCR. For the disabled case, #
615
# This handler emulates the instruction to determine what the correct #
616
# default result should be for the operation. This default result is #
617
# then stored in either the FP regfile, data regfile, or memory. #
618
# Finally, the handler exits through the "callout" _fpsp_done() #
619
# denoting that no exceptional conditions exist within the machine. #
620
# If the exception is enabled, then this handler must create the #
621
# exceptional operand and plave it in the fsave state frame, and store #
622
# the default result (only if the instruction is opclass 3). For #
623
# exceptions enabled, this handler must exit through the "callout" #
624
# _real_ovfl() so that the operating system enabled overflow handler #
625
# can handle this case. #
626
# Two other conditions exist. First, if overflow was disabled #
627
# but the inexact exception was enabled, this handler must exit #
628
# through the "callout" _real_inex() regardless of whether the result #
629
# was inexact. #
630
# Also, in the case of an opclass three instruction where #
631
# overflow was disabled and the trace exception was enabled, this #
632
# handler must exit through the "callout" _real_trace(). #
633
# #
634
#########################################################################
635
636
global _fpsp_ovfl
637
_fpsp_ovfl:
638
639
#$# sub.l &24,%sp # make room for src/dst
640
641
link.w %a6,&-LOCAL_SIZE # init stack frame
642
643
fsave FP_SRC(%a6) # grab the "busy" frame
644
645
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
646
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
647
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
648
649
# the FPIAR holds the "current PC" of the faulting instruction
650
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
651
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
652
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
653
bsr.l _imem_read_long # fetch the instruction words
654
mov.l %d0,EXC_OPWORD(%a6)
655
656
##############################################################################
657
658
btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
659
bne.w fovfl_out
660
661
662
lea FP_SRC(%a6),%a0 # pass: ptr to src op
663
bsr.l fix_skewed_ops # fix src op
664
665
# since, I believe, only NORMs and DENORMs can come through here,
666
# maybe we can avoid the subroutine call.
667
lea FP_SRC(%a6),%a0 # pass: ptr to src op
668
bsr.l set_tag_x # tag the operand type
669
mov.b %d0,STAG(%a6) # maybe NORM,DENORM
670
671
# bit five of the fp extension word separates the monadic and dyadic operations
672
# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
673
# will never take this exception.
674
btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
675
beq.b fovfl_extract # monadic
676
677
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
678
bsr.l load_fpn2 # load dst into FP_DST
679
680
lea FP_DST(%a6),%a0 # pass: ptr to dst op
681
bsr.l set_tag_x # tag the operand type
682
cmpi.b %d0,&UNNORM # is operand an UNNORM?
683
bne.b fovfl_op2_done # no
684
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
685
fovfl_op2_done:
686
mov.b %d0,DTAG(%a6) # save dst optype tag
687
688
fovfl_extract:
689
690
#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
691
#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
692
#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
693
#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
694
#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
695
#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
696
697
clr.l %d0
698
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
699
700
mov.b 1+EXC_CMDREG(%a6),%d1
701
andi.w &0x007f,%d1 # extract extension
702
703
andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
704
705
fmov.l &0x0,%fpcr # zero current control regs
706
fmov.l &0x0,%fpsr
707
708
lea FP_SRC(%a6),%a0
709
lea FP_DST(%a6),%a1
710
711
# maybe we can make these entry points ONLY the OVFL entry points of each routine.
712
mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
713
jsr (tbl_unsupp.l,%pc,%d1.l*1)
714
715
# the operation has been emulated. the result is in fp0.
716
# the EXOP, if an exception occurred, is in fp1.
717
# we must save the default result regardless of whether
718
# traps are enabled or disabled.
719
bfextu EXC_CMDREG(%a6){&6:&3},%d0
720
bsr.l store_fpreg
721
722
# the exceptional possibilities we have left ourselves with are ONLY overflow
723
# and inexact. and, the inexact is such that overflow occurred and was disabled
724
# but inexact was enabled.
725
btst &ovfl_bit,FPCR_ENABLE(%a6)
726
bne.b fovfl_ovfl_on
727
728
btst &inex2_bit,FPCR_ENABLE(%a6)
729
bne.b fovfl_inex_on
730
731
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
732
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
733
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
734
735
unlk %a6
736
#$# add.l &24,%sp
737
bra.l _fpsp_done
738
739
# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
740
# in fp1. now, simply jump to _real_ovfl()!
741
fovfl_ovfl_on:
742
fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
743
744
mov.w &0xe005,2+FP_SRC(%a6) # save exc status
745
746
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
747
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
748
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
749
750
frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
751
752
unlk %a6
753
754
bra.l _real_ovfl
755
756
# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
757
# we must jump to real_inex().
758
fovfl_inex_on:
759
760
fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
761
762
mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
763
mov.w &0xe001,2+FP_SRC(%a6) # save exc status
764
765
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
766
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
767
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
768
769
frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
770
771
unlk %a6
772
773
bra.l _real_inex
774
775
########################################################################
776
fovfl_out:
777
778
779
#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
780
#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
781
#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
782
783
# the src operand is definitely a NORM(!), so tag it as such
784
mov.b &NORM,STAG(%a6) # set src optype tag
785
786
clr.l %d0
787
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
788
789
and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
790
791
fmov.l &0x0,%fpcr # zero current control regs
792
fmov.l &0x0,%fpsr
793
794
lea FP_SRC(%a6),%a0 # pass ptr to src operand
795
796
bsr.l fout
797
798
btst &ovfl_bit,FPCR_ENABLE(%a6)
799
bne.w fovfl_ovfl_on
800
801
btst &inex2_bit,FPCR_ENABLE(%a6)
802
bne.w fovfl_inex_on
803
804
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
805
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
806
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
807
808
unlk %a6
809
#$# add.l &24,%sp
810
811
btst &0x7,(%sp) # is trace on?
812
beq.l _fpsp_done # no
813
814
fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
815
mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
816
bra.l _real_trace
817
818
#########################################################################
819
# XDEF **************************************************************** #
820
# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
821
# #
822
# This handler should be the first code executed upon taking the #
823
# FP Underflow exception in an operating system. #
824
# #
825
# XREF **************************************************************** #
826
# _imem_read_long() - read instruction longword #
827
# fix_skewed_ops() - adjust src operand in fsave frame #
828
# set_tag_x() - determine optype of src/dst operands #
829
# store_fpreg() - store opclass 0 or 2 result to FP regfile #
830
# unnorm_fix() - change UNNORM operands to NORM or ZERO #
831
# load_fpn2() - load dst operand from FP regfile #
832
# fout() - emulate an opclass 3 instruction #
833
# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
834
# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
835
# _real_ovfl() - "callout" for Overflow exception enabled code #
836
# _real_inex() - "callout" for Inexact exception enabled code #
837
# _real_trace() - "callout" for Trace exception code #
838
# #
839
# INPUT *************************************************************** #
840
# - The system stack contains the FP Unfl exception stack frame #
841
# - The fsave frame contains the source operand #
842
# #
843
# OUTPUT ************************************************************** #
844
# Underflow Exception enabled: #
845
# - The system stack is unchanged #
846
# - The fsave frame contains the adjusted src op for opclass 0,2 #
847
# Underflow Exception disabled: #
848
# - The system stack is unchanged #
849
# - The "exception present" flag in the fsave frame is cleared #
850
# #
851
# ALGORITHM *********************************************************** #
852
# On the 060, if an FP underflow is present as the result of any #
853
# instruction, the 060 will take an underflow exception whether the #
854
# exception is enabled or disabled in the FPCR. For the disabled case, #
855
# This handler emulates the instruction to determine what the correct #
856
# default result should be for the operation. This default result is #
857
# then stored in either the FP regfile, data regfile, or memory. #
858
# Finally, the handler exits through the "callout" _fpsp_done() #
859
# denoting that no exceptional conditions exist within the machine. #
860
# If the exception is enabled, then this handler must create the #
861
# exceptional operand and plave it in the fsave state frame, and store #
862
# the default result (only if the instruction is opclass 3). For #
863
# exceptions enabled, this handler must exit through the "callout" #
864
# _real_unfl() so that the operating system enabled overflow handler #
865
# can handle this case. #
866
# Two other conditions exist. First, if underflow was disabled #
867
# but the inexact exception was enabled and the result was inexact, #
868
# this handler must exit through the "callout" _real_inex(). #
869
# was inexact. #
870
# Also, in the case of an opclass three instruction where #
871
# underflow was disabled and the trace exception was enabled, this #
872
# handler must exit through the "callout" _real_trace(). #
873
# #
874
#########################################################################
875
876
global _fpsp_unfl
877
_fpsp_unfl:
878
879
#$# sub.l &24,%sp # make room for src/dst
880
881
link.w %a6,&-LOCAL_SIZE # init stack frame
882
883
fsave FP_SRC(%a6) # grab the "busy" frame
884
885
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
886
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
887
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
888
889
# the FPIAR holds the "current PC" of the faulting instruction
890
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
891
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
892
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
893
bsr.l _imem_read_long # fetch the instruction words
894
mov.l %d0,EXC_OPWORD(%a6)
895
896
##############################################################################
897
898
btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
899
bne.w funfl_out
900
901
902
lea FP_SRC(%a6),%a0 # pass: ptr to src op
903
bsr.l fix_skewed_ops # fix src op
904
905
lea FP_SRC(%a6),%a0 # pass: ptr to src op
906
bsr.l set_tag_x # tag the operand type
907
mov.b %d0,STAG(%a6) # maybe NORM,DENORM
908
909
# bit five of the fp ext word separates the monadic and dyadic operations
910
# that can pass through fpsp_unfl(). remember that fcmp, and ftst
911
# will never take this exception.
912
btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?
913
beq.b funfl_extract # monadic
914
915
# now, what's left that's not dyadic is fsincos. we can distinguish it
916
# from all dyadics by the '0110xxx pattern
917
btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?
918
bne.b funfl_extract # yes
919
920
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
921
bsr.l load_fpn2 # load dst into FP_DST
922
923
lea FP_DST(%a6),%a0 # pass: ptr to dst op
924
bsr.l set_tag_x # tag the operand type
925
cmpi.b %d0,&UNNORM # is operand an UNNORM?
926
bne.b funfl_op2_done # no
927
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
928
funfl_op2_done:
929
mov.b %d0,DTAG(%a6) # save dst optype tag
930
931
funfl_extract:
932
933
#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
934
#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
935
#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
936
#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
937
#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
938
#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
939
940
clr.l %d0
941
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
942
943
mov.b 1+EXC_CMDREG(%a6),%d1
944
andi.w &0x007f,%d1 # extract extension
945
946
andi.l &0x00ff01ff,USER_FPSR(%a6)
947
948
fmov.l &0x0,%fpcr # zero current control regs
949
fmov.l &0x0,%fpsr
950
951
lea FP_SRC(%a6),%a0
952
lea FP_DST(%a6),%a1
953
954
# maybe we can make these entry points ONLY the OVFL entry points of each routine.
955
mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
956
jsr (tbl_unsupp.l,%pc,%d1.l*1)
957
958
bfextu EXC_CMDREG(%a6){&6:&3},%d0
959
bsr.l store_fpreg
960
961
# The `060 FPU multiplier hardware is such that if the result of a
962
# multiply operation is the smallest possible normalized number
963
# (0x00000000_80000000_00000000), then the machine will take an
964
# underflow exception. Since this is incorrect, we need to check
965
# if our emulation, after re-doing the operation, decided that
966
# no underflow was called for. We do these checks only in
967
# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
968
# special case will simply exit gracefully with the correct result.
969
970
# the exceptional possibilities we have left ourselves with are ONLY overflow
971
# and inexact. and, the inexact is such that overflow occurred and was disabled
972
# but inexact was enabled.
973
btst &unfl_bit,FPCR_ENABLE(%a6)
974
bne.b funfl_unfl_on
975
976
funfl_chkinex:
977
btst &inex2_bit,FPCR_ENABLE(%a6)
978
bne.b funfl_inex_on
979
980
funfl_exit:
981
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
982
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
983
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
984
985
unlk %a6
986
#$# add.l &24,%sp
987
bra.l _fpsp_done
988
989
# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
990
# in fp1 (don't forget to save fp0). what to do now?
991
# well, we simply have to get to go to _real_unfl()!
992
funfl_unfl_on:
993
994
# The `060 FPU multiplier hardware is such that if the result of a
995
# multiply operation is the smallest possible normalized number
996
# (0x00000000_80000000_00000000), then the machine will take an
997
# underflow exception. Since this is incorrect, we check here to see
998
# if our emulation, after re-doing the operation, decided that
999
# no underflow was called for.
1000
btst &unfl_bit,FPSR_EXCEPT(%a6)
1001
beq.w funfl_chkinex
1002
1003
funfl_unfl_on2:
1004
fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
1005
1006
mov.w &0xe003,2+FP_SRC(%a6) # save exc status
1007
1008
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1009
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1010
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1011
1012
frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1013
1014
unlk %a6
1015
1016
bra.l _real_unfl
1017
1018
# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1019
# we must jump to real_inex().
1020
funfl_inex_on:
1021
1022
# The `060 FPU multiplier hardware is such that if the result of a
1023
# multiply operation is the smallest possible normalized number
1024
# (0x00000000_80000000_00000000), then the machine will take an
1025
# underflow exception.
1026
# But, whether bogus or not, if inexact is enabled AND it occurred,
1027
# then we have to branch to real_inex.
1028
1029
btst &inex2_bit,FPSR_EXCEPT(%a6)
1030
beq.w funfl_exit
1031
1032
funfl_inex_on2:
1033
1034
fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack
1035
1036
mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
1037
mov.w &0xe001,2+FP_SRC(%a6) # save exc status
1038
1039
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1040
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1041
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1042
1043
frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1044
1045
unlk %a6
1046
1047
bra.l _real_inex
1048
1049
#######################################################################
1050
funfl_out:
1051
1052
1053
#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1054
#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1055
#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1056
1057
# the src operand is definitely a NORM(!), so tag it as such
1058
mov.b &NORM,STAG(%a6) # set src optype tag
1059
1060
clr.l %d0
1061
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
1062
1063
and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1064
1065
fmov.l &0x0,%fpcr # zero current control regs
1066
fmov.l &0x0,%fpsr
1067
1068
lea FP_SRC(%a6),%a0 # pass ptr to src operand
1069
1070
bsr.l fout
1071
1072
btst &unfl_bit,FPCR_ENABLE(%a6)
1073
bne.w funfl_unfl_on2
1074
1075
btst &inex2_bit,FPCR_ENABLE(%a6)
1076
bne.w funfl_inex_on2
1077
1078
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1079
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1080
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1081
1082
unlk %a6
1083
#$# add.l &24,%sp
1084
1085
btst &0x7,(%sp) # is trace on?
1086
beq.l _fpsp_done # no
1087
1088
fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
1089
mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
1090
bra.l _real_trace
1091
1092
#########################################################################
1093
# XDEF **************************************************************** #
1094
# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
1095
# Data Type" exception. #
1096
# #
1097
# This handler should be the first code executed upon taking the #
1098
# FP Unimplemented Data Type exception in an operating system. #
1099
# #
1100
# XREF **************************************************************** #
1101
# _imem_read_{word,long}() - read instruction word/longword #
1102
# fix_skewed_ops() - adjust src operand in fsave frame #
1103
# set_tag_x() - determine optype of src/dst operands #
1104
# store_fpreg() - store opclass 0 or 2 result to FP regfile #
1105
# unnorm_fix() - change UNNORM operands to NORM or ZERO #
1106
# load_fpn2() - load dst operand from FP regfile #
1107
# load_fpn1() - load src operand from FP regfile #
1108
# fout() - emulate an opclass 3 instruction #
1109
# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1110
# _real_inex() - "callout" to operating system inexact handler #
1111
# _fpsp_done() - "callout" for exit; work all done #
1112
# _real_trace() - "callout" for Trace enabled exception #
1113
# funimp_skew() - adjust fsave src ops to "incorrect" value #
1114
# _real_snan() - "callout" for SNAN exception #
1115
# _real_operr() - "callout" for OPERR exception #
1116
# _real_ovfl() - "callout" for OVFL exception #
1117
# _real_unfl() - "callout" for UNFL exception #
1118
# get_packed() - fetch packed operand from memory #
1119
# #
1120
# INPUT *************************************************************** #
1121
# - The system stack contains the "Unimp Data Type" stk frame #
1122
# - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
1123
# #
1124
# OUTPUT ************************************************************** #
1125
# If Inexact exception (opclass 3): #
1126
# - The system stack is changed to an Inexact exception stk frame #
1127
# If SNAN exception (opclass 3): #
1128
# - The system stack is changed to an SNAN exception stk frame #
1129
# If OPERR exception (opclass 3): #
1130
# - The system stack is changed to an OPERR exception stk frame #
1131
# If OVFL exception (opclass 3): #
1132
# - The system stack is changed to an OVFL exception stk frame #
1133
# If UNFL exception (opclass 3): #
1134
# - The system stack is changed to an UNFL exception stack frame #
1135
# If Trace exception enabled: #
1136
# - The system stack is changed to a Trace exception stack frame #
1137
# Else: (normal case) #
1138
# - Correct result has been stored as appropriate #
1139
# #
1140
# ALGORITHM *********************************************************** #
1141
# Two main instruction types can enter here: (1) DENORM or UNNORM #
1142
# unimplemented data types. These can be either opclass 0,2 or 3 #
1143
# instructions, and (2) PACKED unimplemented data format instructions #
1144
# also of opclasses 0,2, or 3. #
1145
# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
1146
# operand from the fsave state frame and the dst operand (if dyadic) #
1147
# from the FP register file. The instruction is then emulated by #
1148
# choosing an emulation routine from a table of routines indexed by #
1149
# instruction type. Once the instruction has been emulated and result #
1150
# saved, then we check to see if any enabled exceptions resulted from #
1151
# instruction emulation. If none, then we exit through the "callout" #
1152
# _fpsp_done(). If there is an enabled FP exception, then we insert #
1153
# this exception into the FPU in the fsave state frame and then exit #
1154
# through _fpsp_done(). #
1155
# PACKED opclass 0 and 2 is similar in how the instruction is #
1156
# emulated and exceptions handled. The differences occur in how the #
1157
# handler loads the packed op (by calling get_packed() routine) and #
1158
# by the fact that a Trace exception could be pending for PACKED ops. #
1159
# If a Trace exception is pending, then the current exception stack #
1160
# frame is changed to a Trace exception stack frame and an exit is #
1161
# made through _real_trace(). #
1162
# For UNNORM/DENORM opclass 3, the actual move out to memory is #
1163
# performed by calling the routine fout(). If no exception should occur #
1164
# as the result of emulation, then an exit either occurs through #
1165
# _fpsp_done() or through _real_trace() if a Trace exception is pending #
1166
# (a Trace stack frame must be created here, too). If an FP exception #
1167
# should occur, then we must create an exception stack frame of that #
1168
# type and jump to either _real_snan(), _real_operr(), _real_inex(), #
1169
# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
1170
# emulation is performed in a similar manner. #
1171
# #
1172
#########################################################################
1173
1174
#
1175
# (1) DENORM and UNNORM (unimplemented) data types:
1176
#
1177
# post-instruction
1178
# *****************
1179
# * EA *
1180
# pre-instruction * *
1181
# ***************** *****************
1182
# * 0x0 * 0x0dc * * 0x3 * 0x0dc *
1183
# ***************** *****************
1184
# * Next * * Next *
1185
# * PC * * PC *
1186
# ***************** *****************
1187
# * SR * * SR *
1188
# ***************** *****************
1189
#
1190
# (2) PACKED format (unsupported) opclasses two and three:
1191
# *****************
1192
# * EA *
1193
# * *
1194
# *****************
1195
# * 0x2 * 0x0dc *
1196
# *****************
1197
# * Next *
1198
# * PC *
1199
# *****************
1200
# * SR *
1201
# *****************
1202
#
1203
global _fpsp_unsupp
1204
_fpsp_unsupp:
1205
1206
link.w %a6,&-LOCAL_SIZE # init stack frame
1207
1208
fsave FP_SRC(%a6) # save fp state
1209
1210
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1211
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1212
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
1213
1214
btst &0x5,EXC_SR(%a6) # user or supervisor mode?
1215
bne.b fu_s
1216
fu_u:
1217
mov.l %usp,%a0 # fetch user stack pointer
1218
mov.l %a0,EXC_A7(%a6) # save on stack
1219
bra.b fu_cont
1220
# if the exception is an opclass zero or two unimplemented data type
1221
# exception, then the a7' calculated here is wrong since it doesn't
1222
# stack an ea. however, we don't need an a7' for this case anyways.
1223
fu_s:
1224
lea 0x4+EXC_EA(%a6),%a0 # load old a7'
1225
mov.l %a0,EXC_A7(%a6) # save on stack
1226
1227
fu_cont:
1228
1229
# the FPIAR holds the "current PC" of the faulting instruction
1230
# the FPIAR should be set correctly for ALL exceptions passing through
1231
# this point.
1232
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1233
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
1234
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
1235
bsr.l _imem_read_long # fetch the instruction words
1236
mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
1237
1238
############################
1239
1240
clr.b SPCOND_FLG(%a6) # clear special condition flag
1241
1242
# Separate opclass three (fpn-to-mem) ops since they have a different
1243
# stack frame and protocol.
1244
btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?
1245
bne.w fu_out # yes
1246
1247
# Separate packed opclass two instructions.
1248
bfextu EXC_CMDREG(%a6){&0:&6},%d0
1249
cmpi.b %d0,&0x13
1250
beq.w fu_in_pack
1251
1252
1253
# I'm not sure at this point what FPSR bits are valid for this instruction.
1254
# so, since the emulation routines re-create them anyways, zero exception field
1255
andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1256
1257
fmov.l &0x0,%fpcr # zero current control regs
1258
fmov.l &0x0,%fpsr
1259
1260
# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1261
# precision format if the src format was single or double and the
1262
# source data type was an INF, NAN, DENORM, or UNNORM
1263
lea FP_SRC(%a6),%a0 # pass ptr to input
1264
bsr.l fix_skewed_ops
1265
1266
# we don't know whether the src operand or the dst operand (or both) is the
1267
# UNNORM or DENORM. call the function that tags the operand type. if the
1268
# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1269
lea FP_SRC(%a6),%a0 # pass: ptr to src op
1270
bsr.l set_tag_x # tag the operand type
1271
cmpi.b %d0,&UNNORM # is operand an UNNORM?
1272
bne.b fu_op2 # no
1273
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1274
1275
fu_op2:
1276
mov.b %d0,STAG(%a6) # save src optype tag
1277
1278
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1279
1280
# bit five of the fp extension word separates the monadic and dyadic operations
1281
# at this point
1282
btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1283
beq.b fu_extract # monadic
1284
cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1285
beq.b fu_extract # yes, so it's monadic, too
1286
1287
bsr.l load_fpn2 # load dst into FP_DST
1288
1289
lea FP_DST(%a6),%a0 # pass: ptr to dst op
1290
bsr.l set_tag_x # tag the operand type
1291
cmpi.b %d0,&UNNORM # is operand an UNNORM?
1292
bne.b fu_op2_done # no
1293
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1294
fu_op2_done:
1295
mov.b %d0,DTAG(%a6) # save dst optype tag
1296
1297
fu_extract:
1298
clr.l %d0
1299
mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1300
1301
bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1302
1303
lea FP_SRC(%a6),%a0
1304
lea FP_DST(%a6),%a1
1305
1306
mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1307
jsr (tbl_unsupp.l,%pc,%d1.l*1)
1308
1309
#
1310
# Exceptions in order of precedence:
1311
# BSUN : none
1312
# SNAN : all dyadic ops
1313
# OPERR : fsqrt(-NORM)
1314
# OVFL : all except ftst,fcmp
1315
# UNFL : all except ftst,fcmp
1316
# DZ : fdiv
1317
# INEX2 : all except ftst,fcmp
1318
# INEX1 : none (packed doesn't go through here)
1319
#
1320
1321
# we determine the highest priority exception(if any) set by the
1322
# emulation routine that has also been enabled by the user.
1323
mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set
1324
bne.b fu_in_ena # some are enabled
1325
1326
fu_in_cont:
1327
# fcmp and ftst do not store any result.
1328
mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1329
andi.b &0x38,%d0 # extract bits 3-5
1330
cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1331
beq.b fu_in_exit # yes
1332
1333
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1334
bsr.l store_fpreg # store the result
1335
1336
fu_in_exit:
1337
1338
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1339
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1340
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1341
1342
unlk %a6
1343
1344
bra.l _fpsp_done
1345
1346
fu_in_ena:
1347
and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1348
bfffo %d0{&24:&8},%d0 # find highest priority exception
1349
bne.b fu_in_exc # there is at least one set
1350
1351
#
1352
# No exceptions occurred that were also enabled. Now:
1353
#
1354
# if (OVFL && ovfl_disabled && inexact_enabled) {
1355
# branch to _real_inex() (even if the result was exact!);
1356
# } else {
1357
# save the result in the proper fp reg (unless the op is fcmp or ftst);
1358
# return;
1359
# }
1360
#
1361
btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1362
beq.b fu_in_cont # no
1363
1364
fu_in_ovflchk:
1365
btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1366
beq.b fu_in_cont # no
1367
bra.w fu_in_exc_ovfl # go insert overflow frame
1368
1369
#
1370
# An exception occurred and that exception was enabled:
1371
#
1372
# shift enabled exception field into lo byte of d0;
1373
# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1374
# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1375
# /*
1376
# * this is the case where we must call _real_inex() now or else
1377
# * there will be no other way to pass it the exceptional operand
1378
# */
1379
# call _real_inex();
1380
# } else {
1381
# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1382
# }
1383
#
1384
fu_in_exc:
1385
subi.l &24,%d0 # fix offset to be 0-8
1386
cmpi.b %d0,&0x6 # is exception INEX? (6)
1387
bne.b fu_in_exc_exit # no
1388
1389
# the enabled exception was inexact
1390
btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1391
bne.w fu_in_exc_unfl # yes
1392
btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1393
bne.w fu_in_exc_ovfl # yes
1394
1395
# here, we insert the correct fsave status value into the fsave frame for the
1396
# corresponding exception. the operand in the fsave frame should be the original
1397
# src operand.
1398
fu_in_exc_exit:
1399
mov.l %d0,-(%sp) # save d0
1400
bsr.l funimp_skew # skew sgl or dbl inputs
1401
mov.l (%sp)+,%d0 # restore d0
1402
1403
mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1404
1405
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1406
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1407
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1408
1409
frestore FP_SRC(%a6) # restore src op
1410
1411
unlk %a6
1412
1413
bra.l _fpsp_done
1414
1415
tbl_except:
1416
short 0xe000,0xe006,0xe004,0xe005
1417
short 0xe003,0xe002,0xe001,0xe001
1418
1419
fu_in_exc_unfl:
1420
mov.w &0x4,%d0
1421
bra.b fu_in_exc_exit
1422
fu_in_exc_ovfl:
1423
mov.w &0x03,%d0
1424
bra.b fu_in_exc_exit
1425
1426
# If the input operand to this operation was opclass two and a single
1427
# or double precision denorm, inf, or nan, the operand needs to be
1428
# "corrected" in order to have the proper equivalent extended precision
1429
# number.
1430
global fix_skewed_ops
1431
fix_skewed_ops:
1432
bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1433
cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?
1434
beq.b fso_sgl # yes
1435
cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?
1436
beq.b fso_dbl # yes
1437
rts # no
1438
1439
fso_sgl:
1440
mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1441
andi.w &0x7fff,%d0 # strip sign
1442
cmpi.w %d0,&0x3f80 # is |exp| == $3f80?
1443
beq.b fso_sgl_dnrm_zero # yes
1444
cmpi.w %d0,&0x407f # no; is |exp| == $407f?
1445
beq.b fso_infnan # yes
1446
rts # no
1447
1448
fso_sgl_dnrm_zero:
1449
andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1450
beq.b fso_zero # it's a skewed zero
1451
fso_sgl_dnrm:
1452
# here, we count on norm not to alter a0...
1453
bsr.l norm # normalize mantissa
1454
neg.w %d0 # -shft amt
1455
addi.w &0x3f81,%d0 # adjust new exponent
1456
andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1457
or.w %d0,LOCAL_EX(%a0) # insert new exponent
1458
rts
1459
1460
fso_zero:
1461
andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent
1462
rts
1463
1464
fso_infnan:
1465
andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit
1466
ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff
1467
rts
1468
1469
fso_dbl:
1470
mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1471
andi.w &0x7fff,%d0 # strip sign
1472
cmpi.w %d0,&0x3c00 # is |exp| == $3c00?
1473
beq.b fso_dbl_dnrm_zero # yes
1474
cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?
1475
beq.b fso_infnan # yes
1476
rts # no
1477
1478
fso_dbl_dnrm_zero:
1479
andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1480
bne.b fso_dbl_dnrm # it's a skewed denorm
1481
tst.l LOCAL_LO(%a0) # is it a zero?
1482
beq.b fso_zero # yes
1483
fso_dbl_dnrm:
1484
# here, we count on norm not to alter a0...
1485
bsr.l norm # normalize mantissa
1486
neg.w %d0 # -shft amt
1487
addi.w &0x3c01,%d0 # adjust new exponent
1488
andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1489
or.w %d0,LOCAL_EX(%a0) # insert new exponent
1490
rts
1491
1492
#################################################################
1493
1494
# fmove out took an unimplemented data type exception.
1495
# the src operand is in FP_SRC. Call _fout() to write out the result and
1496
# to determine which exceptions, if any, to take.
1497
fu_out:
1498
1499
# Separate packed move outs from the UNNORM and DENORM move outs.
1500
bfextu EXC_CMDREG(%a6){&3:&3},%d0
1501
cmpi.b %d0,&0x3
1502
beq.w fu_out_pack
1503
cmpi.b %d0,&0x7
1504
beq.w fu_out_pack
1505
1506
1507
# I'm not sure at this point what FPSR bits are valid for this instruction.
1508
# so, since the emulation routines re-create them anyways, zero exception field.
1509
# fmove out doesn't affect ccodes.
1510
and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
1511
1512
fmov.l &0x0,%fpcr # zero current control regs
1513
fmov.l &0x0,%fpsr
1514
1515
# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1516
# call here. just figure out what it is...
1517
mov.w FP_SRC_EX(%a6),%d0 # get exponent
1518
andi.w &0x7fff,%d0 # strip sign
1519
beq.b fu_out_denorm # it's a DENORM
1520
1521
lea FP_SRC(%a6),%a0
1522
bsr.l unnorm_fix # yes; fix it
1523
1524
mov.b %d0,STAG(%a6)
1525
1526
bra.b fu_out_cont
1527
fu_out_denorm:
1528
mov.b &DENORM,STAG(%a6)
1529
fu_out_cont:
1530
1531
clr.l %d0
1532
mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1533
1534
lea FP_SRC(%a6),%a0 # pass ptr to src operand
1535
1536
mov.l (%a6),EXC_A6(%a6) # in case a6 changes
1537
bsr.l fout # call fmove out routine
1538
1539
# Exceptions in order of precedence:
1540
# BSUN : none
1541
# SNAN : none
1542
# OPERR : fmove.{b,w,l} out of large UNNORM
1543
# OVFL : fmove.{s,d}
1544
# UNFL : fmove.{s,d,x}
1545
# DZ : none
1546
# INEX2 : all
1547
# INEX1 : none (packed doesn't travel through here)
1548
1549
# determine the highest priority exception(if any) set by the
1550
# emulation routine that has also been enabled by the user.
1551
mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1552
bne.w fu_out_ena # some are enabled
1553
1554
fu_out_done:
1555
1556
mov.l EXC_A6(%a6),(%a6) # in case a6 changed
1557
1558
# on extended precision opclass three instructions using pre-decrement or
1559
# post-increment addressing mode, the address register is not updated. is the
1560
# address register was the stack pointer used from user mode, then let's update
1561
# it here. if it was used from supervisor mode, then we have to handle this
1562
# as a special case.
1563
btst &0x5,EXC_SR(%a6)
1564
bne.b fu_out_done_s
1565
1566
mov.l EXC_A7(%a6),%a0 # restore a7
1567
mov.l %a0,%usp
1568
1569
fu_out_done_cont:
1570
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1571
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1572
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1573
1574
unlk %a6
1575
1576
btst &0x7,(%sp) # is trace on?
1577
bne.b fu_out_trace # yes
1578
1579
bra.l _fpsp_done
1580
1581
# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1582
# ("fmov.x fpm,-(a7)") if so,
1583
fu_out_done_s:
1584
cmpi.b SPCOND_FLG(%a6),&mda7_flg
1585
bne.b fu_out_done_cont
1586
1587
# the extended precision result is still in fp0. but, we need to save it
1588
# somewhere on the stack until we can copy it to its final resting place.
1589
# here, we're counting on the top of the stack to be the old place-holders
1590
# for fp0/fp1 which have already been restored. that way, we can write
1591
# over those destinations with the shifted stack frame.
1592
fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1593
1594
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1595
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1596
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1597
1598
mov.l (%a6),%a6 # restore frame pointer
1599
1600
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1601
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1602
1603
# now, copy the result to the proper place on the stack
1604
mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1605
mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1606
mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1607
1608
add.l &LOCAL_SIZE-0x8,%sp
1609
1610
btst &0x7,(%sp)
1611
bne.b fu_out_trace
1612
1613
bra.l _fpsp_done
1614
1615
fu_out_ena:
1616
and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1617
bfffo %d0{&24:&8},%d0 # find highest priority exception
1618
bne.b fu_out_exc # there is at least one set
1619
1620
# no exceptions were set.
1621
# if a disabled overflow occurred and inexact was enabled but the result
1622
# was exact, then a branch to _real_inex() is made.
1623
btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1624
beq.w fu_out_done # no
1625
1626
fu_out_ovflchk:
1627
btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1628
beq.w fu_out_done # no
1629
bra.w fu_inex # yes
1630
1631
#
1632
# The fp move out that took the "Unimplemented Data Type" exception was
1633
# being traced. Since the stack frames are similar, get the "current" PC
1634
# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1635
#
1636
# UNSUPP FRAME TRACE FRAME
1637
# ***************** *****************
1638
# * EA * * Current *
1639
# * * * PC *
1640
# ***************** *****************
1641
# * 0x3 * 0x0dc * * 0x2 * 0x024 *
1642
# ***************** *****************
1643
# * Next * * Next *
1644
# * PC * * PC *
1645
# ***************** *****************
1646
# * SR * * SR *
1647
# ***************** *****************
1648
#
1649
fu_out_trace:
1650
mov.w &0x2024,0x6(%sp)
1651
fmov.l %fpiar,0x8(%sp)
1652
bra.l _real_trace
1653
1654
# an exception occurred and that exception was enabled.
1655
fu_out_exc:
1656
subi.l &24,%d0 # fix offset to be 0-8
1657
1658
# we don't mess with the existing fsave frame. just re-insert it and
1659
# jump to the "_real_{}()" handler...
1660
mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0
1661
jmp (tbl_fu_out.b,%pc,%d0.w*1)
1662
1663
swbeg &0x8
1664
tbl_fu_out:
1665
short tbl_fu_out - tbl_fu_out # BSUN can't happen
1666
short tbl_fu_out - tbl_fu_out # SNAN can't happen
1667
short fu_operr - tbl_fu_out # OPERR
1668
short fu_ovfl - tbl_fu_out # OVFL
1669
short fu_unfl - tbl_fu_out # UNFL
1670
short tbl_fu_out - tbl_fu_out # DZ can't happen
1671
short fu_inex - tbl_fu_out # INEX2
1672
short tbl_fu_out - tbl_fu_out # INEX1 won't make it here
1673
1674
# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1675
# frestore it.
1676
fu_snan:
1677
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1678
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1679
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1680
1681
mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8
1682
mov.w &0xe006,2+FP_SRC(%a6)
1683
1684
frestore FP_SRC(%a6)
1685
1686
unlk %a6
1687
1688
1689
bra.l _real_snan
1690
1691
fu_operr:
1692
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1693
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1694
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1695
1696
mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
1697
mov.w &0xe004,2+FP_SRC(%a6)
1698
1699
frestore FP_SRC(%a6)
1700
1701
unlk %a6
1702
1703
1704
bra.l _real_operr
1705
1706
fu_ovfl:
1707
fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1708
1709
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1710
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1711
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1712
1713
mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4
1714
mov.w &0xe005,2+FP_SRC(%a6)
1715
1716
frestore FP_SRC(%a6) # restore EXOP
1717
1718
unlk %a6
1719
1720
bra.l _real_ovfl
1721
1722
# underflow can happen for extended precision. extended precision opclass
1723
# three instruction exceptions don't update the stack pointer. so, if the
1724
# exception occurred from user mode, then simply update a7 and exit normally.
1725
# if the exception occurred from supervisor mode, check if
1726
fu_unfl:
1727
mov.l EXC_A6(%a6),(%a6) # restore a6
1728
1729
btst &0x5,EXC_SR(%a6)
1730
bne.w fu_unfl_s
1731
1732
mov.l EXC_A7(%a6),%a0 # restore a7 whether we need
1733
mov.l %a0,%usp # to or not...
1734
1735
fu_unfl_cont:
1736
fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1737
1738
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1739
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1740
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1741
1742
mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1743
mov.w &0xe003,2+FP_SRC(%a6)
1744
1745
frestore FP_SRC(%a6) # restore EXOP
1746
1747
unlk %a6
1748
1749
bra.l _real_unfl
1750
1751
fu_unfl_s:
1752
cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1753
bne.b fu_unfl_cont
1754
1755
# the extended precision result is still in fp0. but, we need to save it
1756
# somewhere on the stack until we can copy it to its final resting place
1757
# (where the exc frame is currently). make sure it's not at the top of the
1758
# frame or it will get overwritten when the exc stack frame is shifted "down".
1759
fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1760
fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack
1761
1762
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1763
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1764
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1765
1766
mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1767
mov.w &0xe003,2+FP_DST(%a6)
1768
1769
frestore FP_DST(%a6) # restore EXOP
1770
1771
mov.l (%a6),%a6 # restore frame pointer
1772
1773
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1774
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1775
mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1776
1777
# now, copy the result to the proper place on the stack
1778
mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1779
mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1780
mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1781
1782
add.l &LOCAL_SIZE-0x8,%sp
1783
1784
bra.l _real_unfl
1785
1786
# fmove in and out enter here.
1787
fu_inex:
1788
fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1789
1790
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1791
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1792
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1793
1794
mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
1795
mov.w &0xe001,2+FP_SRC(%a6)
1796
1797
frestore FP_SRC(%a6) # restore EXOP
1798
1799
unlk %a6
1800
1801
1802
bra.l _real_inex
1803
1804
#########################################################################
1805
#########################################################################
1806
fu_in_pack:
1807
1808
1809
# I'm not sure at this point what FPSR bits are valid for this instruction.
1810
# so, since the emulation routines re-create them anyways, zero exception field
1811
andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1812
1813
fmov.l &0x0,%fpcr # zero current control regs
1814
fmov.l &0x0,%fpsr
1815
1816
bsr.l get_packed # fetch packed src operand
1817
1818
lea FP_SRC(%a6),%a0 # pass ptr to src
1819
bsr.l set_tag_x # set src optype tag
1820
1821
mov.b %d0,STAG(%a6) # save src optype tag
1822
1823
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1824
1825
# bit five of the fp extension word separates the monadic and dyadic operations
1826
# at this point
1827
btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1828
beq.b fu_extract_p # monadic
1829
cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1830
beq.b fu_extract_p # yes, so it's monadic, too
1831
1832
bsr.l load_fpn2 # load dst into FP_DST
1833
1834
lea FP_DST(%a6),%a0 # pass: ptr to dst op
1835
bsr.l set_tag_x # tag the operand type
1836
cmpi.b %d0,&UNNORM # is operand an UNNORM?
1837
bne.b fu_op2_done_p # no
1838
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1839
fu_op2_done_p:
1840
mov.b %d0,DTAG(%a6) # save dst optype tag
1841
1842
fu_extract_p:
1843
clr.l %d0
1844
mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1845
1846
bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1847
1848
lea FP_SRC(%a6),%a0
1849
lea FP_DST(%a6),%a1
1850
1851
mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1852
jsr (tbl_unsupp.l,%pc,%d1.l*1)
1853
1854
#
1855
# Exceptions in order of precedence:
1856
# BSUN : none
1857
# SNAN : all dyadic ops
1858
# OPERR : fsqrt(-NORM)
1859
# OVFL : all except ftst,fcmp
1860
# UNFL : all except ftst,fcmp
1861
# DZ : fdiv
1862
# INEX2 : all except ftst,fcmp
1863
# INEX1 : all
1864
#
1865
1866
# we determine the highest priority exception(if any) set by the
1867
# emulation routine that has also been enabled by the user.
1868
mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1869
bne.w fu_in_ena_p # some are enabled
1870
1871
fu_in_cont_p:
1872
# fcmp and ftst do not store any result.
1873
mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1874
andi.b &0x38,%d0 # extract bits 3-5
1875
cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1876
beq.b fu_in_exit_p # yes
1877
1878
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1879
bsr.l store_fpreg # store the result
1880
1881
fu_in_exit_p:
1882
1883
btst &0x5,EXC_SR(%a6) # user or supervisor?
1884
bne.w fu_in_exit_s_p # supervisor
1885
1886
mov.l EXC_A7(%a6),%a0 # update user a7
1887
mov.l %a0,%usp
1888
1889
fu_in_exit_cont_p:
1890
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1891
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1892
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1893
1894
unlk %a6 # unravel stack frame
1895
1896
btst &0x7,(%sp) # is trace on?
1897
bne.w fu_trace_p # yes
1898
1899
bra.l _fpsp_done # exit to os
1900
1901
# the exception occurred in supervisor mode. check to see if the
1902
# addressing mode was (a7)+. if so, we'll need to shift the
1903
# stack frame "up".
1904
fu_in_exit_s_p:
1905
btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1906
beq.b fu_in_exit_cont_p # no
1907
1908
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1909
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1910
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1911
1912
unlk %a6 # unravel stack frame
1913
1914
# shift the stack frame "up". we don't really care about the <ea> field.
1915
mov.l 0x4(%sp),0x10(%sp)
1916
mov.l 0x0(%sp),0xc(%sp)
1917
add.l &0xc,%sp
1918
1919
btst &0x7,(%sp) # is trace on?
1920
bne.w fu_trace_p # yes
1921
1922
bra.l _fpsp_done # exit to os
1923
1924
fu_in_ena_p:
1925
and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set
1926
bfffo %d0{&24:&8},%d0 # find highest priority exception
1927
bne.b fu_in_exc_p # at least one was set
1928
1929
#
1930
# No exceptions occurred that were also enabled. Now:
1931
#
1932
# if (OVFL && ovfl_disabled && inexact_enabled) {
1933
# branch to _real_inex() (even if the result was exact!);
1934
# } else {
1935
# save the result in the proper fp reg (unless the op is fcmp or ftst);
1936
# return;
1937
# }
1938
#
1939
btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1940
beq.w fu_in_cont_p # no
1941
1942
fu_in_ovflchk_p:
1943
btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1944
beq.w fu_in_cont_p # no
1945
bra.w fu_in_exc_ovfl_p # do _real_inex() now
1946
1947
#
1948
# An exception occurred and that exception was enabled:
1949
#
1950
# shift enabled exception field into lo byte of d0;
1951
# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1952
# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1953
# /*
1954
# * this is the case where we must call _real_inex() now or else
1955
# * there will be no other way to pass it the exceptional operand
1956
# */
1957
# call _real_inex();
1958
# } else {
1959
# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1960
# }
1961
#
1962
fu_in_exc_p:
1963
subi.l &24,%d0 # fix offset to be 0-8
1964
cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)
1965
blt.b fu_in_exc_exit_p # no
1966
1967
# the enabled exception was inexact
1968
btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1969
bne.w fu_in_exc_unfl_p # yes
1970
btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1971
bne.w fu_in_exc_ovfl_p # yes
1972
1973
# here, we insert the correct fsave status value into the fsave frame for the
1974
# corresponding exception. the operand in the fsave frame should be the original
1975
# src operand.
1976
# as a reminder for future predicted pain and agony, we are passing in fsave the
1977
# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1978
# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1979
fu_in_exc_exit_p:
1980
btst &0x5,EXC_SR(%a6) # user or supervisor?
1981
bne.w fu_in_exc_exit_s_p # supervisor
1982
1983
mov.l EXC_A7(%a6),%a0 # update user a7
1984
mov.l %a0,%usp
1985
1986
fu_in_exc_exit_cont_p:
1987
mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1988
1989
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1990
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1991
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1992
1993
frestore FP_SRC(%a6) # restore src op
1994
1995
unlk %a6
1996
1997
btst &0x7,(%sp) # is trace enabled?
1998
bne.w fu_trace_p # yes
1999
2000
bra.l _fpsp_done
2001
2002
tbl_except_p:
2003
short 0xe000,0xe006,0xe004,0xe005
2004
short 0xe003,0xe002,0xe001,0xe001
2005
2006
fu_in_exc_ovfl_p:
2007
mov.w &0x3,%d0
2008
bra.w fu_in_exc_exit_p
2009
2010
fu_in_exc_unfl_p:
2011
mov.w &0x4,%d0
2012
bra.w fu_in_exc_exit_p
2013
2014
fu_in_exc_exit_s_p:
2015
btst &mia7_bit,SPCOND_FLG(%a6)
2016
beq.b fu_in_exc_exit_cont_p
2017
2018
mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2019
2020
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2021
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2022
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2023
2024
frestore FP_SRC(%a6) # restore src op
2025
2026
unlk %a6 # unravel stack frame
2027
2028
# shift stack frame "up". who cares about <ea> field.
2029
mov.l 0x4(%sp),0x10(%sp)
2030
mov.l 0x0(%sp),0xc(%sp)
2031
add.l &0xc,%sp
2032
2033
btst &0x7,(%sp) # is trace on?
2034
bne.b fu_trace_p # yes
2035
2036
bra.l _fpsp_done # exit to os
2037
2038
#
2039
# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2040
# exception was being traced. Make the "current" PC the FPIAR and put it in the
2041
# trace stack frame then jump to _real_trace().
2042
#
2043
# UNSUPP FRAME TRACE FRAME
2044
# ***************** *****************
2045
# * EA * * Current *
2046
# * * * PC *
2047
# ***************** *****************
2048
# * 0x2 * 0x0dc * * 0x2 * 0x024 *
2049
# ***************** *****************
2050
# * Next * * Next *
2051
# * PC * * PC *
2052
# ***************** *****************
2053
# * SR * * SR *
2054
# ***************** *****************
2055
fu_trace_p:
2056
mov.w &0x2024,0x6(%sp)
2057
fmov.l %fpiar,0x8(%sp)
2058
2059
bra.l _real_trace
2060
2061
#########################################################
2062
#########################################################
2063
fu_out_pack:
2064
2065
2066
# I'm not sure at this point what FPSR bits are valid for this instruction.
2067
# so, since the emulation routines re-create them anyways, zero exception field.
2068
# fmove out doesn't affect ccodes.
2069
and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
2070
2071
fmov.l &0x0,%fpcr # zero current control regs
2072
fmov.l &0x0,%fpsr
2073
2074
bfextu EXC_CMDREG(%a6){&6:&3},%d0
2075
bsr.l load_fpn1
2076
2077
# unlike other opclass 3, unimplemented data type exceptions, packed must be
2078
# able to detect all operand types.
2079
lea FP_SRC(%a6),%a0
2080
bsr.l set_tag_x # tag the operand type
2081
cmpi.b %d0,&UNNORM # is operand an UNNORM?
2082
bne.b fu_op2_p # no
2083
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
2084
2085
fu_op2_p:
2086
mov.b %d0,STAG(%a6) # save src optype tag
2087
2088
clr.l %d0
2089
mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
2090
2091
lea FP_SRC(%a6),%a0 # pass ptr to src operand
2092
2093
mov.l (%a6),EXC_A6(%a6) # in case a6 changes
2094
bsr.l fout # call fmove out routine
2095
2096
# Exceptions in order of precedence:
2097
# BSUN : no
2098
# SNAN : yes
2099
# OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2100
# OVFL : no
2101
# UNFL : no
2102
# DZ : no
2103
# INEX2 : yes
2104
# INEX1 : no
2105
2106
# determine the highest priority exception(if any) set by the
2107
# emulation routine that has also been enabled by the user.
2108
mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2109
bne.w fu_out_ena_p # some are enabled
2110
2111
fu_out_exit_p:
2112
mov.l EXC_A6(%a6),(%a6) # restore a6
2113
2114
btst &0x5,EXC_SR(%a6) # user or supervisor?
2115
bne.b fu_out_exit_s_p # supervisor
2116
2117
mov.l EXC_A7(%a6),%a0 # update user a7
2118
mov.l %a0,%usp
2119
2120
fu_out_exit_cont_p:
2121
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2122
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2123
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2124
2125
unlk %a6 # unravel stack frame
2126
2127
btst &0x7,(%sp) # is trace on?
2128
bne.w fu_trace_p # yes
2129
2130
bra.l _fpsp_done # exit to os
2131
2132
# the exception occurred in supervisor mode. check to see if the
2133
# addressing mode was -(a7). if so, we'll need to shift the
2134
# stack frame "down".
2135
fu_out_exit_s_p:
2136
btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2137
beq.b fu_out_exit_cont_p # no
2138
2139
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2140
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2141
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2142
2143
mov.l (%a6),%a6 # restore frame pointer
2144
2145
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2146
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2147
2148
# now, copy the result to the proper place on the stack
2149
mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2150
mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2151
mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2152
2153
add.l &LOCAL_SIZE-0x8,%sp
2154
2155
btst &0x7,(%sp)
2156
bne.w fu_trace_p
2157
2158
bra.l _fpsp_done
2159
2160
fu_out_ena_p:
2161
and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
2162
bfffo %d0{&24:&8},%d0 # find highest priority exception
2163
beq.w fu_out_exit_p
2164
2165
mov.l EXC_A6(%a6),(%a6) # restore a6
2166
2167
# an exception occurred and that exception was enabled.
2168
# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2169
fu_out_exc_p:
2170
cmpi.b %d0,&0x1a
2171
bgt.w fu_inex_p2
2172
beq.w fu_operr_p
2173
2174
fu_snan_p:
2175
btst &0x5,EXC_SR(%a6)
2176
bne.b fu_snan_s_p
2177
2178
mov.l EXC_A7(%a6),%a0
2179
mov.l %a0,%usp
2180
bra.w fu_snan
2181
2182
fu_snan_s_p:
2183
cmpi.b SPCOND_FLG(%a6),&mda7_flg
2184
bne.w fu_snan
2185
2186
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2187
# the strategy is to move the exception frame "down" 12 bytes. then, we
2188
# can store the default result where the exception frame was.
2189
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2190
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2191
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2192
2193
mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0
2194
mov.w &0xe006,2+FP_SRC(%a6) # set fsave status
2195
2196
frestore FP_SRC(%a6) # restore src operand
2197
2198
mov.l (%a6),%a6 # restore frame pointer
2199
2200
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2201
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2202
mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2203
2204
# now, we copy the default result to its proper location
2205
mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2206
mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2207
mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2208
2209
add.l &LOCAL_SIZE-0x8,%sp
2210
2211
2212
bra.l _real_snan
2213
2214
fu_operr_p:
2215
btst &0x5,EXC_SR(%a6)
2216
bne.w fu_operr_p_s
2217
2218
mov.l EXC_A7(%a6),%a0
2219
mov.l %a0,%usp
2220
bra.w fu_operr
2221
2222
fu_operr_p_s:
2223
cmpi.b SPCOND_FLG(%a6),&mda7_flg
2224
bne.w fu_operr
2225
2226
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2227
# the strategy is to move the exception frame "down" 12 bytes. then, we
2228
# can store the default result where the exception frame was.
2229
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2230
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2231
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2232
2233
mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
2234
mov.w &0xe004,2+FP_SRC(%a6) # set fsave status
2235
2236
frestore FP_SRC(%a6) # restore src operand
2237
2238
mov.l (%a6),%a6 # restore frame pointer
2239
2240
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2241
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2242
mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2243
2244
# now, we copy the default result to its proper location
2245
mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2246
mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2247
mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2248
2249
add.l &LOCAL_SIZE-0x8,%sp
2250
2251
2252
bra.l _real_operr
2253
2254
fu_inex_p2:
2255
btst &0x5,EXC_SR(%a6)
2256
bne.w fu_inex_s_p2
2257
2258
mov.l EXC_A7(%a6),%a0
2259
mov.l %a0,%usp
2260
bra.w fu_inex
2261
2262
fu_inex_s_p2:
2263
cmpi.b SPCOND_FLG(%a6),&mda7_flg
2264
bne.w fu_inex
2265
2266
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2267
# the strategy is to move the exception frame "down" 12 bytes. then, we
2268
# can store the default result where the exception frame was.
2269
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2270
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2271
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2272
2273
mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
2274
mov.w &0xe001,2+FP_SRC(%a6) # set fsave status
2275
2276
frestore FP_SRC(%a6) # restore src operand
2277
2278
mov.l (%a6),%a6 # restore frame pointer
2279
2280
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2281
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2282
mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2283
2284
# now, we copy the default result to its proper location
2285
mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2286
mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2287
mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2288
2289
add.l &LOCAL_SIZE-0x8,%sp
2290
2291
2292
bra.l _real_inex
2293
2294
#########################################################################
2295
2296
#
2297
# if we're stuffing a source operand back into an fsave frame then we
2298
# have to make sure that for single or double source operands that the
2299
# format stuffed is as weird as the hardware usually makes it.
2300
#
2301
global funimp_skew
2302
funimp_skew:
2303
bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2304
cmpi.b %d0,&0x1 # was src sgl?
2305
beq.b funimp_skew_sgl # yes
2306
cmpi.b %d0,&0x5 # was src dbl?
2307
beq.b funimp_skew_dbl # yes
2308
rts
2309
2310
funimp_skew_sgl:
2311
mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2312
andi.w &0x7fff,%d0 # strip sign
2313
beq.b funimp_skew_sgl_not
2314
cmpi.w %d0,&0x3f80
2315
bgt.b funimp_skew_sgl_not
2316
neg.w %d0 # make exponent negative
2317
addi.w &0x3f81,%d0 # find amt to shift
2318
mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)
2319
lsr.l %d0,%d1 # shift it
2320
bset &31,%d1 # set j-bit
2321
mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)
2322
andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent
2323
ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent
2324
funimp_skew_sgl_not:
2325
rts
2326
2327
funimp_skew_dbl:
2328
mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2329
andi.w &0x7fff,%d0 # strip sign
2330
beq.b funimp_skew_dbl_not
2331
cmpi.w %d0,&0x3c00
2332
bgt.b funimp_skew_dbl_not
2333
2334
tst.b FP_SRC_EX(%a6) # make "internal format"
2335
smi.b 0x2+FP_SRC(%a6)
2336
mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign
2337
clr.l %d0 # clear g,r,s
2338
lea FP_SRC(%a6),%a0 # pass ptr to src op
2339
mov.w &0x3c01,%d1 # pass denorm threshold
2340
bsr.l dnrm_lp # denorm it
2341
mov.w &0x3c00,%d0 # new exponent
2342
tst.b 0x2+FP_SRC(%a6) # is sign set?
2343
beq.b fss_dbl_denorm_done # no
2344
bset &15,%d0 # set sign
2345
fss_dbl_denorm_done:
2346
bset &0x7,FP_SRC_HI(%a6) # set j-bit
2347
mov.w %d0,FP_SRC_EX(%a6) # insert new exponent
2348
funimp_skew_dbl_not:
2349
rts
2350
2351
#########################################################################
2352
global _mem_write2
2353
_mem_write2:
2354
btst &0x5,EXC_SR(%a6)
2355
beq.l _dmem_write
2356
mov.l 0x0(%a0),FP_DST_EX(%a6)
2357
mov.l 0x4(%a0),FP_DST_HI(%a6)
2358
mov.l 0x8(%a0),FP_DST_LO(%a6)
2359
clr.l %d1
2360
rts
2361
2362
#########################################################################
2363
# XDEF **************************************************************** #
2364
# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
2365
# effective address" exception. #
2366
# #
2367
# This handler should be the first code executed upon taking the #
2368
# FP Unimplemented Effective Address exception in an operating #
2369
# system. #
2370
# #
2371
# XREF **************************************************************** #
2372
# _imem_read_long() - read instruction longword #
2373
# fix_skewed_ops() - adjust src operand in fsave frame #
2374
# set_tag_x() - determine optype of src/dst operands #
2375
# store_fpreg() - store opclass 0 or 2 result to FP regfile #
2376
# unnorm_fix() - change UNNORM operands to NORM or ZERO #
2377
# load_fpn2() - load dst operand from FP regfile #
2378
# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2379
# decbin() - convert packed data to FP binary data #
2380
# _real_fpu_disabled() - "callout" for "FPU disabled" exception #
2381
# _real_access() - "callout" for access error exception #
2382
# _mem_read() - read extended immediate operand from memory #
2383
# _fpsp_done() - "callout" for exit; work all done #
2384
# _real_trace() - "callout" for Trace enabled exception #
2385
# fmovm_dynamic() - emulate dynamic fmovm instruction #
2386
# fmovm_ctrl() - emulate fmovm control instruction #
2387
# #
2388
# INPUT *************************************************************** #
2389
# - The system stack contains the "Unimplemented <ea>" stk frame #
2390
# #
2391
# OUTPUT ************************************************************** #
2392
# If access error: #
2393
# - The system stack is changed to an access error stack frame #
2394
# If FPU disabled: #
2395
# - The system stack is changed to an FPU disabled stack frame #
2396
# If Trace exception enabled: #
2397
# - The system stack is changed to a Trace exception stack frame #
2398
# Else: (normal case) #
2399
# - None (correct result has been stored as appropriate) #
2400
# #
2401
# ALGORITHM *********************************************************** #
2402
# This exception handles 3 types of operations: #
2403
# (1) FP Instructions using extended precision or packed immediate #
2404
# addressing mode. #
2405
# (2) The "fmovm.x" instruction w/ dynamic register specification. #
2406
# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
2407
# #
2408
# For immediate data operations, the data is read in w/ a #
2409
# _mem_read() "callout", converted to FP binary (if packed), and used #
2410
# as the source operand to the instruction specified by the instruction #
2411
# word. If no FP exception should be reported ads a result of the #
2412
# emulation, then the result is stored to the destination register and #
2413
# the handler exits through _fpsp_done(). If an enabled exc has been #
2414
# signalled as a result of emulation, then an fsave state frame #
2415
# corresponding to the FP exception type must be entered into the 060 #
2416
# FPU before exiting. In either the enabled or disabled cases, we #
2417
# must also check if a Trace exception is pending, in which case, we #
2418
# must create a Trace exception stack frame from the current exception #
2419
# stack frame. If no Trace is pending, we simply exit through #
2420
# _fpsp_done(). #
2421
# For "fmovm.x", call the routine fmovm_dynamic() which will #
2422
# decode and emulate the instruction. No FP exceptions can be pending #
2423
# as a result of this operation emulation. A Trace exception can be #
2424
# pending, though, which means the current stack frame must be changed #
2425
# to a Trace stack frame and an exit made through _real_trace(). #
2426
# For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
2427
# was executed from supervisor mode, this handler must store the FP #
2428
# register file values to the system stack by itself since #
2429
# fmovm_dynamic() can't handle this. A normal exit is made through #
2430
# fpsp_done(). #
2431
# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2432
# Again, a Trace exception may be pending and an exit made through #
2433
# _real_trace(). Else, a normal exit is made through _fpsp_done(). #
2434
# #
2435
# Before any of the above is attempted, it must be checked to #
2436
# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2437
# before the "FPU disabled" exception, but the "FPU disabled" exception #
2438
# has higher priority, we check the disabled bit in the PCR. If set, #
2439
# then we must create an 8 word "FPU disabled" exception stack frame #
2440
# from the current 4 word exception stack frame. This includes #
2441
# reproducing the effective address of the instruction to put on the #
2442
# new stack frame. #
2443
# #
2444
# In the process of all emulation work, if a _mem_read() #
2445
# "callout" returns a failing result indicating an access error, then #
2446
# we must create an access error stack frame from the current stack #
2447
# frame. This information includes a faulting address and a fault- #
2448
# status-longword. These are created within this handler. #
2449
# #
2450
#########################################################################
2451
2452
global _fpsp_effadd
2453
_fpsp_effadd:
2454
2455
# This exception type takes priority over the "Line F Emulator"
2456
# exception. Therefore, the FPU could be disabled when entering here.
2457
# So, we must check to see if it's disabled and handle that case separately.
2458
mov.l %d0,-(%sp) # save d0
2459
movc %pcr,%d0 # load proc cr
2460
btst &0x1,%d0 # is FPU disabled?
2461
bne.w iea_disabled # yes
2462
mov.l (%sp)+,%d0 # restore d0
2463
2464
link %a6,&-LOCAL_SIZE # init stack frame
2465
2466
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2467
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2468
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
2469
2470
# PC of instruction that took the exception is the PC in the frame
2471
mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2472
2473
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2474
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2475
bsr.l _imem_read_long # fetch the instruction words
2476
mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2477
2478
#########################################################################
2479
2480
tst.w %d0 # is operation fmovem?
2481
bmi.w iea_fmovm # yes
2482
2483
#
2484
# here, we will have:
2485
# fabs fdabs fsabs facos fmod
2486
# fadd fdadd fsadd fasin frem
2487
# fcmp fatan fscale
2488
# fdiv fddiv fsdiv fatanh fsin
2489
# fint fcos fsincos
2490
# fintrz fcosh fsinh
2491
# fmove fdmove fsmove fetox ftan
2492
# fmul fdmul fsmul fetoxm1 ftanh
2493
# fneg fdneg fsneg fgetexp ftentox
2494
# fsgldiv fgetman ftwotox
2495
# fsglmul flog10
2496
# fsqrt flog2
2497
# fsub fdsub fssub flogn
2498
# ftst flognp1
2499
# which can all use f<op>.{x,p}
2500
# so, now it's immediate data extended precision AND PACKED FORMAT!
2501
#
2502
iea_op:
2503
andi.l &0x00ff00ff,USER_FPSR(%a6)
2504
2505
btst &0xa,%d0 # is src fmt x or p?
2506
bne.b iea_op_pack # packed
2507
2508
2509
mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2510
lea FP_SRC(%a6),%a1 # pass: ptr to super addr
2511
mov.l &0xc,%d0 # pass: 12 bytes
2512
bsr.l _imem_read # read extended immediate
2513
2514
tst.l %d1 # did ifetch fail?
2515
bne.w iea_iacc # yes
2516
2517
bra.b iea_op_setsrc
2518
2519
iea_op_pack:
2520
2521
mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2522
lea FP_SRC(%a6),%a1 # pass: ptr to super dst
2523
mov.l &0xc,%d0 # pass: 12 bytes
2524
bsr.l _imem_read # read packed operand
2525
2526
tst.l %d1 # did ifetch fail?
2527
bne.w iea_iacc # yes
2528
2529
# The packed operand is an INF or a NAN if the exponent field is all ones.
2530
bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
2531
cmpi.w %d0,&0x7fff # INF or NAN?
2532
beq.b iea_op_setsrc # operand is an INF or NAN
2533
2534
# The packed operand is a zero if the mantissa is all zero, else it's
2535
# a normal packed op.
2536
mov.b 3+FP_SRC(%a6),%d0 # get byte 4
2537
andi.b &0x0f,%d0 # clear all but last nybble
2538
bne.b iea_op_gp_not_spec # not a zero
2539
tst.l FP_SRC_HI(%a6) # is lw 2 zero?
2540
bne.b iea_op_gp_not_spec # not a zero
2541
tst.l FP_SRC_LO(%a6) # is lw 3 zero?
2542
beq.b iea_op_setsrc # operand is a ZERO
2543
iea_op_gp_not_spec:
2544
lea FP_SRC(%a6),%a0 # pass: ptr to packed op
2545
bsr.l decbin # convert to extended
2546
fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
2547
2548
iea_op_setsrc:
2549
addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer
2550
2551
# FP_SRC now holds the src operand.
2552
lea FP_SRC(%a6),%a0 # pass: ptr to src op
2553
bsr.l set_tag_x # tag the operand type
2554
mov.b %d0,STAG(%a6) # could be ANYTHING!!!
2555
cmpi.b %d0,&UNNORM # is operand an UNNORM?
2556
bne.b iea_op_getdst # no
2557
bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2558
mov.b %d0,STAG(%a6) # set new optype tag
2559
iea_op_getdst:
2560
clr.b STORE_FLG(%a6) # clear "store result" boolean
2561
2562
btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
2563
beq.b iea_op_extract # monadic
2564
btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?
2565
bne.b iea_op_spec # yes
2566
2567
iea_op_loaddst:
2568
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2569
bsr.l load_fpn2 # load dst operand
2570
2571
lea FP_DST(%a6),%a0 # pass: ptr to dst op
2572
bsr.l set_tag_x # tag the operand type
2573
mov.b %d0,DTAG(%a6) # could be ANYTHING!!!
2574
cmpi.b %d0,&UNNORM # is operand an UNNORM?
2575
bne.b iea_op_extract # no
2576
bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2577
mov.b %d0,DTAG(%a6) # set new optype tag
2578
bra.b iea_op_extract
2579
2580
# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2581
iea_op_spec:
2582
btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?
2583
beq.b iea_op_extract # yes
2584
# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2585
# store a result. then, only fcmp will branch back and pick up a dst operand.
2586
st STORE_FLG(%a6) # don't store a final result
2587
btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?
2588
beq.b iea_op_loaddst # yes
2589
2590
iea_op_extract:
2591
clr.l %d0
2592
mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec
2593
2594
mov.b 1+EXC_CMDREG(%a6),%d1
2595
andi.w &0x007f,%d1 # extract extension
2596
2597
fmov.l &0x0,%fpcr
2598
fmov.l &0x0,%fpsr
2599
2600
lea FP_SRC(%a6),%a0
2601
lea FP_DST(%a6),%a1
2602
2603
mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2604
jsr (tbl_unsupp.l,%pc,%d1.l*1)
2605
2606
#
2607
# Exceptions in order of precedence:
2608
# BSUN : none
2609
# SNAN : all operations
2610
# OPERR : all reg-reg or mem-reg operations that can normally operr
2611
# OVFL : same as OPERR
2612
# UNFL : same as OPERR
2613
# DZ : same as OPERR
2614
# INEX2 : same as OPERR
2615
# INEX1 : all packed immediate operations
2616
#
2617
2618
# we determine the highest priority exception(if any) set by the
2619
# emulation routine that has also been enabled by the user.
2620
mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2621
bne.b iea_op_ena # some are enabled
2622
2623
# now, we save the result, unless, of course, the operation was ftst or fcmp.
2624
# these don't save results.
2625
iea_op_save:
2626
tst.b STORE_FLG(%a6) # does this op store a result?
2627
bne.b iea_op_exit1 # exit with no frestore
2628
2629
iea_op_store:
2630
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2631
bsr.l store_fpreg # store the result
2632
2633
iea_op_exit1:
2634
mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2635
mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2636
2637
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2638
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2639
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2640
2641
unlk %a6 # unravel the frame
2642
2643
btst &0x7,(%sp) # is trace on?
2644
bne.w iea_op_trace # yes
2645
2646
bra.l _fpsp_done # exit to os
2647
2648
iea_op_ena:
2649
and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set
2650
bfffo %d0{&24:&8},%d0 # find highest priority exception
2651
bne.b iea_op_exc # at least one was set
2652
2653
# no exception occurred. now, did a disabled, exact overflow occur with inexact
2654
# enabled? if so, then we have to stuff an overflow frame into the FPU.
2655
btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2656
beq.b iea_op_save
2657
2658
iea_op_ovfl:
2659
btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2660
beq.b iea_op_store # no
2661
bra.b iea_op_exc_ovfl # yes
2662
2663
# an enabled exception occurred. we have to insert the exception type back into
2664
# the machine.
2665
iea_op_exc:
2666
subi.l &24,%d0 # fix offset to be 0-8
2667
cmpi.b %d0,&0x6 # is exception INEX?
2668
bne.b iea_op_exc_force # no
2669
2670
# the enabled exception was inexact. so, if it occurs with an overflow
2671
# or underflow that was disabled, then we have to force an overflow or
2672
# underflow frame.
2673
btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2674
bne.b iea_op_exc_ovfl # yes
2675
btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2676
bne.b iea_op_exc_unfl # yes
2677
2678
iea_op_exc_force:
2679
mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2680
bra.b iea_op_exit2 # exit with frestore
2681
2682
tbl_iea_except:
2683
short 0xe002, 0xe006, 0xe004, 0xe005
2684
short 0xe003, 0xe002, 0xe001, 0xe001
2685
2686
iea_op_exc_ovfl:
2687
mov.w &0xe005,2+FP_SRC(%a6)
2688
bra.b iea_op_exit2
2689
2690
iea_op_exc_unfl:
2691
mov.w &0xe003,2+FP_SRC(%a6)
2692
2693
iea_op_exit2:
2694
mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2695
mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2696
2697
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2698
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2699
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2700
2701
frestore FP_SRC(%a6) # restore exceptional state
2702
2703
unlk %a6 # unravel the frame
2704
2705
btst &0x7,(%sp) # is trace on?
2706
bne.b iea_op_trace # yes
2707
2708
bra.l _fpsp_done # exit to os
2709
2710
#
2711
# The opclass two instruction that took an "Unimplemented Effective Address"
2712
# exception was being traced. Make the "current" PC the FPIAR and put it in
2713
# the trace stack frame then jump to _real_trace().
2714
#
2715
# UNIMP EA FRAME TRACE FRAME
2716
# ***************** *****************
2717
# * 0x0 * 0x0f0 * * Current *
2718
# ***************** * PC *
2719
# * Current * *****************
2720
# * PC * * 0x2 * 0x024 *
2721
# ***************** *****************
2722
# * SR * * Next *
2723
# ***************** * PC *
2724
# *****************
2725
# * SR *
2726
# *****************
2727
iea_op_trace:
2728
mov.l (%sp),-(%sp) # shift stack frame "down"
2729
mov.w 0x8(%sp),0x4(%sp)
2730
mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
2731
fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
2732
2733
bra.l _real_trace
2734
2735
#########################################################################
2736
iea_fmovm:
2737
btst &14,%d0 # ctrl or data reg
2738
beq.w iea_fmovm_ctrl
2739
2740
iea_fmovm_data:
2741
2742
btst &0x5,EXC_SR(%a6) # user or supervisor mode
2743
bne.b iea_fmovm_data_s
2744
2745
iea_fmovm_data_u:
2746
mov.l %usp,%a0
2747
mov.l %a0,EXC_A7(%a6) # store current a7
2748
bsr.l fmovm_dynamic # do dynamic fmovm
2749
mov.l EXC_A7(%a6),%a0 # load possibly new a7
2750
mov.l %a0,%usp # update usp
2751
bra.w iea_fmovm_exit
2752
2753
iea_fmovm_data_s:
2754
clr.b SPCOND_FLG(%a6)
2755
lea 0x2+EXC_VOFF(%a6),%a0
2756
mov.l %a0,EXC_A7(%a6)
2757
bsr.l fmovm_dynamic # do dynamic fmovm
2758
2759
cmpi.b SPCOND_FLG(%a6),&mda7_flg
2760
beq.w iea_fmovm_data_predec
2761
cmpi.b SPCOND_FLG(%a6),&mia7_flg
2762
bne.w iea_fmovm_exit
2763
2764
# right now, d0 = the size.
2765
# the data has been fetched from the supervisor stack, but we have not
2766
# incremented the stack pointer by the appropriate number of bytes.
2767
# do it here.
2768
iea_fmovm_data_postinc:
2769
btst &0x7,EXC_SR(%a6)
2770
bne.b iea_fmovm_data_pi_trace
2771
2772
mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2773
mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2774
mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2775
2776
lea (EXC_SR,%a6,%d0),%a0
2777
mov.l %a0,EXC_SR(%a6)
2778
2779
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2780
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2781
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2782
2783
unlk %a6
2784
mov.l (%sp)+,%sp
2785
bra.l _fpsp_done
2786
2787
iea_fmovm_data_pi_trace:
2788
mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2789
mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2790
mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2791
mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2792
2793
lea (EXC_SR-0x4,%a6,%d0),%a0
2794
mov.l %a0,EXC_SR(%a6)
2795
2796
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2797
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2798
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2799
2800
unlk %a6
2801
mov.l (%sp)+,%sp
2802
bra.l _real_trace
2803
2804
# right now, d1 = size and d0 = the strg.
2805
iea_fmovm_data_predec:
2806
mov.b %d1,EXC_VOFF(%a6) # store strg
2807
mov.b %d0,0x1+EXC_VOFF(%a6) # store size
2808
2809
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2810
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2811
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2812
2813
mov.l (%a6),-(%sp) # make a copy of a6
2814
mov.l %d0,-(%sp) # save d0
2815
mov.l %d1,-(%sp) # save d1
2816
mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2817
2818
clr.l %d0
2819
mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size
2820
neg.l %d0 # get negative of size
2821
2822
btst &0x7,EXC_SR(%a6) # is trace enabled?
2823
beq.b iea_fmovm_data_p2
2824
2825
mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2826
mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2827
mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)
2828
mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2829
2830
pea (%a6,%d0) # create final sp
2831
bra.b iea_fmovm_data_p3
2832
2833
iea_fmovm_data_p2:
2834
mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2835
mov.l (%sp)+,(EXC_PC,%a6,%d0)
2836
mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2837
2838
pea (0x4,%a6,%d0) # create final sp
2839
2840
iea_fmovm_data_p3:
2841
clr.l %d1
2842
mov.b EXC_VOFF(%a6),%d1 # fetch strg
2843
2844
tst.b %d1
2845
bpl.b fm_1
2846
fmovm.x &0x80,(0x4+0x8,%a6,%d0)
2847
addi.l &0xc,%d0
2848
fm_1:
2849
lsl.b &0x1,%d1
2850
bpl.b fm_2
2851
fmovm.x &0x40,(0x4+0x8,%a6,%d0)
2852
addi.l &0xc,%d0
2853
fm_2:
2854
lsl.b &0x1,%d1
2855
bpl.b fm_3
2856
fmovm.x &0x20,(0x4+0x8,%a6,%d0)
2857
addi.l &0xc,%d0
2858
fm_3:
2859
lsl.b &0x1,%d1
2860
bpl.b fm_4
2861
fmovm.x &0x10,(0x4+0x8,%a6,%d0)
2862
addi.l &0xc,%d0
2863
fm_4:
2864
lsl.b &0x1,%d1
2865
bpl.b fm_5
2866
fmovm.x &0x08,(0x4+0x8,%a6,%d0)
2867
addi.l &0xc,%d0
2868
fm_5:
2869
lsl.b &0x1,%d1
2870
bpl.b fm_6
2871
fmovm.x &0x04,(0x4+0x8,%a6,%d0)
2872
addi.l &0xc,%d0
2873
fm_6:
2874
lsl.b &0x1,%d1
2875
bpl.b fm_7
2876
fmovm.x &0x02,(0x4+0x8,%a6,%d0)
2877
addi.l &0xc,%d0
2878
fm_7:
2879
lsl.b &0x1,%d1
2880
bpl.b fm_end
2881
fmovm.x &0x01,(0x4+0x8,%a6,%d0)
2882
fm_end:
2883
mov.l 0x4(%sp),%d1
2884
mov.l 0x8(%sp),%d0
2885
mov.l 0xc(%sp),%a6
2886
mov.l (%sp)+,%sp
2887
2888
btst &0x7,(%sp) # is trace enabled?
2889
beq.l _fpsp_done
2890
bra.l _real_trace
2891
2892
#########################################################################
2893
iea_fmovm_ctrl:
2894
2895
bsr.l fmovm_ctrl # load ctrl regs
2896
2897
iea_fmovm_exit:
2898
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2899
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2900
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2901
2902
btst &0x7,EXC_SR(%a6) # is trace on?
2903
bne.b iea_fmovm_trace # yes
2904
2905
mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2906
2907
unlk %a6 # unravel the frame
2908
2909
bra.l _fpsp_done # exit to os
2910
2911
#
2912
# The control reg instruction that took an "Unimplemented Effective Address"
2913
# exception was being traced. The "Current PC" for the trace frame is the
2914
# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2915
# After fixing the stack frame, jump to _real_trace().
2916
#
2917
# UNIMP EA FRAME TRACE FRAME
2918
# ***************** *****************
2919
# * 0x0 * 0x0f0 * * Current *
2920
# ***************** * PC *
2921
# * Current * *****************
2922
# * PC * * 0x2 * 0x024 *
2923
# ***************** *****************
2924
# * SR * * Next *
2925
# ***************** * PC *
2926
# *****************
2927
# * SR *
2928
# *****************
2929
# this ain't a pretty solution, but it works:
2930
# -restore a6 (not with unlk)
2931
# -shift stack frame down over where old a6 used to be
2932
# -add LOCAL_SIZE to stack pointer
2933
iea_fmovm_trace:
2934
mov.l (%a6),%a6 # restore frame pointer
2935
mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2936
mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2937
mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2938
mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2939
add.l &LOCAL_SIZE,%sp # clear stack frame
2940
2941
bra.l _real_trace
2942
2943
#########################################################################
2944
# The FPU is disabled and so we should really have taken the "Line
2945
# F Emulator" exception. So, here we create an 8-word stack frame
2946
# from our 4-word stack frame. This means we must calculate the length
2947
# the faulting instruction to get the "next PC". This is trivial for
2948
# immediate operands but requires some extra work for fmovm dynamic
2949
# which can use most addressing modes.
2950
iea_disabled:
2951
mov.l (%sp)+,%d0 # restore d0
2952
2953
link %a6,&-LOCAL_SIZE # init stack frame
2954
2955
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2956
2957
# PC of instruction that took the exception is the PC in the frame
2958
mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2959
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2960
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2961
bsr.l _imem_read_long # fetch the instruction words
2962
mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2963
2964
tst.w %d0 # is instr fmovm?
2965
bmi.b iea_dis_fmovm # yes
2966
# instruction is using an extended precision immediate operand. Therefore,
2967
# the total instruction length is 16 bytes.
2968
iea_dis_immed:
2969
mov.l &0x10,%d0 # 16 bytes of instruction
2970
bra.b iea_dis_cont
2971
iea_dis_fmovm:
2972
btst &0xe,%d0 # is instr fmovm ctrl
2973
bne.b iea_dis_fmovm_data # no
2974
# the instruction is a fmovm.l with 2 or 3 registers.
2975
bfextu %d0{&19:&3},%d1
2976
mov.l &0xc,%d0
2977
cmpi.b %d1,&0x7 # move all regs?
2978
bne.b iea_dis_cont
2979
addq.l &0x4,%d0
2980
bra.b iea_dis_cont
2981
# the instruction is an fmovm.x dynamic which can use many addressing
2982
# modes and thus can have several different total instruction lengths.
2983
# call fmovm_calc_ea which will go through the ea calc process and,
2984
# as a by-product, will tell us how long the instruction is.
2985
iea_dis_fmovm_data:
2986
clr.l %d0
2987
bsr.l fmovm_calc_ea
2988
mov.l EXC_EXTWPTR(%a6),%d0
2989
sub.l EXC_PC(%a6),%d0
2990
iea_dis_cont:
2991
mov.w %d0,EXC_VOFF(%a6) # store stack shift value
2992
2993
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2994
2995
unlk %a6
2996
2997
# here, we actually create the 8-word frame from the 4-word frame,
2998
# with the "next PC" as additional info.
2999
# the <ea> field is let as undefined.
3000
subq.l &0x8,%sp # make room for new stack
3001
mov.l %d0,-(%sp) # save d0
3002
mov.w 0xc(%sp),0x4(%sp) # move SR
3003
mov.l 0xe(%sp),0x6(%sp) # move Current PC
3004
clr.l %d0
3005
mov.w 0x12(%sp),%d0
3006
mov.l 0x6(%sp),0x10(%sp) # move Current PC
3007
add.l %d0,0x6(%sp) # make Next PC
3008
mov.w &0x402c,0xa(%sp) # insert offset,frame format
3009
mov.l (%sp)+,%d0 # restore d0
3010
3011
bra.l _real_fpu_disabled
3012
3013
##########
3014
3015
iea_iacc:
3016
movc %pcr,%d0
3017
btst &0x1,%d0
3018
bne.b iea_iacc_cont
3019
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3020
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3021
iea_iacc_cont:
3022
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3023
3024
unlk %a6
3025
3026
subq.w &0x8,%sp # make stack frame bigger
3027
mov.l 0x8(%sp),(%sp) # store SR,hi(PC)
3028
mov.w 0xc(%sp),0x4(%sp) # store lo(PC)
3029
mov.w &0x4008,0x6(%sp) # store voff
3030
mov.l 0x2(%sp),0x8(%sp) # store ea
3031
mov.l &0x09428001,0xc(%sp) # store fslw
3032
3033
iea_acc_done:
3034
btst &0x5,(%sp) # user or supervisor mode?
3035
beq.b iea_acc_done2 # user
3036
bset &0x2,0xd(%sp) # set supervisor TM bit
3037
3038
iea_acc_done2:
3039
bra.l _real_access
3040
3041
iea_dacc:
3042
lea -LOCAL_SIZE(%a6),%sp
3043
3044
movc %pcr,%d1
3045
btst &0x1,%d1
3046
bne.b iea_dacc_cont
3047
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3048
fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3049
iea_dacc_cont:
3050
mov.l (%a6),%a6
3051
3052
mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3053
mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3054
mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3055
mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3056
mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3057
mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3058
3059
movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3060
add.w &LOCAL_SIZE-0x4,%sp
3061
3062
bra.b iea_acc_done
3063
3064
#########################################################################
3065
# XDEF **************************************************************** #
3066
# _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
3067
# #
3068
# This handler should be the first code executed upon taking the #
3069
# FP Operand Error exception in an operating system. #
3070
# #
3071
# XREF **************************************************************** #
3072
# _imem_read_long() - read instruction longword #
3073
# fix_skewed_ops() - adjust src operand in fsave frame #
3074
# _real_operr() - "callout" to operating system operr handler #
3075
# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3076
# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3077
# facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3078
# #
3079
# INPUT *************************************************************** #
3080
# - The system stack contains the FP Operr exception frame #
3081
# - The fsave frame contains the source operand #
3082
# #
3083
# OUTPUT ************************************************************** #
3084
# No access error: #
3085
# - The system stack is unchanged #
3086
# - The fsave frame contains the adjusted src op for opclass 0,2 #
3087
# #
3088
# ALGORITHM *********************************************************** #
3089
# In a system where the FP Operr exception is enabled, the goal #
3090
# is to get to the handler specified at _real_operr(). But, on the 060, #
3091
# for opclass zero and two instruction taking this exception, the #
3092
# input operand in the fsave frame may be incorrect for some cases #
3093
# and needs to be corrected. This handler calls fix_skewed_ops() to #
3094
# do just this and then exits through _real_operr(). #
3095
# For opclass 3 instructions, the 060 doesn't store the default #
3096
# operr result out to memory or data register file as it should. #
3097
# This code must emulate the move out before finally exiting through #
3098
# _real_inex(). The move out, if to memory, is performed using #
3099
# _mem_write() "callout" routines that may return a failing result. #
3100
# In this special case, the handler must exit through facc_out() #
3101
# which creates an access error stack frame from the current operr #
3102
# stack frame. #
3103
# #
3104
#########################################################################
3105
3106
global _fpsp_operr
3107
_fpsp_operr:
3108
3109
link.w %a6,&-LOCAL_SIZE # init stack frame
3110
3111
fsave FP_SRC(%a6) # grab the "busy" frame
3112
3113
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3114
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3115
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3116
3117
# the FPIAR holds the "current PC" of the faulting instruction
3118
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3119
3120
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3121
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3122
bsr.l _imem_read_long # fetch the instruction words
3123
mov.l %d0,EXC_OPWORD(%a6)
3124
3125
##############################################################################
3126
3127
btst &13,%d0 # is instr an fmove out?
3128
bne.b foperr_out # fmove out
3129
3130
3131
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3132
# this would be the case for opclass two operations with a source infinity or
3133
# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3134
# cause an operr so we don't need to check for them here.
3135
lea FP_SRC(%a6),%a0 # pass: ptr to src op
3136
bsr.l fix_skewed_ops # fix src op
3137
3138
foperr_exit:
3139
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3140
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3141
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3142
3143
frestore FP_SRC(%a6)
3144
3145
unlk %a6
3146
bra.l _real_operr
3147
3148
########################################################################
3149
3150
#
3151
# the hardware does not save the default result to memory on enabled
3152
# operand error exceptions. we do this here before passing control to
3153
# the user operand error handler.
3154
#
3155
# byte, word, and long destination format operations can pass
3156
# through here. we simply need to test the sign of the src
3157
# operand and save the appropriate minimum or maximum integer value
3158
# to the effective address as pointed to by the stacked effective address.
3159
#
3160
# although packed opclass three operations can take operand error
3161
# exceptions, they won't pass through here since they are caught
3162
# first by the unsupported data format exception handler. that handler
3163
# sends them directly to _real_operr() if necessary.
3164
#
3165
foperr_out:
3166
3167
mov.w FP_SRC_EX(%a6),%d1 # fetch exponent
3168
andi.w &0x7fff,%d1
3169
cmpi.w %d1,&0x7fff
3170
bne.b foperr_out_not_qnan
3171
# the operand is either an infinity or a QNAN.
3172
tst.l FP_SRC_LO(%a6)
3173
bne.b foperr_out_qnan
3174
mov.l FP_SRC_HI(%a6),%d1
3175
andi.l &0x7fffffff,%d1
3176
beq.b foperr_out_not_qnan
3177
foperr_out_qnan:
3178
mov.l FP_SRC_HI(%a6),L_SCR1(%a6)
3179
bra.b foperr_out_jmp
3180
3181
foperr_out_not_qnan:
3182
mov.l &0x7fffffff,%d1
3183
tst.b FP_SRC_EX(%a6)
3184
bpl.b foperr_out_not_qnan2
3185
addq.l &0x1,%d1
3186
foperr_out_not_qnan2:
3187
mov.l %d1,L_SCR1(%a6)
3188
3189
foperr_out_jmp:
3190
bfextu %d0{&19:&3},%d0 # extract dst format field
3191
mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3192
mov.w (tbl_operr.b,%pc,%d0.w*2),%a0
3193
jmp (tbl_operr.b,%pc,%a0)
3194
3195
tbl_operr:
3196
short foperr_out_l - tbl_operr # long word integer
3197
short tbl_operr - tbl_operr # sgl prec shouldn't happen
3198
short tbl_operr - tbl_operr # ext prec shouldn't happen
3199
short foperr_exit - tbl_operr # packed won't enter here
3200
short foperr_out_w - tbl_operr # word integer
3201
short tbl_operr - tbl_operr # dbl prec shouldn't happen
3202
short foperr_out_b - tbl_operr # byte integer
3203
short tbl_operr - tbl_operr # packed won't enter here
3204
3205
foperr_out_b:
3206
mov.b L_SCR1(%a6),%d0 # load positive default result
3207
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3208
ble.b foperr_out_b_save_dn # yes
3209
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3210
bsr.l _dmem_write_byte # write the default result
3211
3212
tst.l %d1 # did dstore fail?
3213
bne.l facc_out_b # yes
3214
3215
bra.w foperr_exit
3216
foperr_out_b_save_dn:
3217
andi.w &0x0007,%d1
3218
bsr.l store_dreg_b # store result to regfile
3219
bra.w foperr_exit
3220
3221
foperr_out_w:
3222
mov.w L_SCR1(%a6),%d0 # load positive default result
3223
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3224
ble.b foperr_out_w_save_dn # yes
3225
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3226
bsr.l _dmem_write_word # write the default result
3227
3228
tst.l %d1 # did dstore fail?
3229
bne.l facc_out_w # yes
3230
3231
bra.w foperr_exit
3232
foperr_out_w_save_dn:
3233
andi.w &0x0007,%d1
3234
bsr.l store_dreg_w # store result to regfile
3235
bra.w foperr_exit
3236
3237
foperr_out_l:
3238
mov.l L_SCR1(%a6),%d0 # load positive default result
3239
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3240
ble.b foperr_out_l_save_dn # yes
3241
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3242
bsr.l _dmem_write_long # write the default result
3243
3244
tst.l %d1 # did dstore fail?
3245
bne.l facc_out_l # yes
3246
3247
bra.w foperr_exit
3248
foperr_out_l_save_dn:
3249
andi.w &0x0007,%d1
3250
bsr.l store_dreg_l # store result to regfile
3251
bra.w foperr_exit
3252
3253
#########################################################################
3254
# XDEF **************************************************************** #
3255
# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
3256
# #
3257
# This handler should be the first code executed upon taking the #
3258
# FP Signalling NAN exception in an operating system. #
3259
# #
3260
# XREF **************************************************************** #
3261
# _imem_read_long() - read instruction longword #
3262
# fix_skewed_ops() - adjust src operand in fsave frame #
3263
# _real_snan() - "callout" to operating system SNAN handler #
3264
# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3265
# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3266
# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
3267
# _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
3268
# #
3269
# INPUT *************************************************************** #
3270
# - The system stack contains the FP SNAN exception frame #
3271
# - The fsave frame contains the source operand #
3272
# #
3273
# OUTPUT ************************************************************** #
3274
# No access error: #
3275
# - The system stack is unchanged #
3276
# - The fsave frame contains the adjusted src op for opclass 0,2 #
3277
# #
3278
# ALGORITHM *********************************************************** #
3279
# In a system where the FP SNAN exception is enabled, the goal #
3280
# is to get to the handler specified at _real_snan(). But, on the 060, #
3281
# for opclass zero and two instructions taking this exception, the #
3282
# input operand in the fsave frame may be incorrect for some cases #
3283
# and needs to be corrected. This handler calls fix_skewed_ops() to #
3284
# do just this and then exits through _real_snan(). #
3285
# For opclass 3 instructions, the 060 doesn't store the default #
3286
# SNAN result out to memory or data register file as it should. #
3287
# This code must emulate the move out before finally exiting through #
3288
# _real_snan(). The move out, if to memory, is performed using #
3289
# _mem_write() "callout" routines that may return a failing result. #
3290
# In this special case, the handler must exit through facc_out() #
3291
# which creates an access error stack frame from the current SNAN #
3292
# stack frame. #
3293
# For the case of an extended precision opclass 3 instruction, #
3294
# if the effective addressing mode was -() or ()+, then the address #
3295
# register must get updated by calling _calc_ea_fout(). If the <ea> #
3296
# was -(a7) from supervisor mode, then the exception frame currently #
3297
# on the system stack must be carefully moved "down" to make room #
3298
# for the operand being moved. #
3299
# #
3300
#########################################################################
3301
3302
global _fpsp_snan
3303
_fpsp_snan:
3304
3305
link.w %a6,&-LOCAL_SIZE # init stack frame
3306
3307
fsave FP_SRC(%a6) # grab the "busy" frame
3308
3309
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3310
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3311
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3312
3313
# the FPIAR holds the "current PC" of the faulting instruction
3314
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3315
3316
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3317
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3318
bsr.l _imem_read_long # fetch the instruction words
3319
mov.l %d0,EXC_OPWORD(%a6)
3320
3321
##############################################################################
3322
3323
btst &13,%d0 # is instr an fmove out?
3324
bne.w fsnan_out # fmove out
3325
3326
3327
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3328
# this would be the case for opclass two operations with a source infinity or
3329
# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3330
# fixed here.
3331
lea FP_SRC(%a6),%a0 # pass: ptr to src op
3332
bsr.l fix_skewed_ops # fix src op
3333
3334
fsnan_exit:
3335
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3336
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3337
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3338
3339
frestore FP_SRC(%a6)
3340
3341
unlk %a6
3342
bra.l _real_snan
3343
3344
########################################################################
3345
3346
#
3347
# the hardware does not save the default result to memory on enabled
3348
# snan exceptions. we do this here before passing control to
3349
# the user snan handler.
3350
#
3351
# byte, word, long, and packed destination format operations can pass
3352
# through here. since packed format operations already were handled by
3353
# fpsp_unsupp(), then we need to do nothing else for them here.
3354
# for byte, word, and long, we simply need to test the sign of the src
3355
# operand and save the appropriate minimum or maximum integer value
3356
# to the effective address as pointed to by the stacked effective address.
3357
#
3358
fsnan_out:
3359
3360
bfextu %d0{&19:&3},%d0 # extract dst format field
3361
mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3362
mov.w (tbl_snan.b,%pc,%d0.w*2),%a0
3363
jmp (tbl_snan.b,%pc,%a0)
3364
3365
tbl_snan:
3366
short fsnan_out_l - tbl_snan # long word integer
3367
short fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3368
short fsnan_out_x - tbl_snan # ext prec shouldn't happen
3369
short tbl_snan - tbl_snan # packed needs no help
3370
short fsnan_out_w - tbl_snan # word integer
3371
short fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3372
short fsnan_out_b - tbl_snan # byte integer
3373
short tbl_snan - tbl_snan # packed needs no help
3374
3375
fsnan_out_b:
3376
mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN
3377
bset &6,%d0 # set SNAN bit
3378
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3379
ble.b fsnan_out_b_dn # yes
3380
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3381
bsr.l _dmem_write_byte # write the default result
3382
3383
tst.l %d1 # did dstore fail?
3384
bne.l facc_out_b # yes
3385
3386
bra.w fsnan_exit
3387
fsnan_out_b_dn:
3388
andi.w &0x0007,%d1
3389
bsr.l store_dreg_b # store result to regfile
3390
bra.w fsnan_exit
3391
3392
fsnan_out_w:
3393
mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN
3394
bset &14,%d0 # set SNAN bit
3395
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3396
ble.b fsnan_out_w_dn # yes
3397
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3398
bsr.l _dmem_write_word # write the default result
3399
3400
tst.l %d1 # did dstore fail?
3401
bne.l facc_out_w # yes
3402
3403
bra.w fsnan_exit
3404
fsnan_out_w_dn:
3405
andi.w &0x0007,%d1
3406
bsr.l store_dreg_w # store result to regfile
3407
bra.w fsnan_exit
3408
3409
fsnan_out_l:
3410
mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN
3411
bset &30,%d0 # set SNAN bit
3412
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3413
ble.b fsnan_out_l_dn # yes
3414
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3415
bsr.l _dmem_write_long # write the default result
3416
3417
tst.l %d1 # did dstore fail?
3418
bne.l facc_out_l # yes
3419
3420
bra.w fsnan_exit
3421
fsnan_out_l_dn:
3422
andi.w &0x0007,%d1
3423
bsr.l store_dreg_l # store result to regfile
3424
bra.w fsnan_exit
3425
3426
fsnan_out_s:
3427
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3428
ble.b fsnan_out_d_dn # yes
3429
mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3430
andi.l &0x80000000,%d0 # keep sign
3431
ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3432
mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3433
lsr.l &0x8,%d1 # shift mantissa for sgl
3434
or.l %d1,%d0 # create sgl SNAN
3435
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3436
bsr.l _dmem_write_long # write the default result
3437
3438
tst.l %d1 # did dstore fail?
3439
bne.l facc_out_l # yes
3440
3441
bra.w fsnan_exit
3442
fsnan_out_d_dn:
3443
mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3444
andi.l &0x80000000,%d0 # keep sign
3445
ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3446
mov.l %d1,-(%sp)
3447
mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3448
lsr.l &0x8,%d1 # shift mantissa for sgl
3449
or.l %d1,%d0 # create sgl SNAN
3450
mov.l (%sp)+,%d1
3451
andi.w &0x0007,%d1
3452
bsr.l store_dreg_l # store result to regfile
3453
bra.w fsnan_exit
3454
3455
fsnan_out_d:
3456
mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3457
andi.l &0x80000000,%d0 # keep sign
3458
ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit
3459
mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3460
mov.l %d0,FP_SCR0_EX(%a6) # store to temp space
3461
mov.l &11,%d0 # load shift amt
3462
lsr.l %d0,%d1
3463
or.l %d1,FP_SCR0_EX(%a6) # create dbl hi
3464
mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3465
andi.l &0x000007ff,%d1
3466
ror.l %d0,%d1
3467
mov.l %d1,FP_SCR0_HI(%a6) # store to temp space
3468
mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa
3469
lsr.l %d0,%d1
3470
or.l %d1,FP_SCR0_HI(%a6) # create dbl lo
3471
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3472
mov.l EXC_EA(%a6),%a1 # pass: dst addr
3473
movq.l &0x8,%d0 # pass: size of 8 bytes
3474
bsr.l _dmem_write # write the default result
3475
3476
tst.l %d1 # did dstore fail?
3477
bne.l facc_out_d # yes
3478
3479
bra.w fsnan_exit
3480
3481
# for extended precision, if the addressing mode is pre-decrement or
3482
# post-increment, then the address register did not get updated.
3483
# in addition, for pre-decrement, the stacked <ea> is incorrect.
3484
fsnan_out_x:
3485
clr.b SPCOND_FLG(%a6) # clear special case flag
3486
3487
mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3488
clr.w 2+FP_SCR0(%a6)
3489
mov.l FP_SRC_HI(%a6),%d0
3490
bset &30,%d0
3491
mov.l %d0,FP_SCR0_HI(%a6)
3492
mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3493
3494
btst &0x5,EXC_SR(%a6) # supervisor mode exception?
3495
bne.b fsnan_out_x_s # yes
3496
3497
mov.l %usp,%a0 # fetch user stack pointer
3498
mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()
3499
mov.l (%a6),EXC_A6(%a6)
3500
3501
bsr.l _calc_ea_fout # find the correct ea,update An
3502
mov.l %a0,%a1
3503
mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3504
3505
mov.l EXC_A7(%a6),%a0
3506
mov.l %a0,%usp # restore user stack pointer
3507
mov.l EXC_A6(%a6),(%a6)
3508
3509
fsnan_out_x_save:
3510
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3511
movq.l &0xc,%d0 # pass: size of extended
3512
bsr.l _dmem_write # write the default result
3513
3514
tst.l %d1 # did dstore fail?
3515
bne.l facc_out_x # yes
3516
3517
bra.w fsnan_exit
3518
3519
fsnan_out_x_s:
3520
mov.l (%a6),EXC_A6(%a6)
3521
3522
bsr.l _calc_ea_fout # find the correct ea,update An
3523
mov.l %a0,%a1
3524
mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3525
3526
mov.l EXC_A6(%a6),(%a6)
3527
3528
cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3529
bne.b fsnan_out_x_save # no
3530
3531
# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3532
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3533
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3534
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3535
3536
frestore FP_SRC(%a6)
3537
3538
mov.l EXC_A6(%a6),%a6 # restore frame pointer
3539
3540
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3541
mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3542
mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3543
3544
mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3545
mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3546
mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3547
3548
add.l &LOCAL_SIZE-0x8,%sp
3549
3550
bra.l _real_snan
3551
3552
#########################################################################
3553
# XDEF **************************************************************** #
3554
# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
3555
# #
3556
# This handler should be the first code executed upon taking the #
3557
# FP Inexact exception in an operating system. #
3558
# #
3559
# XREF **************************************************************** #
3560
# _imem_read_long() - read instruction longword #
3561
# fix_skewed_ops() - adjust src operand in fsave frame #
3562
# set_tag_x() - determine optype of src/dst operands #
3563
# store_fpreg() - store opclass 0 or 2 result to FP regfile #
3564
# unnorm_fix() - change UNNORM operands to NORM or ZERO #
3565
# load_fpn2() - load dst operand from FP regfile #
3566
# smovcr() - emulate an "fmovcr" instruction #
3567
# fout() - emulate an opclass 3 instruction #
3568
# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3569
# _real_inex() - "callout" to operating system inexact handler #
3570
# #
3571
# INPUT *************************************************************** #
3572
# - The system stack contains the FP Inexact exception frame #
3573
# - The fsave frame contains the source operand #
3574
# #
3575
# OUTPUT ************************************************************** #
3576
# - The system stack is unchanged #
3577
# - The fsave frame contains the adjusted src op for opclass 0,2 #
3578
# #
3579
# ALGORITHM *********************************************************** #
3580
# In a system where the FP Inexact exception is enabled, the goal #
3581
# is to get to the handler specified at _real_inex(). But, on the 060, #
3582
# for opclass zero and two instruction taking this exception, the #
3583
# hardware doesn't store the correct result to the destination FP #
3584
# register as did the '040 and '881/2. This handler must emulate the #
3585
# instruction in order to get this value and then store it to the #
3586
# correct register before calling _real_inex(). #
3587
# For opclass 3 instructions, the 060 doesn't store the default #
3588
# inexact result out to memory or data register file as it should. #
3589
# This code must emulate the move out by calling fout() before finally #
3590
# exiting through _real_inex(). #
3591
# #
3592
#########################################################################
3593
3594
global _fpsp_inex
3595
_fpsp_inex:
3596
3597
link.w %a6,&-LOCAL_SIZE # init stack frame
3598
3599
fsave FP_SRC(%a6) # grab the "busy" frame
3600
3601
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3602
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3603
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3604
3605
# the FPIAR holds the "current PC" of the faulting instruction
3606
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3607
3608
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3609
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3610
bsr.l _imem_read_long # fetch the instruction words
3611
mov.l %d0,EXC_OPWORD(%a6)
3612
3613
##############################################################################
3614
3615
btst &13,%d0 # is instr an fmove out?
3616
bne.w finex_out # fmove out
3617
3618
3619
# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3620
# longword integer directly into the upper longword of the mantissa along
3621
# w/ an exponent value of 0x401e. we convert this to extended precision here.
3622
bfextu %d0{&19:&3},%d0 # fetch instr size
3623
bne.b finex_cont # instr size is not long
3624
cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?
3625
bne.b finex_cont # no
3626
fmov.l &0x0,%fpcr
3627
fmov.l FP_SRC_HI(%a6),%fp0 # load integer src
3628
fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision
3629
mov.w &0xe001,0x2+FP_SRC(%a6)
3630
3631
finex_cont:
3632
lea FP_SRC(%a6),%a0 # pass: ptr to src op
3633
bsr.l fix_skewed_ops # fix src op
3634
3635
# Here, we zero the ccode and exception byte field since we're going to
3636
# emulate the whole instruction. Notice, though, that we don't kill the
3637
# INEX1 bit. This is because a packed op has long since been converted
3638
# to extended before arriving here. Therefore, we need to retain the
3639
# INEX1 bit from when the operand was first converted.
3640
andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3641
3642
fmov.l &0x0,%fpcr # zero current control regs
3643
fmov.l &0x0,%fpsr
3644
3645
bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3646
cmpi.b %d1,&0x17 # is op an fmovecr?
3647
beq.w finex_fmovcr # yes
3648
3649
lea FP_SRC(%a6),%a0 # pass: ptr to src op
3650
bsr.l set_tag_x # tag the operand type
3651
mov.b %d0,STAG(%a6) # maybe NORM,DENORM
3652
3653
# bits four and five of the fp extension word separate the monadic and dyadic
3654
# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3655
# will never take this exception, but fsincos will.
3656
btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
3657
beq.b finex_extract # monadic
3658
3659
btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?
3660
bne.b finex_extract # yes
3661
3662
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3663
bsr.l load_fpn2 # load dst into FP_DST
3664
3665
lea FP_DST(%a6),%a0 # pass: ptr to dst op
3666
bsr.l set_tag_x # tag the operand type
3667
cmpi.b %d0,&UNNORM # is operand an UNNORM?
3668
bne.b finex_op2_done # no
3669
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
3670
finex_op2_done:
3671
mov.b %d0,DTAG(%a6) # save dst optype tag
3672
3673
finex_extract:
3674
clr.l %d0
3675
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
3676
3677
mov.b 1+EXC_CMDREG(%a6),%d1
3678
andi.w &0x007f,%d1 # extract extension
3679
3680
lea FP_SRC(%a6),%a0
3681
lea FP_DST(%a6),%a1
3682
3683
mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3684
jsr (tbl_unsupp.l,%pc,%d1.l*1)
3685
3686
# the operation has been emulated. the result is in fp0.
3687
finex_save:
3688
bfextu EXC_CMDREG(%a6){&6:&3},%d0
3689
bsr.l store_fpreg
3690
3691
finex_exit:
3692
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3693
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3694
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3695
3696
frestore FP_SRC(%a6)
3697
3698
unlk %a6
3699
bra.l _real_inex
3700
3701
finex_fmovcr:
3702
clr.l %d0
3703
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3704
mov.b 1+EXC_CMDREG(%a6),%d1
3705
andi.l &0x0000007f,%d1 # pass rom offset
3706
bsr.l smovcr
3707
bra.b finex_save
3708
3709
########################################################################
3710
3711
#
3712
# the hardware does not save the default result to memory on enabled
3713
# inexact exceptions. we do this here before passing control to
3714
# the user inexact handler.
3715
#
3716
# byte, word, and long destination format operations can pass
3717
# through here. so can double and single precision.
3718
# although packed opclass three operations can take inexact
3719
# exceptions, they won't pass through here since they are caught
3720
# first by the unsupported data format exception handler. that handler
3721
# sends them directly to _real_inex() if necessary.
3722
#
3723
finex_out:
3724
3725
mov.b &NORM,STAG(%a6) # src is a NORM
3726
3727
clr.l %d0
3728
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3729
3730
andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
3731
3732
lea FP_SRC(%a6),%a0 # pass ptr to src operand
3733
3734
bsr.l fout # store the default result
3735
3736
bra.b finex_exit
3737
3738
#########################################################################
3739
# XDEF **************************************************************** #
3740
# _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
3741
# #
3742
# This handler should be the first code executed upon taking #
3743
# the FP DZ exception in an operating system. #
3744
# #
3745
# XREF **************************************************************** #
3746
# _imem_read_long() - read instruction longword from memory #
3747
# fix_skewed_ops() - adjust fsave operand #
3748
# _real_dz() - "callout" exit point from FP DZ handler #
3749
# #
3750
# INPUT *************************************************************** #
3751
# - The system stack contains the FP DZ exception stack. #
3752
# - The fsave frame contains the source operand. #
3753
# #
3754
# OUTPUT ************************************************************** #
3755
# - The system stack contains the FP DZ exception stack. #
3756
# - The fsave frame contains the adjusted source operand. #
3757
# #
3758
# ALGORITHM *********************************************************** #
3759
# In a system where the DZ exception is enabled, the goal is to #
3760
# get to the handler specified at _real_dz(). But, on the 060, when the #
3761
# exception is taken, the input operand in the fsave state frame may #
3762
# be incorrect for some cases and need to be adjusted. So, this package #
3763
# adjusts the operand using fix_skewed_ops() and then branches to #
3764
# _real_dz(). #
3765
# #
3766
#########################################################################
3767
3768
global _fpsp_dz
3769
_fpsp_dz:
3770
3771
link.w %a6,&-LOCAL_SIZE # init stack frame
3772
3773
fsave FP_SRC(%a6) # grab the "busy" frame
3774
3775
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3776
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3777
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3778
3779
# the FPIAR holds the "current PC" of the faulting instruction
3780
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3781
3782
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3783
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3784
bsr.l _imem_read_long # fetch the instruction words
3785
mov.l %d0,EXC_OPWORD(%a6)
3786
3787
##############################################################################
3788
3789
3790
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3791
# this would be the case for opclass two operations with a source zero
3792
# in the sgl or dbl format.
3793
lea FP_SRC(%a6),%a0 # pass: ptr to src op
3794
bsr.l fix_skewed_ops # fix src op
3795
3796
fdz_exit:
3797
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3798
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3799
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3800
3801
frestore FP_SRC(%a6)
3802
3803
unlk %a6
3804
bra.l _real_dz
3805
3806
#########################################################################
3807
# XDEF **************************************************************** #
3808
# _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc. #
3809
# #
3810
# This handler should be the first code executed upon taking the #
3811
# "Line F Emulator" exception in an operating system. #
3812
# #
3813
# XREF **************************************************************** #
3814
# _fpsp_unimp() - handle "FP Unimplemented" exceptions #
3815
# _real_fpu_disabled() - handle "FPU disabled" exceptions #
3816
# _real_fline() - handle "FLINE" exceptions #
3817
# _imem_read_long() - read instruction longword #
3818
# #
3819
# INPUT *************************************************************** #
3820
# - The system stack contains a "Line F Emulator" exception #
3821
# stack frame. #
3822
# #
3823
# OUTPUT ************************************************************** #
3824
# - The system stack is unchanged #
3825
# #
3826
# ALGORITHM *********************************************************** #
3827
# When a "Line F Emulator" exception occurs, there are 3 possible #
3828
# exception types, denoted by the exception stack frame format number: #
3829
# (1) FPU unimplemented instruction (6 word stack frame) #
3830
# (2) FPU disabled (8 word stack frame) #
3831
# (3) Line F (4 word stack frame) #
3832
# #
3833
# This module determines which and forks the flow off to the #
3834
# appropriate "callout" (for "disabled" and "Line F") or to the #
3835
# correct emulation code (for "FPU unimplemented"). #
3836
# This code also must check for "fmovecr" instructions w/ a #
3837
# non-zero <ea> field. These may get flagged as "Line F" but should #
3838
# really be flagged as "FPU Unimplemented". (This is a "feature" on #
3839
# the '060. #
3840
# #
3841
#########################################################################
3842
3843
global _fpsp_fline
3844
_fpsp_fline:
3845
3846
# check to see if this exception is a "FP Unimplemented Instruction"
3847
# exception. if so, branch directly to that handler's entry point.
3848
cmpi.w 0x6(%sp),&0x202c
3849
beq.l _fpsp_unimp
3850
3851
# check to see if the FPU is disabled. if so, jump to the OS entry
3852
# point for that condition.
3853
cmpi.w 0x6(%sp),&0x402c
3854
beq.l _real_fpu_disabled
3855
3856
# the exception was an "F-Line Illegal" exception. we check to see
3857
# if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
3858
# so, convert the F-Line exception stack frame to an FP Unimplemented
3859
# Instruction exception stack frame else branch to the OS entry
3860
# point for the F-Line exception handler.
3861
link.w %a6,&-LOCAL_SIZE # init stack frame
3862
3863
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3864
3865
mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
3866
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3867
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3868
bsr.l _imem_read_long # fetch instruction words
3869
3870
bfextu %d0{&0:&10},%d1 # is it an fmovecr?
3871
cmpi.w %d1,&0x03c8
3872
bne.b fline_fline # no
3873
3874
bfextu %d0{&16:&6},%d1 # is it an fmovecr?
3875
cmpi.b %d1,&0x17
3876
bne.b fline_fline # no
3877
3878
# it's an fmovecr w/ a non-zero <ea> that has entered through
3879
# the F-Line Illegal exception.
3880
# so, we need to convert the F-Line exception stack frame into an
3881
# FP Unimplemented Instruction stack frame and jump to that entry
3882
# point.
3883
#
3884
# but, if the FPU is disabled, then we need to jump to the FPU disabled
3885
# entry point.
3886
movc %pcr,%d0
3887
btst &0x1,%d0
3888
beq.b fline_fmovcr
3889
3890
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3891
3892
unlk %a6
3893
3894
sub.l &0x8,%sp # make room for "Next PC", <ea>
3895
mov.w 0x8(%sp),(%sp)
3896
mov.l 0xa(%sp),0x2(%sp) # move "Current PC"
3897
mov.w &0x402c,0x6(%sp)
3898
mov.l 0x2(%sp),0xc(%sp)
3899
addq.l &0x4,0x2(%sp) # set "Next PC"
3900
3901
bra.l _real_fpu_disabled
3902
3903
fline_fmovcr:
3904
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3905
3906
unlk %a6
3907
3908
fmov.l 0x2(%sp),%fpiar # set current PC
3909
addq.l &0x4,0x2(%sp) # set Next PC
3910
3911
mov.l (%sp),-(%sp)
3912
mov.l 0x8(%sp),0x4(%sp)
3913
mov.b &0x20,0x6(%sp)
3914
3915
bra.l _fpsp_unimp
3916
3917
fline_fline:
3918
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3919
3920
unlk %a6
3921
3922
bra.l _real_fline
3923
3924
#########################################################################
3925
# XDEF **************************************************************** #
3926
# _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented #
3927
# Instruction" exception. #
3928
# #
3929
# This handler should be the first code executed upon taking the #
3930
# FP Unimplemented Instruction exception in an operating system. #
3931
# #
3932
# XREF **************************************************************** #
3933
# _imem_read_{word,long}() - read instruction word/longword #
3934
# load_fop() - load src/dst ops from memory and/or FP regfile #
3935
# store_fpreg() - store opclass 0 or 2 result to FP regfile #
3936
# tbl_trans - addr of table of emulation routines for trnscndls #
3937
# _real_access() - "callout" for access error exception #
3938
# _fpsp_done() - "callout" for exit; work all done #
3939
# _real_trace() - "callout" for Trace enabled exception #
3940
# smovcr() - emulate "fmovecr" instruction #
3941
# funimp_skew() - adjust fsave src ops to "incorrect" value #
3942
# _ftrapcc() - emulate an "ftrapcc" instruction #
3943
# _fdbcc() - emulate an "fdbcc" instruction #
3944
# _fscc() - emulate an "fscc" instruction #
3945
# _real_trap() - "callout" for Trap exception #
3946
# _real_bsun() - "callout" for enabled Bsun exception #
3947
# #
3948
# INPUT *************************************************************** #
3949
# - The system stack contains the "Unimplemented Instr" stk frame #
3950
# #
3951
# OUTPUT ************************************************************** #
3952
# If access error: #
3953
# - The system stack is changed to an access error stack frame #
3954
# If Trace exception enabled: #
3955
# - The system stack is changed to a Trace exception stack frame #
3956
# Else: (normal case) #
3957
# - Correct result has been stored as appropriate #
3958
# #
3959
# ALGORITHM *********************************************************** #
3960
# There are two main cases of instructions that may enter here to #
3961
# be emulated: (1) the FPgen instructions, most of which were also #
3962
# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc". #
3963
# For the first set, this handler calls the routine load_fop() #
3964
# to load the source and destination (for dyadic) operands to be used #
3965
# for instruction emulation. The correct emulation routine is then #
3966
# chosen by decoding the instruction type and indexing into an #
3967
# emulation subroutine index table. After emulation returns, this #
3968
# handler checks to see if an exception should occur as a result of the #
3969
# FP instruction emulation. If so, then an FP exception of the correct #
3970
# type is inserted into the FPU state frame using the "frestore" #
3971
# instruction before exiting through _fpsp_done(). In either the #
3972
# exceptional or non-exceptional cases, we must check to see if the #
3973
# Trace exception is enabled. If so, then we must create a Trace #
3974
# exception frame from the current exception frame and exit through #
3975
# _real_trace(). #
3976
# For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines #
3977
# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three #
3978
# may flag that a BSUN exception should be taken. If so, then the #
3979
# current exception stack frame is converted into a BSUN exception #
3980
# stack frame and an exit is made through _real_bsun(). If the #
3981
# instruction was "ftrapcc" and a Trap exception should result, a Trap #
3982
# exception stack frame is created from the current frame and an exit #
3983
# is made through _real_trap(). If a Trace exception is pending, then #
3984
# a Trace exception frame is created from the current frame and a jump #
3985
# is made to _real_trace(). Finally, if none of these conditions exist, #
3986
# then the handler exits though the callout _fpsp_done(). #
3987
# #
3988
# In any of the above scenarios, if a _mem_read() or _mem_write() #
3989
# "callout" returns a failing value, then an access error stack frame #
3990
# is created from the current stack frame and an exit is made through #
3991
# _real_access(). #
3992
# #
3993
#########################################################################
3994
3995
#
3996
# FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
3997
#
3998
# *****************
3999
# * * => <ea> of fp unimp instr.
4000
# - EA -
4001
# * *
4002
# *****************
4003
# * 0x2 * 0x02c * => frame format and vector offset(vector #11)
4004
# *****************
4005
# * *
4006
# - Next PC - => PC of instr to execute after exc handling
4007
# * *
4008
# *****************
4009
# * SR * => SR at the time the exception was taken
4010
# *****************
4011
#
4012
# Note: the !NULL bit does not get set in the fsave frame when the
4013
# machine encounters an fp unimp exception. Therefore, it must be set
4014
# before leaving this handler.
4015
#
4016
global _fpsp_unimp
4017
_fpsp_unimp:
4018
4019
link.w %a6,&-LOCAL_SIZE # init stack frame
4020
4021
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4022
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
4023
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1
4024
4025
btst &0x5,EXC_SR(%a6) # user mode exception?
4026
bne.b funimp_s # no; supervisor mode
4027
4028
# save the value of the user stack pointer onto the stack frame
4029
funimp_u:
4030
mov.l %usp,%a0 # fetch user stack pointer
4031
mov.l %a0,EXC_A7(%a6) # store in stack frame
4032
bra.b funimp_cont
4033
4034
# store the value of the supervisor stack pointer BEFORE the exc occurred.
4035
# old_sp is address just above stacked effective address.
4036
funimp_s:
4037
lea 4+EXC_EA(%a6),%a0 # load old a7'
4038
mov.l %a0,EXC_A7(%a6) # store a7'
4039
mov.l %a0,OLD_A7(%a6) # make a copy
4040
4041
funimp_cont:
4042
4043
# the FPIAR holds the "current PC" of the faulting instruction.
4044
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
4045
4046
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4047
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
4048
bsr.l _imem_read_long # fetch the instruction words
4049
mov.l %d0,EXC_OPWORD(%a6)
4050
4051
############################################################################
4052
4053
fmov.l &0x0,%fpcr # clear FPCR
4054
fmov.l &0x0,%fpsr # clear FPSR
4055
4056
clr.b SPCOND_FLG(%a6) # clear "special case" flag
4057
4058
# Divide the fp instructions into 8 types based on the TYPE field in
4059
# bits 6-8 of the opword(classes 6,7 are undefined).
4060
# (for the '060, only two types can take this exception)
4061
# bftst %d0{&7:&3} # test TYPE
4062
btst &22,%d0 # type 0 or 1 ?
4063
bne.w funimp_misc # type 1
4064
4065
#########################################
4066
# TYPE == 0: General instructions #
4067
#########################################
4068
funimp_gen:
4069
4070
clr.b STORE_FLG(%a6) # clear "store result" flag
4071
4072
# clear the ccode byte and exception status byte
4073
andi.l &0x00ff00ff,USER_FPSR(%a6)
4074
4075
bfextu %d0{&16:&6},%d1 # extract upper 6 of cmdreg
4076
cmpi.b %d1,&0x17 # is op an fmovecr?
4077
beq.w funimp_fmovcr # yes
4078
4079
funimp_gen_op:
4080
bsr.l _load_fop # load
4081
4082
clr.l %d0
4083
mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode
4084
4085
mov.b 1+EXC_CMDREG(%a6),%d1
4086
andi.w &0x003f,%d1 # extract extension bits
4087
lsl.w &0x3,%d1 # shift right 3 bits
4088
or.b STAG(%a6),%d1 # insert src optag bits
4089
4090
lea FP_DST(%a6),%a1 # pass dst ptr in a1
4091
lea FP_SRC(%a6),%a0 # pass src ptr in a0
4092
4093
mov.w (tbl_trans.w,%pc,%d1.w*2),%d1
4094
jsr (tbl_trans.w,%pc,%d1.w*1) # emulate
4095
4096
funimp_fsave:
4097
mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
4098
bne.w funimp_ena # some are enabled
4099
4100
funimp_store:
4101
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
4102
bsr.l store_fpreg # store result to fp regfile
4103
4104
funimp_gen_exit:
4105
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4106
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4107
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4108
4109
funimp_gen_exit_cmp:
4110
cmpi.b SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
4111
beq.b funimp_gen_exit_a7 # yes
4112
4113
cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
4114
beq.b funimp_gen_exit_a7 # yes
4115
4116
funimp_gen_exit_cont:
4117
unlk %a6
4118
4119
funimp_gen_exit_cont2:
4120
btst &0x7,(%sp) # is trace on?
4121
beq.l _fpsp_done # no
4122
4123
# this catches a problem with the case where an exception will be re-inserted
4124
# into the machine. the frestore has already been executed...so, the fmov.l
4125
# alone of the control register would trigger an unwanted exception.
4126
# until I feel like fixing this, we'll sidestep the exception.
4127
fsave -(%sp)
4128
fmov.l %fpiar,0x14(%sp) # "Current PC" is in FPIAR
4129
frestore (%sp)+
4130
mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x24
4131
bra.l _real_trace
4132
4133
funimp_gen_exit_a7:
4134
btst &0x5,EXC_SR(%a6) # supervisor or user mode?
4135
bne.b funimp_gen_exit_a7_s # supervisor
4136
4137
mov.l %a0,-(%sp)
4138
mov.l EXC_A7(%a6),%a0
4139
mov.l %a0,%usp
4140
mov.l (%sp)+,%a0
4141
bra.b funimp_gen_exit_cont
4142
4143
# if the instruction was executed from supervisor mode and the addressing
4144
# mode was (a7)+, then the stack frame for the rte must be shifted "up"
4145
# "n" bytes where "n" is the size of the src operand type.
4146
# f<op>.{b,w,l,s,d,x,p}
4147
funimp_gen_exit_a7_s:
4148
mov.l %d0,-(%sp) # save d0
4149
mov.l EXC_A7(%a6),%d0 # load new a7'
4150
sub.l OLD_A7(%a6),%d0 # subtract old a7'
4151
mov.l 0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
4152
mov.l EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
4153
mov.w %d0,EXC_SR(%a6) # store incr number
4154
mov.l (%sp)+,%d0 # restore d0
4155
4156
unlk %a6
4157
4158
add.w (%sp),%sp # stack frame shifted
4159
bra.b funimp_gen_exit_cont2
4160
4161
######################
4162
# fmovecr.x #ccc,fpn #
4163
######################
4164
funimp_fmovcr:
4165
clr.l %d0
4166
mov.b FPCR_MODE(%a6),%d0
4167
mov.b 1+EXC_CMDREG(%a6),%d1
4168
andi.l &0x0000007f,%d1 # pass rom offset in d1
4169
bsr.l smovcr
4170
bra.w funimp_fsave
4171
4172
#########################################################################
4173
4174
#
4175
# the user has enabled some exceptions. we figure not to see this too
4176
# often so that's why it gets lower priority.
4177
#
4178
funimp_ena:
4179
4180
# was an exception set that was also enabled?
4181
and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled and set
4182
bfffo %d0{&24:&8},%d0 # find highest priority exception
4183
bne.b funimp_exc # at least one was set
4184
4185
# no exception that was enabled was set BUT if we got an exact overflow
4186
# and overflow wasn't enabled but inexact was (yech!) then this is
4187
# an inexact exception; otherwise, return to normal non-exception flow.
4188
btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4189
beq.w funimp_store # no; return to normal flow
4190
4191
# the overflow w/ exact result happened but was inexact set in the FPCR?
4192
funimp_ovfl:
4193
btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
4194
beq.w funimp_store # no; return to normal flow
4195
bra.b funimp_exc_ovfl # yes
4196
4197
# some exception happened that was actually enabled.
4198
# we'll insert this new exception into the FPU and then return.
4199
funimp_exc:
4200
subi.l &24,%d0 # fix offset to be 0-8
4201
cmpi.b %d0,&0x6 # is exception INEX?
4202
bne.b funimp_exc_force # no
4203
4204
# the enabled exception was inexact. so, if it occurs with an overflow
4205
# or underflow that was disabled, then we have to force an overflow or
4206
# underflow frame. the eventual overflow or underflow handler will see that
4207
# it's actually an inexact and act appropriately. this is the only easy
4208
# way to have the EXOP available for the enabled inexact handler when
4209
# a disabled overflow or underflow has also happened.
4210
btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4211
bne.b funimp_exc_ovfl # yes
4212
btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
4213
bne.b funimp_exc_unfl # yes
4214
4215
# force the fsave exception status bits to signal an exception of the
4216
# appropriate type. don't forget to "skew" the source operand in case we
4217
# "unskewed" the one the hardware initially gave us.
4218
funimp_exc_force:
4219
mov.l %d0,-(%sp) # save d0
4220
bsr.l funimp_skew # check for special case
4221
mov.l (%sp)+,%d0 # restore d0
4222
mov.w (tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
4223
bra.b funimp_gen_exit2 # exit with frestore
4224
4225
tbl_funimp_except:
4226
short 0xe002, 0xe006, 0xe004, 0xe005
4227
short 0xe003, 0xe002, 0xe001, 0xe001
4228
4229
# insert an overflow frame
4230
funimp_exc_ovfl:
4231
bsr.l funimp_skew # check for special case
4232
mov.w &0xe005,2+FP_SRC(%a6)
4233
bra.b funimp_gen_exit2
4234
4235
# insert an underflow frame
4236
funimp_exc_unfl:
4237
bsr.l funimp_skew # check for special case
4238
mov.w &0xe003,2+FP_SRC(%a6)
4239
4240
# this is the general exit point for an enabled exception that will be
4241
# restored into the machine for the instruction just emulated.
4242
funimp_gen_exit2:
4243
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4244
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4245
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4246
4247
frestore FP_SRC(%a6) # insert exceptional status
4248
4249
bra.w funimp_gen_exit_cmp
4250
4251
############################################################################
4252
4253
#
4254
# TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
4255
#
4256
# These instructions were implemented on the '881/2 and '040 in hardware but
4257
# are emulated in software on the '060.
4258
#
4259
funimp_misc:
4260
bfextu %d0{&10:&3},%d1 # extract mode field
4261
cmpi.b %d1,&0x1 # is it an fdb<cc>?
4262
beq.w funimp_fdbcc # yes
4263
cmpi.b %d1,&0x7 # is it an fs<cc>?
4264
bne.w funimp_fscc # yes
4265
bfextu %d0{&13:&3},%d1
4266
cmpi.b %d1,&0x2 # is it an fs<cc>?
4267
blt.w funimp_fscc # yes
4268
4269
#########################
4270
# ftrap<cc> #
4271
# ftrap<cc>.w #<data> #
4272
# ftrap<cc>.l #<data> #
4273
#########################
4274
funimp_ftrapcc:
4275
4276
bsr.l _ftrapcc # FTRAP<cc>()
4277
4278
cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4279
beq.w funimp_bsun # yes
4280
4281
cmpi.b SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
4282
bne.w funimp_done # no
4283
4284
# FP UNIMP FRAME TRAP FRAME
4285
# ***************** *****************
4286
# ** <EA> ** ** Current PC **
4287
# ***************** *****************
4288
# * 0x2 * 0x02c * * 0x2 * 0x01c *
4289
# ***************** *****************
4290
# ** Next PC ** ** Next PC **
4291
# ***************** *****************
4292
# * SR * * SR *
4293
# ***************** *****************
4294
# (6 words) (6 words)
4295
#
4296
# the ftrapcc instruction should take a trap. so, here we must create a
4297
# trap stack frame from an unimplemented fp instruction stack frame and
4298
# jump to the user supplied entry point for the trap exception
4299
funimp_ftrapcc_tp:
4300
mov.l USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
4301
mov.w &0x201c,EXC_VOFF(%a6) # Vector Offset = 0x01c
4302
4303
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4304
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4305
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4306
4307
unlk %a6
4308
bra.l _real_trap
4309
4310
#########################
4311
# fdb<cc> Dn,<label> #
4312
#########################
4313
funimp_fdbcc:
4314
4315
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4316
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4317
bsr.l _imem_read_word # read displacement
4318
4319
tst.l %d1 # did ifetch fail?
4320
bne.w funimp_iacc # yes
4321
4322
ext.l %d0 # sign extend displacement
4323
4324
bsr.l _fdbcc # FDB<cc>()
4325
4326
cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4327
beq.w funimp_bsun
4328
4329
bra.w funimp_done # branch to finish
4330
4331
#################
4332
# fs<cc>.b <ea> #
4333
#################
4334
funimp_fscc:
4335
4336
bsr.l _fscc # FS<cc>()
4337
4338
# I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
4339
# does not need to update "An" before taking a bsun exception.
4340
cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4341
beq.w funimp_bsun
4342
4343
btst &0x5,EXC_SR(%a6) # yes; is it a user mode exception?
4344
bne.b funimp_fscc_s # no
4345
4346
funimp_fscc_u:
4347
mov.l EXC_A7(%a6),%a0 # yes; set new USP
4348
mov.l %a0,%usp
4349
bra.w funimp_done # branch to finish
4350
4351
# remember, I'm assuming that post-increment is bogus...(it IS!!!)
4352
# so, the least significant WORD of the stacked effective address got
4353
# overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
4354
# so that the rte will work correctly without destroying the result.
4355
# even though the operation size is byte, the stack ptr is decr by 2.
4356
#
4357
# remember, also, this instruction may be traced.
4358
funimp_fscc_s:
4359
cmpi.b SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
4360
bne.w funimp_done # no
4361
4362
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4363
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4364
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4365
4366
unlk %a6
4367
4368
btst &0x7,(%sp) # is trace enabled?
4369
bne.b funimp_fscc_s_trace # yes
4370
4371
subq.l &0x2,%sp
4372
mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down"
4373
mov.l 0x6(%sp),0x4(%sp) # shift lo(PC),voff "down"
4374
bra.l _fpsp_done
4375
4376
funimp_fscc_s_trace:
4377
subq.l &0x2,%sp
4378
mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down"
4379
mov.w 0x6(%sp),0x4(%sp) # shift lo(PC)
4380
mov.w &0x2024,0x6(%sp) # fmt/voff = $2024
4381
fmov.l %fpiar,0x8(%sp) # insert "current PC"
4382
4383
bra.l _real_trace
4384
4385
#
4386
# The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
4387
# the fp unimplemented instruction exception stack frame into a bsun stack frame,
4388
# restore a bsun exception into the machine, and branch to the user
4389
# supplied bsun hook.
4390
#
4391
# FP UNIMP FRAME BSUN FRAME
4392
# ***************** *****************
4393
# ** <EA> ** * 0x0 * 0x0c0 *
4394
# ***************** *****************
4395
# * 0x2 * 0x02c * ** Current PC **
4396
# ***************** *****************
4397
# ** Next PC ** * SR *
4398
# ***************** *****************
4399
# * SR * (4 words)
4400
# *****************
4401
# (6 words)
4402
#
4403
funimp_bsun:
4404
mov.w &0x00c0,2+EXC_EA(%a6) # Fmt = 0x0; Vector Offset = 0x0c0
4405
mov.l USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
4406
mov.w EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
4407
4408
mov.w &0xe000,2+FP_SRC(%a6) # bsun exception enabled
4409
4410
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4411
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4412
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4413
4414
frestore FP_SRC(%a6) # restore bsun exception
4415
4416
unlk %a6
4417
4418
addq.l &0x4,%sp # erase sludge
4419
4420
bra.l _real_bsun # branch to user bsun hook
4421
4422
#
4423
# all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
4424
# and return.
4425
#
4426
# as usual, we have to check for trace mode being on here. since instructions
4427
# modifying the supervisor stack frame don't pass through here, this is a
4428
# relatively easy task.
4429
#
4430
funimp_done:
4431
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4432
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4433
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4434
4435
unlk %a6
4436
4437
btst &0x7,(%sp) # is trace enabled?
4438
bne.b funimp_trace # yes
4439
4440
bra.l _fpsp_done
4441
4442
# FP UNIMP FRAME TRACE FRAME
4443
# ***************** *****************
4444
# ** <EA> ** ** Current PC **
4445
# ***************** *****************
4446
# * 0x2 * 0x02c * * 0x2 * 0x024 *
4447
# ***************** *****************
4448
# ** Next PC ** ** Next PC **
4449
# ***************** *****************
4450
# * SR * * SR *
4451
# ***************** *****************
4452
# (6 words) (6 words)
4453
#
4454
# the fscc instruction should take a trace trap. so, here we must create a
4455
# trace stack frame from an unimplemented fp instruction stack frame and
4456
# jump to the user supplied entry point for the trace exception
4457
funimp_trace:
4458
fmov.l %fpiar,0x8(%sp) # current PC is in fpiar
4459
mov.b &0x24,0x7(%sp) # vector offset = 0x024
4460
4461
bra.l _real_trace
4462
4463
################################################################
4464
4465
global tbl_trans
4466
swbeg &0x1c0
4467
tbl_trans:
4468
short tbl_trans - tbl_trans # $00-0 fmovecr all
4469
short tbl_trans - tbl_trans # $00-1 fmovecr all
4470
short tbl_trans - tbl_trans # $00-2 fmovecr all
4471
short tbl_trans - tbl_trans # $00-3 fmovecr all
4472
short tbl_trans - tbl_trans # $00-4 fmovecr all
4473
short tbl_trans - tbl_trans # $00-5 fmovecr all
4474
short tbl_trans - tbl_trans # $00-6 fmovecr all
4475
short tbl_trans - tbl_trans # $00-7 fmovecr all
4476
4477
short tbl_trans - tbl_trans # $01-0 fint norm
4478
short tbl_trans - tbl_trans # $01-1 fint zero
4479
short tbl_trans - tbl_trans # $01-2 fint inf
4480
short tbl_trans - tbl_trans # $01-3 fint qnan
4481
short tbl_trans - tbl_trans # $01-5 fint denorm
4482
short tbl_trans - tbl_trans # $01-4 fint snan
4483
short tbl_trans - tbl_trans # $01-6 fint unnorm
4484
short tbl_trans - tbl_trans # $01-7 ERROR
4485
4486
short ssinh - tbl_trans # $02-0 fsinh norm
4487
short src_zero - tbl_trans # $02-1 fsinh zero
4488
short src_inf - tbl_trans # $02-2 fsinh inf
4489
short src_qnan - tbl_trans # $02-3 fsinh qnan
4490
short ssinhd - tbl_trans # $02-5 fsinh denorm
4491
short src_snan - tbl_trans # $02-4 fsinh snan
4492
short tbl_trans - tbl_trans # $02-6 fsinh unnorm
4493
short tbl_trans - tbl_trans # $02-7 ERROR
4494
4495
short tbl_trans - tbl_trans # $03-0 fintrz norm
4496
short tbl_trans - tbl_trans # $03-1 fintrz zero
4497
short tbl_trans - tbl_trans # $03-2 fintrz inf
4498
short tbl_trans - tbl_trans # $03-3 fintrz qnan
4499
short tbl_trans - tbl_trans # $03-5 fintrz denorm
4500
short tbl_trans - tbl_trans # $03-4 fintrz snan
4501
short tbl_trans - tbl_trans # $03-6 fintrz unnorm
4502
short tbl_trans - tbl_trans # $03-7 ERROR
4503
4504
short tbl_trans - tbl_trans # $04-0 fsqrt norm
4505
short tbl_trans - tbl_trans # $04-1 fsqrt zero
4506
short tbl_trans - tbl_trans # $04-2 fsqrt inf
4507
short tbl_trans - tbl_trans # $04-3 fsqrt qnan
4508
short tbl_trans - tbl_trans # $04-5 fsqrt denorm
4509
short tbl_trans - tbl_trans # $04-4 fsqrt snan
4510
short tbl_trans - tbl_trans # $04-6 fsqrt unnorm
4511
short tbl_trans - tbl_trans # $04-7 ERROR
4512
4513
short tbl_trans - tbl_trans # $05-0 ERROR
4514
short tbl_trans - tbl_trans # $05-1 ERROR
4515
short tbl_trans - tbl_trans # $05-2 ERROR
4516
short tbl_trans - tbl_trans # $05-3 ERROR
4517
short tbl_trans - tbl_trans # $05-4 ERROR
4518
short tbl_trans - tbl_trans # $05-5 ERROR
4519
short tbl_trans - tbl_trans # $05-6 ERROR
4520
short tbl_trans - tbl_trans # $05-7 ERROR
4521
4522
short slognp1 - tbl_trans # $06-0 flognp1 norm
4523
short src_zero - tbl_trans # $06-1 flognp1 zero
4524
short sopr_inf - tbl_trans # $06-2 flognp1 inf
4525
short src_qnan - tbl_trans # $06-3 flognp1 qnan
4526
short slognp1d - tbl_trans # $06-5 flognp1 denorm
4527
short src_snan - tbl_trans # $06-4 flognp1 snan
4528
short tbl_trans - tbl_trans # $06-6 flognp1 unnorm
4529
short tbl_trans - tbl_trans # $06-7 ERROR
4530
4531
short tbl_trans - tbl_trans # $07-0 ERROR
4532
short tbl_trans - tbl_trans # $07-1 ERROR
4533
short tbl_trans - tbl_trans # $07-2 ERROR
4534
short tbl_trans - tbl_trans # $07-3 ERROR
4535
short tbl_trans - tbl_trans # $07-4 ERROR
4536
short tbl_trans - tbl_trans # $07-5 ERROR
4537
short tbl_trans - tbl_trans # $07-6 ERROR
4538
short tbl_trans - tbl_trans # $07-7 ERROR
4539
4540
short setoxm1 - tbl_trans # $08-0 fetoxm1 norm
4541
short src_zero - tbl_trans # $08-1 fetoxm1 zero
4542
short setoxm1i - tbl_trans # $08-2 fetoxm1 inf
4543
short src_qnan - tbl_trans # $08-3 fetoxm1 qnan
4544
short setoxm1d - tbl_trans # $08-5 fetoxm1 denorm
4545
short src_snan - tbl_trans # $08-4 fetoxm1 snan
4546
short tbl_trans - tbl_trans # $08-6 fetoxm1 unnorm
4547
short tbl_trans - tbl_trans # $08-7 ERROR
4548
4549
short stanh - tbl_trans # $09-0 ftanh norm
4550
short src_zero - tbl_trans # $09-1 ftanh zero
4551
short src_one - tbl_trans # $09-2 ftanh inf
4552
short src_qnan - tbl_trans # $09-3 ftanh qnan
4553
short stanhd - tbl_trans # $09-5 ftanh denorm
4554
short src_snan - tbl_trans # $09-4 ftanh snan
4555
short tbl_trans - tbl_trans # $09-6 ftanh unnorm
4556
short tbl_trans - tbl_trans # $09-7 ERROR
4557
4558
short satan - tbl_trans # $0a-0 fatan norm
4559
short src_zero - tbl_trans # $0a-1 fatan zero
4560
short spi_2 - tbl_trans # $0a-2 fatan inf
4561
short src_qnan - tbl_trans # $0a-3 fatan qnan
4562
short satand - tbl_trans # $0a-5 fatan denorm
4563
short src_snan - tbl_trans # $0a-4 fatan snan
4564
short tbl_trans - tbl_trans # $0a-6 fatan unnorm
4565
short tbl_trans - tbl_trans # $0a-7 ERROR
4566
4567
short tbl_trans - tbl_trans # $0b-0 ERROR
4568
short tbl_trans - tbl_trans # $0b-1 ERROR
4569
short tbl_trans - tbl_trans # $0b-2 ERROR
4570
short tbl_trans - tbl_trans # $0b-3 ERROR
4571
short tbl_trans - tbl_trans # $0b-4 ERROR
4572
short tbl_trans - tbl_trans # $0b-5 ERROR
4573
short tbl_trans - tbl_trans # $0b-6 ERROR
4574
short tbl_trans - tbl_trans # $0b-7 ERROR
4575
4576
short sasin - tbl_trans # $0c-0 fasin norm
4577
short src_zero - tbl_trans # $0c-1 fasin zero
4578
short t_operr - tbl_trans # $0c-2 fasin inf
4579
short src_qnan - tbl_trans # $0c-3 fasin qnan
4580
short sasind - tbl_trans # $0c-5 fasin denorm
4581
short src_snan - tbl_trans # $0c-4 fasin snan
4582
short tbl_trans - tbl_trans # $0c-6 fasin unnorm
4583
short tbl_trans - tbl_trans # $0c-7 ERROR
4584
4585
short satanh - tbl_trans # $0d-0 fatanh norm
4586
short src_zero - tbl_trans # $0d-1 fatanh zero
4587
short t_operr - tbl_trans # $0d-2 fatanh inf
4588
short src_qnan - tbl_trans # $0d-3 fatanh qnan
4589
short satanhd - tbl_trans # $0d-5 fatanh denorm
4590
short src_snan - tbl_trans # $0d-4 fatanh snan
4591
short tbl_trans - tbl_trans # $0d-6 fatanh unnorm
4592
short tbl_trans - tbl_trans # $0d-7 ERROR
4593
4594
short ssin - tbl_trans # $0e-0 fsin norm
4595
short src_zero - tbl_trans # $0e-1 fsin zero
4596
short t_operr - tbl_trans # $0e-2 fsin inf
4597
short src_qnan - tbl_trans # $0e-3 fsin qnan
4598
short ssind - tbl_trans # $0e-5 fsin denorm
4599
short src_snan - tbl_trans # $0e-4 fsin snan
4600
short tbl_trans - tbl_trans # $0e-6 fsin unnorm
4601
short tbl_trans - tbl_trans # $0e-7 ERROR
4602
4603
short stan - tbl_trans # $0f-0 ftan norm
4604
short src_zero - tbl_trans # $0f-1 ftan zero
4605
short t_operr - tbl_trans # $0f-2 ftan inf
4606
short src_qnan - tbl_trans # $0f-3 ftan qnan
4607
short stand - tbl_trans # $0f-5 ftan denorm
4608
short src_snan - tbl_trans # $0f-4 ftan snan
4609
short tbl_trans - tbl_trans # $0f-6 ftan unnorm
4610
short tbl_trans - tbl_trans # $0f-7 ERROR
4611
4612
short setox - tbl_trans # $10-0 fetox norm
4613
short ld_pone - tbl_trans # $10-1 fetox zero
4614
short szr_inf - tbl_trans # $10-2 fetox inf
4615
short src_qnan - tbl_trans # $10-3 fetox qnan
4616
short setoxd - tbl_trans # $10-5 fetox denorm
4617
short src_snan - tbl_trans # $10-4 fetox snan
4618
short tbl_trans - tbl_trans # $10-6 fetox unnorm
4619
short tbl_trans - tbl_trans # $10-7 ERROR
4620
4621
short stwotox - tbl_trans # $11-0 ftwotox norm
4622
short ld_pone - tbl_trans # $11-1 ftwotox zero
4623
short szr_inf - tbl_trans # $11-2 ftwotox inf
4624
short src_qnan - tbl_trans # $11-3 ftwotox qnan
4625
short stwotoxd - tbl_trans # $11-5 ftwotox denorm
4626
short src_snan - tbl_trans # $11-4 ftwotox snan
4627
short tbl_trans - tbl_trans # $11-6 ftwotox unnorm
4628
short tbl_trans - tbl_trans # $11-7 ERROR
4629
4630
short stentox - tbl_trans # $12-0 ftentox norm
4631
short ld_pone - tbl_trans # $12-1 ftentox zero
4632
short szr_inf - tbl_trans # $12-2 ftentox inf
4633
short src_qnan - tbl_trans # $12-3 ftentox qnan
4634
short stentoxd - tbl_trans # $12-5 ftentox denorm
4635
short src_snan - tbl_trans # $12-4 ftentox snan
4636
short tbl_trans - tbl_trans # $12-6 ftentox unnorm
4637
short tbl_trans - tbl_trans # $12-7 ERROR
4638
4639
short tbl_trans - tbl_trans # $13-0 ERROR
4640
short tbl_trans - tbl_trans # $13-1 ERROR
4641
short tbl_trans - tbl_trans # $13-2 ERROR
4642
short tbl_trans - tbl_trans # $13-3 ERROR
4643
short tbl_trans - tbl_trans # $13-4 ERROR
4644
short tbl_trans - tbl_trans # $13-5 ERROR
4645
short tbl_trans - tbl_trans # $13-6 ERROR
4646
short tbl_trans - tbl_trans # $13-7 ERROR
4647
4648
short slogn - tbl_trans # $14-0 flogn norm
4649
short t_dz2 - tbl_trans # $14-1 flogn zero
4650
short sopr_inf - tbl_trans # $14-2 flogn inf
4651
short src_qnan - tbl_trans # $14-3 flogn qnan
4652
short slognd - tbl_trans # $14-5 flogn denorm
4653
short src_snan - tbl_trans # $14-4 flogn snan
4654
short tbl_trans - tbl_trans # $14-6 flogn unnorm
4655
short tbl_trans - tbl_trans # $14-7 ERROR
4656
4657
short slog10 - tbl_trans # $15-0 flog10 norm
4658
short t_dz2 - tbl_trans # $15-1 flog10 zero
4659
short sopr_inf - tbl_trans # $15-2 flog10 inf
4660
short src_qnan - tbl_trans # $15-3 flog10 qnan
4661
short slog10d - tbl_trans # $15-5 flog10 denorm
4662
short src_snan - tbl_trans # $15-4 flog10 snan
4663
short tbl_trans - tbl_trans # $15-6 flog10 unnorm
4664
short tbl_trans - tbl_trans # $15-7 ERROR
4665
4666
short slog2 - tbl_trans # $16-0 flog2 norm
4667
short t_dz2 - tbl_trans # $16-1 flog2 zero
4668
short sopr_inf - tbl_trans # $16-2 flog2 inf
4669
short src_qnan - tbl_trans # $16-3 flog2 qnan
4670
short slog2d - tbl_trans # $16-5 flog2 denorm
4671
short src_snan - tbl_trans # $16-4 flog2 snan
4672
short tbl_trans - tbl_trans # $16-6 flog2 unnorm
4673
short tbl_trans - tbl_trans # $16-7 ERROR
4674
4675
short tbl_trans - tbl_trans # $17-0 ERROR
4676
short tbl_trans - tbl_trans # $17-1 ERROR
4677
short tbl_trans - tbl_trans # $17-2 ERROR
4678
short tbl_trans - tbl_trans # $17-3 ERROR
4679
short tbl_trans - tbl_trans # $17-4 ERROR
4680
short tbl_trans - tbl_trans # $17-5 ERROR
4681
short tbl_trans - tbl_trans # $17-6 ERROR
4682
short tbl_trans - tbl_trans # $17-7 ERROR
4683
4684
short tbl_trans - tbl_trans # $18-0 fabs norm
4685
short tbl_trans - tbl_trans # $18-1 fabs zero
4686
short tbl_trans - tbl_trans # $18-2 fabs inf
4687
short tbl_trans - tbl_trans # $18-3 fabs qnan
4688
short tbl_trans - tbl_trans # $18-5 fabs denorm
4689
short tbl_trans - tbl_trans # $18-4 fabs snan
4690
short tbl_trans - tbl_trans # $18-6 fabs unnorm
4691
short tbl_trans - tbl_trans # $18-7 ERROR
4692
4693
short scosh - tbl_trans # $19-0 fcosh norm
4694
short ld_pone - tbl_trans # $19-1 fcosh zero
4695
short ld_pinf - tbl_trans # $19-2 fcosh inf
4696
short src_qnan - tbl_trans # $19-3 fcosh qnan
4697
short scoshd - tbl_trans # $19-5 fcosh denorm
4698
short src_snan - tbl_trans # $19-4 fcosh snan
4699
short tbl_trans - tbl_trans # $19-6 fcosh unnorm
4700
short tbl_trans - tbl_trans # $19-7 ERROR
4701
4702
short tbl_trans - tbl_trans # $1a-0 fneg norm
4703
short tbl_trans - tbl_trans # $1a-1 fneg zero
4704
short tbl_trans - tbl_trans # $1a-2 fneg inf
4705
short tbl_trans - tbl_trans # $1a-3 fneg qnan
4706
short tbl_trans - tbl_trans # $1a-5 fneg denorm
4707
short tbl_trans - tbl_trans # $1a-4 fneg snan
4708
short tbl_trans - tbl_trans # $1a-6 fneg unnorm
4709
short tbl_trans - tbl_trans # $1a-7 ERROR
4710
4711
short tbl_trans - tbl_trans # $1b-0 ERROR
4712
short tbl_trans - tbl_trans # $1b-1 ERROR
4713
short tbl_trans - tbl_trans # $1b-2 ERROR
4714
short tbl_trans - tbl_trans # $1b-3 ERROR
4715
short tbl_trans - tbl_trans # $1b-4 ERROR
4716
short tbl_trans - tbl_trans # $1b-5 ERROR
4717
short tbl_trans - tbl_trans # $1b-6 ERROR
4718
short tbl_trans - tbl_trans # $1b-7 ERROR
4719
4720
short sacos - tbl_trans # $1c-0 facos norm
4721
short ld_ppi2 - tbl_trans # $1c-1 facos zero
4722
short t_operr - tbl_trans # $1c-2 facos inf
4723
short src_qnan - tbl_trans # $1c-3 facos qnan
4724
short sacosd - tbl_trans # $1c-5 facos denorm
4725
short src_snan - tbl_trans # $1c-4 facos snan
4726
short tbl_trans - tbl_trans # $1c-6 facos unnorm
4727
short tbl_trans - tbl_trans # $1c-7 ERROR
4728
4729
short scos - tbl_trans # $1d-0 fcos norm
4730
short ld_pone - tbl_trans # $1d-1 fcos zero
4731
short t_operr - tbl_trans # $1d-2 fcos inf
4732
short src_qnan - tbl_trans # $1d-3 fcos qnan
4733
short scosd - tbl_trans # $1d-5 fcos denorm
4734
short src_snan - tbl_trans # $1d-4 fcos snan
4735
short tbl_trans - tbl_trans # $1d-6 fcos unnorm
4736
short tbl_trans - tbl_trans # $1d-7 ERROR
4737
4738
short sgetexp - tbl_trans # $1e-0 fgetexp norm
4739
short src_zero - tbl_trans # $1e-1 fgetexp zero
4740
short t_operr - tbl_trans # $1e-2 fgetexp inf
4741
short src_qnan - tbl_trans # $1e-3 fgetexp qnan
4742
short sgetexpd - tbl_trans # $1e-5 fgetexp denorm
4743
short src_snan - tbl_trans # $1e-4 fgetexp snan
4744
short tbl_trans - tbl_trans # $1e-6 fgetexp unnorm
4745
short tbl_trans - tbl_trans # $1e-7 ERROR
4746
4747
short sgetman - tbl_trans # $1f-0 fgetman norm
4748
short src_zero - tbl_trans # $1f-1 fgetman zero
4749
short t_operr - tbl_trans # $1f-2 fgetman inf
4750
short src_qnan - tbl_trans # $1f-3 fgetman qnan
4751
short sgetmand - tbl_trans # $1f-5 fgetman denorm
4752
short src_snan - tbl_trans # $1f-4 fgetman snan
4753
short tbl_trans - tbl_trans # $1f-6 fgetman unnorm
4754
short tbl_trans - tbl_trans # $1f-7 ERROR
4755
4756
short tbl_trans - tbl_trans # $20-0 fdiv norm
4757
short tbl_trans - tbl_trans # $20-1 fdiv zero
4758
short tbl_trans - tbl_trans # $20-2 fdiv inf
4759
short tbl_trans - tbl_trans # $20-3 fdiv qnan
4760
short tbl_trans - tbl_trans # $20-5 fdiv denorm
4761
short tbl_trans - tbl_trans # $20-4 fdiv snan
4762
short tbl_trans - tbl_trans # $20-6 fdiv unnorm
4763
short tbl_trans - tbl_trans # $20-7 ERROR
4764
4765
short smod_snorm - tbl_trans # $21-0 fmod norm
4766
short smod_szero - tbl_trans # $21-1 fmod zero
4767
short smod_sinf - tbl_trans # $21-2 fmod inf
4768
short sop_sqnan - tbl_trans # $21-3 fmod qnan
4769
short smod_sdnrm - tbl_trans # $21-5 fmod denorm
4770
short sop_ssnan - tbl_trans # $21-4 fmod snan
4771
short tbl_trans - tbl_trans # $21-6 fmod unnorm
4772
short tbl_trans - tbl_trans # $21-7 ERROR
4773
4774
short tbl_trans - tbl_trans # $22-0 fadd norm
4775
short tbl_trans - tbl_trans # $22-1 fadd zero
4776
short tbl_trans - tbl_trans # $22-2 fadd inf
4777
short tbl_trans - tbl_trans # $22-3 fadd qnan
4778
short tbl_trans - tbl_trans # $22-5 fadd denorm
4779
short tbl_trans - tbl_trans # $22-4 fadd snan
4780
short tbl_trans - tbl_trans # $22-6 fadd unnorm
4781
short tbl_trans - tbl_trans # $22-7 ERROR
4782
4783
short tbl_trans - tbl_trans # $23-0 fmul norm
4784
short tbl_trans - tbl_trans # $23-1 fmul zero
4785
short tbl_trans - tbl_trans # $23-2 fmul inf
4786
short tbl_trans - tbl_trans # $23-3 fmul qnan
4787
short tbl_trans - tbl_trans # $23-5 fmul denorm
4788
short tbl_trans - tbl_trans # $23-4 fmul snan
4789
short tbl_trans - tbl_trans # $23-6 fmul unnorm
4790
short tbl_trans - tbl_trans # $23-7 ERROR
4791
4792
short tbl_trans - tbl_trans # $24-0 fsgldiv norm
4793
short tbl_trans - tbl_trans # $24-1 fsgldiv zero
4794
short tbl_trans - tbl_trans # $24-2 fsgldiv inf
4795
short tbl_trans - tbl_trans # $24-3 fsgldiv qnan
4796
short tbl_trans - tbl_trans # $24-5 fsgldiv denorm
4797
short tbl_trans - tbl_trans # $24-4 fsgldiv snan
4798
short tbl_trans - tbl_trans # $24-6 fsgldiv unnorm
4799
short tbl_trans - tbl_trans # $24-7 ERROR
4800
4801
short srem_snorm - tbl_trans # $25-0 frem norm
4802
short srem_szero - tbl_trans # $25-1 frem zero
4803
short srem_sinf - tbl_trans # $25-2 frem inf
4804
short sop_sqnan - tbl_trans # $25-3 frem qnan
4805
short srem_sdnrm - tbl_trans # $25-5 frem denorm
4806
short sop_ssnan - tbl_trans # $25-4 frem snan
4807
short tbl_trans - tbl_trans # $25-6 frem unnorm
4808
short tbl_trans - tbl_trans # $25-7 ERROR
4809
4810
short sscale_snorm - tbl_trans # $26-0 fscale norm
4811
short sscale_szero - tbl_trans # $26-1 fscale zero
4812
short sscale_sinf - tbl_trans # $26-2 fscale inf
4813
short sop_sqnan - tbl_trans # $26-3 fscale qnan
4814
short sscale_sdnrm - tbl_trans # $26-5 fscale denorm
4815
short sop_ssnan - tbl_trans # $26-4 fscale snan
4816
short tbl_trans - tbl_trans # $26-6 fscale unnorm
4817
short tbl_trans - tbl_trans # $26-7 ERROR
4818
4819
short tbl_trans - tbl_trans # $27-0 fsglmul norm
4820
short tbl_trans - tbl_trans # $27-1 fsglmul zero
4821
short tbl_trans - tbl_trans # $27-2 fsglmul inf
4822
short tbl_trans - tbl_trans # $27-3 fsglmul qnan
4823
short tbl_trans - tbl_trans # $27-5 fsglmul denorm
4824
short tbl_trans - tbl_trans # $27-4 fsglmul snan
4825
short tbl_trans - tbl_trans # $27-6 fsglmul unnorm
4826
short tbl_trans - tbl_trans # $27-7 ERROR
4827
4828
short tbl_trans - tbl_trans # $28-0 fsub norm
4829
short tbl_trans - tbl_trans # $28-1 fsub zero
4830
short tbl_trans - tbl_trans # $28-2 fsub inf
4831
short tbl_trans - tbl_trans # $28-3 fsub qnan
4832
short tbl_trans - tbl_trans # $28-5 fsub denorm
4833
short tbl_trans - tbl_trans # $28-4 fsub snan
4834
short tbl_trans - tbl_trans # $28-6 fsub unnorm
4835
short tbl_trans - tbl_trans # $28-7 ERROR
4836
4837
short tbl_trans - tbl_trans # $29-0 ERROR
4838
short tbl_trans - tbl_trans # $29-1 ERROR
4839
short tbl_trans - tbl_trans # $29-2 ERROR
4840
short tbl_trans - tbl_trans # $29-3 ERROR
4841
short tbl_trans - tbl_trans # $29-4 ERROR
4842
short tbl_trans - tbl_trans # $29-5 ERROR
4843
short tbl_trans - tbl_trans # $29-6 ERROR
4844
short tbl_trans - tbl_trans # $29-7 ERROR
4845
4846
short tbl_trans - tbl_trans # $2a-0 ERROR
4847
short tbl_trans - tbl_trans # $2a-1 ERROR
4848
short tbl_trans - tbl_trans # $2a-2 ERROR
4849
short tbl_trans - tbl_trans # $2a-3 ERROR
4850
short tbl_trans - tbl_trans # $2a-4 ERROR
4851
short tbl_trans - tbl_trans # $2a-5 ERROR
4852
short tbl_trans - tbl_trans # $2a-6 ERROR
4853
short tbl_trans - tbl_trans # $2a-7 ERROR
4854
4855
short tbl_trans - tbl_trans # $2b-0 ERROR
4856
short tbl_trans - tbl_trans # $2b-1 ERROR
4857
short tbl_trans - tbl_trans # $2b-2 ERROR
4858
short tbl_trans - tbl_trans # $2b-3 ERROR
4859
short tbl_trans - tbl_trans # $2b-4 ERROR
4860
short tbl_trans - tbl_trans # $2b-5 ERROR
4861
short tbl_trans - tbl_trans # $2b-6 ERROR
4862
short tbl_trans - tbl_trans # $2b-7 ERROR
4863
4864
short tbl_trans - tbl_trans # $2c-0 ERROR
4865
short tbl_trans - tbl_trans # $2c-1 ERROR
4866
short tbl_trans - tbl_trans # $2c-2 ERROR
4867
short tbl_trans - tbl_trans # $2c-3 ERROR
4868
short tbl_trans - tbl_trans # $2c-4 ERROR
4869
short tbl_trans - tbl_trans # $2c-5 ERROR
4870
short tbl_trans - tbl_trans # $2c-6 ERROR
4871
short tbl_trans - tbl_trans # $2c-7 ERROR
4872
4873
short tbl_trans - tbl_trans # $2d-0 ERROR
4874
short tbl_trans - tbl_trans # $2d-1 ERROR
4875
short tbl_trans - tbl_trans # $2d-2 ERROR
4876
short tbl_trans - tbl_trans # $2d-3 ERROR
4877
short tbl_trans - tbl_trans # $2d-4 ERROR
4878
short tbl_trans - tbl_trans # $2d-5 ERROR
4879
short tbl_trans - tbl_trans # $2d-6 ERROR
4880
short tbl_trans - tbl_trans # $2d-7 ERROR
4881
4882
short tbl_trans - tbl_trans # $2e-0 ERROR
4883
short tbl_trans - tbl_trans # $2e-1 ERROR
4884
short tbl_trans - tbl_trans # $2e-2 ERROR
4885
short tbl_trans - tbl_trans # $2e-3 ERROR
4886
short tbl_trans - tbl_trans # $2e-4 ERROR
4887
short tbl_trans - tbl_trans # $2e-5 ERROR
4888
short tbl_trans - tbl_trans # $2e-6 ERROR
4889
short tbl_trans - tbl_trans # $2e-7 ERROR
4890
4891
short tbl_trans - tbl_trans # $2f-0 ERROR
4892
short tbl_trans - tbl_trans # $2f-1 ERROR
4893
short tbl_trans - tbl_trans # $2f-2 ERROR
4894
short tbl_trans - tbl_trans # $2f-3 ERROR
4895
short tbl_trans - tbl_trans # $2f-4 ERROR
4896
short tbl_trans - tbl_trans # $2f-5 ERROR
4897
short tbl_trans - tbl_trans # $2f-6 ERROR
4898
short tbl_trans - tbl_trans # $2f-7 ERROR
4899
4900
short ssincos - tbl_trans # $30-0 fsincos norm
4901
short ssincosz - tbl_trans # $30-1 fsincos zero
4902
short ssincosi - tbl_trans # $30-2 fsincos inf
4903
short ssincosqnan - tbl_trans # $30-3 fsincos qnan
4904
short ssincosd - tbl_trans # $30-5 fsincos denorm
4905
short ssincossnan - tbl_trans # $30-4 fsincos snan
4906
short tbl_trans - tbl_trans # $30-6 fsincos unnorm
4907
short tbl_trans - tbl_trans # $30-7 ERROR
4908
4909
short ssincos - tbl_trans # $31-0 fsincos norm
4910
short ssincosz - tbl_trans # $31-1 fsincos zero
4911
short ssincosi - tbl_trans # $31-2 fsincos inf
4912
short ssincosqnan - tbl_trans # $31-3 fsincos qnan
4913
short ssincosd - tbl_trans # $31-5 fsincos denorm
4914
short ssincossnan - tbl_trans # $31-4 fsincos snan
4915
short tbl_trans - tbl_trans # $31-6 fsincos unnorm
4916
short tbl_trans - tbl_trans # $31-7 ERROR
4917
4918
short ssincos - tbl_trans # $32-0 fsincos norm
4919
short ssincosz - tbl_trans # $32-1 fsincos zero
4920
short ssincosi - tbl_trans # $32-2 fsincos inf
4921
short ssincosqnan - tbl_trans # $32-3 fsincos qnan
4922
short ssincosd - tbl_trans # $32-5 fsincos denorm
4923
short ssincossnan - tbl_trans # $32-4 fsincos snan
4924
short tbl_trans - tbl_trans # $32-6 fsincos unnorm
4925
short tbl_trans - tbl_trans # $32-7 ERROR
4926
4927
short ssincos - tbl_trans # $33-0 fsincos norm
4928
short ssincosz - tbl_trans # $33-1 fsincos zero
4929
short ssincosi - tbl_trans # $33-2 fsincos inf
4930
short ssincosqnan - tbl_trans # $33-3 fsincos qnan
4931
short ssincosd - tbl_trans # $33-5 fsincos denorm
4932
short ssincossnan - tbl_trans # $33-4 fsincos snan
4933
short tbl_trans - tbl_trans # $33-6 fsincos unnorm
4934
short tbl_trans - tbl_trans # $33-7 ERROR
4935
4936
short ssincos - tbl_trans # $34-0 fsincos norm
4937
short ssincosz - tbl_trans # $34-1 fsincos zero
4938
short ssincosi - tbl_trans # $34-2 fsincos inf
4939
short ssincosqnan - tbl_trans # $34-3 fsincos qnan
4940
short ssincosd - tbl_trans # $34-5 fsincos denorm
4941
short ssincossnan - tbl_trans # $34-4 fsincos snan
4942
short tbl_trans - tbl_trans # $34-6 fsincos unnorm
4943
short tbl_trans - tbl_trans # $34-7 ERROR
4944
4945
short ssincos - tbl_trans # $35-0 fsincos norm
4946
short ssincosz - tbl_trans # $35-1 fsincos zero
4947
short ssincosi - tbl_trans # $35-2 fsincos inf
4948
short ssincosqnan - tbl_trans # $35-3 fsincos qnan
4949
short ssincosd - tbl_trans # $35-5 fsincos denorm
4950
short ssincossnan - tbl_trans # $35-4 fsincos snan
4951
short tbl_trans - tbl_trans # $35-6 fsincos unnorm
4952
short tbl_trans - tbl_trans # $35-7 ERROR
4953
4954
short ssincos - tbl_trans # $36-0 fsincos norm
4955
short ssincosz - tbl_trans # $36-1 fsincos zero
4956
short ssincosi - tbl_trans # $36-2 fsincos inf
4957
short ssincosqnan - tbl_trans # $36-3 fsincos qnan
4958
short ssincosd - tbl_trans # $36-5 fsincos denorm
4959
short ssincossnan - tbl_trans # $36-4 fsincos snan
4960
short tbl_trans - tbl_trans # $36-6 fsincos unnorm
4961
short tbl_trans - tbl_trans # $36-7 ERROR
4962
4963
short ssincos - tbl_trans # $37-0 fsincos norm
4964
short ssincosz - tbl_trans # $37-1 fsincos zero
4965
short ssincosi - tbl_trans # $37-2 fsincos inf
4966
short ssincosqnan - tbl_trans # $37-3 fsincos qnan
4967
short ssincosd - tbl_trans # $37-5 fsincos denorm
4968
short ssincossnan - tbl_trans # $37-4 fsincos snan
4969
short tbl_trans - tbl_trans # $37-6 fsincos unnorm
4970
short tbl_trans - tbl_trans # $37-7 ERROR
4971
4972
##########
4973
4974
# the instruction fetch access for the displacement word for the
4975
# fdbcc emulation failed. here, we create an access error frame
4976
# from the current frame and branch to _real_access().
4977
funimp_iacc:
4978
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4979
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4980
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
4981
4982
mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
4983
4984
unlk %a6
4985
4986
mov.l (%sp),-(%sp) # store SR,hi(PC)
4987
mov.w 0x8(%sp),0x4(%sp) # store lo(PC)
4988
mov.w &0x4008,0x6(%sp) # store voff
4989
mov.l 0x2(%sp),0x8(%sp) # store EA
4990
mov.l &0x09428001,0xc(%sp) # store FSLW
4991
4992
btst &0x5,(%sp) # user or supervisor mode?
4993
beq.b funimp_iacc_end # user
4994
bset &0x2,0xd(%sp) # set supervisor TM bit
4995
4996
funimp_iacc_end:
4997
bra.l _real_access
4998
4999
#########################################################################
5000
# ssin(): computes the sine of a normalized input #
5001
# ssind(): computes the sine of a denormalized input #
5002
# scos(): computes the cosine of a normalized input #
5003
# scosd(): computes the cosine of a denormalized input #
5004
# ssincos(): computes the sine and cosine of a normalized input #
5005
# ssincosd(): computes the sine and cosine of a denormalized input #
5006
# #
5007
# INPUT *************************************************************** #
5008
# a0 = pointer to extended precision input #
5009
# d0 = round precision,mode #
5010
# #
5011
# OUTPUT ************************************************************** #
5012
# fp0 = sin(X) or cos(X) #
5013
# #
5014
# For ssincos(X): #
5015
# fp0 = sin(X) #
5016
# fp1 = cos(X) #
5017
# #
5018
# ACCURACY and MONOTONICITY ******************************************* #
5019
# The returned result is within 1 ulp in 64 significant bit, i.e. #
5020
# within 0.5001 ulp to 53 bits if the result is subsequently #
5021
# rounded to double precision. The result is provably monotonic #
5022
# in double precision. #
5023
# #
5024
# ALGORITHM *********************************************************** #
5025
# #
5026
# SIN and COS: #
5027
# 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #
5028
# #
5029
# 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. #
5030
# #
5031
# 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5032
# k = N mod 4, so in particular, k = 0,1,2,or 3. #
5033
# Overwrite k by k := k + AdjN. #
5034
# #
5035
# 4. If k is even, go to 6. #
5036
# #
5037
# 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #
5038
# Return sgn*cos(r) where cos(r) is approximated by an #
5039
# even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), #
5040
# s = r*r. #
5041
# Exit. #
5042
# #
5043
# 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) #
5044
# where sin(r) is approximated by an odd polynomial in r #
5045
# r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. #
5046
# Exit. #
5047
# #
5048
# 7. If |X| > 1, go to 9. #
5049
# #
5050
# 8. (|X|<2**(-40)) If SIN is invoked, return X; #
5051
# otherwise return 1. #
5052
# #
5053
# 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
5054
# go back to 3. #
5055
# #
5056
# SINCOS: #
5057
# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5058
# #
5059
# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5060
# k = N mod 4, so in particular, k = 0,1,2,or 3. #
5061
# #
5062
# 3. If k is even, go to 5. #
5063
# #
5064
# 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #
5065
# j1 exclusive or with the l.s.b. of k. #
5066
# sgn1 := (-1)**j1, sgn2 := (-1)**j2. #
5067
# SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #
5068
# sin(r) and cos(r) are computed as odd and even #
5069
# polynomials in r, respectively. Exit #
5070
# #
5071
# 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #
5072
# SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #
5073
# sin(r) and cos(r) are computed as odd and even #
5074
# polynomials in r, respectively. Exit #
5075
# #
5076
# 6. If |X| > 1, go to 8. #
5077
# #
5078
# 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #
5079
# #
5080
# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
5081
# go back to 2. #
5082
# #
5083
#########################################################################
5084
5085
SINA7: long 0xBD6AAA77,0xCCC994F5
5086
SINA6: long 0x3DE61209,0x7AAE8DA1
5087
SINA5: long 0xBE5AE645,0x2A118AE4
5088
SINA4: long 0x3EC71DE3,0xA5341531
5089
SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
5090
SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000
5091
SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
5092
5093
COSB8: long 0x3D2AC4D0,0xD6011EE3
5094
COSB7: long 0xBDA9396F,0x9F45AC19
5095
COSB6: long 0x3E21EED9,0x0612C972
5096
COSB5: long 0xBE927E4F,0xB79D9FCF
5097
COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5098
COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5099
COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5100
COSB1: long 0xBF000000
5101
5102
set INARG,FP_SCR0
5103
5104
set X,FP_SCR0
5105
# set XDCARE,X+2
5106
set XFRAC,X+4
5107
5108
set RPRIME,FP_SCR0
5109
set SPRIME,FP_SCR1
5110
5111
set POSNEG1,L_SCR1
5112
set TWOTO63,L_SCR1
5113
5114
set ENDFLAG,L_SCR2
5115
set INT,L_SCR2
5116
5117
set ADJN,L_SCR3
5118
5119
############################################
5120
global ssin
5121
ssin:
5122
mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0
5123
bra.b SINBGN
5124
5125
############################################
5126
global scos
5127
scos:
5128
mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1
5129
5130
############################################
5131
SINBGN:
5132
#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5133
5134
fmov.x (%a0),%fp0 # LOAD INPUT
5135
fmov.x %fp0,X(%a6) # save input at X
5136
5137
# "COMPACTIFY" X
5138
mov.l (%a0),%d1 # put exp in hi word
5139
mov.w 4(%a0),%d1 # fetch hi(man)
5140
and.l &0x7FFFFFFF,%d1 # strip sign
5141
5142
cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)?
5143
bge.b SOK1 # no
5144
bra.w SINSM # yes; input is very small
5145
5146
SOK1:
5147
cmp.l %d1,&0x4004BC7E # is |X| < 15 PI?
5148
blt.b SINMAIN # no
5149
bra.w SREDUCEX # yes; input is very large
5150
5151
#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5152
#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5153
SINMAIN:
5154
fmov.x %fp0,%fp1
5155
fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5156
5157
lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5158
5159
fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5160
5161
mov.l INT(%a6),%d1 # make a copy of N
5162
asl.l &4,%d1 # N *= 16
5163
add.l %d1,%a1 # tbl_addr = a1 + (N*16)
5164
5165
# A1 IS THE ADDRESS OF N*PIBY2
5166
# ...WHICH IS IN TWO PIECES Y1 & Y2
5167
fsub.x (%a1)+,%fp0 # X-Y1
5168
fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2
5169
5170
SINCONT:
5171
#--continuation from REDUCEX
5172
5173
#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5174
mov.l INT(%a6),%d1
5175
add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN
5176
ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE
5177
cmp.l %d1,&0
5178
blt.w COSPOLY
5179
5180
#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5181
#--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5182
#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5183
#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5184
#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5185
#--WHERE T=S*S.
5186
#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5187
#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5188
SINPOLY:
5189
fmovm.x &0x0c,-(%sp) # save fp2/fp3
5190
5191
fmov.x %fp0,X(%a6) # X IS R
5192
fmul.x %fp0,%fp0 # FP0 IS S
5193
5194
fmov.d SINA7(%pc),%fp3
5195
fmov.d SINA6(%pc),%fp2
5196
5197
fmov.x %fp0,%fp1
5198
fmul.x %fp1,%fp1 # FP1 IS T
5199
5200
ror.l &1,%d1
5201
and.l &0x80000000,%d1
5202
# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5203
eor.l %d1,X(%a6) # X IS NOW R'= SGN*R
5204
5205
fmul.x %fp1,%fp3 # TA7
5206
fmul.x %fp1,%fp2 # TA6
5207
5208
fadd.d SINA5(%pc),%fp3 # A5+TA7
5209
fadd.d SINA4(%pc),%fp2 # A4+TA6
5210
5211
fmul.x %fp1,%fp3 # T(A5+TA7)
5212
fmul.x %fp1,%fp2 # T(A4+TA6)
5213
5214
fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7)
5215
fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6)
5216
5217
fmul.x %fp3,%fp1 # T(A3+T(A5+TA7))
5218
5219
fmul.x %fp0,%fp2 # S(A2+T(A4+TA6))
5220
fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7))
5221
fmul.x X(%a6),%fp0 # R'*S
5222
5223
fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5224
5225
fmul.x %fp1,%fp0 # SIN(R')-R'
5226
5227
fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5228
5229
fmov.l %d0,%fpcr # restore users round mode,prec
5230
fadd.x X(%a6),%fp0 # last inst - possible exception set
5231
bra t_inx2
5232
5233
#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5234
#--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5235
#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5236
#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5237
#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5238
#--WHERE T=S*S.
5239
#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5240
#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5241
#--AND IS THEREFORE STORED AS SINGLE PRECISION.
5242
COSPOLY:
5243
fmovm.x &0x0c,-(%sp) # save fp2/fp3
5244
5245
fmul.x %fp0,%fp0 # FP0 IS S
5246
5247
fmov.d COSB8(%pc),%fp2
5248
fmov.d COSB7(%pc),%fp3
5249
5250
fmov.x %fp0,%fp1
5251
fmul.x %fp1,%fp1 # FP1 IS T
5252
5253
fmov.x %fp0,X(%a6) # X IS S
5254
ror.l &1,%d1
5255
and.l &0x80000000,%d1
5256
# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5257
5258
fmul.x %fp1,%fp2 # TB8
5259
5260
eor.l %d1,X(%a6) # X IS NOW S'= SGN*S
5261
and.l &0x80000000,%d1
5262
5263
fmul.x %fp1,%fp3 # TB7
5264
5265
or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE
5266
mov.l %d1,POSNEG1(%a6)
5267
5268
fadd.d COSB6(%pc),%fp2 # B6+TB8
5269
fadd.d COSB5(%pc),%fp3 # B5+TB7
5270
5271
fmul.x %fp1,%fp2 # T(B6+TB8)
5272
fmul.x %fp1,%fp3 # T(B5+TB7)
5273
5274
fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8)
5275
fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7)
5276
5277
fmul.x %fp1,%fp2 # T(B4+T(B6+TB8))
5278
fmul.x %fp3,%fp1 # T(B3+T(B5+TB7))
5279
5280
fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8))
5281
fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7))
5282
5283
fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8)))
5284
5285
fadd.x %fp1,%fp0
5286
5287
fmul.x X(%a6),%fp0
5288
5289
fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5290
5291
fmov.l %d0,%fpcr # restore users round mode,prec
5292
fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set
5293
bra t_inx2
5294
5295
##############################################
5296
5297
# SINe: Big OR Small?
5298
#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5299
#--IF |X| < 2**(-40), RETURN X OR 1.
5300
SINBORS:
5301
cmp.l %d1,&0x3FFF8000
5302
bgt.l SREDUCEX
5303
5304
SINSM:
5305
mov.l ADJN(%a6),%d1
5306
cmp.l %d1,&0
5307
bgt.b COSTINY
5308
5309
# here, the operation may underflow iff the precision is sgl or dbl.
5310
# extended denorms are handled through another entry point.
5311
SINTINY:
5312
# mov.w &0x0000,XDCARE(%a6) # JUST IN CASE
5313
5314
fmov.l %d0,%fpcr # restore users round mode,prec
5315
mov.b &FMOV_OP,%d1 # last inst is MOVE
5316
fmov.x X(%a6),%fp0 # last inst - possible exception set
5317
bra t_catch
5318
5319
COSTINY:
5320
fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5321
fmov.l %d0,%fpcr # restore users round mode,prec
5322
fadd.s &0x80800000,%fp0 # last inst - possible exception set
5323
bra t_pinx2
5324
5325
################################################
5326
global ssind
5327
#--SIN(X) = X FOR DENORMALIZED X
5328
ssind:
5329
bra t_extdnrm
5330
5331
############################################
5332
global scosd
5333
#--COS(X) = 1 FOR DENORMALIZED X
5334
scosd:
5335
fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5336
bra t_pinx2
5337
5338
##################################################
5339
5340
global ssincos
5341
ssincos:
5342
#--SET ADJN TO 4
5343
mov.l &4,ADJN(%a6)
5344
5345
fmov.x (%a0),%fp0 # LOAD INPUT
5346
fmov.x %fp0,X(%a6)
5347
5348
mov.l (%a0),%d1
5349
mov.w 4(%a0),%d1
5350
and.l &0x7FFFFFFF,%d1 # COMPACTIFY X
5351
5352
cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5353
bge.b SCOK1
5354
bra.w SCSM
5355
5356
SCOK1:
5357
cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5358
blt.b SCMAIN
5359
bra.w SREDUCEX
5360
5361
5362
#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5363
#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5364
SCMAIN:
5365
fmov.x %fp0,%fp1
5366
5367
fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5368
5369
lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5370
5371
fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5372
5373
mov.l INT(%a6),%d1
5374
asl.l &4,%d1
5375
add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2
5376
5377
fsub.x (%a1)+,%fp0 # X-Y1
5378
fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5379
5380
SCCONT:
5381
#--continuation point from REDUCEX
5382
5383
mov.l INT(%a6),%d1
5384
ror.l &1,%d1
5385
cmp.l %d1,&0 # D0 < 0 IFF N IS ODD
5386
bge.w NEVEN
5387
5388
SNODD:
5389
#--REGISTERS SAVED SO FAR: D0, A0, FP2.
5390
fmovm.x &0x04,-(%sp) # save fp2
5391
5392
fmov.x %fp0,RPRIME(%a6)
5393
fmul.x %fp0,%fp0 # FP0 IS S = R*R
5394
fmov.d SINA7(%pc),%fp1 # A7
5395
fmov.d COSB8(%pc),%fp2 # B8
5396
fmul.x %fp0,%fp1 # SA7
5397
fmul.x %fp0,%fp2 # SB8
5398
5399
mov.l %d2,-(%sp)
5400
mov.l %d1,%d2
5401
ror.l &1,%d2
5402
and.l &0x80000000,%d2
5403
eor.l %d1,%d2
5404
and.l &0x80000000,%d2
5405
5406
fadd.d SINA6(%pc),%fp1 # A6+SA7
5407
fadd.d COSB7(%pc),%fp2 # B7+SB8
5408
5409
fmul.x %fp0,%fp1 # S(A6+SA7)
5410
eor.l %d2,RPRIME(%a6)
5411
mov.l (%sp)+,%d2
5412
fmul.x %fp0,%fp2 # S(B7+SB8)
5413
ror.l &1,%d1
5414
and.l &0x80000000,%d1
5415
mov.l &0x3F800000,POSNEG1(%a6)
5416
eor.l %d1,POSNEG1(%a6)
5417
5418
fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7)
5419
fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8)
5420
5421
fmul.x %fp0,%fp1 # S(A5+S(A6+SA7))
5422
fmul.x %fp0,%fp2 # S(B6+S(B7+SB8))
5423
fmov.x %fp0,SPRIME(%a6)
5424
5425
fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7))
5426
eor.l %d1,SPRIME(%a6)
5427
fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8))
5428
5429
fmul.x %fp0,%fp1 # S(A4+...)
5430
fmul.x %fp0,%fp2 # S(B5+...)
5431
5432
fadd.d SINA3(%pc),%fp1 # A3+S(A4+...)
5433
fadd.d COSB4(%pc),%fp2 # B4+S(B5+...)
5434
5435
fmul.x %fp0,%fp1 # S(A3+...)
5436
fmul.x %fp0,%fp2 # S(B4+...)
5437
5438
fadd.x SINA2(%pc),%fp1 # A2+S(A3+...)
5439
fadd.x COSB3(%pc),%fp2 # B3+S(B4+...)
5440
5441
fmul.x %fp0,%fp1 # S(A2+...)
5442
fmul.x %fp0,%fp2 # S(B3+...)
5443
5444
fadd.x SINA1(%pc),%fp1 # A1+S(A2+...)
5445
fadd.x COSB2(%pc),%fp2 # B2+S(B3+...)
5446
5447
fmul.x %fp0,%fp1 # S(A1+...)
5448
fmul.x %fp2,%fp0 # S(B2+...)
5449
5450
fmul.x RPRIME(%a6),%fp1 # R'S(A1+...)
5451
fadd.s COSB1(%pc),%fp0 # B1+S(B2...)
5452
fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...))
5453
5454
fmovm.x (%sp)+,&0x20 # restore fp2
5455
5456
fmov.l %d0,%fpcr
5457
fadd.x RPRIME(%a6),%fp1 # COS(X)
5458
bsr sto_cos # store cosine result
5459
fadd.s POSNEG1(%a6),%fp0 # SIN(X)
5460
bra t_inx2
5461
5462
NEVEN:
5463
#--REGISTERS SAVED SO FAR: FP2.
5464
fmovm.x &0x04,-(%sp) # save fp2
5465
5466
fmov.x %fp0,RPRIME(%a6)
5467
fmul.x %fp0,%fp0 # FP0 IS S = R*R
5468
5469
fmov.d COSB8(%pc),%fp1 # B8
5470
fmov.d SINA7(%pc),%fp2 # A7
5471
5472
fmul.x %fp0,%fp1 # SB8
5473
fmov.x %fp0,SPRIME(%a6)
5474
fmul.x %fp0,%fp2 # SA7
5475
5476
ror.l &1,%d1
5477
and.l &0x80000000,%d1
5478
5479
fadd.d COSB7(%pc),%fp1 # B7+SB8
5480
fadd.d SINA6(%pc),%fp2 # A6+SA7
5481
5482
eor.l %d1,RPRIME(%a6)
5483
eor.l %d1,SPRIME(%a6)
5484
5485
fmul.x %fp0,%fp1 # S(B7+SB8)
5486
5487
or.l &0x3F800000,%d1
5488
mov.l %d1,POSNEG1(%a6)
5489
5490
fmul.x %fp0,%fp2 # S(A6+SA7)
5491
5492
fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8)
5493
fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7)
5494
5495
fmul.x %fp0,%fp1 # S(B6+S(B7+SB8))
5496
fmul.x %fp0,%fp2 # S(A5+S(A6+SA7))
5497
5498
fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8))
5499
fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7))
5500
5501
fmul.x %fp0,%fp1 # S(B5+...)
5502
fmul.x %fp0,%fp2 # S(A4+...)
5503
5504
fadd.d COSB4(%pc),%fp1 # B4+S(B5+...)
5505
fadd.d SINA3(%pc),%fp2 # A3+S(A4+...)
5506
5507
fmul.x %fp0,%fp1 # S(B4+...)
5508
fmul.x %fp0,%fp2 # S(A3+...)
5509
5510
fadd.x COSB3(%pc),%fp1 # B3+S(B4+...)
5511
fadd.x SINA2(%pc),%fp2 # A2+S(A3+...)
5512
5513
fmul.x %fp0,%fp1 # S(B3+...)
5514
fmul.x %fp0,%fp2 # S(A2+...)
5515
5516
fadd.x COSB2(%pc),%fp1 # B2+S(B3+...)
5517
fadd.x SINA1(%pc),%fp2 # A1+S(A2+...)
5518
5519
fmul.x %fp0,%fp1 # S(B2+...)
5520
fmul.x %fp2,%fp0 # s(a1+...)
5521
5522
5523
fadd.s COSB1(%pc),%fp1 # B1+S(B2...)
5524
fmul.x RPRIME(%a6),%fp0 # R'S(A1+...)
5525
fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...))
5526
5527
fmovm.x (%sp)+,&0x20 # restore fp2
5528
5529
fmov.l %d0,%fpcr
5530
fadd.s POSNEG1(%a6),%fp1 # COS(X)
5531
bsr sto_cos # store cosine result
5532
fadd.x RPRIME(%a6),%fp0 # SIN(X)
5533
bra t_inx2
5534
5535
################################################
5536
5537
SCBORS:
5538
cmp.l %d1,&0x3FFF8000
5539
bgt.w SREDUCEX
5540
5541
################################################
5542
5543
SCSM:
5544
# mov.w &0x0000,XDCARE(%a6)
5545
fmov.s &0x3F800000,%fp1
5546
5547
fmov.l %d0,%fpcr
5548
fsub.s &0x00800000,%fp1
5549
bsr sto_cos # store cosine result
5550
fmov.l %fpcr,%d0 # d0 must have fpcr,too
5551
mov.b &FMOV_OP,%d1 # last inst is MOVE
5552
fmov.x X(%a6),%fp0
5553
bra t_catch
5554
5555
##############################################
5556
5557
global ssincosd
5558
#--SIN AND COS OF X FOR DENORMALIZED X
5559
ssincosd:
5560
mov.l %d0,-(%sp) # save d0
5561
fmov.s &0x3F800000,%fp1
5562
bsr sto_cos # store cosine result
5563
mov.l (%sp)+,%d0 # restore d0
5564
bra t_extdnrm
5565
5566
############################################
5567
5568
#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5569
#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5570
#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5571
SREDUCEX:
5572
fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
5573
mov.l %d2,-(%sp) # save d2
5574
fmov.s &0x00000000,%fp1 # fp1 = 0
5575
5576
#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5577
#--there is a danger of unwanted overflow in first LOOP iteration. In this
5578
#--case, reduce argument by one remainder step to make subsequent reduction
5579
#--safe.
5580
cmp.l %d1,&0x7ffeffff # is arg dangerously large?
5581
bne.b SLOOP # no
5582
5583
# yes; create 2**16383*PI/2
5584
mov.w &0x7ffe,FP_SCR0_EX(%a6)
5585
mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
5586
clr.l FP_SCR0_LO(%a6)
5587
5588
# create low half of 2**16383*PI/2 at FP_SCR1
5589
mov.w &0x7fdc,FP_SCR1_EX(%a6)
5590
mov.l &0x85a308d3,FP_SCR1_HI(%a6)
5591
clr.l FP_SCR1_LO(%a6)
5592
5593
ftest.x %fp0 # test sign of argument
5594
fblt.w sred_neg
5595
5596
or.b &0x80,FP_SCR0_EX(%a6) # positive arg
5597
or.b &0x80,FP_SCR1_EX(%a6)
5598
sred_neg:
5599
fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
5600
fmov.x %fp0,%fp1 # save high result in fp1
5601
fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
5602
fsub.x %fp0,%fp1 # determine low component of result
5603
fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
5604
5605
#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5606
#--integer quotient will be stored in N
5607
#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5608
SLOOP:
5609
fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
5610
mov.w INARG(%a6),%d1
5611
mov.l %d1,%a1 # save a copy of D0
5612
and.l &0x00007FFF,%d1
5613
sub.l &0x00003FFF,%d1 # d0 = K
5614
cmp.l %d1,&28
5615
ble.b SLASTLOOP
5616
SCONTLOOP:
5617
sub.l &27,%d1 # d0 = L := K-27
5618
mov.b &0,ENDFLAG(%a6)
5619
bra.b SWORK
5620
SLASTLOOP:
5621
clr.l %d1 # d0 = L := 0
5622
mov.b &1,ENDFLAG(%a6)
5623
5624
SWORK:
5625
#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5626
#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5627
5628
#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5629
#--2**L * (PIby2_1), 2**L * (PIby2_2)
5630
5631
mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
5632
sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
5633
5634
mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
5635
mov.l &0x4E44152A,FP_SCR0_LO(%a6)
5636
mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
5637
5638
fmov.x %fp0,%fp2
5639
fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
5640
5641
#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5642
#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5643
#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5644
#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5645
#--US THE DESIRED VALUE IN FLOATING POINT.
5646
mov.l %a1,%d2
5647
swap %d2
5648
and.l &0x80000000,%d2
5649
or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
5650
mov.l %d2,TWOTO63(%a6)
5651
fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
5652
fsub.s TWOTO63(%a6),%fp2 # fp2 = N
5653
# fint.x %fp2
5654
5655
#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5656
mov.l %d1,%d2 # d2 = L
5657
5658
add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
5659
mov.w %d2,FP_SCR0_EX(%a6)
5660
mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
5661
clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
5662
5663
add.l &0x00003FDD,%d1
5664
mov.w %d1,FP_SCR1_EX(%a6)
5665
mov.l &0x85A308D3,FP_SCR1_HI(%a6)
5666
clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
5667
5668
mov.b ENDFLAG(%a6),%d1
5669
5670
#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5671
#--P2 = 2**(L) * Piby2_2
5672
fmov.x %fp2,%fp4 # fp4 = N
5673
fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
5674
fmov.x %fp2,%fp5 # fp5 = N
5675
fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
5676
fmov.x %fp4,%fp3 # fp3 = W = N*P1
5677
5678
#--we want P+p = W+w but |p| <= half ulp of P
5679
#--Then, we need to compute A := R-P and a := r-p
5680
fadd.x %fp5,%fp3 # fp3 = P
5681
fsub.x %fp3,%fp4 # fp4 = W-P
5682
5683
fsub.x %fp3,%fp0 # fp0 = A := R - P
5684
fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
5685
5686
fmov.x %fp0,%fp3 # fp3 = A
5687
fsub.x %fp4,%fp1 # fp1 = a := r - p
5688
5689
#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
5690
#--|r| <= half ulp of R.
5691
fadd.x %fp1,%fp0 # fp0 = R := A+a
5692
#--No need to calculate r if this is the last loop
5693
cmp.b %d1,&0
5694
bgt.w SRESTORE
5695
5696
#--Need to calculate r
5697
fsub.x %fp0,%fp3 # fp3 = A-R
5698
fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
5699
bra.w SLOOP
5700
5701
SRESTORE:
5702
fmov.l %fp2,INT(%a6)
5703
mov.l (%sp)+,%d2 # restore d2
5704
fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
5705
5706
mov.l ADJN(%a6),%d1
5707
cmp.l %d1,&4
5708
5709
blt.w SINCONT
5710
bra.w SCCONT
5711
5712
#########################################################################
5713
# stan(): computes the tangent of a normalized input #
5714
# stand(): computes the tangent of a denormalized input #
5715
# #
5716
# INPUT *************************************************************** #
5717
# a0 = pointer to extended precision input #
5718
# d0 = round precision,mode #
5719
# #
5720
# OUTPUT ************************************************************** #
5721
# fp0 = tan(X) #
5722
# #
5723
# ACCURACY and MONOTONICITY ******************************************* #
5724
# The returned result is within 3 ulp in 64 significant bit, i.e. #
5725
# within 0.5001 ulp to 53 bits if the result is subsequently #
5726
# rounded to double precision. The result is provably monotonic #
5727
# in double precision. #
5728
# #
5729
# ALGORITHM *********************************************************** #
5730
# #
5731
# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5732
# #
5733
# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5734
# k = N mod 2, so in particular, k = 0 or 1. #
5735
# #
5736
# 3. If k is odd, go to 5. #
5737
# #
5738
# 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #
5739
# rational function U/V where #
5740
# U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5741
# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. #
5742
# Exit. #
5743
# #
5744
# 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5745
# a rational function U/V where #
5746
# U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5747
# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, #
5748
# -Cot(r) = -V/U. Exit. #
5749
# #
5750
# 6. If |X| > 1, go to 8. #
5751
# #
5752
# 7. (|X|<2**(-40)) Tan(X) = X. Exit. #
5753
# #
5754
# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back #
5755
# to 2. #
5756
# #
5757
#########################################################################
5758
5759
TANQ4:
5760
long 0x3EA0B759,0xF50F8688
5761
TANP3:
5762
long 0xBEF2BAA5,0xA8924F04
5763
5764
TANQ3:
5765
long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5766
5767
TANP2:
5768
long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5769
5770
TANQ2:
5771
long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5772
5773
TANP1:
5774
long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5775
5776
TANQ1:
5777
long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5778
5779
INVTWOPI:
5780
long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5781
5782
TWOPI1:
5783
long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
5784
TWOPI2:
5785
long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5786
5787
#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5788
#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5789
#--MOST 69 BITS LONG.
5790
# global PITBL
5791
PITBL:
5792
long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5793
long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5794
long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5795
long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5796
long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5797
long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5798
long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5799
long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5800
long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5801
long 0xC0040000,0x90836524,0x88034B96,0x20B00000
5802
long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5803
long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5804
long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5805
long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5806
long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5807
long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5808
long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5809
long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5810
long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5811
long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5812
long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5813
long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5814
long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5815
long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5816
long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5817
long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5818
long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5819
long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5820
long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5821
long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5822
long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5823
long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5824
long 0x00000000,0x00000000,0x00000000,0x00000000
5825
long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5826
long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5827
long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5828
long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5829
long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5830
long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5831
long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5832
long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5833
long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5834
long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5835
long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5836
long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5837
long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5838
long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5839
long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5840
long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5841
long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5842
long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5843
long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5844
long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5845
long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5846
long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5847
long 0x40040000,0x90836524,0x88034B96,0xA0B00000
5848
long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5849
long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5850
long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5851
long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5852
long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5853
long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5854
long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5855
long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5856
long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5857
5858
set INARG,FP_SCR0
5859
5860
set TWOTO63,L_SCR1
5861
set INT,L_SCR1
5862
set ENDFLAG,L_SCR2
5863
5864
global stan
5865
stan:
5866
fmov.x (%a0),%fp0 # LOAD INPUT
5867
5868
mov.l (%a0),%d1
5869
mov.w 4(%a0),%d1
5870
and.l &0x7FFFFFFF,%d1
5871
5872
cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5873
bge.b TANOK1
5874
bra.w TANSM
5875
TANOK1:
5876
cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5877
blt.b TANMAIN
5878
bra.w REDUCEX
5879
5880
TANMAIN:
5881
#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5882
#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5883
fmov.x %fp0,%fp1
5884
fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5885
5886
lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5887
5888
fmov.l %fp1,%d1 # CONVERT TO INTEGER
5889
5890
asl.l &4,%d1
5891
add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2
5892
5893
fsub.x (%a1)+,%fp0 # X-Y1
5894
5895
fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5896
5897
ror.l &5,%d1
5898
and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0
5899
5900
TANCONT:
5901
fmovm.x &0x0c,-(%sp) # save fp2,fp3
5902
5903
cmp.l %d1,&0
5904
blt.w NODD
5905
5906
fmov.x %fp0,%fp1
5907
fmul.x %fp1,%fp1 # S = R*R
5908
5909
fmov.d TANQ4(%pc),%fp3
5910
fmov.d TANP3(%pc),%fp2
5911
5912
fmul.x %fp1,%fp3 # SQ4
5913
fmul.x %fp1,%fp2 # SP3
5914
5915
fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5916
fadd.x TANP2(%pc),%fp2 # P2+SP3
5917
5918
fmul.x %fp1,%fp3 # S(Q3+SQ4)
5919
fmul.x %fp1,%fp2 # S(P2+SP3)
5920
5921
fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5922
fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5923
5924
fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4))
5925
fmul.x %fp1,%fp2 # S(P1+S(P2+SP3))
5926
5927
fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5928
fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3))
5929
5930
fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4)))
5931
5932
fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3))
5933
5934
fadd.s &0x3F800000,%fp1 # 1+S(Q1+...)
5935
5936
fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5937
5938
fmov.l %d0,%fpcr # restore users round mode,prec
5939
fdiv.x %fp1,%fp0 # last inst - possible exception set
5940
bra t_inx2
5941
5942
NODD:
5943
fmov.x %fp0,%fp1
5944
fmul.x %fp0,%fp0 # S = R*R
5945
5946
fmov.d TANQ4(%pc),%fp3
5947
fmov.d TANP3(%pc),%fp2
5948
5949
fmul.x %fp0,%fp3 # SQ4
5950
fmul.x %fp0,%fp2 # SP3
5951
5952
fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5953
fadd.x TANP2(%pc),%fp2 # P2+SP3
5954
5955
fmul.x %fp0,%fp3 # S(Q3+SQ4)
5956
fmul.x %fp0,%fp2 # S(P2+SP3)
5957
5958
fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5959
fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5960
5961
fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4))
5962
fmul.x %fp0,%fp2 # S(P1+S(P2+SP3))
5963
5964
fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5965
fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3))
5966
5967
fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4)))
5968
5969
fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3))
5970
fadd.s &0x3F800000,%fp0 # 1+S(Q1+...)
5971
5972
fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5973
5974
fmov.x %fp1,-(%sp)
5975
eor.l &0x80000000,(%sp)
5976
5977
fmov.l %d0,%fpcr # restore users round mode,prec
5978
fdiv.x (%sp)+,%fp0 # last inst - possible exception set
5979
bra t_inx2
5980
5981
TANBORS:
5982
#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5983
#--IF |X| < 2**(-40), RETURN X OR 1.
5984
cmp.l %d1,&0x3FFF8000
5985
bgt.b REDUCEX
5986
5987
TANSM:
5988
fmov.x %fp0,-(%sp)
5989
fmov.l %d0,%fpcr # restore users round mode,prec
5990
mov.b &FMOV_OP,%d1 # last inst is MOVE
5991
fmov.x (%sp)+,%fp0 # last inst - posibble exception set
5992
bra t_catch
5993
5994
global stand
5995
#--TAN(X) = X FOR DENORMALIZED X
5996
stand:
5997
bra t_extdnrm
5998
5999
#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
6000
#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
6001
#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
6002
REDUCEX:
6003
fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
6004
mov.l %d2,-(%sp) # save d2
6005
fmov.s &0x00000000,%fp1 # fp1 = 0
6006
6007
#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
6008
#--there is a danger of unwanted overflow in first LOOP iteration. In this
6009
#--case, reduce argument by one remainder step to make subsequent reduction
6010
#--safe.
6011
cmp.l %d1,&0x7ffeffff # is arg dangerously large?
6012
bne.b LOOP # no
6013
6014
# yes; create 2**16383*PI/2
6015
mov.w &0x7ffe,FP_SCR0_EX(%a6)
6016
mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
6017
clr.l FP_SCR0_LO(%a6)
6018
6019
# create low half of 2**16383*PI/2 at FP_SCR1
6020
mov.w &0x7fdc,FP_SCR1_EX(%a6)
6021
mov.l &0x85a308d3,FP_SCR1_HI(%a6)
6022
clr.l FP_SCR1_LO(%a6)
6023
6024
ftest.x %fp0 # test sign of argument
6025
fblt.w red_neg
6026
6027
or.b &0x80,FP_SCR0_EX(%a6) # positive arg
6028
or.b &0x80,FP_SCR1_EX(%a6)
6029
red_neg:
6030
fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
6031
fmov.x %fp0,%fp1 # save high result in fp1
6032
fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
6033
fsub.x %fp0,%fp1 # determine low component of result
6034
fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
6035
6036
#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
6037
#--integer quotient will be stored in N
6038
#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
6039
LOOP:
6040
fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
6041
mov.w INARG(%a6),%d1
6042
mov.l %d1,%a1 # save a copy of D0
6043
and.l &0x00007FFF,%d1
6044
sub.l &0x00003FFF,%d1 # d0 = K
6045
cmp.l %d1,&28
6046
ble.b LASTLOOP
6047
CONTLOOP:
6048
sub.l &27,%d1 # d0 = L := K-27
6049
mov.b &0,ENDFLAG(%a6)
6050
bra.b WORK
6051
LASTLOOP:
6052
clr.l %d1 # d0 = L := 0
6053
mov.b &1,ENDFLAG(%a6)
6054
6055
WORK:
6056
#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
6057
#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
6058
6059
#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
6060
#--2**L * (PIby2_1), 2**L * (PIby2_2)
6061
6062
mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
6063
sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
6064
6065
mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
6066
mov.l &0x4E44152A,FP_SCR0_LO(%a6)
6067
mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
6068
6069
fmov.x %fp0,%fp2
6070
fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
6071
6072
#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
6073
#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
6074
#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
6075
#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
6076
#--US THE DESIRED VALUE IN FLOATING POINT.
6077
mov.l %a1,%d2
6078
swap %d2
6079
and.l &0x80000000,%d2
6080
or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
6081
mov.l %d2,TWOTO63(%a6)
6082
fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
6083
fsub.s TWOTO63(%a6),%fp2 # fp2 = N
6084
# fintrz.x %fp2,%fp2
6085
6086
#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
6087
mov.l %d1,%d2 # d2 = L
6088
6089
add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
6090
mov.w %d2,FP_SCR0_EX(%a6)
6091
mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
6092
clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
6093
6094
add.l &0x00003FDD,%d1
6095
mov.w %d1,FP_SCR1_EX(%a6)
6096
mov.l &0x85A308D3,FP_SCR1_HI(%a6)
6097
clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
6098
6099
mov.b ENDFLAG(%a6),%d1
6100
6101
#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6102
#--P2 = 2**(L) * Piby2_2
6103
fmov.x %fp2,%fp4 # fp4 = N
6104
fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
6105
fmov.x %fp2,%fp5 # fp5 = N
6106
fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
6107
fmov.x %fp4,%fp3 # fp3 = W = N*P1
6108
6109
#--we want P+p = W+w but |p| <= half ulp of P
6110
#--Then, we need to compute A := R-P and a := r-p
6111
fadd.x %fp5,%fp3 # fp3 = P
6112
fsub.x %fp3,%fp4 # fp4 = W-P
6113
6114
fsub.x %fp3,%fp0 # fp0 = A := R - P
6115
fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
6116
6117
fmov.x %fp0,%fp3 # fp3 = A
6118
fsub.x %fp4,%fp1 # fp1 = a := r - p
6119
6120
#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
6121
#--|r| <= half ulp of R.
6122
fadd.x %fp1,%fp0 # fp0 = R := A+a
6123
#--No need to calculate r if this is the last loop
6124
cmp.b %d1,&0
6125
bgt.w RESTORE
6126
6127
#--Need to calculate r
6128
fsub.x %fp0,%fp3 # fp3 = A-R
6129
fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
6130
bra.w LOOP
6131
6132
RESTORE:
6133
fmov.l %fp2,INT(%a6)
6134
mov.l (%sp)+,%d2 # restore d2
6135
fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
6136
6137
mov.l INT(%a6),%d1
6138
ror.l &1,%d1
6139
6140
bra.w TANCONT
6141
6142
#########################################################################
6143
# satan(): computes the arctangent of a normalized number #
6144
# satand(): computes the arctangent of a denormalized number #
6145
# #
6146
# INPUT *************************************************************** #
6147
# a0 = pointer to extended precision input #
6148
# d0 = round precision,mode #
6149
# #
6150
# OUTPUT ************************************************************** #
6151
# fp0 = arctan(X) #
6152
# #
6153
# ACCURACY and MONOTONICITY ******************************************* #
6154
# The returned result is within 2 ulps in 64 significant bit, #
6155
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6156
# rounded to double precision. The result is provably monotonic #
6157
# in double precision. #
6158
# #
6159
# ALGORITHM *********************************************************** #
6160
# Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. #
6161
# #
6162
# Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. #
6163
# Note that k = -4, -3,..., or 3. #
6164
# Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 #
6165
# significant bits of X with a bit-1 attached at the 6-th #
6166
# bit position. Define u to be u = (X-F) / (1 + X*F). #
6167
# #
6168
# Step 3. Approximate arctan(u) by a polynomial poly. #
6169
# #
6170
# Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #
6171
# table of values calculated beforehand. Exit. #
6172
# #
6173
# Step 5. If |X| >= 16, go to Step 7. #
6174
# #
6175
# Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #
6176
# #
6177
# Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #
6178
# polynomial in X'. #
6179
# Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #
6180
# #
6181
#########################################################################
6182
6183
ATANA3: long 0xBFF6687E,0x314987D8
6184
ATANA2: long 0x4002AC69,0x34A26DB3
6185
ATANA1: long 0xBFC2476F,0x4E1DA28E
6186
6187
ATANB6: long 0x3FB34444,0x7F876989
6188
ATANB5: long 0xBFB744EE,0x7FAF45DB
6189
ATANB4: long 0x3FBC71C6,0x46940220
6190
ATANB3: long 0xBFC24924,0x921872F9
6191
ATANB2: long 0x3FC99999,0x99998FA9
6192
ATANB1: long 0xBFD55555,0x55555555
6193
6194
ATANC5: long 0xBFB70BF3,0x98539E6A
6195
ATANC4: long 0x3FBC7187,0x962D1D7D
6196
ATANC3: long 0xBFC24924,0x827107B8
6197
ATANC2: long 0x3FC99999,0x9996263E
6198
ATANC1: long 0xBFD55555,0x55555536
6199
6200
PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6201
NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6202
6203
PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000
6204
NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000
6205
6206
ATANTBL:
6207
long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6208
long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6209
long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6210
long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6211
long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6212
long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6213
long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6214
long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6215
long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6216
long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6217
long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6218
long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6219
long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6220
long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6221
long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6222
long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6223
long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6224
long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6225
long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6226
long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6227
long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6228
long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6229
long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6230
long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6231
long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6232
long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6233
long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6234
long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6235
long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6236
long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6237
long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6238
long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6239
long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6240
long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6241
long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6242
long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6243
long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6244
long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6245
long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6246
long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6247
long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6248
long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6249
long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6250
long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6251
long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6252
long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6253
long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6254
long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6255
long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6256
long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6257
long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6258
long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6259
long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6260
long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
6261
long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6262
long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6263
long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6264
long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6265
long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6266
long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6267
long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6268
long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6269
long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6270
long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6271
long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6272
long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6273
long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6274
long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6275
long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6276
long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6277
long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6278
long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6279
long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6280
long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6281
long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6282
long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6283
long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6284
long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6285
long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6286
long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6287
long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6288
long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6289
long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6290
long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6291
long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6292
long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6293
long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6294
long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6295
long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6296
long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6297
long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6298
long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6299
long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6300
long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6301
long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6302
long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6303
long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6304
long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6305
long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6306
long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6307
long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6308
long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6309
long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6310
long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6311
long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6312
long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6313
long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6314
long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6315
long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6316
long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6317
long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6318
long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6319
long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6320
long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6321
long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6322
long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6323
long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6324
long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6325
long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6326
long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6327
long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6328
long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6329
long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6330
long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6331
long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6332
long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6333
long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6334
long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6335
6336
set X,FP_SCR0
6337
set XDCARE,X+2
6338
set XFRAC,X+4
6339
set XFRACLO,X+8
6340
6341
set ATANF,FP_SCR1
6342
set ATANFHI,ATANF+4
6343
set ATANFLO,ATANF+8
6344
6345
global satan
6346
#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6347
satan:
6348
fmov.x (%a0),%fp0 # LOAD INPUT
6349
6350
mov.l (%a0),%d1
6351
mov.w 4(%a0),%d1
6352
fmov.x %fp0,X(%a6)
6353
and.l &0x7FFFFFFF,%d1
6354
6355
cmp.l %d1,&0x3FFB8000 # |X| >= 1/16?
6356
bge.b ATANOK1
6357
bra.w ATANSM
6358
6359
ATANOK1:
6360
cmp.l %d1,&0x4002FFFF # |X| < 16 ?
6361
ble.b ATANMAIN
6362
bra.w ATANBIG
6363
6364
#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6365
#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6366
#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6367
#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6368
#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6369
#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6370
#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6371
#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6372
#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6373
#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6374
#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6375
#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6376
#--WILL INVOLVE A VERY LONG POLYNOMIAL.
6377
6378
#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6379
#--WE CHOSE F TO BE +-2^K * 1.BBBB1
6380
#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6381
#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6382
#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6383
#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6384
6385
ATANMAIN:
6386
6387
and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS
6388
or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1
6389
mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6390
6391
fmov.x %fp0,%fp1 # FP1 IS X
6392
fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0
6393
fsub.x X(%a6),%fp0 # FP0 IS X-F
6394
fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F
6395
fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F)
6396
6397
#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6398
#--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6399
#--SAVE REGISTERS FP2.
6400
6401
mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY
6402
mov.l %d1,%d2 # THE EXP AND 16 BITS OF X
6403
and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION
6404
and.l &0x7FFF0000,%d2 # EXPONENT OF F
6405
sub.l &0x3FFB0000,%d2 # K+4
6406
asr.l &1,%d2
6407
add.l %d2,%d1 # THE 7 BITS IDENTIFYING F
6408
asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|)
6409
lea ATANTBL(%pc),%a1
6410
add.l %d1,%a1 # ADDRESS OF ATAN(|F|)
6411
mov.l (%a1)+,ATANF(%a6)
6412
mov.l (%a1)+,ATANFHI(%a6)
6413
mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|)
6414
mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN
6415
and.l &0x80000000,%d1 # SIGN(F)
6416
or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|)
6417
mov.l (%sp)+,%d2 # RESTORE d2
6418
6419
#--THAT'S ALL I HAVE TO DO FOR NOW,
6420
#--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6421
6422
#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6423
#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6424
#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6425
#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6426
#--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
6427
#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6428
#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6429
6430
fmovm.x &0x04,-(%sp) # save fp2
6431
6432
fmov.x %fp0,%fp1
6433
fmul.x %fp1,%fp1
6434
fmov.d ATANA3(%pc),%fp2
6435
fadd.x %fp1,%fp2 # A3+V
6436
fmul.x %fp1,%fp2 # V*(A3+V)
6437
fmul.x %fp0,%fp1 # U*V
6438
fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V)
6439
fmul.d ATANA1(%pc),%fp1 # A1*U*V
6440
fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V))
6441
fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED
6442
6443
fmovm.x (%sp)+,&0x20 # restore fp2
6444
6445
fmov.l %d0,%fpcr # restore users rnd mode,prec
6446
fadd.x ATANF(%a6),%fp0 # ATAN(X)
6447
bra t_inx2
6448
6449
ATANBORS:
6450
#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6451
#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6452
cmp.l %d1,&0x3FFF8000
6453
bgt.w ATANBIG # I.E. |X| >= 16
6454
6455
ATANSM:
6456
#--|X| <= 1/16
6457
#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6458
#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6459
#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6460
#--WHERE Y = X*X, AND Z = Y*Y.
6461
6462
cmp.l %d1,&0x3FD78000
6463
blt.w ATANTINY
6464
6465
#--COMPUTE POLYNOMIAL
6466
fmovm.x &0x0c,-(%sp) # save fp2/fp3
6467
6468
fmul.x %fp0,%fp0 # FPO IS Y = X*X
6469
6470
fmov.x %fp0,%fp1
6471
fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6472
6473
fmov.d ATANB6(%pc),%fp2
6474
fmov.d ATANB5(%pc),%fp3
6475
6476
fmul.x %fp1,%fp2 # Z*B6
6477
fmul.x %fp1,%fp3 # Z*B5
6478
6479
fadd.d ATANB4(%pc),%fp2 # B4+Z*B6
6480
fadd.d ATANB3(%pc),%fp3 # B3+Z*B5
6481
6482
fmul.x %fp1,%fp2 # Z*(B4+Z*B6)
6483
fmul.x %fp3,%fp1 # Z*(B3+Z*B5)
6484
6485
fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6)
6486
fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5)
6487
6488
fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6))
6489
fmul.x X(%a6),%fp0 # X*Y
6490
6491
fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6492
6493
fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6494
6495
fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6496
6497
fmov.l %d0,%fpcr # restore users rnd mode,prec
6498
fadd.x X(%a6),%fp0
6499
bra t_inx2
6500
6501
ATANTINY:
6502
#--|X| < 2^(-40), ATAN(X) = X
6503
6504
fmov.l %d0,%fpcr # restore users rnd mode,prec
6505
mov.b &FMOV_OP,%d1 # last inst is MOVE
6506
fmov.x X(%a6),%fp0 # last inst - possible exception set
6507
6508
bra t_catch
6509
6510
ATANBIG:
6511
#--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
6512
#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6513
cmp.l %d1,&0x40638000
6514
bgt.w ATANHUGE
6515
6516
#--APPROXIMATE ATAN(-1/X) BY
6517
#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6518
#--THIS CAN BE RE-WRITTEN AS
6519
#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6520
6521
fmovm.x &0x0c,-(%sp) # save fp2/fp3
6522
6523
fmov.s &0xBF800000,%fp1 # LOAD -1
6524
fdiv.x %fp0,%fp1 # FP1 IS -1/X
6525
6526
#--DIVIDE IS STILL CRANKING
6527
6528
fmov.x %fp1,%fp0 # FP0 IS X'
6529
fmul.x %fp0,%fp0 # FP0 IS Y = X'*X'
6530
fmov.x %fp1,X(%a6) # X IS REALLY X'
6531
6532
fmov.x %fp0,%fp1
6533
fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6534
6535
fmov.d ATANC5(%pc),%fp3
6536
fmov.d ATANC4(%pc),%fp2
6537
6538
fmul.x %fp1,%fp3 # Z*C5
6539
fmul.x %fp1,%fp2 # Z*B4
6540
6541
fadd.d ATANC3(%pc),%fp3 # C3+Z*C5
6542
fadd.d ATANC2(%pc),%fp2 # C2+Z*C4
6543
6544
fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED
6545
fmul.x %fp0,%fp2 # Y*(C2+Z*C4)
6546
6547
fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5)
6548
fmul.x X(%a6),%fp0 # X'*Y
6549
6550
fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6551
6552
fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)]
6553
# ... +[Y*(B2+Z*(B4+Z*B6))])
6554
fadd.x X(%a6),%fp0
6555
6556
fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6557
6558
fmov.l %d0,%fpcr # restore users rnd mode,prec
6559
tst.b (%a0)
6560
bpl.b pos_big
6561
6562
neg_big:
6563
fadd.x NPIBY2(%pc),%fp0
6564
bra t_minx2
6565
6566
pos_big:
6567
fadd.x PPIBY2(%pc),%fp0
6568
bra t_pinx2
6569
6570
ATANHUGE:
6571
#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6572
tst.b (%a0)
6573
bpl.b pos_huge
6574
6575
neg_huge:
6576
fmov.x NPIBY2(%pc),%fp0
6577
fmov.l %d0,%fpcr
6578
fadd.x PTINY(%pc),%fp0
6579
bra t_minx2
6580
6581
pos_huge:
6582
fmov.x PPIBY2(%pc),%fp0
6583
fmov.l %d0,%fpcr
6584
fadd.x NTINY(%pc),%fp0
6585
bra t_pinx2
6586
6587
global satand
6588
#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6589
satand:
6590
bra t_extdnrm
6591
6592
#########################################################################
6593
# sasin(): computes the inverse sine of a normalized input #
6594
# sasind(): computes the inverse sine of a denormalized input #
6595
# #
6596
# INPUT *************************************************************** #
6597
# a0 = pointer to extended precision input #
6598
# d0 = round precision,mode #
6599
# #
6600
# OUTPUT ************************************************************** #
6601
# fp0 = arcsin(X) #
6602
# #
6603
# ACCURACY and MONOTONICITY ******************************************* #
6604
# The returned result is within 3 ulps in 64 significant bit, #
6605
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6606
# rounded to double precision. The result is provably monotonic #
6607
# in double precision. #
6608
# #
6609
# ALGORITHM *********************************************************** #
6610
# #
6611
# ASIN #
6612
# 1. If |X| >= 1, go to 3. #
6613
# #
6614
# 2. (|X| < 1) Calculate asin(X) by #
6615
# z := sqrt( [1-X][1+X] ) #
6616
# asin(X) = atan( x / z ). #
6617
# Exit. #
6618
# #
6619
# 3. If |X| > 1, go to 5. #
6620
# #
6621
# 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6622
# #
6623
# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6624
# Exit. #
6625
# #
6626
#########################################################################
6627
6628
global sasin
6629
sasin:
6630
fmov.x (%a0),%fp0 # LOAD INPUT
6631
6632
mov.l (%a0),%d1
6633
mov.w 4(%a0),%d1
6634
and.l &0x7FFFFFFF,%d1
6635
cmp.l %d1,&0x3FFF8000
6636
bge.b ASINBIG
6637
6638
# This catch is added here for the '060 QSP. Originally, the call to
6639
# satan() would handle this case by causing the exception which would
6640
# not be caught until gen_except(). Now, with the exceptions being
6641
# detected inside of satan(), the exception would have been handled there
6642
# instead of inside sasin() as expected.
6643
cmp.l %d1,&0x3FD78000
6644
blt.w ASINTINY
6645
6646
#--THIS IS THE USUAL CASE, |X| < 1
6647
#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6648
6649
ASINMAIN:
6650
fmov.s &0x3F800000,%fp1
6651
fsub.x %fp0,%fp1 # 1-X
6652
fmovm.x &0x4,-(%sp) # {fp2}
6653
fmov.s &0x3F800000,%fp2
6654
fadd.x %fp0,%fp2 # 1+X
6655
fmul.x %fp2,%fp1 # (1+X)(1-X)
6656
fmovm.x (%sp)+,&0x20 # {fp2}
6657
fsqrt.x %fp1 # SQRT([1-X][1+X])
6658
fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X])
6659
fmovm.x &0x01,-(%sp) # save X/SQRT(...)
6660
lea (%sp),%a0 # pass ptr to X/SQRT(...)
6661
bsr satan
6662
add.l &0xc,%sp # clear X/SQRT(...) from stack
6663
bra t_inx2
6664
6665
ASINBIG:
6666
fabs.x %fp0 # |X|
6667
fcmp.s %fp0,&0x3F800000
6668
fbgt t_operr # cause an operr exception
6669
6670
#--|X| = 1, ASIN(X) = +- PI/2.
6671
ASINONE:
6672
fmov.x PIBY2(%pc),%fp0
6673
mov.l (%a0),%d1
6674
and.l &0x80000000,%d1 # SIGN BIT OF X
6675
or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT
6676
mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT
6677
fmov.l %d0,%fpcr
6678
fmul.s (%sp)+,%fp0
6679
bra t_inx2
6680
6681
#--|X| < 2^(-40), ATAN(X) = X
6682
ASINTINY:
6683
fmov.l %d0,%fpcr # restore users rnd mode,prec
6684
mov.b &FMOV_OP,%d1 # last inst is MOVE
6685
fmov.x (%a0),%fp0 # last inst - possible exception
6686
bra t_catch
6687
6688
global sasind
6689
#--ASIN(X) = X FOR DENORMALIZED X
6690
sasind:
6691
bra t_extdnrm
6692
6693
#########################################################################
6694
# sacos(): computes the inverse cosine of a normalized input #
6695
# sacosd(): computes the inverse cosine of a denormalized input #
6696
# #
6697
# INPUT *************************************************************** #
6698
# a0 = pointer to extended precision input #
6699
# d0 = round precision,mode #
6700
# #
6701
# OUTPUT ************************************************************** #
6702
# fp0 = arccos(X) #
6703
# #
6704
# ACCURACY and MONOTONICITY ******************************************* #
6705
# The returned result is within 3 ulps in 64 significant bit, #
6706
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6707
# rounded to double precision. The result is provably monotonic #
6708
# in double precision. #
6709
# #
6710
# ALGORITHM *********************************************************** #
6711
# #
6712
# ACOS #
6713
# 1. If |X| >= 1, go to 3. #
6714
# #
6715
# 2. (|X| < 1) Calculate acos(X) by #
6716
# z := (1-X) / (1+X) #
6717
# acos(X) = 2 * atan( sqrt(z) ). #
6718
# Exit. #
6719
# #
6720
# 3. If |X| > 1, go to 5. #
6721
# #
6722
# 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #
6723
# #
6724
# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6725
# Exit. #
6726
# #
6727
#########################################################################
6728
6729
global sacos
6730
sacos:
6731
fmov.x (%a0),%fp0 # LOAD INPUT
6732
6733
mov.l (%a0),%d1 # pack exp w/ upper 16 fraction
6734
mov.w 4(%a0),%d1
6735
and.l &0x7FFFFFFF,%d1
6736
cmp.l %d1,&0x3FFF8000
6737
bge.b ACOSBIG
6738
6739
#--THIS IS THE USUAL CASE, |X| < 1
6740
#--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
6741
6742
ACOSMAIN:
6743
fmov.s &0x3F800000,%fp1
6744
fadd.x %fp0,%fp1 # 1+X
6745
fneg.x %fp0 # -X
6746
fadd.s &0x3F800000,%fp0 # 1-X
6747
fdiv.x %fp1,%fp0 # (1-X)/(1+X)
6748
fsqrt.x %fp0 # SQRT((1-X)/(1+X))
6749
mov.l %d0,-(%sp) # save original users fpcr
6750
clr.l %d0
6751
fmovm.x &0x01,-(%sp) # save SQRT(...) to stack
6752
lea (%sp),%a0 # pass ptr to sqrt
6753
bsr satan # ATAN(SQRT([1-X]/[1+X]))
6754
add.l &0xc,%sp # clear SQRT(...) from stack
6755
6756
fmov.l (%sp)+,%fpcr # restore users round prec,mode
6757
fadd.x %fp0,%fp0 # 2 * ATAN( STUFF )
6758
bra t_pinx2
6759
6760
ACOSBIG:
6761
fabs.x %fp0
6762
fcmp.s %fp0,&0x3F800000
6763
fbgt t_operr # cause an operr exception
6764
6765
#--|X| = 1, ACOS(X) = 0 OR PI
6766
tst.b (%a0) # is X positive or negative?
6767
bpl.b ACOSP1
6768
6769
#--X = -1
6770
#Returns PI and inexact exception
6771
ACOSM1:
6772
fmov.x PI(%pc),%fp0 # load PI
6773
fmov.l %d0,%fpcr # load round mode,prec
6774
fadd.s &0x00800000,%fp0 # add a small value
6775
bra t_pinx2
6776
6777
ACOSP1:
6778
bra ld_pzero # answer is positive zero
6779
6780
global sacosd
6781
#--ACOS(X) = PI/2 FOR DENORMALIZED X
6782
sacosd:
6783
fmov.l %d0,%fpcr # load user's rnd mode/prec
6784
fmov.x PIBY2(%pc),%fp0
6785
bra t_pinx2
6786
6787
#########################################################################
6788
# setox(): computes the exponential for a normalized input #
6789
# setoxd(): computes the exponential for a denormalized input #
6790
# setoxm1(): computes the exponential minus 1 for a normalized input #
6791
# setoxm1d(): computes the exponential minus 1 for a denormalized input #
6792
# #
6793
# INPUT *************************************************************** #
6794
# a0 = pointer to extended precision input #
6795
# d0 = round precision,mode #
6796
# #
6797
# OUTPUT ************************************************************** #
6798
# fp0 = exp(X) or exp(X)-1 #
6799
# #
6800
# ACCURACY and MONOTONICITY ******************************************* #
6801
# The returned result is within 0.85 ulps in 64 significant bit, #
6802
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6803
# rounded to double precision. The result is provably monotonic #
6804
# in double precision. #
6805
# #
6806
# ALGORITHM and IMPLEMENTATION **************************************** #
6807
# #
6808
# setoxd #
6809
# ------ #
6810
# Step 1. Set ans := 1.0 #
6811
# #
6812
# Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #
6813
# Notes: This will always generate one exception -- inexact. #
6814
# #
6815
# #
6816
# setox #
6817
# ----- #
6818
# #
6819
# Step 1. Filter out extreme cases of input argument. #
6820
# 1.1 If |X| >= 2^(-65), go to Step 1.3. #
6821
# 1.2 Go to Step 7. #
6822
# 1.3 If |X| < 16380 log(2), go to Step 2. #
6823
# 1.4 Go to Step 8. #
6824
# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6825
# To avoid the use of floating-point comparisons, a #
6826
# compact representation of |X| is used. This format is a #
6827
# 32-bit integer, the upper (more significant) 16 bits #
6828
# are the sign and biased exponent field of |X|; the #
6829
# lower 16 bits are the 16 most significant fraction #
6830
# (including the explicit bit) bits of |X|. Consequently, #
6831
# the comparisons in Steps 1.1 and 1.3 can be performed #
6832
# by integer comparison. Note also that the constant #
6833
# 16380 log(2) used in Step 1.3 is also in the compact #
6834
# form. Thus taking the branch to Step 2 guarantees #
6835
# |X| < 16380 log(2). There is no harm to have a small #
6836
# number of cases where |X| is less than, but close to, #
6837
# 16380 log(2) and the branch to Step 9 is taken. #
6838
# #
6839
# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6840
# 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6841
# was taken) #
6842
# 2.2 N := round-to-nearest-integer( X * 64/log2 ). #
6843
# 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #
6844
# or 63. #
6845
# 2.4 Calculate M = (N - J)/64; so N = 64M + J. #
6846
# 2.5 Calculate the address of the stored value of #
6847
# 2^(J/64). #
6848
# 2.6 Create the value Scale = 2^M. #
6849
# Notes: The calculation in 2.2 is really performed by #
6850
# Z := X * constant #
6851
# N := round-to-nearest-integer(Z) #
6852
# where #
6853
# constant := single-precision( 64/log 2 ). #
6854
# #
6855
# Using a single-precision constant avoids memory #
6856
# access. Another effect of using a single-precision #
6857
# "constant" is that the calculated value Z is #
6858
# #
6859
# Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). #
6860
# #
6861
# This error has to be considered later in Steps 3 and 4. #
6862
# #
6863
# Step 3. Calculate X - N*log2/64. #
6864
# 3.1 R := X + N*L1, #
6865
# where L1 := single-precision(-log2/64). #
6866
# 3.2 R := R + N*L2, #
6867
# L2 := extended-precision(-log2/64 - L1).#
6868
# Notes: a) The way L1 and L2 are chosen ensures L1+L2 #
6869
# approximate the value -log2/64 to 88 bits of accuracy. #
6870
# b) N*L1 is exact because N is no longer than 22 bits #
6871
# and L1 is no longer than 24 bits. #
6872
# c) The calculation X+N*L1 is also exact due to #
6873
# cancellation. Thus, R is practically X+N(L1+L2) to full #
6874
# 64 bits. #
6875
# d) It is important to estimate how large can |R| be #
6876
# after Step 3.2. #
6877
# #
6878
# N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) #
6879
# X*64/log2 (1+eps) = N + f, |f| <= 0.5 #
6880
# X*64/log2 - N = f - eps*X 64/log2 #
6881
# X - N*log2/64 = f*log2/64 - eps*X #
6882
# #
6883
# #
6884
# Now |X| <= 16446 log2, thus #
6885
# #
6886
# |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
6887
# <= 0.57 log2/64. #
6888
# This bound will be used in Step 4. #
6889
# #
6890
# Step 4. Approximate exp(R)-1 by a polynomial #
6891
# p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) #
6892
# Notes: a) In order to reduce memory access, the coefficients #
6893
# are made as "short" as possible: A1 (which is 1/2), A4 #
6894
# and A5 are single precision; A2 and A3 are double #
6895
# precision. #
6896
# b) Even with the restrictions above, #
6897
# |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. #
6898
# Note that 0.0062 is slightly bigger than 0.57 log2/64. #
6899
# c) To fully utilize the pipeline, p is separated into #
6900
# two independent pieces of roughly equal complexities #
6901
# p = [ R + R*S*(A2 + S*A4) ] + #
6902
# [ S*(A1 + S*(A3 + S*A5)) ] #
6903
# where S = R*R. #
6904
# #
6905
# Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by #
6906
# ans := T + ( T*p + t) #
6907
# where T and t are the stored values for 2^(J/64). #
6908
# Notes: 2^(J/64) is stored as T and t where T+t approximates #
6909
# 2^(J/64) to roughly 85 bits; T is in extended precision #
6910
# and t is in single precision. Note also that T is #
6911
# rounded to 62 bits so that the last two bits of T are #
6912
# zero. The reason for such a special form is that T-1, #
6913
# T-2, and T-8 will all be exact --- a property that will #
6914
# give much more accurate computation of the function #
6915
# EXPM1. #
6916
# #
6917
# Step 6. Reconstruction of exp(X) #
6918
# exp(X) = 2^M * 2^(J/64) * exp(R). #
6919
# 6.1 If AdjFlag = 0, go to 6.3 #
6920
# 6.2 ans := ans * AdjScale #
6921
# 6.3 Restore the user FPCR #
6922
# 6.4 Return ans := ans * Scale. Exit. #
6923
# Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #
6924
# |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will #
6925
# neither overflow nor underflow. If AdjFlag = 1, that #
6926
# means that #
6927
# X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
6928
# Hence, exp(X) may overflow or underflow or neither. #
6929
# When that is the case, AdjScale = 2^(M1) where M1 is #
6930
# approximately M. Thus 6.2 will never cause #
6931
# over/underflow. Possible exception in 6.4 is overflow #
6932
# or underflow. The inexact exception is not generated in #
6933
# 6.4. Although one can argue that the inexact flag #
6934
# should always be raised, to simulate that exception #
6935
# cost to much than the flag is worth in practical uses. #
6936
# #
6937
# Step 7. Return 1 + X. #
6938
# 7.1 ans := X #
6939
# 7.2 Restore user FPCR. #
6940
# 7.3 Return ans := 1 + ans. Exit #
6941
# Notes: For non-zero X, the inexact exception will always be #
6942
# raised by 7.3. That is the only exception raised by 7.3.#
6943
# Note also that we use the FMOVEM instruction to move X #
6944
# in Step 7.1 to avoid unnecessary trapping. (Although #
6945
# the FMOVEM may not seem relevant since X is normalized, #
6946
# the precaution will be useful in the library version of #
6947
# this code where the separate entry for denormalized #
6948
# inputs will be done away with.) #
6949
# #
6950
# Step 8. Handle exp(X) where |X| >= 16380log2. #
6951
# 8.1 If |X| > 16480 log2, go to Step 9. #
6952
# (mimic 2.2 - 2.6) #
6953
# 8.2 N := round-to-integer( X * 64/log2 ) #
6954
# 8.3 Calculate J = N mod 64, J = 0,1,...,63 #
6955
# 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #
6956
# AdjFlag := 1. #
6957
# 8.5 Calculate the address of the stored value #
6958
# 2^(J/64). #
6959
# 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #
6960
# 8.7 Go to Step 3. #
6961
# Notes: Refer to notes for 2.2 - 2.6. #
6962
# #
6963
# Step 9. Handle exp(X), |X| > 16480 log2. #
6964
# 9.1 If X < 0, go to 9.3 #
6965
# 9.2 ans := Huge, go to 9.4 #
6966
# 9.3 ans := Tiny. #
6967
# 9.4 Restore user FPCR. #
6968
# 9.5 Return ans := ans * ans. Exit. #
6969
# Notes: Exp(X) will surely overflow or underflow, depending on #
6970
# X's sign. "Huge" and "Tiny" are respectively large/tiny #
6971
# extended-precision numbers whose square over/underflow #
6972
# with an inexact result. Thus, 9.5 always raises the #
6973
# inexact together with either overflow or underflow. #
6974
# #
6975
# setoxm1d #
6976
# -------- #
6977
# #
6978
# Step 1. Set ans := 0 #
6979
# #
6980
# Step 2. Return ans := X + ans. Exit. #
6981
# Notes: This will return X with the appropriate rounding #
6982
# precision prescribed by the user FPCR. #
6983
# #
6984
# setoxm1 #
6985
# ------- #
6986
# #
6987
# Step 1. Check |X| #
6988
# 1.1 If |X| >= 1/4, go to Step 1.3. #
6989
# 1.2 Go to Step 7. #
6990
# 1.3 If |X| < 70 log(2), go to Step 2. #
6991
# 1.4 Go to Step 10. #
6992
# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6993
# However, it is conceivable |X| can be small very often #
6994
# because EXPM1 is intended to evaluate exp(X)-1 #
6995
# accurately when |X| is small. For further details on #
6996
# the comparisons, see the notes on Step 1 of setox. #
6997
# #
6998
# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6999
# 2.1 N := round-to-nearest-integer( X * 64/log2 ). #
7000
# 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #
7001
# or 63. #
7002
# 2.3 Calculate M = (N - J)/64; so N = 64M + J. #
7003
# 2.4 Calculate the address of the stored value of #
7004
# 2^(J/64). #
7005
# 2.5 Create the values Sc = 2^M and #
7006
# OnebySc := -2^(-M). #
7007
# Notes: See the notes on Step 2 of setox. #
7008
# #
7009
# Step 3. Calculate X - N*log2/64. #
7010
# 3.1 R := X + N*L1, #
7011
# where L1 := single-precision(-log2/64). #
7012
# 3.2 R := R + N*L2, #
7013
# L2 := extended-precision(-log2/64 - L1).#
7014
# Notes: Applying the analysis of Step 3 of setox in this case #
7015
# shows that |R| <= 0.0055 (note that |X| <= 70 log2 in #
7016
# this case). #
7017
# #
7018
# Step 4. Approximate exp(R)-1 by a polynomial #
7019
# p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
7020
# Notes: a) In order to reduce memory access, the coefficients #
7021
# are made as "short" as possible: A1 (which is 1/2), A5 #
7022
# and A6 are single precision; A2, A3 and A4 are double #
7023
# precision. #
7024
# b) Even with the restriction above, #
7025
# |p - (exp(R)-1)| < |R| * 2^(-72.7) #
7026
# for all |R| <= 0.0055. #
7027
# c) To fully utilize the pipeline, p is separated into #
7028
# two independent pieces of roughly equal complexity #
7029
# p = [ R*S*(A2 + S*(A4 + S*A6)) ] + #
7030
# [ R + S*(A1 + S*(A3 + S*A5)) ] #
7031
# where S = R*R. #
7032
# #
7033
# Step 5. Compute 2^(J/64)*p by #
7034
# p := T*p #
7035
# where T and t are the stored values for 2^(J/64). #
7036
# Notes: 2^(J/64) is stored as T and t where T+t approximates #
7037
# 2^(J/64) to roughly 85 bits; T is in extended precision #
7038
# and t is in single precision. Note also that T is #
7039
# rounded to 62 bits so that the last two bits of T are #
7040
# zero. The reason for such a special form is that T-1, #
7041
# T-2, and T-8 will all be exact --- a property that will #
7042
# be exploited in Step 6 below. The total relative error #
7043
# in p is no bigger than 2^(-67.7) compared to the final #
7044
# result. #
7045
# #
7046
# Step 6. Reconstruction of exp(X)-1 #
7047
# exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #
7048
# 6.1 If M <= 63, go to Step 6.3. #
7049
# 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #
7050
# 6.3 If M >= -3, go to 6.5. #
7051
# 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #
7052
# 6.5 ans := (T + OnebySc) + (p + t). #
7053
# 6.6 Restore user FPCR. #
7054
# 6.7 Return ans := Sc * ans. Exit. #
7055
# Notes: The various arrangements of the expressions give #
7056
# accurate evaluations. #
7057
# #
7058
# Step 7. exp(X)-1 for |X| < 1/4. #
7059
# 7.1 If |X| >= 2^(-65), go to Step 9. #
7060
# 7.2 Go to Step 8. #
7061
# #
7062
# Step 8. Calculate exp(X)-1, |X| < 2^(-65). #
7063
# 8.1 If |X| < 2^(-16312), goto 8.3 #
7064
# 8.2 Restore FPCR; return ans := X - 2^(-16382). #
7065
# Exit. #
7066
# 8.3 X := X * 2^(140). #
7067
# 8.4 Restore FPCR; ans := ans - 2^(-16382). #
7068
# Return ans := ans*2^(140). Exit #
7069
# Notes: The idea is to return "X - tiny" under the user #
7070
# precision and rounding modes. To avoid unnecessary #
7071
# inefficiency, we stay away from denormalized numbers #
7072
# the best we can. For |X| >= 2^(-16312), the #
7073
# straightforward 8.2 generates the inexact exception as #
7074
# the case warrants. #
7075
# #
7076
# Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial #
7077
# p = X + X*X*(B1 + X*(B2 + ... + X*B12)) #
7078
# Notes: a) In order to reduce memory access, the coefficients #
7079
# are made as "short" as possible: B1 (which is 1/2), B9 #
7080
# to B12 are single precision; B3 to B8 are double #
7081
# precision; and B2 is double extended. #
7082
# b) Even with the restriction above, #
7083
# |p - (exp(X)-1)| < |X| 2^(-70.6) #
7084
# for all |X| <= 0.251. #
7085
# Note that 0.251 is slightly bigger than 1/4. #
7086
# c) To fully preserve accuracy, the polynomial is #
7087
# computed as #
7088
# X + ( S*B1 + Q ) where S = X*X and #
7089
# Q = X*S*(B2 + X*(B3 + ... + X*B12)) #
7090
# d) To fully utilize the pipeline, Q is separated into #
7091
# two independent pieces of roughly equal complexity #
7092
# Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + #
7093
# [ S*S*(B3 + S*(B5 + ... + S*B11)) ] #
7094
# #
7095
# Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. #
7096
# 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #
7097
# practical purposes. Therefore, go to Step 1 of setox. #
7098
# 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
7099
# purposes. #
7100
# ans := -1 #
7101
# Restore user FPCR #
7102
# Return ans := ans + 2^(-126). Exit. #
7103
# Notes: 10.2 will always create an inexact and return -1 + tiny #
7104
# in the user rounding precision and mode. #
7105
# #
7106
#########################################################################
7107
7108
L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7109
7110
EEXPA3: long 0x3FA55555,0x55554CC1
7111
EEXPA2: long 0x3FC55555,0x55554A54
7112
7113
EM1A4: long 0x3F811111,0x11174385
7114
EM1A3: long 0x3FA55555,0x55554F5A
7115
7116
EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000
7117
7118
EM1B8: long 0x3EC71DE3,0xA5774682
7119
EM1B7: long 0x3EFA01A0,0x19D7CB68
7120
7121
EM1B6: long 0x3F2A01A0,0x1A019DF3
7122
EM1B5: long 0x3F56C16C,0x16C170E2
7123
7124
EM1B4: long 0x3F811111,0x11111111
7125
EM1B3: long 0x3FA55555,0x55555555
7126
7127
EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7128
long 0x00000000
7129
7130
TWO140: long 0x48B00000,0x00000000
7131
TWON140:
7132
long 0x37300000,0x00000000
7133
7134
EEXPTBL:
7135
long 0x3FFF0000,0x80000000,0x00000000,0x00000000
7136
long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7137
long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7138
long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7139
long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7140
long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7141
long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7142
long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7143
long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7144
long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7145
long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7146
long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7147
long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7148
long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7149
long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7150
long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7151
long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7152
long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7153
long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7154
long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7155
long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7156
long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7157
long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7158
long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7159
long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7160
long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7161
long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7162
long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7163
long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7164
long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7165
long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7166
long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7167
long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7168
long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7169
long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7170
long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7171
long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7172
long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7173
long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7174
long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7175
long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7176
long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7177
long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7178
long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7179
long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7180
long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7181
long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7182
long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7183
long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7184
long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7185
long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7186
long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7187
long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7188
long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7189
long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7190
long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7191
long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7192
long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7193
long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7194
long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7195
long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7196
long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7197
long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7198
long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7199
7200
set ADJFLAG,L_SCR2
7201
set SCALE,FP_SCR0
7202
set ADJSCALE,FP_SCR1
7203
set SC,FP_SCR0
7204
set ONEBYSC,FP_SCR1
7205
7206
global setox
7207
setox:
7208
#--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7209
7210
#--Step 1.
7211
mov.l (%a0),%d1 # load part of input X
7212
and.l &0x7FFF0000,%d1 # biased expo. of X
7213
cmp.l %d1,&0x3FBE0000 # 2^(-65)
7214
bge.b EXPC1 # normal case
7215
bra EXPSM
7216
7217
EXPC1:
7218
#--The case |X| >= 2^(-65)
7219
mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7220
cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits
7221
blt.b EXPMAIN # normal case
7222
bra EEXPBIG
7223
7224
EXPMAIN:
7225
#--Step 2.
7226
#--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
7227
fmov.x (%a0),%fp0 # load input from (a0)
7228
7229
fmov.x %fp0,%fp1
7230
fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7231
fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7232
mov.l &0,ADJFLAG(%a6)
7233
fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7234
lea EEXPTBL(%pc),%a1
7235
fmov.l %d1,%fp0 # convert to floating-format
7236
7237
mov.l %d1,L_SCR1(%a6) # save N temporarily
7238
and.l &0x3F,%d1 # D0 is J = N mod 64
7239
lsl.l &4,%d1
7240
add.l %d1,%a1 # address of 2^(J/64)
7241
mov.l L_SCR1(%a6),%d1
7242
asr.l &6,%d1 # D0 is M
7243
add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7244
mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB
7245
7246
EXPCONT1:
7247
#--Step 3.
7248
#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7249
#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7250
fmov.x %fp0,%fp2
7251
fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7252
fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7253
fadd.x %fp1,%fp0 # X + N*L1
7254
fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7255
7256
#--Step 4.
7257
#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7258
#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7259
#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7260
#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7261
7262
fmov.x %fp0,%fp1
7263
fmul.x %fp1,%fp1 # fp1 IS S = R*R
7264
7265
fmov.s &0x3AB60B70,%fp2 # fp2 IS A5
7266
7267
fmul.x %fp1,%fp2 # fp2 IS S*A5
7268
fmov.x %fp1,%fp3
7269
fmul.s &0x3C088895,%fp3 # fp3 IS S*A4
7270
7271
fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5
7272
fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4
7273
7274
fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5)
7275
mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended
7276
mov.l &0x80000000,SCALE+4(%a6)
7277
clr.l SCALE+8(%a6)
7278
7279
fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4)
7280
7281
fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5)
7282
fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4)
7283
7284
fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5))
7285
fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4),
7286
7287
fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64)
7288
fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1
7289
7290
#--Step 5
7291
#--final reconstruction process
7292
#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7293
7294
fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1)
7295
fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7296
fadd.s (%a1),%fp0 # accurate 2^(J/64)
7297
7298
fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*...
7299
mov.l ADJFLAG(%a6),%d1
7300
7301
#--Step 6
7302
tst.l %d1
7303
beq.b NORMAL
7304
ADJUST:
7305
fmul.x ADJSCALE(%a6),%fp0
7306
NORMAL:
7307
fmov.l %d0,%fpcr # restore user FPCR
7308
mov.b &FMUL_OP,%d1 # last inst is MUL
7309
fmul.x SCALE(%a6),%fp0 # multiply 2^(M)
7310
bra t_catch
7311
7312
EXPSM:
7313
#--Step 7
7314
fmovm.x (%a0),&0x80 # load X
7315
fmov.l %d0,%fpcr
7316
fadd.s &0x3F800000,%fp0 # 1+X in user mode
7317
bra t_pinx2
7318
7319
EEXPBIG:
7320
#--Step 8
7321
cmp.l %d1,&0x400CB27C # 16480 log2
7322
bgt.b EXP2BIG
7323
#--Steps 8.2 -- 8.6
7324
fmov.x (%a0),%fp0 # load input from (a0)
7325
7326
fmov.x %fp0,%fp1
7327
fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7328
fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7329
mov.l &1,ADJFLAG(%a6)
7330
fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7331
lea EEXPTBL(%pc),%a1
7332
fmov.l %d1,%fp0 # convert to floating-format
7333
mov.l %d1,L_SCR1(%a6) # save N temporarily
7334
and.l &0x3F,%d1 # D0 is J = N mod 64
7335
lsl.l &4,%d1
7336
add.l %d1,%a1 # address of 2^(J/64)
7337
mov.l L_SCR1(%a6),%d1
7338
asr.l &6,%d1 # D0 is K
7339
mov.l %d1,L_SCR1(%a6) # save K temporarily
7340
asr.l &1,%d1 # D0 is M1
7341
sub.l %d1,L_SCR1(%a6) # a1 is M
7342
add.w &0x3FFF,%d1 # biased expo. of 2^(M1)
7343
mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1)
7344
mov.l &0x80000000,ADJSCALE+4(%a6)
7345
clr.l ADJSCALE+8(%a6)
7346
mov.l L_SCR1(%a6),%d1 # D0 is M
7347
add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7348
bra.w EXPCONT1 # go back to Step 3
7349
7350
EXP2BIG:
7351
#--Step 9
7352
tst.b (%a0) # is X positive or negative?
7353
bmi t_unfl2
7354
bra t_ovfl2
7355
7356
global setoxd
7357
setoxd:
7358
#--entry point for EXP(X), X is denormalized
7359
mov.l (%a0),-(%sp)
7360
andi.l &0x80000000,(%sp)
7361
ori.l &0x00800000,(%sp) # sign(X)*2^(-126)
7362
7363
fmov.s &0x3F800000,%fp0
7364
7365
fmov.l %d0,%fpcr
7366
fadd.s (%sp)+,%fp0
7367
bra t_pinx2
7368
7369
global setoxm1
7370
setoxm1:
7371
#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7372
7373
#--Step 1.
7374
#--Step 1.1
7375
mov.l (%a0),%d1 # load part of input X
7376
and.l &0x7FFF0000,%d1 # biased expo. of X
7377
cmp.l %d1,&0x3FFD0000 # 1/4
7378
bge.b EM1CON1 # |X| >= 1/4
7379
bra EM1SM
7380
7381
EM1CON1:
7382
#--Step 1.3
7383
#--The case |X| >= 1/4
7384
mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7385
cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits
7386
ble.b EM1MAIN # 1/4 <= |X| <= 70log2
7387
bra EM1BIG
7388
7389
EM1MAIN:
7390
#--Step 2.
7391
#--This is the case: 1/4 <= |X| <= 70 log2.
7392
fmov.x (%a0),%fp0 # load input from (a0)
7393
7394
fmov.x %fp0,%fp1
7395
fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7396
fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7397
fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7398
lea EEXPTBL(%pc),%a1
7399
fmov.l %d1,%fp0 # convert to floating-format
7400
7401
mov.l %d1,L_SCR1(%a6) # save N temporarily
7402
and.l &0x3F,%d1 # D0 is J = N mod 64
7403
lsl.l &4,%d1
7404
add.l %d1,%a1 # address of 2^(J/64)
7405
mov.l L_SCR1(%a6),%d1
7406
asr.l &6,%d1 # D0 is M
7407
mov.l %d1,L_SCR1(%a6) # save a copy of M
7408
7409
#--Step 3.
7410
#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7411
#--a0 points to 2^(J/64), D0 and a1 both contain M
7412
fmov.x %fp0,%fp2
7413
fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7414
fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7415
fadd.x %fp1,%fp0 # X + N*L1
7416
fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7417
add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M
7418
7419
#--Step 4.
7420
#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7421
#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7422
#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7423
#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7424
7425
fmov.x %fp0,%fp1
7426
fmul.x %fp1,%fp1 # fp1 IS S = R*R
7427
7428
fmov.s &0x3950097B,%fp2 # fp2 IS a6
7429
7430
fmul.x %fp1,%fp2 # fp2 IS S*A6
7431
fmov.x %fp1,%fp3
7432
fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5
7433
7434
fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6
7435
fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5
7436
mov.w %d1,SC(%a6) # SC is 2^(M) in extended
7437
mov.l &0x80000000,SC+4(%a6)
7438
clr.l SC+8(%a6)
7439
7440
fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6)
7441
mov.l L_SCR1(%a6),%d1 # D0 is M
7442
neg.w %d1 # D0 is -M
7443
fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5)
7444
add.w &0x3FFF,%d1 # biased expo. of 2^(-M)
7445
fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6)
7446
fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5)
7447
7448
fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6))
7449
or.w &0x8000,%d1 # signed/expo. of -2^(-M)
7450
mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M)
7451
mov.l &0x80000000,ONEBYSC+4(%a6)
7452
clr.l ONEBYSC+8(%a6)
7453
fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5))
7454
7455
fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6))
7456
fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5))
7457
7458
fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1
7459
7460
fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7461
7462
#--Step 5
7463
#--Compute 2^(J/64)*p
7464
7465
fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1)
7466
7467
#--Step 6
7468
#--Step 6.1
7469
mov.l L_SCR1(%a6),%d1 # retrieve M
7470
cmp.l %d1,&63
7471
ble.b MLE63
7472
#--Step 6.2 M >= 64
7473
fmov.s 12(%a1),%fp1 # fp1 is t
7474
fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc
7475
fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released
7476
fadd.x (%a1),%fp0 # T+(p+(t+OnebySc))
7477
bra EM1SCALE
7478
MLE63:
7479
#--Step 6.3 M <= 63
7480
cmp.l %d1,&-3
7481
bge.b MGEN3
7482
MLTN3:
7483
#--Step 6.4 M <= -4
7484
fadd.s 12(%a1),%fp0 # p+t
7485
fadd.x (%a1),%fp0 # T+(p+t)
7486
fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t))
7487
bra EM1SCALE
7488
MGEN3:
7489
#--Step 6.5 -3 <= M <= 63
7490
fmov.x (%a1)+,%fp1 # fp1 is T
7491
fadd.s (%a1),%fp0 # fp0 is p+t
7492
fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc
7493
fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t)
7494
7495
EM1SCALE:
7496
#--Step 6.6
7497
fmov.l %d0,%fpcr
7498
fmul.x SC(%a6),%fp0
7499
bra t_inx2
7500
7501
EM1SM:
7502
#--Step 7 |X| < 1/4.
7503
cmp.l %d1,&0x3FBE0000 # 2^(-65)
7504
bge.b EM1POLY
7505
7506
EM1TINY:
7507
#--Step 8 |X| < 2^(-65)
7508
cmp.l %d1,&0x00330000 # 2^(-16312)
7509
blt.b EM12TINY
7510
#--Step 8.2
7511
mov.l &0x80010000,SC(%a6) # SC is -2^(-16382)
7512
mov.l &0x80000000,SC+4(%a6)
7513
clr.l SC+8(%a6)
7514
fmov.x (%a0),%fp0
7515
fmov.l %d0,%fpcr
7516
mov.b &FADD_OP,%d1 # last inst is ADD
7517
fadd.x SC(%a6),%fp0
7518
bra t_catch
7519
7520
EM12TINY:
7521
#--Step 8.3
7522
fmov.x (%a0),%fp0
7523
fmul.d TWO140(%pc),%fp0
7524
mov.l &0x80010000,SC(%a6)
7525
mov.l &0x80000000,SC+4(%a6)
7526
clr.l SC+8(%a6)
7527
fadd.x SC(%a6),%fp0
7528
fmov.l %d0,%fpcr
7529
mov.b &FMUL_OP,%d1 # last inst is MUL
7530
fmul.d TWON140(%pc),%fp0
7531
bra t_catch
7532
7533
EM1POLY:
7534
#--Step 9 exp(X)-1 by a simple polynomial
7535
fmov.x (%a0),%fp0 # fp0 is X
7536
fmul.x %fp0,%fp0 # fp0 is S := X*X
7537
fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7538
fmov.s &0x2F30CAA8,%fp1 # fp1 is B12
7539
fmul.x %fp0,%fp1 # fp1 is S*B12
7540
fmov.s &0x310F8290,%fp2 # fp2 is B11
7541
fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12
7542
7543
fmul.x %fp0,%fp2 # fp2 is S*B11
7544
fmul.x %fp0,%fp1 # fp1 is S*(B10 + ...
7545
7546
fadd.s &0x3493F281,%fp2 # fp2 is B9+S*...
7547
fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*...
7548
7549
fmul.x %fp0,%fp2 # fp2 is S*(B9+...
7550
fmul.x %fp0,%fp1 # fp1 is S*(B8+...
7551
7552
fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*...
7553
fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*...
7554
7555
fmul.x %fp0,%fp2 # fp2 is S*(B7+...
7556
fmul.x %fp0,%fp1 # fp1 is S*(B6+...
7557
7558
fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*...
7559
fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*...
7560
7561
fmul.x %fp0,%fp2 # fp2 is S*(B5+...
7562
fmul.x %fp0,%fp1 # fp1 is S*(B4+...
7563
7564
fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*...
7565
fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*...
7566
7567
fmul.x %fp0,%fp2 # fp2 is S*(B3+...
7568
fmul.x %fp0,%fp1 # fp1 is S*(B2+...
7569
7570
fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...)
7571
fmul.x (%a0),%fp1 # fp1 is X*S*(B2...
7572
7573
fmul.s &0x3F000000,%fp0 # fp0 is S*B1
7574
fadd.x %fp2,%fp1 # fp1 is Q
7575
7576
fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7577
7578
fadd.x %fp1,%fp0 # fp0 is S*B1+Q
7579
7580
fmov.l %d0,%fpcr
7581
fadd.x (%a0),%fp0
7582
bra t_inx2
7583
7584
EM1BIG:
7585
#--Step 10 |X| > 70 log2
7586
mov.l (%a0),%d1
7587
cmp.l %d1,&0
7588
bgt.w EXPC1
7589
#--Step 10.2
7590
fmov.s &0xBF800000,%fp0 # fp0 is -1
7591
fmov.l %d0,%fpcr
7592
fadd.s &0x00800000,%fp0 # -1 + 2^(-126)
7593
bra t_minx2
7594
7595
global setoxm1d
7596
setoxm1d:
7597
#--entry point for EXPM1(X), here X is denormalized
7598
#--Step 0.
7599
bra t_extdnrm
7600
7601
#########################################################################
7602
# sgetexp(): returns the exponent portion of the input argument. #
7603
# The exponent bias is removed and the exponent value is #
7604
# returned as an extended precision number in fp0. #
7605
# sgetexpd(): handles denormalized numbers. #
7606
# #
7607
# sgetman(): extracts the mantissa of the input argument. The #
7608
# mantissa is converted to an extended precision number w/ #
7609
# an exponent of $3fff and is returned in fp0. The range of #
7610
# the result is [1.0 - 2.0). #
7611
# sgetmand(): handles denormalized numbers. #
7612
# #
7613
# INPUT *************************************************************** #
7614
# a0 = pointer to extended precision input #
7615
# #
7616
# OUTPUT ************************************************************** #
7617
# fp0 = exponent(X) or mantissa(X) #
7618
# #
7619
#########################################################################
7620
7621
global sgetexp
7622
sgetexp:
7623
mov.w SRC_EX(%a0),%d0 # get the exponent
7624
bclr &0xf,%d0 # clear the sign bit
7625
subi.w &0x3fff,%d0 # subtract off the bias
7626
fmov.w %d0,%fp0 # return exp in fp0
7627
blt.b sgetexpn # it's negative
7628
rts
7629
7630
sgetexpn:
7631
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7632
rts
7633
7634
global sgetexpd
7635
sgetexpd:
7636
bsr.l norm # normalize
7637
neg.w %d0 # new exp = -(shft amt)
7638
subi.w &0x3fff,%d0 # subtract off the bias
7639
fmov.w %d0,%fp0 # return exp in fp0
7640
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7641
rts
7642
7643
global sgetman
7644
sgetman:
7645
mov.w SRC_EX(%a0),%d0 # get the exp
7646
ori.w &0x7fff,%d0 # clear old exp
7647
bclr &0xe,%d0 # make it the new exp +-3fff
7648
7649
# here, we build the result in a tmp location so as not to disturb the input
7650
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7651
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7652
mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7653
fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0
7654
bmi.b sgetmann # it's negative
7655
rts
7656
7657
sgetmann:
7658
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7659
rts
7660
7661
#
7662
# For denormalized numbers, shift the mantissa until the j-bit = 1,
7663
# then load the exponent with +/1 $3fff.
7664
#
7665
global sgetmand
7666
sgetmand:
7667
bsr.l norm # normalize exponent
7668
bra.b sgetman
7669
7670
#########################################################################
7671
# scosh(): computes the hyperbolic cosine of a normalized input #
7672
# scoshd(): computes the hyperbolic cosine of a denormalized input #
7673
# #
7674
# INPUT *************************************************************** #
7675
# a0 = pointer to extended precision input #
7676
# d0 = round precision,mode #
7677
# #
7678
# OUTPUT ************************************************************** #
7679
# fp0 = cosh(X) #
7680
# #
7681
# ACCURACY and MONOTONICITY ******************************************* #
7682
# The returned result is within 3 ulps in 64 significant bit, #
7683
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7684
# rounded to double precision. The result is provably monotonic #
7685
# in double precision. #
7686
# #
7687
# ALGORITHM *********************************************************** #
7688
# #
7689
# COSH #
7690
# 1. If |X| > 16380 log2, go to 3. #
7691
# #
7692
# 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae #
7693
# y = |X|, z = exp(Y), and #
7694
# cosh(X) = (1/2)*( z + 1/z ). #
7695
# Exit. #
7696
# #
7697
# 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. #
7698
# #
7699
# 4. (16380 log2 < |X| <= 16480 log2) #
7700
# cosh(X) = sign(X) * exp(|X|)/2. #
7701
# However, invoking exp(|X|) may cause premature #
7702
# overflow. Thus, we calculate sinh(X) as follows: #
7703
# Y := |X| #
7704
# Fact := 2**(16380) #
7705
# Y' := Y - 16381 log2 #
7706
# cosh(X) := Fact * exp(Y'). #
7707
# Exit. #
7708
# #
7709
# 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7710
# Huge*Huge to generate overflow and an infinity with #
7711
# the appropriate sign. Huge is the largest finite number #
7712
# in extended format. Exit. #
7713
# #
7714
#########################################################################
7715
7716
TWO16380:
7717
long 0x7FFB0000,0x80000000,0x00000000,0x00000000
7718
7719
global scosh
7720
scosh:
7721
fmov.x (%a0),%fp0 # LOAD INPUT
7722
7723
mov.l (%a0),%d1
7724
mov.w 4(%a0),%d1
7725
and.l &0x7FFFFFFF,%d1
7726
cmp.l %d1,&0x400CB167
7727
bgt.b COSHBIG
7728
7729
#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7730
#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7731
7732
fabs.x %fp0 # |X|
7733
7734
mov.l %d0,-(%sp)
7735
clr.l %d0
7736
fmovm.x &0x01,-(%sp) # save |X| to stack
7737
lea (%sp),%a0 # pass ptr to |X|
7738
bsr setox # FP0 IS EXP(|X|)
7739
add.l &0xc,%sp # erase |X| from stack
7740
fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|)
7741
mov.l (%sp)+,%d0
7742
7743
fmov.s &0x3E800000,%fp1 # (1/4)
7744
fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|))
7745
7746
fmov.l %d0,%fpcr
7747
mov.b &FADD_OP,%d1 # last inst is ADD
7748
fadd.x %fp1,%fp0
7749
bra t_catch
7750
7751
COSHBIG:
7752
cmp.l %d1,&0x400CB2B3
7753
bgt.b COSHHUGE
7754
7755
fabs.x %fp0
7756
fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7757
fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7758
7759
mov.l %d0,-(%sp)
7760
clr.l %d0
7761
fmovm.x &0x01,-(%sp) # save fp0 to stack
7762
lea (%sp),%a0 # pass ptr to fp0
7763
bsr setox
7764
add.l &0xc,%sp # clear fp0 from stack
7765
mov.l (%sp)+,%d0
7766
7767
fmov.l %d0,%fpcr
7768
mov.b &FMUL_OP,%d1 # last inst is MUL
7769
fmul.x TWO16380(%pc),%fp0
7770
bra t_catch
7771
7772
COSHHUGE:
7773
bra t_ovfl2
7774
7775
global scoshd
7776
#--COSH(X) = 1 FOR DENORMALIZED X
7777
scoshd:
7778
fmov.s &0x3F800000,%fp0
7779
7780
fmov.l %d0,%fpcr
7781
fadd.s &0x00800000,%fp0
7782
bra t_pinx2
7783
7784
#########################################################################
7785
# ssinh(): computes the hyperbolic sine of a normalized input #
7786
# ssinhd(): computes the hyperbolic sine of a denormalized input #
7787
# #
7788
# INPUT *************************************************************** #
7789
# a0 = pointer to extended precision input #
7790
# d0 = round precision,mode #
7791
# #
7792
# OUTPUT ************************************************************** #
7793
# fp0 = sinh(X) #
7794
# #
7795
# ACCURACY and MONOTONICITY ******************************************* #
7796
# The returned result is within 3 ulps in 64 significant bit, #
7797
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7798
# rounded to double precision. The result is provably monotonic #
7799
# in double precision. #
7800
# #
7801
# ALGORITHM *********************************************************** #
7802
# #
7803
# SINH #
7804
# 1. If |X| > 16380 log2, go to 3. #
7805
# #
7806
# 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula #
7807
# y = |X|, sgn = sign(X), and z = expm1(Y), #
7808
# sinh(X) = sgn*(1/2)*( z + z/(1+z) ). #
7809
# Exit. #
7810
# #
7811
# 3. If |X| > 16480 log2, go to 5. #
7812
# #
7813
# 4. (16380 log2 < |X| <= 16480 log2) #
7814
# sinh(X) = sign(X) * exp(|X|)/2. #
7815
# However, invoking exp(|X|) may cause premature overflow. #
7816
# Thus, we calculate sinh(X) as follows: #
7817
# Y := |X| #
7818
# sgn := sign(X) #
7819
# sgnFact := sgn * 2**(16380) #
7820
# Y' := Y - 16381 log2 #
7821
# sinh(X) := sgnFact * exp(Y'). #
7822
# Exit. #
7823
# #
7824
# 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7825
# sign(X)*Huge*Huge to generate overflow and an infinity with #
7826
# the appropriate sign. Huge is the largest finite number in #
7827
# extended format. Exit. #
7828
# #
7829
#########################################################################
7830
7831
global ssinh
7832
ssinh:
7833
fmov.x (%a0),%fp0 # LOAD INPUT
7834
7835
mov.l (%a0),%d1
7836
mov.w 4(%a0),%d1
7837
mov.l %d1,%a1 # save (compacted) operand
7838
and.l &0x7FFFFFFF,%d1
7839
cmp.l %d1,&0x400CB167
7840
bgt.b SINHBIG
7841
7842
#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7843
#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7844
7845
fabs.x %fp0 # Y = |X|
7846
7847
movm.l &0x8040,-(%sp) # {a1/d0}
7848
fmovm.x &0x01,-(%sp) # save Y on stack
7849
lea (%sp),%a0 # pass ptr to Y
7850
clr.l %d0
7851
bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7852
add.l &0xc,%sp # clear Y from stack
7853
fmov.l &0,%fpcr
7854
movm.l (%sp)+,&0x0201 # {a1/d0}
7855
7856
fmov.x %fp0,%fp1
7857
fadd.s &0x3F800000,%fp1 # 1+Z
7858
fmov.x %fp0,-(%sp)
7859
fdiv.x %fp1,%fp0 # Z/(1+Z)
7860
mov.l %a1,%d1
7861
and.l &0x80000000,%d1
7862
or.l &0x3F000000,%d1
7863
fadd.x (%sp)+,%fp0
7864
mov.l %d1,-(%sp)
7865
7866
fmov.l %d0,%fpcr
7867
mov.b &FMUL_OP,%d1 # last inst is MUL
7868
fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set
7869
bra t_catch
7870
7871
SINHBIG:
7872
cmp.l %d1,&0x400CB2B3
7873
bgt t_ovfl
7874
fabs.x %fp0
7875
fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7876
mov.l &0,-(%sp)
7877
mov.l &0x80000000,-(%sp)
7878
mov.l %a1,%d1
7879
and.l &0x80000000,%d1
7880
or.l &0x7FFB0000,%d1
7881
mov.l %d1,-(%sp) # EXTENDED FMT
7882
fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7883
7884
mov.l %d0,-(%sp)
7885
clr.l %d0
7886
fmovm.x &0x01,-(%sp) # save fp0 on stack
7887
lea (%sp),%a0 # pass ptr to fp0
7888
bsr setox
7889
add.l &0xc,%sp # clear fp0 from stack
7890
7891
mov.l (%sp)+,%d0
7892
fmov.l %d0,%fpcr
7893
mov.b &FMUL_OP,%d1 # last inst is MUL
7894
fmul.x (%sp)+,%fp0 # possible exception
7895
bra t_catch
7896
7897
global ssinhd
7898
#--SINH(X) = X FOR DENORMALIZED X
7899
ssinhd:
7900
bra t_extdnrm
7901
7902
#########################################################################
7903
# stanh(): computes the hyperbolic tangent of a normalized input #
7904
# stanhd(): computes the hyperbolic tangent of a denormalized input #
7905
# #
7906
# INPUT *************************************************************** #
7907
# a0 = pointer to extended precision input #
7908
# d0 = round precision,mode #
7909
# #
7910
# OUTPUT ************************************************************** #
7911
# fp0 = tanh(X) #
7912
# #
7913
# ACCURACY and MONOTONICITY ******************************************* #
7914
# The returned result is within 3 ulps in 64 significant bit, #
7915
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7916
# rounded to double precision. The result is provably monotonic #
7917
# in double precision. #
7918
# #
7919
# ALGORITHM *********************************************************** #
7920
# #
7921
# TANH #
7922
# 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. #
7923
# #
7924
# 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by #
7925
# sgn := sign(X), y := 2|X|, z := expm1(Y), and #
7926
# tanh(X) = sgn*( z/(2+z) ). #
7927
# Exit. #
7928
# #
7929
# 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, #
7930
# go to 7. #
7931
# #
7932
# 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. #
7933
# #
7934
# 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by #
7935
# sgn := sign(X), y := 2|X|, z := exp(Y), #
7936
# tanh(X) = sgn - [ sgn*2/(1+z) ]. #
7937
# Exit. #
7938
# #
7939
# 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #
7940
# calculate Tanh(X) by #
7941
# sgn := sign(X), Tiny := 2**(-126), #
7942
# tanh(X) := sgn - sgn*Tiny. #
7943
# Exit. #
7944
# #
7945
# 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. #
7946
# #
7947
#########################################################################
7948
7949
set X,FP_SCR0
7950
set XFRAC,X+4
7951
7952
set SGN,L_SCR3
7953
7954
set V,FP_SCR0
7955
7956
global stanh
7957
stanh:
7958
fmov.x (%a0),%fp0 # LOAD INPUT
7959
7960
fmov.x %fp0,X(%a6)
7961
mov.l (%a0),%d1
7962
mov.w 4(%a0),%d1
7963
mov.l %d1,X(%a6)
7964
and.l &0x7FFFFFFF,%d1
7965
cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)?
7966
blt.w TANHBORS # yes
7967
cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2?
7968
bgt.w TANHBORS # yes
7969
7970
#--THIS IS THE USUAL CASE
7971
#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7972
7973
mov.l X(%a6),%d1
7974
mov.l %d1,SGN(%a6)
7975
and.l &0x7FFF0000,%d1
7976
add.l &0x00010000,%d1 # EXPONENT OF 2|X|
7977
mov.l %d1,X(%a6)
7978
and.l &0x80000000,SGN(%a6)
7979
fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X|
7980
7981
mov.l %d0,-(%sp)
7982
clr.l %d0
7983
fmovm.x &0x1,-(%sp) # save Y on stack
7984
lea (%sp),%a0 # pass ptr to Y
7985
bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7986
add.l &0xc,%sp # clear Y from stack
7987
mov.l (%sp)+,%d0
7988
7989
fmov.x %fp0,%fp1
7990
fadd.s &0x40000000,%fp1 # Z+2
7991
mov.l SGN(%a6),%d1
7992
fmov.x %fp1,V(%a6)
7993
eor.l %d1,V(%a6)
7994
7995
fmov.l %d0,%fpcr # restore users round prec,mode
7996
fdiv.x V(%a6),%fp0
7997
bra t_inx2
7998
7999
TANHBORS:
8000
cmp.l %d1,&0x3FFF8000
8001
blt.w TANHSM
8002
8003
cmp.l %d1,&0x40048AA1
8004
bgt.w TANHHUGE
8005
8006
#-- (5/2) LOG2 < |X| < 50 LOG2,
8007
#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
8008
#--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
8009
8010
mov.l X(%a6),%d1
8011
mov.l %d1,SGN(%a6)
8012
and.l &0x7FFF0000,%d1
8013
add.l &0x00010000,%d1 # EXPO OF 2|X|
8014
mov.l %d1,X(%a6) # Y = 2|X|
8015
and.l &0x80000000,SGN(%a6)
8016
mov.l SGN(%a6),%d1
8017
fmov.x X(%a6),%fp0 # Y = 2|X|
8018
8019
mov.l %d0,-(%sp)
8020
clr.l %d0
8021
fmovm.x &0x01,-(%sp) # save Y on stack
8022
lea (%sp),%a0 # pass ptr to Y
8023
bsr setox # FP0 IS EXP(Y)
8024
add.l &0xc,%sp # clear Y from stack
8025
mov.l (%sp)+,%d0
8026
mov.l SGN(%a6),%d1
8027
fadd.s &0x3F800000,%fp0 # EXP(Y)+1
8028
8029
eor.l &0xC0000000,%d1 # -SIGN(X)*2
8030
fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT
8031
fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ]
8032
8033
mov.l SGN(%a6),%d1
8034
or.l &0x3F800000,%d1 # SGN
8035
fmov.s %d1,%fp0 # SGN IN SGL FMT
8036
8037
fmov.l %d0,%fpcr # restore users round prec,mode
8038
mov.b &FADD_OP,%d1 # last inst is ADD
8039
fadd.x %fp1,%fp0
8040
bra t_inx2
8041
8042
TANHSM:
8043
fmov.l %d0,%fpcr # restore users round prec,mode
8044
mov.b &FMOV_OP,%d1 # last inst is MOVE
8045
fmov.x X(%a6),%fp0 # last inst - possible exception set
8046
bra t_catch
8047
8048
#---RETURN SGN(X) - SGN(X)EPS
8049
TANHHUGE:
8050
mov.l X(%a6),%d1
8051
and.l &0x80000000,%d1
8052
or.l &0x3F800000,%d1
8053
fmov.s %d1,%fp0
8054
and.l &0x80000000,%d1
8055
eor.l &0x80800000,%d1 # -SIGN(X)*EPS
8056
8057
fmov.l %d0,%fpcr # restore users round prec,mode
8058
fadd.s %d1,%fp0
8059
bra t_inx2
8060
8061
global stanhd
8062
#--TANH(X) = X FOR DENORMALIZED X
8063
stanhd:
8064
bra t_extdnrm
8065
8066
#########################################################################
8067
# slogn(): computes the natural logarithm of a normalized input #
8068
# slognd(): computes the natural logarithm of a denormalized input #
8069
# slognp1(): computes the log(1+X) of a normalized input #
8070
# slognp1d(): computes the log(1+X) of a denormalized input #
8071
# #
8072
# INPUT *************************************************************** #
8073
# a0 = pointer to extended precision input #
8074
# d0 = round precision,mode #
8075
# #
8076
# OUTPUT ************************************************************** #
8077
# fp0 = log(X) or log(1+X) #
8078
# #
8079
# ACCURACY and MONOTONICITY ******************************************* #
8080
# The returned result is within 2 ulps in 64 significant bit, #
8081
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8082
# rounded to double precision. The result is provably monotonic #
8083
# in double precision. #
8084
# #
8085
# ALGORITHM *********************************************************** #
8086
# LOGN: #
8087
# Step 1. If |X-1| < 1/16, approximate log(X) by an odd #
8088
# polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #
8089
# move on to Step 2. #
8090
# #
8091
# Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
8092
# seven significant bits of Y plus 2**(-7), i.e. #
8093
# F = 1.xxxxxx1 in base 2 where the six "x" match those #
8094
# of Y. Note that |Y-F| <= 2**(-7). #
8095
# #
8096
# Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #
8097
# polynomial in u, log(1+u) = poly. #
8098
# #
8099
# Step 4. Reconstruct #
8100
# log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
8101
# by k*log(2) + (log(F) + poly). The values of log(F) are #
8102
# calculated beforehand and stored in the program. #
8103
# #
8104
# lognp1: #
8105
# Step 1: If |X| < 1/16, approximate log(1+X) by an odd #
8106
# polynomial in u where u = 2X/(2+X). Otherwise, move on #
8107
# to Step 2. #
8108
# #
8109
# Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done #
8110
# in Step 2 of the algorithm for LOGN and compute #
8111
# log(1+X) as k*log(2) + log(F) + poly where poly #
8112
# approximates log(1+u), u = (Y-F)/F. #
8113
# #
8114
# Implementation Notes: #
8115
# Note 1. There are 64 different possible values for F, thus 64 #
8116
# log(F)'s need to be tabulated. Moreover, the values of #
8117
# 1/F are also tabulated so that the division in (Y-F)/F #
8118
# can be performed by a multiplication. #
8119
# #
8120
# Note 2. In Step 2 of lognp1, in order to preserved accuracy, #
8121
# the value Y-F has to be calculated carefully when #
8122
# 1/2 <= X < 3/2. #
8123
# #
8124
# Note 3. To fully exploit the pipeline, polynomials are usually #
8125
# separated into two parts evaluated independently before #
8126
# being added up. #
8127
# #
8128
#########################################################################
8129
LOGOF2:
8130
long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8131
8132
one:
8133
long 0x3F800000
8134
zero:
8135
long 0x00000000
8136
infty:
8137
long 0x7F800000
8138
negone:
8139
long 0xBF800000
8140
8141
LOGA6:
8142
long 0x3FC2499A,0xB5E4040B
8143
LOGA5:
8144
long 0xBFC555B5,0x848CB7DB
8145
8146
LOGA4:
8147
long 0x3FC99999,0x987D8730
8148
LOGA3:
8149
long 0xBFCFFFFF,0xFF6F7E97
8150
8151
LOGA2:
8152
long 0x3FD55555,0x555555A4
8153
LOGA1:
8154
long 0xBFE00000,0x00000008
8155
8156
LOGB5:
8157
long 0x3F175496,0xADD7DAD6
8158
LOGB4:
8159
long 0x3F3C71C2,0xFE80C7E0
8160
8161
LOGB3:
8162
long 0x3F624924,0x928BCCFF
8163
LOGB2:
8164
long 0x3F899999,0x999995EC
8165
8166
LOGB1:
8167
long 0x3FB55555,0x55555555
8168
TWO:
8169
long 0x40000000,0x00000000
8170
8171
LTHOLD:
8172
long 0x3f990000,0x80000000,0x00000000,0x00000000
8173
8174
LOGTBL:
8175
long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8176
long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8177
long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8178
long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8179
long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8180
long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8181
long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8182
long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8183
long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8184
long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8185
long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8186
long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8187
long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8188
long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8189
long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8190
long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8191
long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8192
long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8193
long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8194
long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8195
long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8196
long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8197
long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8198
long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8199
long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8200
long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8201
long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8202
long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8203
long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8204
long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8205
long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8206
long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8207
long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8208
long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8209
long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8210
long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8211
long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8212
long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8213
long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8214
long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8215
long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8216
long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8217
long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8218
long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8219
long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8220
long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8221
long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8222
long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8223
long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8224
long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8225
long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8226
long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8227
long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8228
long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8229
long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8230
long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8231
long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8232
long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8233
long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8234
long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8235
long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8236
long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8237
long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8238
long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8239
long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8240
long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8241
long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8242
long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8243
long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8244
long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8245
long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8246
long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8247
long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8248
long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8249
long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8250
long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8251
long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8252
long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8253
long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8254
long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8255
long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8256
long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8257
long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8258
long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8259
long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8260
long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8261
long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8262
long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8263
long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8264
long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8265
long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8266
long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8267
long 0x3FFE0000,0x94458094,0x45809446,0x00000000
8268
long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8269
long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8270
long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8271
long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8272
long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8273
long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8274
long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8275
long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8276
long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8277
long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8278
long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8279
long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8280
long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8281
long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8282
long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8283
long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8284
long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8285
long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8286
long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8287
long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8288
long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8289
long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8290
long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8291
long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8292
long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8293
long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8294
long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8295
long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8296
long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8297
long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8298
long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8299
long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8300
long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8301
long 0x3FFE0000,0x80808080,0x80808081,0x00000000
8302
long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8303
8304
set ADJK,L_SCR1
8305
8306
set X,FP_SCR0
8307
set XDCARE,X+2
8308
set XFRAC,X+4
8309
8310
set F,FP_SCR1
8311
set FFRAC,F+4
8312
8313
set KLOG2,FP_SCR0
8314
8315
set SAVEU,FP_SCR0
8316
8317
global slogn
8318
#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8319
slogn:
8320
fmov.x (%a0),%fp0 # LOAD INPUT
8321
mov.l &0x00000000,ADJK(%a6)
8322
8323
LOGBGN:
8324
#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8325
#--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8326
8327
mov.l (%a0),%d1
8328
mov.w 4(%a0),%d1
8329
8330
mov.l (%a0),X(%a6)
8331
mov.l 4(%a0),X+4(%a6)
8332
mov.l 8(%a0),X+8(%a6)
8333
8334
cmp.l %d1,&0 # CHECK IF X IS NEGATIVE
8335
blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID
8336
# X IS POSITIVE, CHECK IF X IS NEAR 1
8337
cmp.l %d1,&0x3ffef07d # IS X < 15/16?
8338
blt.b LOGMAIN # YES
8339
cmp.l %d1,&0x3fff8841 # IS X > 17/16?
8340
ble.w LOGNEAR1 # NO
8341
8342
LOGMAIN:
8343
#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8344
8345
#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8346
#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8347
#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8348
#-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8349
#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8350
#--LOG(1+U) CAN BE VERY EFFICIENT.
8351
#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8352
#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8353
8354
#--GET K, Y, F, AND ADDRESS OF 1/F.
8355
asr.l &8,%d1
8356
asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X
8357
sub.l &0x3FFF,%d1 # THIS IS K
8358
add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM.
8359
lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F)
8360
fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT
8361
8362
#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8363
mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X
8364
mov.l XFRAC(%a6),FFRAC(%a6)
8365
and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y
8366
or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT
8367
mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F
8368
and.l &0x7E000000,%d1
8369
asr.l &8,%d1
8370
asr.l &8,%d1
8371
asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT
8372
add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F
8373
8374
fmov.x X(%a6),%fp0
8375
mov.l &0x3fff0000,F(%a6)
8376
clr.l F+8(%a6)
8377
fsub.x F(%a6),%fp0 # Y-F
8378
fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY
8379
#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8380
#--REGISTERS SAVED: FPCR, FP1, FP2
8381
8382
LP1CONT1:
8383
#--AN RE-ENTRY POINT FOR LOGNP1
8384
fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F
8385
fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY
8386
fmov.x %fp0,%fp2
8387
fmul.x %fp2,%fp2 # FP2 IS V=U*U
8388
fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1
8389
8390
#--LOG(1+U) IS APPROXIMATED BY
8391
#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8392
#--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
8393
8394
fmov.x %fp2,%fp3
8395
fmov.x %fp2,%fp1
8396
8397
fmul.d LOGA6(%pc),%fp1 # V*A6
8398
fmul.d LOGA5(%pc),%fp2 # V*A5
8399
8400
fadd.d LOGA4(%pc),%fp1 # A4+V*A6
8401
fadd.d LOGA3(%pc),%fp2 # A3+V*A5
8402
8403
fmul.x %fp3,%fp1 # V*(A4+V*A6)
8404
fmul.x %fp3,%fp2 # V*(A3+V*A5)
8405
8406
fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6)
8407
fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5)
8408
8409
fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6))
8410
add.l &16,%a0 # ADDRESS OF LOG(F)
8411
fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5))
8412
8413
fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6))
8414
fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5))
8415
8416
fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6))
8417
fmovm.x (%sp)+,&0x30 # RESTORE FP2-3
8418
fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U)
8419
8420
fmov.l %d0,%fpcr
8421
fadd.x KLOG2(%a6),%fp0 # FINAL ADD
8422
bra t_inx2
8423
8424
8425
LOGNEAR1:
8426
8427
# if the input is exactly equal to one, then exit through ld_pzero.
8428
# if these 2 lines weren't here, the correct answer would be returned
8429
# but the INEX2 bit would be set.
8430
fcmp.b %fp0,&0x1 # is it equal to one?
8431
fbeq.l ld_pzero # yes
8432
8433
#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8434
fmov.x %fp0,%fp1
8435
fsub.s one(%pc),%fp1 # FP1 IS X-1
8436
fadd.s one(%pc),%fp0 # FP0 IS X+1
8437
fadd.x %fp1,%fp1 # FP1 IS 2(X-1)
8438
#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8439
#--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8440
8441
LP1CONT2:
8442
#--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8443
fdiv.x %fp0,%fp1 # FP1 IS U
8444
fmovm.x &0xc,-(%sp) # SAVE FP2-3
8445
#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8446
#--LET V=U*U, W=V*V, CALCULATE
8447
#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8448
#--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
8449
fmov.x %fp1,%fp0
8450
fmul.x %fp0,%fp0 # FP0 IS V
8451
fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1
8452
fmov.x %fp0,%fp1
8453
fmul.x %fp1,%fp1 # FP1 IS W
8454
8455
fmov.d LOGB5(%pc),%fp3
8456
fmov.d LOGB4(%pc),%fp2
8457
8458
fmul.x %fp1,%fp3 # W*B5
8459
fmul.x %fp1,%fp2 # W*B4
8460
8461
fadd.d LOGB3(%pc),%fp3 # B3+W*B5
8462
fadd.d LOGB2(%pc),%fp2 # B2+W*B4
8463
8464
fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED
8465
8466
fmul.x %fp0,%fp2 # V*(B2+W*B4)
8467
8468
fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5)
8469
fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V
8470
8471
fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8472
fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED
8473
8474
fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8475
8476
fmov.l %d0,%fpcr
8477
fadd.x SAVEU(%a6),%fp0
8478
bra t_inx2
8479
8480
#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8481
LOGNEG:
8482
bra t_operr
8483
8484
global slognd
8485
slognd:
8486
#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8487
8488
mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0
8489
8490
#----normalize the input value by left shifting k bits (k to be determined
8491
#----below), adjusting exponent and storing -k to ADJK
8492
#----the value TWOTO100 is no longer needed.
8493
#----Note that this code assumes the denormalized input is NON-ZERO.
8494
8495
movm.l &0x3f00,-(%sp) # save some registers {d2-d7}
8496
mov.l (%a0),%d3 # D3 is exponent of smallest norm. #
8497
mov.l 4(%a0),%d4
8498
mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X)
8499
clr.l %d2 # D2 used for holding K
8500
8501
tst.l %d4
8502
bne.b Hi_not0
8503
8504
Hi_0:
8505
mov.l %d5,%d4
8506
clr.l %d5
8507
mov.l &32,%d2
8508
clr.l %d6
8509
bfffo %d4{&0:&32},%d6
8510
lsl.l %d6,%d4
8511
add.l %d6,%d2 # (D3,D4,D5) is normalized
8512
8513
mov.l %d3,X(%a6)
8514
mov.l %d4,XFRAC(%a6)
8515
mov.l %d5,XFRAC+4(%a6)
8516
neg.l %d2
8517
mov.l %d2,ADJK(%a6)
8518
fmov.x X(%a6),%fp0
8519
movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8520
lea X(%a6),%a0
8521
bra.w LOGBGN # begin regular log(X)
8522
8523
Hi_not0:
8524
clr.l %d6
8525
bfffo %d4{&0:&32},%d6 # find first 1
8526
mov.l %d6,%d2 # get k
8527
lsl.l %d6,%d4
8528
mov.l %d5,%d7 # a copy of D5
8529
lsl.l %d6,%d5
8530
neg.l %d6
8531
add.l &32,%d6
8532
lsr.l %d6,%d7
8533
or.l %d7,%d4 # (D3,D4,D5) normalized
8534
8535
mov.l %d3,X(%a6)
8536
mov.l %d4,XFRAC(%a6)
8537
mov.l %d5,XFRAC+4(%a6)
8538
neg.l %d2
8539
mov.l %d2,ADJK(%a6)
8540
fmov.x X(%a6),%fp0
8541
movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8542
lea X(%a6),%a0
8543
bra.w LOGBGN # begin regular log(X)
8544
8545
global slognp1
8546
#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8547
slognp1:
8548
fmov.x (%a0),%fp0 # LOAD INPUT
8549
fabs.x %fp0 # test magnitude
8550
fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold
8551
fbgt.w LP1REAL # if greater, continue
8552
fmov.l %d0,%fpcr
8553
mov.b &FMOV_OP,%d1 # last inst is MOVE
8554
fmov.x (%a0),%fp0 # return signed argument
8555
bra t_catch
8556
8557
LP1REAL:
8558
fmov.x (%a0),%fp0 # LOAD INPUT
8559
mov.l &0x00000000,ADJK(%a6)
8560
fmov.x %fp0,%fp1 # FP1 IS INPUT Z
8561
fadd.s one(%pc),%fp0 # X := ROUND(1+Z)
8562
fmov.x %fp0,X(%a6)
8563
mov.w XFRAC(%a6),XDCARE(%a6)
8564
mov.l X(%a6),%d1
8565
cmp.l %d1,&0
8566
ble.w LP1NEG0 # LOG OF ZERO OR -VE
8567
cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]?
8568
blt.w LOGMAIN
8569
cmp.l %d1,&0x3fffc000
8570
bgt.w LOGMAIN
8571
#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8572
#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8573
#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8574
8575
LP1NEAR1:
8576
#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8577
cmp.l %d1,&0x3ffef07d
8578
blt.w LP1CARE
8579
cmp.l %d1,&0x3fff8841
8580
bgt.w LP1CARE
8581
8582
LP1ONE16:
8583
#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8584
#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8585
fadd.x %fp1,%fp1 # FP1 IS 2Z
8586
fadd.s one(%pc),%fp0 # FP0 IS 1+X
8587
#--U = FP1/FP0
8588
bra.w LP1CONT2
8589
8590
LP1CARE:
8591
#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8592
#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8593
#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8594
#--THERE ARE ONLY TWO CASES.
8595
#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8596
#--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
8597
#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8598
#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8599
8600
mov.l XFRAC(%a6),FFRAC(%a6)
8601
and.l &0xFE000000,FFRAC(%a6)
8602
or.l &0x01000000,FFRAC(%a6) # F OBTAINED
8603
cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1
8604
bge.b KISZERO
8605
8606
KISNEG1:
8607
fmov.s TWO(%pc),%fp0
8608
mov.l &0x3fff0000,F(%a6)
8609
clr.l F+8(%a6)
8610
fsub.x F(%a6),%fp0 # 2-F
8611
mov.l FFRAC(%a6),%d1
8612
and.l &0x7E000000,%d1
8613
asr.l &8,%d1
8614
asr.l &8,%d1
8615
asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F
8616
fadd.x %fp1,%fp1 # GET 2Z
8617
fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3}
8618
fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z
8619
lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F
8620
add.l %d1,%a0
8621
fmov.s negone(%pc),%fp1 # FP1 IS K = -1
8622
bra.w LP1CONT1
8623
8624
KISZERO:
8625
fmov.s one(%pc),%fp0
8626
mov.l &0x3fff0000,F(%a6)
8627
clr.l F+8(%a6)
8628
fsub.x F(%a6),%fp0 # 1-F
8629
mov.l FFRAC(%a6),%d1
8630
and.l &0x7E000000,%d1
8631
asr.l &8,%d1
8632
asr.l &8,%d1
8633
asr.l &4,%d1
8634
fadd.x %fp1,%fp0 # FP0 IS Y-F
8635
fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3}
8636
lea LOGTBL(%pc),%a0
8637
add.l %d1,%a0 # A0 IS ADDRESS OF 1/F
8638
fmov.s zero(%pc),%fp1 # FP1 IS K = 0
8639
bra.w LP1CONT1
8640
8641
LP1NEG0:
8642
#--FPCR SAVED. D0 IS X IN COMPACT FORM.
8643
cmp.l %d1,&0
8644
blt.b LP1NEG
8645
LP1ZERO:
8646
fmov.s negone(%pc),%fp0
8647
8648
fmov.l %d0,%fpcr
8649
bra t_dz
8650
8651
LP1NEG:
8652
fmov.s zero(%pc),%fp0
8653
8654
fmov.l %d0,%fpcr
8655
bra t_operr
8656
8657
global slognp1d
8658
#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8659
# Simply return the denorm
8660
slognp1d:
8661
bra t_extdnrm
8662
8663
#########################################################################
8664
# satanh(): computes the inverse hyperbolic tangent of a norm input #
8665
# satanhd(): computes the inverse hyperbolic tangent of a denorm input #
8666
# #
8667
# INPUT *************************************************************** #
8668
# a0 = pointer to extended precision input #
8669
# d0 = round precision,mode #
8670
# #
8671
# OUTPUT ************************************************************** #
8672
# fp0 = arctanh(X) #
8673
# #
8674
# ACCURACY and MONOTONICITY ******************************************* #
8675
# The returned result is within 3 ulps in 64 significant bit, #
8676
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8677
# rounded to double precision. The result is provably monotonic #
8678
# in double precision. #
8679
# #
8680
# ALGORITHM *********************************************************** #
8681
# #
8682
# ATANH #
8683
# 1. If |X| >= 1, go to 3. #
8684
# #
8685
# 2. (|X| < 1) Calculate atanh(X) by #
8686
# sgn := sign(X) #
8687
# y := |X| #
8688
# z := 2y/(1-y) #
8689
# atanh(X) := sgn * (1/2) * logp1(z) #
8690
# Exit. #
8691
# #
8692
# 3. If |X| > 1, go to 5. #
8693
# #
8694
# 4. (|X| = 1) Generate infinity with an appropriate sign and #
8695
# divide-by-zero by #
8696
# sgn := sign(X) #
8697
# atan(X) := sgn / (+0). #
8698
# Exit. #
8699
# #
8700
# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
8701
# Exit. #
8702
# #
8703
#########################################################################
8704
8705
global satanh
8706
satanh:
8707
mov.l (%a0),%d1
8708
mov.w 4(%a0),%d1
8709
and.l &0x7FFFFFFF,%d1
8710
cmp.l %d1,&0x3FFF8000
8711
bge.b ATANHBIG
8712
8713
#--THIS IS THE USUAL CASE, |X| < 1
8714
#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8715
8716
fabs.x (%a0),%fp0 # Y = |X|
8717
fmov.x %fp0,%fp1
8718
fneg.x %fp1 # -Y
8719
fadd.x %fp0,%fp0 # 2Y
8720
fadd.s &0x3F800000,%fp1 # 1-Y
8721
fdiv.x %fp1,%fp0 # 2Y/(1-Y)
8722
mov.l (%a0),%d1
8723
and.l &0x80000000,%d1
8724
or.l &0x3F000000,%d1 # SIGN(X)*HALF
8725
mov.l %d1,-(%sp)
8726
8727
mov.l %d0,-(%sp) # save rnd prec,mode
8728
clr.l %d0 # pass ext prec,RN
8729
fmovm.x &0x01,-(%sp) # save Z on stack
8730
lea (%sp),%a0 # pass ptr to Z
8731
bsr slognp1 # LOG1P(Z)
8732
add.l &0xc,%sp # clear Z from stack
8733
8734
mov.l (%sp)+,%d0 # fetch old prec,mode
8735
fmov.l %d0,%fpcr # load it
8736
mov.b &FMUL_OP,%d1 # last inst is MUL
8737
fmul.s (%sp)+,%fp0
8738
bra t_catch
8739
8740
ATANHBIG:
8741
fabs.x (%a0),%fp0 # |X|
8742
fcmp.s %fp0,&0x3F800000
8743
fbgt t_operr
8744
bra t_dz
8745
8746
global satanhd
8747
#--ATANH(X) = X FOR DENORMALIZED X
8748
satanhd:
8749
bra t_extdnrm
8750
8751
#########################################################################
8752
# slog10(): computes the base-10 logarithm of a normalized input #
8753
# slog10d(): computes the base-10 logarithm of a denormalized input #
8754
# slog2(): computes the base-2 logarithm of a normalized input #
8755
# slog2d(): computes the base-2 logarithm of a denormalized input #
8756
# #
8757
# INPUT *************************************************************** #
8758
# a0 = pointer to extended precision input #
8759
# d0 = round precision,mode #
8760
# #
8761
# OUTPUT ************************************************************** #
8762
# fp0 = log_10(X) or log_2(X) #
8763
# #
8764
# ACCURACY and MONOTONICITY ******************************************* #
8765
# The returned result is within 1.7 ulps in 64 significant bit, #
8766
# i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
8767
# rounded to double precision. The result is provably monotonic #
8768
# in double precision. #
8769
# #
8770
# ALGORITHM *********************************************************** #
8771
# #
8772
# slog10d: #
8773
# #
8774
# Step 0. If X < 0, create a NaN and raise the invalid operation #
8775
# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8776
# Notes: Default means round-to-nearest mode, no floating-point #
8777
# traps, and precision control = double extended. #
8778
# #
8779
# Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8780
# Notes: Even if X is denormalized, log(X) is always normalized. #
8781
# #
8782
# Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8783
# 2.1 Restore the user FPCR #
8784
# 2.2 Return ans := Y * INV_L10. #
8785
# #
8786
# slog10: #
8787
# #
8788
# Step 0. If X < 0, create a NaN and raise the invalid operation #
8789
# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8790
# Notes: Default means round-to-nearest mode, no floating-point #
8791
# traps, and precision control = double extended. #
8792
# #
8793
# Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #
8794
# #
8795
# Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8796
# 2.1 Restore the user FPCR #
8797
# 2.2 Return ans := Y * INV_L10. #
8798
# #
8799
# sLog2d: #
8800
# #
8801
# Step 0. If X < 0, create a NaN and raise the invalid operation #
8802
# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8803
# Notes: Default means round-to-nearest mode, no floating-point #
8804
# traps, and precision control = double extended. #
8805
# #
8806
# Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8807
# Notes: Even if X is denormalized, log(X) is always normalized. #
8808
# #
8809
# Step 2. Compute log_10(X) = log(X) * (1/log(2)). #
8810
# 2.1 Restore the user FPCR #
8811
# 2.2 Return ans := Y * INV_L2. #
8812
# #
8813
# sLog2: #
8814
# #
8815
# Step 0. If X < 0, create a NaN and raise the invalid operation #
8816
# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8817
# Notes: Default means round-to-nearest mode, no floating-point #
8818
# traps, and precision control = double extended. #
8819
# #
8820
# Step 1. If X is not an integer power of two, i.e., X != 2^k, #
8821
# go to Step 3. #
8822
# #
8823
# Step 2. Return k. #
8824
# 2.1 Get integer k, X = 2^k. #
8825
# 2.2 Restore the user FPCR. #
8826
# 2.3 Return ans := convert-to-double-extended(k). #
8827
# #
8828
# Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #
8829
# #
8830
# Step 4. Compute log_2(X) = log(X) * (1/log(2)). #
8831
# 4.1 Restore the user FPCR #
8832
# 4.2 Return ans := Y * INV_L2. #
8833
# #
8834
#########################################################################
8835
8836
INV_L10:
8837
long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8838
8839
INV_L2:
8840
long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8841
8842
global slog10
8843
#--entry point for Log10(X), X is normalized
8844
slog10:
8845
fmov.b &0x1,%fp0
8846
fcmp.x %fp0,(%a0) # if operand == 1,
8847
fbeq.l ld_pzero # return an EXACT zero
8848
8849
mov.l (%a0),%d1
8850
blt.w invalid
8851
mov.l %d0,-(%sp)
8852
clr.l %d0
8853
bsr slogn # log(X), X normal.
8854
fmov.l (%sp)+,%fpcr
8855
fmul.x INV_L10(%pc),%fp0
8856
bra t_inx2
8857
8858
global slog10d
8859
#--entry point for Log10(X), X is denormalized
8860
slog10d:
8861
mov.l (%a0),%d1
8862
blt.w invalid
8863
mov.l %d0,-(%sp)
8864
clr.l %d0
8865
bsr slognd # log(X), X denorm.
8866
fmov.l (%sp)+,%fpcr
8867
fmul.x INV_L10(%pc),%fp0
8868
bra t_minx2
8869
8870
global slog2
8871
#--entry point for Log2(X), X is normalized
8872
slog2:
8873
mov.l (%a0),%d1
8874
blt.w invalid
8875
8876
mov.l 8(%a0),%d1
8877
bne.b continue # X is not 2^k
8878
8879
mov.l 4(%a0),%d1
8880
and.l &0x7FFFFFFF,%d1
8881
bne.b continue
8882
8883
#--X = 2^k.
8884
mov.w (%a0),%d1
8885
and.l &0x00007FFF,%d1
8886
sub.l &0x3FFF,%d1
8887
beq.l ld_pzero
8888
fmov.l %d0,%fpcr
8889
fmov.l %d1,%fp0
8890
bra t_inx2
8891
8892
continue:
8893
mov.l %d0,-(%sp)
8894
clr.l %d0
8895
bsr slogn # log(X), X normal.
8896
fmov.l (%sp)+,%fpcr
8897
fmul.x INV_L2(%pc),%fp0
8898
bra t_inx2
8899
8900
invalid:
8901
bra t_operr
8902
8903
global slog2d
8904
#--entry point for Log2(X), X is denormalized
8905
slog2d:
8906
mov.l (%a0),%d1
8907
blt.w invalid
8908
mov.l %d0,-(%sp)
8909
clr.l %d0
8910
bsr slognd # log(X), X denorm.
8911
fmov.l (%sp)+,%fpcr
8912
fmul.x INV_L2(%pc),%fp0
8913
bra t_minx2
8914
8915
#########################################################################
8916
# stwotox(): computes 2**X for a normalized input #
8917
# stwotoxd(): computes 2**X for a denormalized input #
8918
# stentox(): computes 10**X for a normalized input #
8919
# stentoxd(): computes 10**X for a denormalized input #
8920
# #
8921
# INPUT *************************************************************** #
8922
# a0 = pointer to extended precision input #
8923
# d0 = round precision,mode #
8924
# #
8925
# OUTPUT ************************************************************** #
8926
# fp0 = 2**X or 10**X #
8927
# #
8928
# ACCURACY and MONOTONICITY ******************************************* #
8929
# The returned result is within 2 ulps in 64 significant bit, #
8930
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8931
# rounded to double precision. The result is provably monotonic #
8932
# in double precision. #
8933
# #
8934
# ALGORITHM *********************************************************** #
8935
# #
8936
# twotox #
8937
# 1. If |X| > 16480, go to ExpBig. #
8938
# #
8939
# 2. If |X| < 2**(-70), go to ExpSm. #
8940
# #
8941
# 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore #
8942
# decompose N as #
8943
# N = 64(M + M') + j, j = 0,1,2,...,63. #
8944
# #
8945
# 4. Overwrite r := r * log2. Then #
8946
# 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8947
# Go to expr to compute that expression. #
8948
# #
8949
# tentox #
8950
# 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #
8951
# #
8952
# 2. If |X| < 2**(-70), go to ExpSm. #
8953
# #
8954
# 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set #
8955
# N := round-to-int(y). Decompose N as #
8956
# N = 64(M + M') + j, j = 0,1,2,...,63. #
8957
# #
8958
# 4. Define r as #
8959
# r := ((X - N*L1)-N*L2) * L10 #
8960
# where L1, L2 are the leading and trailing parts of #
8961
# log_10(2)/64 and L10 is the natural log of 10. Then #
8962
# 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8963
# Go to expr to compute that expression. #
8964
# #
8965
# expr #
8966
# 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #
8967
# #
8968
# 2. Overwrite Fact1 and Fact2 by #
8969
# Fact1 := 2**(M) * Fact1 #
8970
# Fact2 := 2**(M) * Fact2 #
8971
# Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). #
8972
# #
8973
# 3. Calculate P where 1 + P approximates exp(r): #
8974
# P = r + r*r*(A1+r*(A2+...+r*A5)). #
8975
# #
8976
# 4. Let AdjFact := 2**(M'). Return #
8977
# AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #
8978
# Exit. #
8979
# #
8980
# ExpBig #
8981
# 1. Generate overflow by Huge * Huge if X > 0; otherwise, #
8982
# generate underflow by Tiny * Tiny. #
8983
# #
8984
# ExpSm #
8985
# 1. Return 1 + X. #
8986
# #
8987
#########################################################################
8988
8989
L2TEN64:
8990
long 0x406A934F,0x0979A371 # 64LOG10/LOG2
8991
L10TWO1:
8992
long 0x3F734413,0x509F8000 # LOG2/64LOG10
8993
8994
L10TWO2:
8995
long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
8996
8997
LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
8998
8999
LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
9000
9001
EXPA5: long 0x3F56C16D,0x6F7BD0B2
9002
EXPA4: long 0x3F811112,0x302C712C
9003
EXPA3: long 0x3FA55555,0x55554CC1
9004
EXPA2: long 0x3FC55555,0x55554A54
9005
EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000
9006
9007
TEXPTBL:
9008
long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
9009
long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
9010
long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
9011
long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
9012
long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
9013
long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
9014
long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
9015
long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
9016
long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
9017
long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
9018
long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
9019
long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
9020
long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
9021
long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
9022
long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
9023
long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
9024
long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
9025
long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
9026
long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
9027
long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
9028
long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
9029
long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
9030
long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
9031
long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
9032
long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
9033
long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
9034
long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
9035
long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
9036
long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
9037
long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
9038
long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
9039
long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
9040
long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
9041
long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
9042
long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
9043
long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
9044
long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
9045
long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
9046
long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
9047
long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
9048
long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
9049
long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
9050
long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
9051
long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
9052
long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
9053
long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
9054
long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
9055
long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
9056
long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
9057
long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
9058
long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
9059
long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
9060
long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
9061
long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
9062
long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
9063
long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
9064
long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
9065
long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
9066
long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
9067
long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
9068
long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
9069
long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
9070
long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
9071
long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
9072
9073
set INT,L_SCR1
9074
9075
set X,FP_SCR0
9076
set XDCARE,X+2
9077
set XFRAC,X+4
9078
9079
set ADJFACT,FP_SCR0
9080
9081
set FACT1,FP_SCR0
9082
set FACT1HI,FACT1+4
9083
set FACT1LOW,FACT1+8
9084
9085
set FACT2,FP_SCR1
9086
set FACT2HI,FACT2+4
9087
set FACT2LOW,FACT2+8
9088
9089
global stwotox
9090
#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9091
stwotox:
9092
fmovm.x (%a0),&0x80 # LOAD INPUT
9093
9094
mov.l (%a0),%d1
9095
mov.w 4(%a0),%d1
9096
fmov.x %fp0,X(%a6)
9097
and.l &0x7FFFFFFF,%d1
9098
9099
cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
9100
bge.b TWOOK1
9101
bra.w EXPBORS
9102
9103
TWOOK1:
9104
cmp.l %d1,&0x400D80C0 # |X| > 16480?
9105
ble.b TWOMAIN
9106
bra.w EXPBORS
9107
9108
TWOMAIN:
9109
#--USUAL CASE, 2^(-70) <= |X| <= 16480
9110
9111
fmov.x %fp0,%fp1
9112
fmul.s &0x42800000,%fp1 # 64 * X
9113
fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X)
9114
mov.l %d2,-(%sp)
9115
lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9116
fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9117
mov.l INT(%a6),%d1
9118
mov.l %d1,%d2
9119
and.l &0x3F,%d1 # D0 IS J
9120
asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9121
add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9122
asr.l &6,%d2 # d2 IS L, N = 64L + J
9123
mov.l %d2,%d1
9124
asr.l &1,%d1 # D0 IS M
9125
sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9126
add.l &0x3FFF,%d2
9127
9128
#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9129
#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9130
#--ADJFACT = 2^(M').
9131
#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9132
9133
fmovm.x &0x0c,-(%sp) # save fp2/fp3
9134
9135
fmul.s &0x3C800000,%fp1 # (1/64)*N
9136
mov.l (%a1)+,FACT1(%a6)
9137
mov.l (%a1)+,FACT1HI(%a6)
9138
mov.l (%a1)+,FACT1LOW(%a6)
9139
mov.w (%a1)+,FACT2(%a6)
9140
9141
fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X)
9142
9143
mov.w (%a1)+,FACT2HI(%a6)
9144
clr.w FACT2HI+2(%a6)
9145
clr.l FACT2LOW(%a6)
9146
add.w %d1,FACT1(%a6)
9147
fmul.x LOG2(%pc),%fp0 # FP0 IS R
9148
add.w %d1,FACT2(%a6)
9149
9150
bra.w expr
9151
9152
EXPBORS:
9153
#--FPCR, D0 SAVED
9154
cmp.l %d1,&0x3FFF8000
9155
bgt.b TEXPBIG
9156
9157
#--|X| IS SMALL, RETURN 1 + X
9158
9159
fmov.l %d0,%fpcr # restore users round prec,mode
9160
fadd.s &0x3F800000,%fp0 # RETURN 1 + X
9161
bra t_pinx2
9162
9163
TEXPBIG:
9164
#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9165
#--REGISTERS SAVE SO FAR ARE FPCR AND D0
9166
mov.l X(%a6),%d1
9167
cmp.l %d1,&0
9168
blt.b EXPNEG
9169
9170
bra t_ovfl2 # t_ovfl expects positive value
9171
9172
EXPNEG:
9173
bra t_unfl2 # t_unfl expects positive value
9174
9175
global stwotoxd
9176
stwotoxd:
9177
#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9178
9179
fmov.l %d0,%fpcr # set user's rounding mode/precision
9180
fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9181
mov.l (%a0),%d1
9182
or.l &0x00800001,%d1
9183
fadd.s %d1,%fp0
9184
bra t_pinx2
9185
9186
global stentox
9187
#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9188
stentox:
9189
fmovm.x (%a0),&0x80 # LOAD INPUT
9190
9191
mov.l (%a0),%d1
9192
mov.w 4(%a0),%d1
9193
fmov.x %fp0,X(%a6)
9194
and.l &0x7FFFFFFF,%d1
9195
9196
cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
9197
bge.b TENOK1
9198
bra.w EXPBORS
9199
9200
TENOK1:
9201
cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ?
9202
ble.b TENMAIN
9203
bra.w EXPBORS
9204
9205
TENMAIN:
9206
#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9207
9208
fmov.x %fp0,%fp1
9209
fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2
9210
fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2)
9211
mov.l %d2,-(%sp)
9212
lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9213
fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9214
mov.l INT(%a6),%d1
9215
mov.l %d1,%d2
9216
and.l &0x3F,%d1 # D0 IS J
9217
asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9218
add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9219
asr.l &6,%d2 # d2 IS L, N = 64L + J
9220
mov.l %d2,%d1
9221
asr.l &1,%d1 # D0 IS M
9222
sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9223
add.l &0x3FFF,%d2
9224
9225
#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9226
#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9227
#--ADJFACT = 2^(M').
9228
#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9229
fmovm.x &0x0c,-(%sp) # save fp2/fp3
9230
9231
fmov.x %fp1,%fp2
9232
9233
fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD
9234
mov.l (%a1)+,FACT1(%a6)
9235
9236
fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL
9237
9238
mov.l (%a1)+,FACT1HI(%a6)
9239
mov.l (%a1)+,FACT1LOW(%a6)
9240
fsub.x %fp1,%fp0 # X - N L_LEAD
9241
mov.w (%a1)+,FACT2(%a6)
9242
9243
fsub.x %fp2,%fp0 # X - N L_TRAIL
9244
9245
mov.w (%a1)+,FACT2HI(%a6)
9246
clr.w FACT2HI+2(%a6)
9247
clr.l FACT2LOW(%a6)
9248
9249
fmul.x LOG10(%pc),%fp0 # FP0 IS R
9250
add.w %d1,FACT1(%a6)
9251
add.w %d1,FACT2(%a6)
9252
9253
expr:
9254
#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9255
#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9256
#--FP0 IS R. THE FOLLOWING CODE COMPUTES
9257
#-- 2**(M'+M) * 2**(J/64) * EXP(R)
9258
9259
fmov.x %fp0,%fp1
9260
fmul.x %fp1,%fp1 # FP1 IS S = R*R
9261
9262
fmov.d EXPA5(%pc),%fp2 # FP2 IS A5
9263
fmov.d EXPA4(%pc),%fp3 # FP3 IS A4
9264
9265
fmul.x %fp1,%fp2 # FP2 IS S*A5
9266
fmul.x %fp1,%fp3 # FP3 IS S*A4
9267
9268
fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5
9269
fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4
9270
9271
fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5)
9272
fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4)
9273
9274
fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5)
9275
fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4)
9276
9277
fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5))
9278
fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4)
9279
fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1
9280
9281
fmovm.x (%sp)+,&0x30 # restore fp2/fp3
9282
9283
#--FINAL RECONSTRUCTION PROCESS
9284
#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
9285
9286
fmul.x FACT1(%a6),%fp0
9287
fadd.x FACT2(%a6),%fp0
9288
fadd.x FACT1(%a6),%fp0
9289
9290
fmov.l %d0,%fpcr # restore users round prec,mode
9291
mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT
9292
mov.l (%sp)+,%d2
9293
mov.l &0x80000000,ADJFACT+4(%a6)
9294
clr.l ADJFACT+8(%a6)
9295
mov.b &FMUL_OP,%d1 # last inst is MUL
9296
fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT
9297
bra t_catch
9298
9299
global stentoxd
9300
stentoxd:
9301
#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9302
9303
fmov.l %d0,%fpcr # set user's rounding mode/precision
9304
fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9305
mov.l (%a0),%d1
9306
or.l &0x00800001,%d1
9307
fadd.s %d1,%fp0
9308
bra t_pinx2
9309
9310
#########################################################################
9311
# smovcr(): returns the ROM constant at the offset specified in d1 #
9312
# rounded to the mode and precision specified in d0. #
9313
# #
9314
# INPUT *************************************************************** #
9315
# d0 = rnd prec,mode #
9316
# d1 = ROM offset #
9317
# #
9318
# OUTPUT ************************************************************** #
9319
# fp0 = the ROM constant rounded to the user's rounding mode,prec #
9320
# #
9321
#########################################################################
9322
9323
global smovcr
9324
smovcr:
9325
mov.l %d1,-(%sp) # save rom offset for a sec
9326
9327
lsr.b &0x4,%d0 # shift ctrl bits to lo
9328
mov.l %d0,%d1 # make a copy
9329
andi.w &0x3,%d1 # extract rnd mode
9330
andi.w &0xc,%d0 # extract rnd prec
9331
swap %d0 # put rnd prec in hi
9332
mov.w %d1,%d0 # put rnd mode in lo
9333
9334
mov.l (%sp)+,%d1 # get rom offset
9335
9336
#
9337
# check range of offset
9338
#
9339
tst.b %d1 # if zero, offset is to pi
9340
beq.b pi_tbl # it is pi
9341
cmpi.b %d1,&0x0a # check range $01 - $0a
9342
ble.b z_val # if in this range, return zero
9343
cmpi.b %d1,&0x0e # check range $0b - $0e
9344
ble.b sm_tbl # valid constants in this range
9345
cmpi.b %d1,&0x2f # check range $10 - $2f
9346
ble.b z_val # if in this range, return zero
9347
cmpi.b %d1,&0x3f # check range $30 - $3f
9348
ble.b bg_tbl # valid constants in this range
9349
9350
z_val:
9351
bra.l ld_pzero # return a zero
9352
9353
#
9354
# the answer is PI rounded to the proper precision.
9355
#
9356
# fetch a pointer to the answer table relating to the proper rounding
9357
# precision.
9358
#
9359
pi_tbl:
9360
tst.b %d0 # is rmode RN?
9361
bne.b pi_not_rn # no
9362
pi_rn:
9363
lea.l PIRN(%pc),%a0 # yes; load PI RN table addr
9364
bra.w set_finx
9365
pi_not_rn:
9366
cmpi.b %d0,&rp_mode # is rmode RP?
9367
beq.b pi_rp # yes
9368
pi_rzrm:
9369
lea.l PIRZRM(%pc),%a0 # no; load PI RZ,RM table addr
9370
bra.b set_finx
9371
pi_rp:
9372
lea.l PIRP(%pc),%a0 # load PI RP table addr
9373
bra.b set_finx
9374
9375
#
9376
# the answer is one of:
9377
# $0B log10(2) (inexact)
9378
# $0C e (inexact)
9379
# $0D log2(e) (inexact)
9380
# $0E log10(e) (exact)
9381
#
9382
# fetch a pointer to the answer table relating to the proper rounding
9383
# precision.
9384
#
9385
sm_tbl:
9386
subi.b &0xb,%d1 # make offset in 0-4 range
9387
tst.b %d0 # is rmode RN?
9388
bne.b sm_not_rn # no
9389
sm_rn:
9390
lea.l SMALRN(%pc),%a0 # yes; load RN table addr
9391
sm_tbl_cont:
9392
cmpi.b %d1,&0x2 # is result log10(e)?
9393
ble.b set_finx # no; answer is inexact
9394
bra.b no_finx # yes; answer is exact
9395
sm_not_rn:
9396
cmpi.b %d0,&rp_mode # is rmode RP?
9397
beq.b sm_rp # yes
9398
sm_rzrm:
9399
lea.l SMALRZRM(%pc),%a0 # no; load RZ,RM table addr
9400
bra.b sm_tbl_cont
9401
sm_rp:
9402
lea.l SMALRP(%pc),%a0 # load RP table addr
9403
bra.b sm_tbl_cont
9404
9405
#
9406
# the answer is one of:
9407
# $30 ln(2) (inexact)
9408
# $31 ln(10) (inexact)
9409
# $32 10^0 (exact)
9410
# $33 10^1 (exact)
9411
# $34 10^2 (exact)
9412
# $35 10^4 (exact)
9413
# $36 10^8 (exact)
9414
# $37 10^16 (exact)
9415
# $38 10^32 (inexact)
9416
# $39 10^64 (inexact)
9417
# $3A 10^128 (inexact)
9418
# $3B 10^256 (inexact)
9419
# $3C 10^512 (inexact)
9420
# $3D 10^1024 (inexact)
9421
# $3E 10^2048 (inexact)
9422
# $3F 10^4096 (inexact)
9423
#
9424
# fetch a pointer to the answer table relating to the proper rounding
9425
# precision.
9426
#
9427
bg_tbl:
9428
subi.b &0x30,%d1 # make offset in 0-f range
9429
tst.b %d0 # is rmode RN?
9430
bne.b bg_not_rn # no
9431
bg_rn:
9432
lea.l BIGRN(%pc),%a0 # yes; load RN table addr
9433
bg_tbl_cont:
9434
cmpi.b %d1,&0x1 # is offset <= $31?
9435
ble.b set_finx # yes; answer is inexact
9436
cmpi.b %d1,&0x7 # is $32 <= offset <= $37?
9437
ble.b no_finx # yes; answer is exact
9438
bra.b set_finx # no; answer is inexact
9439
bg_not_rn:
9440
cmpi.b %d0,&rp_mode # is rmode RP?
9441
beq.b bg_rp # yes
9442
bg_rzrm:
9443
lea.l BIGRZRM(%pc),%a0 # no; load RZ,RM table addr
9444
bra.b bg_tbl_cont
9445
bg_rp:
9446
lea.l BIGRP(%pc),%a0 # load RP table addr
9447
bra.b bg_tbl_cont
9448
9449
# answer is inexact, so set INEX2 and AINEX in the user's FPSR.
9450
set_finx:
9451
ori.l &inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
9452
no_finx:
9453
mulu.w &0xc,%d1 # offset points into tables
9454
swap %d0 # put rnd prec in lo word
9455
tst.b %d0 # is precision extended?
9456
9457
bne.b not_ext # if xprec, do not call round
9458
9459
# Precision is extended
9460
fmovm.x (%a0,%d1.w),&0x80 # return result in fp0
9461
rts
9462
9463
# Precision is single or double
9464
not_ext:
9465
swap %d0 # rnd prec in upper word
9466
9467
# call round() to round the answer to the proper precision.
9468
# exponents out of range for single or double DO NOT cause underflow
9469
# or overflow.
9470
mov.w 0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
9471
mov.l 0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
9472
mov.l 0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
9473
mov.l %d0,%d1
9474
clr.l %d0 # clear g,r,s
9475
lea FP_SCR1(%a6),%a0 # pass ptr to answer
9476
clr.w LOCAL_SGN(%a0) # sign always positive
9477
bsr.l _round # round the mantissa
9478
9479
fmovm.x (%a0),&0x80 # return rounded result in fp0
9480
rts
9481
9482
align 0x4
9483
9484
PIRN: long 0x40000000,0xc90fdaa2,0x2168c235 # pi
9485
PIRZRM: long 0x40000000,0xc90fdaa2,0x2168c234 # pi
9486
PIRP: long 0x40000000,0xc90fdaa2,0x2168c235 # pi
9487
9488
SMALRN: long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)
9489
long 0x40000000,0xadf85458,0xa2bb4a9a # e
9490
long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)
9491
long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9492
long 0x00000000,0x00000000,0x00000000 # 0.0
9493
9494
SMALRZRM:
9495
long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)
9496
long 0x40000000,0xadf85458,0xa2bb4a9a # e
9497
long 0x3fff0000,0xb8aa3b29,0x5c17f0bb # log2(e)
9498
long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9499
long 0x00000000,0x00000000,0x00000000 # 0.0
9500
9501
SMALRP: long 0x3ffd0000,0x9a209a84,0xfbcff799 # log10(2)
9502
long 0x40000000,0xadf85458,0xa2bb4a9b # e
9503
long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)
9504
long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9505
long 0x00000000,0x00000000,0x00000000 # 0.0
9506
9507
BIGRN: long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)
9508
long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)
9509
9510
long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9511
long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9512
long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9513
long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9514
long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9515
long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9516
long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
9517
long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
9518
long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
9519
long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
9520
long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
9521
long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
9522
long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
9523
long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
9524
9525
BIGRZRM:
9526
long 0x3ffe0000,0xb17217f7,0xd1cf79ab # ln(2)
9527
long 0x40000000,0x935d8ddd,0xaaa8ac16 # ln(10)
9528
9529
long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9530
long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9531
long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9532
long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9533
long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9534
long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9535
long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
9536
long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
9537
long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
9538
long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
9539
long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
9540
long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
9541
long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
9542
long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
9543
9544
BIGRP:
9545
long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)
9546
long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)
9547
9548
long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9549
long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9550
long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9551
long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9552
long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9553
long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9554
long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
9555
long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
9556
long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
9557
long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
9558
long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
9559
long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
9560
long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
9561
long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
9562
9563
#########################################################################
9564
# sscale(): computes the destination operand scaled by the source #
9565
# operand. If the absoulute value of the source operand is #
9566
# >= 2^14, an overflow or underflow is returned. #
9567
# #
9568
# INPUT *************************************************************** #
9569
# a0 = pointer to double-extended source operand X #
9570
# a1 = pointer to double-extended destination operand Y #
9571
# #
9572
# OUTPUT ************************************************************** #
9573
# fp0 = scale(X,Y) #
9574
# #
9575
#########################################################################
9576
9577
set SIGN, L_SCR1
9578
9579
global sscale
9580
sscale:
9581
mov.l %d0,-(%sp) # store off ctrl bits for now
9582
9583
mov.w DST_EX(%a1),%d1 # get dst exponent
9584
smi.b SIGN(%a6) # use SIGN to hold dst sign
9585
andi.l &0x00007fff,%d1 # strip sign from dst exp
9586
9587
mov.w SRC_EX(%a0),%d0 # check src bounds
9588
andi.w &0x7fff,%d0 # clr src sign bit
9589
cmpi.w %d0,&0x3fff # is src ~ ZERO?
9590
blt.w src_small # yes
9591
cmpi.w %d0,&0x400c # no; is src too big?
9592
bgt.w src_out # yes
9593
9594
#
9595
# Source is within 2^14 range.
9596
#
9597
src_ok:
9598
fintrz.x SRC(%a0),%fp0 # calc int of src
9599
fmov.l %fp0,%d0 # int src to d0
9600
# don't want any accrued bits from the fintrz showing up later since
9601
# we may need to read the fpsr for the last fp op in t_catch2().
9602
fmov.l &0x0,%fpsr
9603
9604
tst.b DST_HI(%a1) # is dst denormalized?
9605
bmi.b sok_norm
9606
9607
# the dst is a DENORM. normalize the DENORM and add the adjustment to
9608
# the src value. then, jump to the norm part of the routine.
9609
sok_dnrm:
9610
mov.l %d0,-(%sp) # save src for now
9611
9612
mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9613
mov.l DST_HI(%a1),FP_SCR0_HI(%a6)
9614
mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
9615
9616
lea FP_SCR0(%a6),%a0 # pass ptr to DENORM
9617
bsr.l norm # normalize the DENORM
9618
neg.l %d0
9619
add.l (%sp)+,%d0 # add adjustment to src
9620
9621
fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM
9622
9623
cmpi.w %d0,&-0x3fff # is the shft amt really low?
9624
bge.b sok_norm2 # thank goodness no
9625
9626
# the multiply factor that we're trying to create should be a denorm
9627
# for the multiply to work. Therefore, we're going to actually do a
9628
# multiply with a denorm which will cause an unimplemented data type
9629
# exception to be put into the machine which will be caught and corrected
9630
# later. we don't do this with the DENORMs above because this method
9631
# is slower. but, don't fret, I don't see it being used much either.
9632
fmov.l (%sp)+,%fpcr # restore user fpcr
9633
mov.l &0x80000000,%d1 # load normalized mantissa
9634
subi.l &-0x3fff,%d0 # how many should we shift?
9635
neg.l %d0 # make it positive
9636
cmpi.b %d0,&0x20 # is it > 32?
9637
bge.b sok_dnrm_32 # yes
9638
lsr.l %d0,%d1 # no; bit stays in upper lw
9639
clr.l -(%sp) # insert zero low mantissa
9640
mov.l %d1,-(%sp) # insert new high mantissa
9641
clr.l -(%sp) # make zero exponent
9642
bra.b sok_norm_cont
9643
sok_dnrm_32:
9644
subi.b &0x20,%d0 # get shift count
9645
lsr.l %d0,%d1 # make low mantissa longword
9646
mov.l %d1,-(%sp) # insert new low mantissa
9647
clr.l -(%sp) # insert zero high mantissa
9648
clr.l -(%sp) # make zero exponent
9649
bra.b sok_norm_cont
9650
9651
# the src will force the dst to a DENORM value or worse. so, let's
9652
# create an fp multiply that will create the result.
9653
sok_norm:
9654
fmovm.x DST(%a1),&0x80 # load fp0 with normalized src
9655
sok_norm2:
9656
fmov.l (%sp)+,%fpcr # restore user fpcr
9657
9658
addi.w &0x3fff,%d0 # turn src amt into exp value
9659
swap %d0 # put exponent in high word
9660
clr.l -(%sp) # insert new exponent
9661
mov.l &0x80000000,-(%sp) # insert new high mantissa
9662
mov.l %d0,-(%sp) # insert new lo mantissa
9663
9664
sok_norm_cont:
9665
fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2
9666
mov.b &FMUL_OP,%d1 # last inst is MUL
9667
fmul.x (%sp)+,%fp0 # do the multiply
9668
bra t_catch2 # catch any exceptions
9669
9670
#
9671
# Source is outside of 2^14 range. Test the sign and branch
9672
# to the appropriate exception handler.
9673
#
9674
src_out:
9675
mov.l (%sp)+,%d0 # restore ctrl bits
9676
exg %a0,%a1 # swap src,dst ptrs
9677
tst.b SRC_EX(%a1) # is src negative?
9678
bmi t_unfl # yes; underflow
9679
bra t_ovfl_sc # no; overflow
9680
9681
#
9682
# The source input is below 1, so we check for denormalized numbers
9683
# and set unfl.
9684
#
9685
src_small:
9686
tst.b DST_HI(%a1) # is dst denormalized?
9687
bpl.b ssmall_done # yes
9688
9689
mov.l (%sp)+,%d0
9690
fmov.l %d0,%fpcr # no; load control bits
9691
mov.b &FMOV_OP,%d1 # last inst is MOVE
9692
fmov.x DST(%a1),%fp0 # simply return dest
9693
bra t_catch2
9694
ssmall_done:
9695
mov.l (%sp)+,%d0 # load control bits into d1
9696
mov.l %a1,%a0 # pass ptr to dst
9697
bra t_resdnrm
9698
9699
#########################################################################
9700
# smod(): computes the fp MOD of the input values X,Y. #
9701
# srem(): computes the fp (IEEE) REM of the input values X,Y. #
9702
# #
9703
# INPUT *************************************************************** #
9704
# a0 = pointer to extended precision input X #
9705
# a1 = pointer to extended precision input Y #
9706
# d0 = round precision,mode #
9707
# #
9708
# The input operands X and Y can be either normalized or #
9709
# denormalized. #
9710
# #
9711
# OUTPUT ************************************************************** #
9712
# fp0 = FREM(X,Y) or FMOD(X,Y) #
9713
# #
9714
# ALGORITHM *********************************************************** #
9715
# #
9716
# Step 1. Save and strip signs of X and Y: signX := sign(X), #
9717
# signY := sign(Y), X := |X|, Y := |Y|, #
9718
# signQ := signX EOR signY. Record whether MOD or REM #
9719
# is requested. #
9720
# #
9721
# Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #
9722
# If (L < 0) then #
9723
# R := X, go to Step 4. #
9724
# else #
9725
# R := 2^(-L)X, j := L. #
9726
# endif #
9727
# #
9728
# Step 3. Perform MOD(X,Y) #
9729
# 3.1 If R = Y, go to Step 9. #
9730
# 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #
9731
# 3.3 If j = 0, go to Step 4. #
9732
# 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #
9733
# Step 3.1. #
9734
# #
9735
# Step 4. At this point, R = X - QY = MOD(X,Y). Set #
9736
# Last_Subtract := false (used in Step 7 below). If #
9737
# MOD is requested, go to Step 6. #
9738
# #
9739
# Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #
9740
# 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #
9741
# Step 6. #
9742
# 5.2 If R > Y/2, then { set Last_Subtract := true, #
9743
# Q := Q + 1, Y := signY*Y }. Go to Step 6. #
9744
# 5.3 This is the tricky case of R = Y/2. If Q is odd, #
9745
# then { Q := Q + 1, signX := -signX }. #
9746
# #
9747
# Step 6. R := signX*R. #
9748
# #
9749
# Step 7. If Last_Subtract = true, R := R - Y. #
9750
# #
9751
# Step 8. Return signQ, last 7 bits of Q, and R as required. #
9752
# #
9753
# Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #
9754
# X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), #
9755
# R := 0. Return signQ, last 7 bits of Q, and R. #
9756
# #
9757
#########################################################################
9758
9759
set Mod_Flag,L_SCR3
9760
set Sc_Flag,L_SCR3+1
9761
9762
set SignY,L_SCR2
9763
set SignX,L_SCR2+2
9764
set SignQ,L_SCR3+2
9765
9766
set Y,FP_SCR0
9767
set Y_Hi,Y+4
9768
set Y_Lo,Y+8
9769
9770
set R,FP_SCR1
9771
set R_Hi,R+4
9772
set R_Lo,R+8
9773
9774
Scale:
9775
long 0x00010000,0x80000000,0x00000000,0x00000000
9776
9777
global smod
9778
smod:
9779
clr.b FPSR_QBYTE(%a6)
9780
mov.l %d0,-(%sp) # save ctrl bits
9781
clr.b Mod_Flag(%a6)
9782
bra.b Mod_Rem
9783
9784
global srem
9785
srem:
9786
clr.b FPSR_QBYTE(%a6)
9787
mov.l %d0,-(%sp) # save ctrl bits
9788
mov.b &0x1,Mod_Flag(%a6)
9789
9790
Mod_Rem:
9791
#..Save sign of X and Y
9792
movm.l &0x3f00,-(%sp) # save data registers
9793
mov.w SRC_EX(%a0),%d3
9794
mov.w %d3,SignY(%a6)
9795
and.l &0x00007FFF,%d3 # Y := |Y|
9796
9797
#
9798
mov.l SRC_HI(%a0),%d4
9799
mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y|
9800
9801
tst.l %d3
9802
bne.b Y_Normal
9803
9804
mov.l &0x00003FFE,%d3 # $3FFD + 1
9805
tst.l %d4
9806
bne.b HiY_not0
9807
9808
HiY_0:
9809
mov.l %d5,%d4
9810
clr.l %d5
9811
sub.l &32,%d3
9812
clr.l %d6
9813
bfffo %d4{&0:&32},%d6
9814
lsl.l %d6,%d4
9815
sub.l %d6,%d3 # (D3,D4,D5) is normalized
9816
# ...with bias $7FFD
9817
bra.b Chk_X
9818
9819
HiY_not0:
9820
clr.l %d6
9821
bfffo %d4{&0:&32},%d6
9822
sub.l %d6,%d3
9823
lsl.l %d6,%d4
9824
mov.l %d5,%d7 # a copy of D5
9825
lsl.l %d6,%d5
9826
neg.l %d6
9827
add.l &32,%d6
9828
lsr.l %d6,%d7
9829
or.l %d7,%d4 # (D3,D4,D5) normalized
9830
# ...with bias $7FFD
9831
bra.b Chk_X
9832
9833
Y_Normal:
9834
add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized
9835
# ...with bias $7FFD
9836
9837
Chk_X:
9838
mov.w DST_EX(%a1),%d0
9839
mov.w %d0,SignX(%a6)
9840
mov.w SignY(%a6),%d1
9841
eor.l %d0,%d1
9842
and.l &0x00008000,%d1
9843
mov.w %d1,SignQ(%a6) # sign(Q) obtained
9844
and.l &0x00007FFF,%d0
9845
mov.l DST_HI(%a1),%d1
9846
mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X|
9847
tst.l %d0
9848
bne.b X_Normal
9849
mov.l &0x00003FFE,%d0
9850
tst.l %d1
9851
bne.b HiX_not0
9852
9853
HiX_0:
9854
mov.l %d2,%d1
9855
clr.l %d2
9856
sub.l &32,%d0
9857
clr.l %d6
9858
bfffo %d1{&0:&32},%d6
9859
lsl.l %d6,%d1
9860
sub.l %d6,%d0 # (D0,D1,D2) is normalized
9861
# ...with bias $7FFD
9862
bra.b Init
9863
9864
HiX_not0:
9865
clr.l %d6
9866
bfffo %d1{&0:&32},%d6
9867
sub.l %d6,%d0
9868
lsl.l %d6,%d1
9869
mov.l %d2,%d7 # a copy of D2
9870
lsl.l %d6,%d2
9871
neg.l %d6
9872
add.l &32,%d6
9873
lsr.l %d6,%d7
9874
or.l %d7,%d1 # (D0,D1,D2) normalized
9875
# ...with bias $7FFD
9876
bra.b Init
9877
9878
X_Normal:
9879
add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized
9880
# ...with bias $7FFD
9881
9882
Init:
9883
#
9884
mov.l %d3,L_SCR1(%a6) # save biased exp(Y)
9885
mov.l %d0,-(%sp) # save biased exp(X)
9886
sub.l %d3,%d0 # L := expo(X)-expo(Y)
9887
9888
clr.l %d6 # D6 := carry <- 0
9889
clr.l %d3 # D3 is Q
9890
mov.l &0,%a1 # A1 is k; j+k=L, Q=0
9891
9892
#..(Carry,D1,D2) is R
9893
tst.l %d0
9894
bge.b Mod_Loop_pre
9895
9896
#..expo(X) < expo(Y). Thus X = mod(X,Y)
9897
#
9898
mov.l (%sp)+,%d0 # restore d0
9899
bra.w Get_Mod
9900
9901
Mod_Loop_pre:
9902
addq.l &0x4,%sp # erase exp(X)
9903
#..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
9904
Mod_Loop:
9905
tst.l %d6 # test carry bit
9906
bgt.b R_GT_Y
9907
9908
#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9909
cmp.l %d1,%d4 # compare hi(R) and hi(Y)
9910
bne.b R_NE_Y
9911
cmp.l %d2,%d5 # compare lo(R) and lo(Y)
9912
bne.b R_NE_Y
9913
9914
#..At this point, R = Y
9915
bra.w Rem_is_0
9916
9917
R_NE_Y:
9918
#..use the borrow of the previous compare
9919
bcs.b R_LT_Y # borrow is set iff R < Y
9920
9921
R_GT_Y:
9922
#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9923
#..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9924
sub.l %d5,%d2 # lo(R) - lo(Y)
9925
subx.l %d4,%d1 # hi(R) - hi(Y)
9926
clr.l %d6 # clear carry
9927
addq.l &1,%d3 # Q := Q + 1
9928
9929
R_LT_Y:
9930
#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9931
tst.l %d0 # see if j = 0.
9932
beq.b PostLoop
9933
9934
add.l %d3,%d3 # Q := 2Q
9935
add.l %d2,%d2 # lo(R) = 2lo(R)
9936
roxl.l &1,%d1 # hi(R) = 2hi(R) + carry
9937
scs %d6 # set Carry if 2(R) overflows
9938
addq.l &1,%a1 # k := k+1
9939
subq.l &1,%d0 # j := j - 1
9940
#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9941
9942
bra.b Mod_Loop
9943
9944
PostLoop:
9945
#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9946
9947
#..normalize R.
9948
mov.l L_SCR1(%a6),%d0 # new biased expo of R
9949
tst.l %d1
9950
bne.b HiR_not0
9951
9952
HiR_0:
9953
mov.l %d2,%d1
9954
clr.l %d2
9955
sub.l &32,%d0
9956
clr.l %d6
9957
bfffo %d1{&0:&32},%d6
9958
lsl.l %d6,%d1
9959
sub.l %d6,%d0 # (D0,D1,D2) is normalized
9960
# ...with bias $7FFD
9961
bra.b Get_Mod
9962
9963
HiR_not0:
9964
clr.l %d6
9965
bfffo %d1{&0:&32},%d6
9966
bmi.b Get_Mod # already normalized
9967
sub.l %d6,%d0
9968
lsl.l %d6,%d1
9969
mov.l %d2,%d7 # a copy of D2
9970
lsl.l %d6,%d2
9971
neg.l %d6
9972
add.l &32,%d6
9973
lsr.l %d6,%d7
9974
or.l %d7,%d1 # (D0,D1,D2) normalized
9975
9976
#
9977
Get_Mod:
9978
cmp.l %d0,&0x000041FE
9979
bge.b No_Scale
9980
Do_Scale:
9981
mov.w %d0,R(%a6)
9982
mov.l %d1,R_Hi(%a6)
9983
mov.l %d2,R_Lo(%a6)
9984
mov.l L_SCR1(%a6),%d6
9985
mov.w %d6,Y(%a6)
9986
mov.l %d4,Y_Hi(%a6)
9987
mov.l %d5,Y_Lo(%a6)
9988
fmov.x R(%a6),%fp0 # no exception
9989
mov.b &1,Sc_Flag(%a6)
9990
bra.b ModOrRem
9991
No_Scale:
9992
mov.l %d1,R_Hi(%a6)
9993
mov.l %d2,R_Lo(%a6)
9994
sub.l &0x3FFE,%d0
9995
mov.w %d0,R(%a6)
9996
mov.l L_SCR1(%a6),%d6
9997
sub.l &0x3FFE,%d6
9998
mov.l %d6,L_SCR1(%a6)
9999
fmov.x R(%a6),%fp0
10000
mov.w %d6,Y(%a6)
10001
mov.l %d4,Y_Hi(%a6)
10002
mov.l %d5,Y_Lo(%a6)
10003
clr.b Sc_Flag(%a6)
10004
10005
#
10006
ModOrRem:
10007
tst.b Mod_Flag(%a6)
10008
beq.b Fix_Sign
10009
10010
mov.l L_SCR1(%a6),%d6 # new biased expo(Y)
10011
subq.l &1,%d6 # biased expo(Y/2)
10012
cmp.l %d0,%d6
10013
blt.b Fix_Sign
10014
bgt.b Last_Sub
10015
10016
cmp.l %d1,%d4
10017
bne.b Not_EQ
10018
cmp.l %d2,%d5
10019
bne.b Not_EQ
10020
bra.w Tie_Case
10021
10022
Not_EQ:
10023
bcs.b Fix_Sign
10024
10025
Last_Sub:
10026
#
10027
fsub.x Y(%a6),%fp0 # no exceptions
10028
addq.l &1,%d3 # Q := Q + 1
10029
10030
#
10031
Fix_Sign:
10032
#..Get sign of X
10033
mov.w SignX(%a6),%d6
10034
bge.b Get_Q
10035
fneg.x %fp0
10036
10037
#..Get Q
10038
#
10039
Get_Q:
10040
clr.l %d6
10041
mov.w SignQ(%a6),%d6 # D6 is sign(Q)
10042
mov.l &8,%d7
10043
lsr.l %d7,%d6
10044
and.l &0x0000007F,%d3 # 7 bits of Q
10045
or.l %d6,%d3 # sign and bits of Q
10046
# swap %d3
10047
# fmov.l %fpsr,%d6
10048
# and.l &0xFF00FFFF,%d6
10049
# or.l %d3,%d6
10050
# fmov.l %d6,%fpsr # put Q in fpsr
10051
mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr
10052
10053
#
10054
Restore:
10055
movm.l (%sp)+,&0xfc # {%d2-%d7}
10056
mov.l (%sp)+,%d0
10057
fmov.l %d0,%fpcr
10058
tst.b Sc_Flag(%a6)
10059
beq.b Finish
10060
mov.b &FMUL_OP,%d1 # last inst is MUL
10061
fmul.x Scale(%pc),%fp0 # may cause underflow
10062
bra t_catch2
10063
# the '040 package did this apparently to see if the dst operand for the
10064
# preceding fmul was a denorm. but, it better not have been since the
10065
# algorithm just got done playing with fp0 and expected no exceptions
10066
# as a result. trust me...
10067
# bra t_avoid_unsupp # check for denorm as a
10068
# ;result of the scaling
10069
10070
Finish:
10071
mov.b &FMOV_OP,%d1 # last inst is MOVE
10072
fmov.x %fp0,%fp0 # capture exceptions & round
10073
bra t_catch2
10074
10075
Rem_is_0:
10076
#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
10077
addq.l &1,%d3
10078
cmp.l %d0,&8 # D0 is j
10079
bge.b Q_Big
10080
10081
lsl.l %d0,%d3
10082
bra.b Set_R_0
10083
10084
Q_Big:
10085
clr.l %d3
10086
10087
Set_R_0:
10088
fmov.s &0x00000000,%fp0
10089
clr.b Sc_Flag(%a6)
10090
bra.w Fix_Sign
10091
10092
Tie_Case:
10093
#..Check parity of Q
10094
mov.l %d3,%d6
10095
and.l &0x00000001,%d6
10096
tst.l %d6
10097
beq.w Fix_Sign # Q is even
10098
10099
#..Q is odd, Q := Q + 1, signX := -signX
10100
addq.l &1,%d3
10101
mov.w SignX(%a6),%d6
10102
eor.l &0x00008000,%d6
10103
mov.w %d6,SignX(%a6)
10104
bra.w Fix_Sign
10105
10106
qnan: long 0x7fff0000, 0xffffffff, 0xffffffff
10107
10108
#########################################################################
10109
# XDEF **************************************************************** #
10110
# t_dz(): Handle DZ exception during transcendental emulation. #
10111
# Sets N bit according to sign of source operand. #
10112
# t_dz2(): Handle DZ exception during transcendental emulation. #
10113
# Sets N bit always. #
10114
# #
10115
# XREF **************************************************************** #
10116
# None #
10117
# #
10118
# INPUT *************************************************************** #
10119
# a0 = pointer to source operand #
10120
# #
10121
# OUTPUT ************************************************************** #
10122
# fp0 = default result #
10123
# #
10124
# ALGORITHM *********************************************************** #
10125
# - Store properly signed INF into fp0. #
10126
# - Set FPSR exception status dz bit, ccode inf bit, and #
10127
# accrued dz bit. #
10128
# #
10129
#########################################################################
10130
10131
global t_dz
10132
t_dz:
10133
tst.b SRC_EX(%a0) # no; is src negative?
10134
bmi.b t_dz2 # yes
10135
10136
dz_pinf:
10137
fmov.s &0x7f800000,%fp0 # return +INF in fp0
10138
ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
10139
rts
10140
10141
global t_dz2
10142
t_dz2:
10143
fmov.s &0xff800000,%fp0 # return -INF in fp0
10144
ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
10145
rts
10146
10147
#################################################################
10148
# OPERR exception: #
10149
# - set FPSR exception status operr bit, condition code #
10150
# nan bit; Store default NAN into fp0 #
10151
#################################################################
10152
global t_operr
10153
t_operr:
10154
ori.l &opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
10155
fmovm.x qnan(%pc),&0x80 # return default NAN in fp0
10156
rts
10157
10158
#################################################################
10159
# Extended DENORM: #
10160
# - For all functions that have a denormalized input and #
10161
# that f(x)=x, this is the entry point. #
10162
# - we only return the EXOP here if either underflow or #
10163
# inexact is enabled. #
10164
#################################################################
10165
10166
# Entry point for scale w/ extended denorm. The function does
10167
# NOT set INEX2/AUNFL/AINEX.
10168
global t_resdnrm
10169
t_resdnrm:
10170
ori.l &unfl_mask,USER_FPSR(%a6) # set UNFL
10171
bra.b xdnrm_con
10172
10173
global t_extdnrm
10174
t_extdnrm:
10175
ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10176
10177
xdnrm_con:
10178
mov.l %a0,%a1 # make copy of src ptr
10179
mov.l %d0,%d1 # make copy of rnd prec,mode
10180
andi.b &0xc0,%d1 # extended precision?
10181
bne.b xdnrm_sd # no
10182
10183
# result precision is extended.
10184
tst.b LOCAL_EX(%a0) # is denorm negative?
10185
bpl.b xdnrm_exit # no
10186
10187
bset &neg_bit,FPSR_CC(%a6) # yes; set 'N' ccode bit
10188
bra.b xdnrm_exit
10189
10190
# result precision is single or double
10191
xdnrm_sd:
10192
mov.l %a1,-(%sp)
10193
tst.b LOCAL_EX(%a0) # is denorm pos or neg?
10194
smi.b %d1 # set d0 accordingly
10195
bsr.l unf_sub
10196
mov.l (%sp)+,%a1
10197
xdnrm_exit:
10198
fmovm.x (%a0),&0x80 # return default result in fp0
10199
10200
mov.b FPCR_ENABLE(%a6),%d0
10201
andi.b &0x0a,%d0 # is UNFL or INEX enabled?
10202
bne.b xdnrm_ena # yes
10203
rts
10204
10205
################
10206
# unfl enabled #
10207
################
10208
# we have a DENORM that needs to be converted into an EXOP.
10209
# so, normalize the mantissa, add 0x6000 to the new exponent,
10210
# and return the result in fp1.
10211
xdnrm_ena:
10212
mov.w LOCAL_EX(%a1),FP_SCR0_EX(%a6)
10213
mov.l LOCAL_HI(%a1),FP_SCR0_HI(%a6)
10214
mov.l LOCAL_LO(%a1),FP_SCR0_LO(%a6)
10215
10216
lea FP_SCR0(%a6),%a0
10217
bsr.l norm # normalize mantissa
10218
addi.l &0x6000,%d0 # add extra bias
10219
andi.w &0x8000,FP_SCR0_EX(%a6) # keep old sign
10220
or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
10221
10222
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10223
rts
10224
10225
#################################################################
10226
# UNFL exception: #
10227
# - This routine is for cases where even an EXOP isn't #
10228
# large enough to hold the range of this result. #
10229
# In such a case, the EXOP equals zero. #
10230
# - Return the default result to the proper precision #
10231
# with the sign of this result being the same as that #
10232
# of the src operand. #
10233
# - t_unfl2() is provided to force the result sign to #
10234
# positive which is the desired result for fetox(). #
10235
#################################################################
10236
global t_unfl
10237
t_unfl:
10238
ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10239
10240
tst.b (%a0) # is result pos or neg?
10241
smi.b %d1 # set d1 accordingly
10242
bsr.l unf_sub # calc default unfl result
10243
fmovm.x (%a0),&0x80 # return default result in fp0
10244
10245
fmov.s &0x00000000,%fp1 # return EXOP in fp1
10246
rts
10247
10248
# t_unfl2 ALWAYS tells unf_sub to create a positive result
10249
global t_unfl2
10250
t_unfl2:
10251
ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10252
10253
sf.b %d1 # set d0 to represent positive
10254
bsr.l unf_sub # calc default unfl result
10255
fmovm.x (%a0),&0x80 # return default result in fp0
10256
10257
fmov.s &0x0000000,%fp1 # return EXOP in fp1
10258
rts
10259
10260
#################################################################
10261
# OVFL exception: #
10262
# - This routine is for cases where even an EXOP isn't #
10263
# large enough to hold the range of this result. #
10264
# - Return the default result to the proper precision #
10265
# with the sign of this result being the same as that #
10266
# of the src operand. #
10267
# - t_ovfl2() is provided to force the result sign to #
10268
# positive which is the desired result for fcosh(). #
10269
# - t_ovfl_sc() is provided for scale() which only sets #
10270
# the inexact bits if the number is inexact for the #
10271
# precision indicated. #
10272
#################################################################
10273
10274
global t_ovfl_sc
10275
t_ovfl_sc:
10276
ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10277
10278
mov.b %d0,%d1 # fetch rnd mode/prec
10279
andi.b &0xc0,%d1 # extract rnd prec
10280
beq.b ovfl_work # prec is extended
10281
10282
tst.b LOCAL_HI(%a0) # is dst a DENORM?
10283
bmi.b ovfl_sc_norm # no
10284
10285
# dst op is a DENORM. we have to normalize the mantissa to see if the
10286
# result would be inexact for the given precision. make a copy of the
10287
# dst so we don't screw up the version passed to us.
10288
mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10289
mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10290
mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10291
lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0
10292
movm.l &0xc080,-(%sp) # save d0-d1/a0
10293
bsr.l norm # normalize mantissa
10294
movm.l (%sp)+,&0x0103 # restore d0-d1/a0
10295
10296
ovfl_sc_norm:
10297
cmpi.b %d1,&0x40 # is prec dbl?
10298
bne.b ovfl_sc_dbl # no; sgl
10299
ovfl_sc_sgl:
10300
tst.l LOCAL_LO(%a0) # is lo lw of sgl set?
10301
bne.b ovfl_sc_inx # yes
10302
tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set?
10303
bne.b ovfl_sc_inx # yes
10304
bra.b ovfl_work # don't set INEX2
10305
ovfl_sc_dbl:
10306
mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of
10307
andi.l &0x7ff,%d1 # dbl mantissa set?
10308
beq.b ovfl_work # no; don't set INEX2
10309
ovfl_sc_inx:
10310
ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2
10311
bra.b ovfl_work # continue
10312
10313
global t_ovfl
10314
t_ovfl:
10315
ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10316
10317
ovfl_work:
10318
tst.b LOCAL_EX(%a0) # what is the sign?
10319
smi.b %d1 # set d1 accordingly
10320
bsr.l ovf_res # calc default ovfl result
10321
mov.b %d0,FPSR_CC(%a6) # insert new ccodes
10322
fmovm.x (%a0),&0x80 # return default result in fp0
10323
10324
fmov.s &0x00000000,%fp1 # return EXOP in fp1
10325
rts
10326
10327
# t_ovfl2 ALWAYS tells ovf_res to create a positive result
10328
global t_ovfl2
10329
t_ovfl2:
10330
ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10331
10332
sf.b %d1 # clear sign flag for positive
10333
bsr.l ovf_res # calc default ovfl result
10334
mov.b %d0,FPSR_CC(%a6) # insert new ccodes
10335
fmovm.x (%a0),&0x80 # return default result in fp0
10336
10337
fmov.s &0x00000000,%fp1 # return EXOP in fp1
10338
rts
10339
10340
#################################################################
10341
# t_catch(): #
10342
# - the last operation of a transcendental emulation #
10343
# routine may have caused an underflow or overflow. #
10344
# we find out if this occurred by doing an fsave and #
10345
# checking the exception bit. if one did occur, then we #
10346
# jump to fgen_except() which creates the default #
10347
# result and EXOP for us. #
10348
#################################################################
10349
global t_catch
10350
t_catch:
10351
10352
fsave -(%sp)
10353
tst.b 0x2(%sp)
10354
bmi.b catch
10355
add.l &0xc,%sp
10356
10357
#################################################################
10358
# INEX2 exception: #
10359
# - The inex2 and ainex bits are set. #
10360
#################################################################
10361
global t_inx2
10362
t_inx2:
10363
fblt.w t_minx2
10364
fbeq.w inx2_zero
10365
10366
global t_pinx2
10367
t_pinx2:
10368
ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10369
rts
10370
10371
global t_minx2
10372
t_minx2:
10373
ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
10374
rts
10375
10376
inx2_zero:
10377
mov.b &z_bmask,FPSR_CC(%a6)
10378
ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10379
rts
10380
10381
# an underflow or overflow exception occurred.
10382
# we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
10383
catch:
10384
ori.w &inx2a_mask,FPSR_EXCEPT(%a6)
10385
catch2:
10386
bsr.l fgen_except
10387
add.l &0xc,%sp
10388
rts
10389
10390
global t_catch2
10391
t_catch2:
10392
10393
fsave -(%sp)
10394
10395
tst.b 0x2(%sp)
10396
bmi.b catch2
10397
add.l &0xc,%sp
10398
10399
fmov.l %fpsr,%d0
10400
or.l %d0,USER_FPSR(%a6)
10401
10402
rts
10403
10404
#########################################################################
10405
10406
#########################################################################
10407
# unf_res(): underflow default result calculation for transcendentals #
10408
# #
10409
# INPUT: #
10410
# d0 : rnd mode,precision #
10411
# d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+)) #
10412
# OUTPUT: #
10413
# a0 : points to result (in instruction memory) #
10414
#########################################################################
10415
unf_sub:
10416
ori.l &unfinx_mask,USER_FPSR(%a6)
10417
10418
andi.w &0x10,%d1 # keep sign bit in 4th spot
10419
10420
lsr.b &0x4,%d0 # shift rnd prec,mode to lo bits
10421
andi.b &0xf,%d0 # strip hi rnd mode bit
10422
or.b %d1,%d0 # concat {sgn,mode,prec}
10423
10424
mov.l %d0,%d1 # make a copy
10425
lsl.b &0x1,%d1 # mult index 2 by 2
10426
10427
mov.b (tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
10428
lea (tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
10429
rts
10430
10431
tbl_unf_cc:
10432
byte 0x4, 0x4, 0x4, 0x0
10433
byte 0x4, 0x4, 0x4, 0x0
10434
byte 0x4, 0x4, 0x4, 0x0
10435
byte 0x0, 0x0, 0x0, 0x0
10436
byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10437
byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10438
byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10439
10440
tbl_unf_result:
10441
long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10442
long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10443
long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10444
long 0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10445
10446
long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10447
long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10448
long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10449
long 0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10450
10451
long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10452
long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
10453
long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10454
long 0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10455
10456
long 0x0,0x0,0x0,0x0
10457
long 0x0,0x0,0x0,0x0
10458
long 0x0,0x0,0x0,0x0
10459
long 0x0,0x0,0x0,0x0
10460
10461
long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10462
long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10463
long 0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10464
long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10465
10466
long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10467
long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10468
long 0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10469
long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10470
10471
long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10472
long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10473
long 0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10474
long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10475
10476
############################################################
10477
10478
#########################################################################
10479
# src_zero(): Return signed zero according to sign of src operand. #
10480
#########################################################################
10481
global src_zero
10482
src_zero:
10483
tst.b SRC_EX(%a0) # get sign of src operand
10484
bmi.b ld_mzero # if neg, load neg zero
10485
10486
#
10487
# ld_pzero(): return a positive zero.
10488
#
10489
global ld_pzero
10490
ld_pzero:
10491
fmov.s &0x00000000,%fp0 # load +0
10492
mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10493
rts
10494
10495
# ld_mzero(): return a negative zero.
10496
global ld_mzero
10497
ld_mzero:
10498
fmov.s &0x80000000,%fp0 # load -0
10499
mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10500
rts
10501
10502
#########################################################################
10503
# dst_zero(): Return signed zero according to sign of dst operand. #
10504
#########################################################################
10505
global dst_zero
10506
dst_zero:
10507
tst.b DST_EX(%a1) # get sign of dst operand
10508
bmi.b ld_mzero # if neg, load neg zero
10509
bra.b ld_pzero # load positive zero
10510
10511
#########################################################################
10512
# src_inf(): Return signed inf according to sign of src operand. #
10513
#########################################################################
10514
global src_inf
10515
src_inf:
10516
tst.b SRC_EX(%a0) # get sign of src operand
10517
bmi.b ld_minf # if negative branch
10518
10519
#
10520
# ld_pinf(): return a positive infinity.
10521
#
10522
global ld_pinf
10523
ld_pinf:
10524
fmov.s &0x7f800000,%fp0 # load +INF
10525
mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit
10526
rts
10527
10528
#
10529
# ld_minf():return a negative infinity.
10530
#
10531
global ld_minf
10532
ld_minf:
10533
fmov.s &0xff800000,%fp0 # load -INF
10534
mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10535
rts
10536
10537
#########################################################################
10538
# dst_inf(): Return signed inf according to sign of dst operand. #
10539
#########################################################################
10540
global dst_inf
10541
dst_inf:
10542
tst.b DST_EX(%a1) # get sign of dst operand
10543
bmi.b ld_minf # if negative branch
10544
bra.b ld_pinf
10545
10546
global szr_inf
10547
#################################################################
10548
# szr_inf(): Return +ZERO for a negative src operand or #
10549
# +INF for a positive src operand. #
10550
# Routine used for fetox, ftwotox, and ftentox. #
10551
#################################################################
10552
szr_inf:
10553
tst.b SRC_EX(%a0) # check sign of source
10554
bmi.b ld_pzero
10555
bra.b ld_pinf
10556
10557
#########################################################################
10558
# sopr_inf(): Return +INF for a positive src operand or #
10559
# jump to operand error routine for a negative src operand. #
10560
# Routine used for flogn, flognp1, flog10, and flog2. #
10561
#########################################################################
10562
global sopr_inf
10563
sopr_inf:
10564
tst.b SRC_EX(%a0) # check sign of source
10565
bmi.w t_operr
10566
bra.b ld_pinf
10567
10568
#################################################################
10569
# setoxm1i(): Return minus one for a negative src operand or #
10570
# positive infinity for a positive src operand. #
10571
# Routine used for fetoxm1. #
10572
#################################################################
10573
global setoxm1i
10574
setoxm1i:
10575
tst.b SRC_EX(%a0) # check sign of source
10576
bmi.b ld_mone
10577
bra.b ld_pinf
10578
10579
#########################################################################
10580
# src_one(): Return signed one according to sign of src operand. #
10581
#########################################################################
10582
global src_one
10583
src_one:
10584
tst.b SRC_EX(%a0) # check sign of source
10585
bmi.b ld_mone
10586
10587
#
10588
# ld_pone(): return positive one.
10589
#
10590
global ld_pone
10591
ld_pone:
10592
fmov.s &0x3f800000,%fp0 # load +1
10593
clr.b FPSR_CC(%a6)
10594
rts
10595
10596
#
10597
# ld_mone(): return negative one.
10598
#
10599
global ld_mone
10600
ld_mone:
10601
fmov.s &0xbf800000,%fp0 # load -1
10602
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
10603
rts
10604
10605
ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235
10606
mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235
10607
10608
#################################################################
10609
# spi_2(): Return signed PI/2 according to sign of src operand. #
10610
#################################################################
10611
global spi_2
10612
spi_2:
10613
tst.b SRC_EX(%a0) # check sign of source
10614
bmi.b ld_mpi2
10615
10616
#
10617
# ld_ppi2(): return positive PI/2.
10618
#
10619
global ld_ppi2
10620
ld_ppi2:
10621
fmov.l %d0,%fpcr
10622
fmov.x ppiby2(%pc),%fp0 # load +pi/2
10623
bra.w t_pinx2 # set INEX2
10624
10625
#
10626
# ld_mpi2(): return negative PI/2.
10627
#
10628
global ld_mpi2
10629
ld_mpi2:
10630
fmov.l %d0,%fpcr
10631
fmov.x mpiby2(%pc),%fp0 # load -pi/2
10632
bra.w t_minx2 # set INEX2
10633
10634
####################################################
10635
# The following routines give support for fsincos. #
10636
####################################################
10637
10638
#
10639
# ssincosz(): When the src operand is ZERO, store a one in the
10640
# cosine register and return a ZERO in fp0 w/ the same sign
10641
# as the src operand.
10642
#
10643
global ssincosz
10644
ssincosz:
10645
fmov.s &0x3f800000,%fp1
10646
tst.b SRC_EX(%a0) # test sign
10647
bpl.b sincoszp
10648
fmov.s &0x80000000,%fp0 # return sin result in fp0
10649
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6)
10650
bra.b sto_cos # store cosine result
10651
sincoszp:
10652
fmov.s &0x00000000,%fp0 # return sin result in fp0
10653
mov.b &z_bmask,FPSR_CC(%a6)
10654
bra.b sto_cos # store cosine result
10655
10656
#
10657
# ssincosi(): When the src operand is INF, store a QNAN in the cosine
10658
# register and jump to the operand error routine for negative
10659
# src operands.
10660
#
10661
global ssincosi
10662
ssincosi:
10663
fmov.x qnan(%pc),%fp1 # load NAN
10664
bsr.l sto_cos # store cosine result
10665
bra.w t_operr
10666
10667
#
10668
# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10669
# register and branch to the src QNAN routine.
10670
#
10671
global ssincosqnan
10672
ssincosqnan:
10673
fmov.x LOCAL_EX(%a0),%fp1
10674
bsr.l sto_cos
10675
bra.w src_qnan
10676
10677
#
10678
# ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
10679
# in the cosine register and branch to the src SNAN routine.
10680
#
10681
global ssincossnan
10682
ssincossnan:
10683
fmov.x LOCAL_EX(%a0),%fp1
10684
bsr.l sto_cos
10685
bra.w src_snan
10686
10687
########################################################################
10688
10689
#########################################################################
10690
# sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field. #
10691
# fp1 holds the result of the cosine portion of ssincos(). #
10692
# the value in fp1 will not take any exceptions when moved. #
10693
# INPUT: #
10694
# fp1 : fp value to store #
10695
# MODIFIED: #
10696
# d0 #
10697
#########################################################################
10698
global sto_cos
10699
sto_cos:
10700
mov.b 1+EXC_CMDREG(%a6),%d0
10701
andi.w &0x7,%d0
10702
mov.w (tbl_sto_cos.b,%pc,%d0.w*2),%d0
10703
jmp (tbl_sto_cos.b,%pc,%d0.w*1)
10704
10705
tbl_sto_cos:
10706
short sto_cos_0 - tbl_sto_cos
10707
short sto_cos_1 - tbl_sto_cos
10708
short sto_cos_2 - tbl_sto_cos
10709
short sto_cos_3 - tbl_sto_cos
10710
short sto_cos_4 - tbl_sto_cos
10711
short sto_cos_5 - tbl_sto_cos
10712
short sto_cos_6 - tbl_sto_cos
10713
short sto_cos_7 - tbl_sto_cos
10714
10715
sto_cos_0:
10716
fmovm.x &0x40,EXC_FP0(%a6)
10717
rts
10718
sto_cos_1:
10719
fmovm.x &0x40,EXC_FP1(%a6)
10720
rts
10721
sto_cos_2:
10722
fmov.x %fp1,%fp2
10723
rts
10724
sto_cos_3:
10725
fmov.x %fp1,%fp3
10726
rts
10727
sto_cos_4:
10728
fmov.x %fp1,%fp4
10729
rts
10730
sto_cos_5:
10731
fmov.x %fp1,%fp5
10732
rts
10733
sto_cos_6:
10734
fmov.x %fp1,%fp6
10735
rts
10736
sto_cos_7:
10737
fmov.x %fp1,%fp7
10738
rts
10739
10740
##################################################################
10741
global smod_sdnrm
10742
global smod_snorm
10743
smod_sdnrm:
10744
smod_snorm:
10745
mov.b DTAG(%a6),%d1
10746
beq.l smod
10747
cmpi.b %d1,&ZERO
10748
beq.w smod_zro
10749
cmpi.b %d1,&INF
10750
beq.l t_operr
10751
cmpi.b %d1,&DENORM
10752
beq.l smod
10753
cmpi.b %d1,&SNAN
10754
beq.l dst_snan
10755
bra.l dst_qnan
10756
10757
global smod_szero
10758
smod_szero:
10759
mov.b DTAG(%a6),%d1
10760
beq.l t_operr
10761
cmpi.b %d1,&ZERO
10762
beq.l t_operr
10763
cmpi.b %d1,&INF
10764
beq.l t_operr
10765
cmpi.b %d1,&DENORM
10766
beq.l t_operr
10767
cmpi.b %d1,&QNAN
10768
beq.l dst_qnan
10769
bra.l dst_snan
10770
10771
global smod_sinf
10772
smod_sinf:
10773
mov.b DTAG(%a6),%d1
10774
beq.l smod_fpn
10775
cmpi.b %d1,&ZERO
10776
beq.l smod_zro
10777
cmpi.b %d1,&INF
10778
beq.l t_operr
10779
cmpi.b %d1,&DENORM
10780
beq.l smod_fpn
10781
cmpi.b %d1,&QNAN
10782
beq.l dst_qnan
10783
bra.l dst_snan
10784
10785
smod_zro:
10786
srem_zro:
10787
mov.b SRC_EX(%a0),%d1 # get src sign
10788
mov.b DST_EX(%a1),%d0 # get dst sign
10789
eor.b %d0,%d1 # get qbyte sign
10790
andi.b &0x80,%d1
10791
mov.b %d1,FPSR_QBYTE(%a6)
10792
tst.b %d0
10793
bpl.w ld_pzero
10794
bra.w ld_mzero
10795
10796
smod_fpn:
10797
srem_fpn:
10798
clr.b FPSR_QBYTE(%a6)
10799
mov.l %d0,-(%sp)
10800
mov.b SRC_EX(%a0),%d1 # get src sign
10801
mov.b DST_EX(%a1),%d0 # get dst sign
10802
eor.b %d0,%d1 # get qbyte sign
10803
andi.b &0x80,%d1
10804
mov.b %d1,FPSR_QBYTE(%a6)
10805
cmpi.b DTAG(%a6),&DENORM
10806
bne.b smod_nrm
10807
lea DST(%a1),%a0
10808
mov.l (%sp)+,%d0
10809
bra t_resdnrm
10810
smod_nrm:
10811
fmov.l (%sp)+,%fpcr
10812
fmov.x DST(%a1),%fp0
10813
tst.b DST_EX(%a1)
10814
bmi.b smod_nrm_neg
10815
rts
10816
10817
smod_nrm_neg:
10818
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode
10819
rts
10820
10821
#########################################################################
10822
global srem_snorm
10823
global srem_sdnrm
10824
srem_sdnrm:
10825
srem_snorm:
10826
mov.b DTAG(%a6),%d1
10827
beq.l srem
10828
cmpi.b %d1,&ZERO
10829
beq.w srem_zro
10830
cmpi.b %d1,&INF
10831
beq.l t_operr
10832
cmpi.b %d1,&DENORM
10833
beq.l srem
10834
cmpi.b %d1,&QNAN
10835
beq.l dst_qnan
10836
bra.l dst_snan
10837
10838
global srem_szero
10839
srem_szero:
10840
mov.b DTAG(%a6),%d1
10841
beq.l t_operr
10842
cmpi.b %d1,&ZERO
10843
beq.l t_operr
10844
cmpi.b %d1,&INF
10845
beq.l t_operr
10846
cmpi.b %d1,&DENORM
10847
beq.l t_operr
10848
cmpi.b %d1,&QNAN
10849
beq.l dst_qnan
10850
bra.l dst_snan
10851
10852
global srem_sinf
10853
srem_sinf:
10854
mov.b DTAG(%a6),%d1
10855
beq.w srem_fpn
10856
cmpi.b %d1,&ZERO
10857
beq.w srem_zro
10858
cmpi.b %d1,&INF
10859
beq.l t_operr
10860
cmpi.b %d1,&DENORM
10861
beq.l srem_fpn
10862
cmpi.b %d1,&QNAN
10863
beq.l dst_qnan
10864
bra.l dst_snan
10865
10866
#########################################################################
10867
global sscale_snorm
10868
global sscale_sdnrm
10869
sscale_snorm:
10870
sscale_sdnrm:
10871
mov.b DTAG(%a6),%d1
10872
beq.l sscale
10873
cmpi.b %d1,&ZERO
10874
beq.l dst_zero
10875
cmpi.b %d1,&INF
10876
beq.l dst_inf
10877
cmpi.b %d1,&DENORM
10878
beq.l sscale
10879
cmpi.b %d1,&QNAN
10880
beq.l dst_qnan
10881
bra.l dst_snan
10882
10883
global sscale_szero
10884
sscale_szero:
10885
mov.b DTAG(%a6),%d1
10886
beq.l sscale
10887
cmpi.b %d1,&ZERO
10888
beq.l dst_zero
10889
cmpi.b %d1,&INF
10890
beq.l dst_inf
10891
cmpi.b %d1,&DENORM
10892
beq.l sscale
10893
cmpi.b %d1,&QNAN
10894
beq.l dst_qnan
10895
bra.l dst_snan
10896
10897
global sscale_sinf
10898
sscale_sinf:
10899
mov.b DTAG(%a6),%d1
10900
beq.l t_operr
10901
cmpi.b %d1,&QNAN
10902
beq.l dst_qnan
10903
cmpi.b %d1,&SNAN
10904
beq.l dst_snan
10905
bra.l t_operr
10906
10907
########################################################################
10908
10909
#
10910
# sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
10911
#
10912
global sop_sqnan
10913
sop_sqnan:
10914
mov.b DTAG(%a6),%d1
10915
cmpi.b %d1,&QNAN
10916
beq.b dst_qnan
10917
cmpi.b %d1,&SNAN
10918
beq.b dst_snan
10919
bra.b src_qnan
10920
10921
#
10922
# sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
10923
#
10924
global sop_ssnan
10925
sop_ssnan:
10926
mov.b DTAG(%a6),%d1
10927
cmpi.b %d1,&QNAN
10928
beq.b dst_qnan_src_snan
10929
cmpi.b %d1,&SNAN
10930
beq.b dst_snan
10931
bra.b src_snan
10932
10933
dst_qnan_src_snan:
10934
ori.l &snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
10935
bra.b dst_qnan
10936
10937
#
10938
# dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
10939
#
10940
global dst_snan
10941
dst_snan:
10942
fmov.x DST(%a1),%fp0 # the fmove sets the SNAN bit
10943
fmov.l %fpsr,%d0 # catch resulting status
10944
or.l %d0,USER_FPSR(%a6) # store status
10945
rts
10946
10947
#
10948
# dst_qnan(): Return the dst QNAN.
10949
#
10950
global dst_qnan
10951
dst_qnan:
10952
fmov.x DST(%a1),%fp0 # return the non-signalling nan
10953
tst.b DST_EX(%a1) # set ccodes according to QNAN sign
10954
bmi.b dst_qnan_m
10955
dst_qnan_p:
10956
mov.b &nan_bmask,FPSR_CC(%a6)
10957
rts
10958
dst_qnan_m:
10959
mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6)
10960
rts
10961
10962
#
10963
# src_snan(): Return the src SNAN w/ the SNAN bit set.
10964
#
10965
global src_snan
10966
src_snan:
10967
fmov.x SRC(%a0),%fp0 # the fmove sets the SNAN bit
10968
fmov.l %fpsr,%d0 # catch resulting status
10969
or.l %d0,USER_FPSR(%a6) # store status
10970
rts
10971
10972
#
10973
# src_qnan(): Return the src QNAN.
10974
#
10975
global src_qnan
10976
src_qnan:
10977
fmov.x SRC(%a0),%fp0 # return the non-signalling nan
10978
tst.b SRC_EX(%a0) # set ccodes according to QNAN sign
10979
bmi.b dst_qnan_m
10980
src_qnan_p:
10981
mov.b &nan_bmask,FPSR_CC(%a6)
10982
rts
10983
src_qnan_m:
10984
mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6)
10985
rts
10986
10987
#
10988
# fkern2.s:
10989
# These entry points are used by the exception handler
10990
# routines where an instruction is selected by an index into
10991
# a large jump table corresponding to a given instruction which
10992
# has been decoded. Flow continues here where we now decode
10993
# further according to the source operand type.
10994
#
10995
10996
global fsinh
10997
fsinh:
10998
mov.b STAG(%a6),%d1
10999
beq.l ssinh
11000
cmpi.b %d1,&ZERO
11001
beq.l src_zero
11002
cmpi.b %d1,&INF
11003
beq.l src_inf
11004
cmpi.b %d1,&DENORM
11005
beq.l ssinhd
11006
cmpi.b %d1,&QNAN
11007
beq.l src_qnan
11008
bra.l src_snan
11009
11010
global flognp1
11011
flognp1:
11012
mov.b STAG(%a6),%d1
11013
beq.l slognp1
11014
cmpi.b %d1,&ZERO
11015
beq.l src_zero
11016
cmpi.b %d1,&INF
11017
beq.l sopr_inf
11018
cmpi.b %d1,&DENORM
11019
beq.l slognp1d
11020
cmpi.b %d1,&QNAN
11021
beq.l src_qnan
11022
bra.l src_snan
11023
11024
global fetoxm1
11025
fetoxm1:
11026
mov.b STAG(%a6),%d1
11027
beq.l setoxm1
11028
cmpi.b %d1,&ZERO
11029
beq.l src_zero
11030
cmpi.b %d1,&INF
11031
beq.l setoxm1i
11032
cmpi.b %d1,&DENORM
11033
beq.l setoxm1d
11034
cmpi.b %d1,&QNAN
11035
beq.l src_qnan
11036
bra.l src_snan
11037
11038
global ftanh
11039
ftanh:
11040
mov.b STAG(%a6),%d1
11041
beq.l stanh
11042
cmpi.b %d1,&ZERO
11043
beq.l src_zero
11044
cmpi.b %d1,&INF
11045
beq.l src_one
11046
cmpi.b %d1,&DENORM
11047
beq.l stanhd
11048
cmpi.b %d1,&QNAN
11049
beq.l src_qnan
11050
bra.l src_snan
11051
11052
global fatan
11053
fatan:
11054
mov.b STAG(%a6),%d1
11055
beq.l satan
11056
cmpi.b %d1,&ZERO
11057
beq.l src_zero
11058
cmpi.b %d1,&INF
11059
beq.l spi_2
11060
cmpi.b %d1,&DENORM
11061
beq.l satand
11062
cmpi.b %d1,&QNAN
11063
beq.l src_qnan
11064
bra.l src_snan
11065
11066
global fasin
11067
fasin:
11068
mov.b STAG(%a6),%d1
11069
beq.l sasin
11070
cmpi.b %d1,&ZERO
11071
beq.l src_zero
11072
cmpi.b %d1,&INF
11073
beq.l t_operr
11074
cmpi.b %d1,&DENORM
11075
beq.l sasind
11076
cmpi.b %d1,&QNAN
11077
beq.l src_qnan
11078
bra.l src_snan
11079
11080
global fatanh
11081
fatanh:
11082
mov.b STAG(%a6),%d1
11083
beq.l satanh
11084
cmpi.b %d1,&ZERO
11085
beq.l src_zero
11086
cmpi.b %d1,&INF
11087
beq.l t_operr
11088
cmpi.b %d1,&DENORM
11089
beq.l satanhd
11090
cmpi.b %d1,&QNAN
11091
beq.l src_qnan
11092
bra.l src_snan
11093
11094
global fsine
11095
fsine:
11096
mov.b STAG(%a6),%d1
11097
beq.l ssin
11098
cmpi.b %d1,&ZERO
11099
beq.l src_zero
11100
cmpi.b %d1,&INF
11101
beq.l t_operr
11102
cmpi.b %d1,&DENORM
11103
beq.l ssind
11104
cmpi.b %d1,&QNAN
11105
beq.l src_qnan
11106
bra.l src_snan
11107
11108
global ftan
11109
ftan:
11110
mov.b STAG(%a6),%d1
11111
beq.l stan
11112
cmpi.b %d1,&ZERO
11113
beq.l src_zero
11114
cmpi.b %d1,&INF
11115
beq.l t_operr
11116
cmpi.b %d1,&DENORM
11117
beq.l stand
11118
cmpi.b %d1,&QNAN
11119
beq.l src_qnan
11120
bra.l src_snan
11121
11122
global fetox
11123
fetox:
11124
mov.b STAG(%a6),%d1
11125
beq.l setox
11126
cmpi.b %d1,&ZERO
11127
beq.l ld_pone
11128
cmpi.b %d1,&INF
11129
beq.l szr_inf
11130
cmpi.b %d1,&DENORM
11131
beq.l setoxd
11132
cmpi.b %d1,&QNAN
11133
beq.l src_qnan
11134
bra.l src_snan
11135
11136
global ftwotox
11137
ftwotox:
11138
mov.b STAG(%a6),%d1
11139
beq.l stwotox
11140
cmpi.b %d1,&ZERO
11141
beq.l ld_pone
11142
cmpi.b %d1,&INF
11143
beq.l szr_inf
11144
cmpi.b %d1,&DENORM
11145
beq.l stwotoxd
11146
cmpi.b %d1,&QNAN
11147
beq.l src_qnan
11148
bra.l src_snan
11149
11150
global ftentox
11151
ftentox:
11152
mov.b STAG(%a6),%d1
11153
beq.l stentox
11154
cmpi.b %d1,&ZERO
11155
beq.l ld_pone
11156
cmpi.b %d1,&INF
11157
beq.l szr_inf
11158
cmpi.b %d1,&DENORM
11159
beq.l stentoxd
11160
cmpi.b %d1,&QNAN
11161
beq.l src_qnan
11162
bra.l src_snan
11163
11164
global flogn
11165
flogn:
11166
mov.b STAG(%a6),%d1
11167
beq.l slogn
11168
cmpi.b %d1,&ZERO
11169
beq.l t_dz2
11170
cmpi.b %d1,&INF
11171
beq.l sopr_inf
11172
cmpi.b %d1,&DENORM
11173
beq.l slognd
11174
cmpi.b %d1,&QNAN
11175
beq.l src_qnan
11176
bra.l src_snan
11177
11178
global flog10
11179
flog10:
11180
mov.b STAG(%a6),%d1
11181
beq.l slog10
11182
cmpi.b %d1,&ZERO
11183
beq.l t_dz2
11184
cmpi.b %d1,&INF
11185
beq.l sopr_inf
11186
cmpi.b %d1,&DENORM
11187
beq.l slog10d
11188
cmpi.b %d1,&QNAN
11189
beq.l src_qnan
11190
bra.l src_snan
11191
11192
global flog2
11193
flog2:
11194
mov.b STAG(%a6),%d1
11195
beq.l slog2
11196
cmpi.b %d1,&ZERO
11197
beq.l t_dz2
11198
cmpi.b %d1,&INF
11199
beq.l sopr_inf
11200
cmpi.b %d1,&DENORM
11201
beq.l slog2d
11202
cmpi.b %d1,&QNAN
11203
beq.l src_qnan
11204
bra.l src_snan
11205
11206
global fcosh
11207
fcosh:
11208
mov.b STAG(%a6),%d1
11209
beq.l scosh
11210
cmpi.b %d1,&ZERO
11211
beq.l ld_pone
11212
cmpi.b %d1,&INF
11213
beq.l ld_pinf
11214
cmpi.b %d1,&DENORM
11215
beq.l scoshd
11216
cmpi.b %d1,&QNAN
11217
beq.l src_qnan
11218
bra.l src_snan
11219
11220
global facos
11221
facos:
11222
mov.b STAG(%a6),%d1
11223
beq.l sacos
11224
cmpi.b %d1,&ZERO
11225
beq.l ld_ppi2
11226
cmpi.b %d1,&INF
11227
beq.l t_operr
11228
cmpi.b %d1,&DENORM
11229
beq.l sacosd
11230
cmpi.b %d1,&QNAN
11231
beq.l src_qnan
11232
bra.l src_snan
11233
11234
global fcos
11235
fcos:
11236
mov.b STAG(%a6),%d1
11237
beq.l scos
11238
cmpi.b %d1,&ZERO
11239
beq.l ld_pone
11240
cmpi.b %d1,&INF
11241
beq.l t_operr
11242
cmpi.b %d1,&DENORM
11243
beq.l scosd
11244
cmpi.b %d1,&QNAN
11245
beq.l src_qnan
11246
bra.l src_snan
11247
11248
global fgetexp
11249
fgetexp:
11250
mov.b STAG(%a6),%d1
11251
beq.l sgetexp
11252
cmpi.b %d1,&ZERO
11253
beq.l src_zero
11254
cmpi.b %d1,&INF
11255
beq.l t_operr
11256
cmpi.b %d1,&DENORM
11257
beq.l sgetexpd
11258
cmpi.b %d1,&QNAN
11259
beq.l src_qnan
11260
bra.l src_snan
11261
11262
global fgetman
11263
fgetman:
11264
mov.b STAG(%a6),%d1
11265
beq.l sgetman
11266
cmpi.b %d1,&ZERO
11267
beq.l src_zero
11268
cmpi.b %d1,&INF
11269
beq.l t_operr
11270
cmpi.b %d1,&DENORM
11271
beq.l sgetmand
11272
cmpi.b %d1,&QNAN
11273
beq.l src_qnan
11274
bra.l src_snan
11275
11276
global fsincos
11277
fsincos:
11278
mov.b STAG(%a6),%d1
11279
beq.l ssincos
11280
cmpi.b %d1,&ZERO
11281
beq.l ssincosz
11282
cmpi.b %d1,&INF
11283
beq.l ssincosi
11284
cmpi.b %d1,&DENORM
11285
beq.l ssincosd
11286
cmpi.b %d1,&QNAN
11287
beq.l ssincosqnan
11288
bra.l ssincossnan
11289
11290
global fmod
11291
fmod:
11292
mov.b STAG(%a6),%d1
11293
beq.l smod_snorm
11294
cmpi.b %d1,&ZERO
11295
beq.l smod_szero
11296
cmpi.b %d1,&INF
11297
beq.l smod_sinf
11298
cmpi.b %d1,&DENORM
11299
beq.l smod_sdnrm
11300
cmpi.b %d1,&QNAN
11301
beq.l sop_sqnan
11302
bra.l sop_ssnan
11303
11304
global frem
11305
frem:
11306
mov.b STAG(%a6),%d1
11307
beq.l srem_snorm
11308
cmpi.b %d1,&ZERO
11309
beq.l srem_szero
11310
cmpi.b %d1,&INF
11311
beq.l srem_sinf
11312
cmpi.b %d1,&DENORM
11313
beq.l srem_sdnrm
11314
cmpi.b %d1,&QNAN
11315
beq.l sop_sqnan
11316
bra.l sop_ssnan
11317
11318
global fscale
11319
fscale:
11320
mov.b STAG(%a6),%d1
11321
beq.l sscale_snorm
11322
cmpi.b %d1,&ZERO
11323
beq.l sscale_szero
11324
cmpi.b %d1,&INF
11325
beq.l sscale_sinf
11326
cmpi.b %d1,&DENORM
11327
beq.l sscale_sdnrm
11328
cmpi.b %d1,&QNAN
11329
beq.l sop_sqnan
11330
bra.l sop_ssnan
11331
11332
#########################################################################
11333
# XDEF **************************************************************** #
11334
# fgen_except(): catch an exception during transcendental #
11335
# emulation #
11336
# #
11337
# XREF **************************************************************** #
11338
# fmul() - emulate a multiply instruction #
11339
# fadd() - emulate an add instruction #
11340
# fin() - emulate an fmove instruction #
11341
# #
11342
# INPUT *************************************************************** #
11343
# fp0 = destination operand #
11344
# d0 = type of instruction that took exception #
11345
# fsave frame = source operand #
11346
# #
11347
# OUTPUT ************************************************************** #
11348
# fp0 = result #
11349
# fp1 = EXOP #
11350
# #
11351
# ALGORITHM *********************************************************** #
11352
# An exception occurred on the last instruction of the #
11353
# transcendental emulation. hopefully, this won't be happening much #
11354
# because it will be VERY slow. #
11355
# The only exceptions capable of passing through here are #
11356
# Overflow, Underflow, and Unsupported Data Type. #
11357
# #
11358
#########################################################################
11359
11360
global fgen_except
11361
fgen_except:
11362
cmpi.b 0x3(%sp),&0x7 # is exception UNSUPP?
11363
beq.b fge_unsupp # yes
11364
11365
mov.b &NORM,STAG(%a6)
11366
11367
fge_cont:
11368
mov.b &NORM,DTAG(%a6)
11369
11370
# ok, I have a problem with putting the dst op at FP_DST. the emulation
11371
# routines aren't supposed to alter the operands but we've just squashed
11372
# FP_DST here...
11373
11374
# 8/17/93 - this turns out to be more of a "cleanliness" standpoint
11375
# then a potential bug. to begin with, only the dyadic functions
11376
# frem,fmod, and fscale would get the dst trashed here. But, for
11377
# the 060SP, the FP_DST is never used again anyways.
11378
fmovm.x &0x80,FP_DST(%a6) # dst op is in fp0
11379
11380
lea 0x4(%sp),%a0 # pass: ptr to src op
11381
lea FP_DST(%a6),%a1 # pass: ptr to dst op
11382
11383
cmpi.b %d1,&FMOV_OP
11384
beq.b fge_fin # it was an "fmov"
11385
cmpi.b %d1,&FADD_OP
11386
beq.b fge_fadd # it was an "fadd"
11387
fge_fmul:
11388
bsr.l fmul
11389
rts
11390
fge_fadd:
11391
bsr.l fadd
11392
rts
11393
fge_fin:
11394
bsr.l fin
11395
rts
11396
11397
fge_unsupp:
11398
mov.b &DENORM,STAG(%a6)
11399
bra.b fge_cont
11400
11401
#
11402
# This table holds the offsets of the emulation routines for each individual
11403
# math operation relative to the address of this table. Included are
11404
# routines like fadd/fmul/fabs as well as the transcendentals.
11405
# The location within the table is determined by the extension bits of the
11406
# operation longword.
11407
#
11408
11409
swbeg &109
11410
tbl_unsupp:
11411
long fin - tbl_unsupp # 00: fmove
11412
long fint - tbl_unsupp # 01: fint
11413
long fsinh - tbl_unsupp # 02: fsinh
11414
long fintrz - tbl_unsupp # 03: fintrz
11415
long fsqrt - tbl_unsupp # 04: fsqrt
11416
long tbl_unsupp - tbl_unsupp
11417
long flognp1 - tbl_unsupp # 06: flognp1
11418
long tbl_unsupp - tbl_unsupp
11419
long fetoxm1 - tbl_unsupp # 08: fetoxm1
11420
long ftanh - tbl_unsupp # 09: ftanh
11421
long fatan - tbl_unsupp # 0a: fatan
11422
long tbl_unsupp - tbl_unsupp
11423
long fasin - tbl_unsupp # 0c: fasin
11424
long fatanh - tbl_unsupp # 0d: fatanh
11425
long fsine - tbl_unsupp # 0e: fsin
11426
long ftan - tbl_unsupp # 0f: ftan
11427
long fetox - tbl_unsupp # 10: fetox
11428
long ftwotox - tbl_unsupp # 11: ftwotox
11429
long ftentox - tbl_unsupp # 12: ftentox
11430
long tbl_unsupp - tbl_unsupp
11431
long flogn - tbl_unsupp # 14: flogn
11432
long flog10 - tbl_unsupp # 15: flog10
11433
long flog2 - tbl_unsupp # 16: flog2
11434
long tbl_unsupp - tbl_unsupp
11435
long fabs - tbl_unsupp # 18: fabs
11436
long fcosh - tbl_unsupp # 19: fcosh
11437
long fneg - tbl_unsupp # 1a: fneg
11438
long tbl_unsupp - tbl_unsupp
11439
long facos - tbl_unsupp # 1c: facos
11440
long fcos - tbl_unsupp # 1d: fcos
11441
long fgetexp - tbl_unsupp # 1e: fgetexp
11442
long fgetman - tbl_unsupp # 1f: fgetman
11443
long fdiv - tbl_unsupp # 20: fdiv
11444
long fmod - tbl_unsupp # 21: fmod
11445
long fadd - tbl_unsupp # 22: fadd
11446
long fmul - tbl_unsupp # 23: fmul
11447
long fsgldiv - tbl_unsupp # 24: fsgldiv
11448
long frem - tbl_unsupp # 25: frem
11449
long fscale - tbl_unsupp # 26: fscale
11450
long fsglmul - tbl_unsupp # 27: fsglmul
11451
long fsub - tbl_unsupp # 28: fsub
11452
long tbl_unsupp - tbl_unsupp
11453
long tbl_unsupp - tbl_unsupp
11454
long tbl_unsupp - tbl_unsupp
11455
long tbl_unsupp - tbl_unsupp
11456
long tbl_unsupp - tbl_unsupp
11457
long tbl_unsupp - tbl_unsupp
11458
long tbl_unsupp - tbl_unsupp
11459
long fsincos - tbl_unsupp # 30: fsincos
11460
long fsincos - tbl_unsupp # 31: fsincos
11461
long fsincos - tbl_unsupp # 32: fsincos
11462
long fsincos - tbl_unsupp # 33: fsincos
11463
long fsincos - tbl_unsupp # 34: fsincos
11464
long fsincos - tbl_unsupp # 35: fsincos
11465
long fsincos - tbl_unsupp # 36: fsincos
11466
long fsincos - tbl_unsupp # 37: fsincos
11467
long fcmp - tbl_unsupp # 38: fcmp
11468
long tbl_unsupp - tbl_unsupp
11469
long ftst - tbl_unsupp # 3a: ftst
11470
long tbl_unsupp - tbl_unsupp
11471
long tbl_unsupp - tbl_unsupp
11472
long tbl_unsupp - tbl_unsupp
11473
long tbl_unsupp - tbl_unsupp
11474
long tbl_unsupp - tbl_unsupp
11475
long fsin - tbl_unsupp # 40: fsmove
11476
long fssqrt - tbl_unsupp # 41: fssqrt
11477
long tbl_unsupp - tbl_unsupp
11478
long tbl_unsupp - tbl_unsupp
11479
long fdin - tbl_unsupp # 44: fdmove
11480
long fdsqrt - tbl_unsupp # 45: fdsqrt
11481
long tbl_unsupp - tbl_unsupp
11482
long tbl_unsupp - tbl_unsupp
11483
long tbl_unsupp - tbl_unsupp
11484
long tbl_unsupp - tbl_unsupp
11485
long tbl_unsupp - tbl_unsupp
11486
long tbl_unsupp - tbl_unsupp
11487
long tbl_unsupp - tbl_unsupp
11488
long tbl_unsupp - tbl_unsupp
11489
long tbl_unsupp - tbl_unsupp
11490
long tbl_unsupp - tbl_unsupp
11491
long tbl_unsupp - tbl_unsupp
11492
long tbl_unsupp - tbl_unsupp
11493
long tbl_unsupp - tbl_unsupp
11494
long tbl_unsupp - tbl_unsupp
11495
long tbl_unsupp - tbl_unsupp
11496
long tbl_unsupp - tbl_unsupp
11497
long tbl_unsupp - tbl_unsupp
11498
long tbl_unsupp - tbl_unsupp
11499
long fsabs - tbl_unsupp # 58: fsabs
11500
long tbl_unsupp - tbl_unsupp
11501
long fsneg - tbl_unsupp # 5a: fsneg
11502
long tbl_unsupp - tbl_unsupp
11503
long fdabs - tbl_unsupp # 5c: fdabs
11504
long tbl_unsupp - tbl_unsupp
11505
long fdneg - tbl_unsupp # 5e: fdneg
11506
long tbl_unsupp - tbl_unsupp
11507
long fsdiv - tbl_unsupp # 60: fsdiv
11508
long tbl_unsupp - tbl_unsupp
11509
long fsadd - tbl_unsupp # 62: fsadd
11510
long fsmul - tbl_unsupp # 63: fsmul
11511
long fddiv - tbl_unsupp # 64: fddiv
11512
long tbl_unsupp - tbl_unsupp
11513
long fdadd - tbl_unsupp # 66: fdadd
11514
long fdmul - tbl_unsupp # 67: fdmul
11515
long fssub - tbl_unsupp # 68: fssub
11516
long tbl_unsupp - tbl_unsupp
11517
long tbl_unsupp - tbl_unsupp
11518
long tbl_unsupp - tbl_unsupp
11519
long fdsub - tbl_unsupp # 6c: fdsub
11520
11521
#########################################################################
11522
# XDEF **************************************************************** #
11523
# fmul(): emulates the fmul instruction #
11524
# fsmul(): emulates the fsmul instruction #
11525
# fdmul(): emulates the fdmul instruction #
11526
# #
11527
# XREF **************************************************************** #
11528
# scale_to_zero_src() - scale src exponent to zero #
11529
# scale_to_zero_dst() - scale dst exponent to zero #
11530
# unf_res() - return default underflow result #
11531
# ovf_res() - return default overflow result #
11532
# res_qnan() - return QNAN result #
11533
# res_snan() - return SNAN result #
11534
# #
11535
# INPUT *************************************************************** #
11536
# a0 = pointer to extended precision source operand #
11537
# a1 = pointer to extended precision destination operand #
11538
# d0 rnd prec,mode #
11539
# #
11540
# OUTPUT ************************************************************** #
11541
# fp0 = result #
11542
# fp1 = EXOP (if exception occurred) #
11543
# #
11544
# ALGORITHM *********************************************************** #
11545
# Handle NANs, infinities, and zeroes as special cases. Divide #
11546
# norms/denorms into ext/sgl/dbl precision. #
11547
# For norms/denorms, scale the exponents such that a multiply #
11548
# instruction won't cause an exception. Use the regular fmul to #
11549
# compute a result. Check if the regular operands would have taken #
11550
# an exception. If so, return the default overflow/underflow result #
11551
# and return the EXOP if exceptions are enabled. Else, scale the #
11552
# result operand to the proper exponent. #
11553
# #
11554
#########################################################################
11555
11556
align 0x10
11557
tbl_fmul_ovfl:
11558
long 0x3fff - 0x7ffe # ext_max
11559
long 0x3fff - 0x407e # sgl_max
11560
long 0x3fff - 0x43fe # dbl_max
11561
tbl_fmul_unfl:
11562
long 0x3fff + 0x0001 # ext_unfl
11563
long 0x3fff - 0x3f80 # sgl_unfl
11564
long 0x3fff - 0x3c00 # dbl_unfl
11565
11566
global fsmul
11567
fsmul:
11568
andi.b &0x30,%d0 # clear rnd prec
11569
ori.b &s_mode*0x10,%d0 # insert sgl prec
11570
bra.b fmul
11571
11572
global fdmul
11573
fdmul:
11574
andi.b &0x30,%d0
11575
ori.b &d_mode*0x10,%d0 # insert dbl prec
11576
11577
global fmul
11578
fmul:
11579
mov.l %d0,L_SCR3(%a6) # store rnd info
11580
11581
clr.w %d1
11582
mov.b DTAG(%a6),%d1
11583
lsl.b &0x3,%d1
11584
or.b STAG(%a6),%d1 # combine src tags
11585
bne.w fmul_not_norm # optimize on non-norm input
11586
11587
fmul_norm:
11588
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11589
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11590
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11591
11592
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11593
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11594
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11595
11596
bsr.l scale_to_zero_src # scale src exponent
11597
mov.l %d0,-(%sp) # save scale factor 1
11598
11599
bsr.l scale_to_zero_dst # scale dst exponent
11600
11601
add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2
11602
11603
mov.w 2+L_SCR3(%a6),%d1 # fetch precision
11604
lsr.b &0x6,%d1 # shift to lo bits
11605
mov.l (%sp)+,%d0 # load S.F.
11606
cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
11607
beq.w fmul_may_ovfl # result may rnd to overflow
11608
blt.w fmul_ovfl # result will overflow
11609
11610
cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
11611
beq.w fmul_may_unfl # result may rnd to no unfl
11612
bgt.w fmul_unfl # result will underflow
11613
11614
#
11615
# NORMAL:
11616
# - the result of the multiply operation will neither overflow nor underflow.
11617
# - do the multiply to the proper precision and rounding mode.
11618
# - scale the result exponent using the scale factor. if both operands were
11619
# normalized then we really don't need to go through this scaling. but for now,
11620
# this will do.
11621
#
11622
fmul_normal:
11623
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11624
11625
fmov.l L_SCR3(%a6),%fpcr # set FPCR
11626
fmov.l &0x0,%fpsr # clear FPSR
11627
11628
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11629
11630
fmov.l %fpsr,%d1 # save status
11631
fmov.l &0x0,%fpcr # clear FPCR
11632
11633
or.l %d1,USER_FPSR(%a6) # save INEX2,N
11634
11635
fmul_normal_exit:
11636
fmovm.x &0x80,FP_SCR0(%a6) # store out result
11637
mov.l %d2,-(%sp) # save d2
11638
mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
11639
mov.l %d1,%d2 # make a copy
11640
andi.l &0x7fff,%d1 # strip sign
11641
andi.w &0x8000,%d2 # keep old sign
11642
sub.l %d0,%d1 # add scale factor
11643
or.w %d2,%d1 # concat old sign,new exp
11644
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11645
mov.l (%sp)+,%d2 # restore d2
11646
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11647
rts
11648
11649
#
11650
# OVERFLOW:
11651
# - the result of the multiply operation is an overflow.
11652
# - do the multiply to the proper precision and rounding mode in order to
11653
# set the inexact bits.
11654
# - calculate the default result and return it in fp0.
11655
# - if overflow or inexact is enabled, we need a multiply result rounded to
11656
# extended precision. if the original operation was extended, then we have this
11657
# result. if the original operation was single or double, we have to do another
11658
# multiply using extended precision and the correct rounding mode. the result
11659
# of this operation then has its exponent scaled by -0x6000 to create the
11660
# exceptional operand.
11661
#
11662
fmul_ovfl:
11663
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11664
11665
fmov.l L_SCR3(%a6),%fpcr # set FPCR
11666
fmov.l &0x0,%fpsr # clear FPSR
11667
11668
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11669
11670
fmov.l %fpsr,%d1 # save status
11671
fmov.l &0x0,%fpcr # clear FPCR
11672
11673
or.l %d1,USER_FPSR(%a6) # save INEX2,N
11674
11675
# save setting this until now because this is where fmul_may_ovfl may jump in
11676
fmul_ovfl_tst:
11677
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11678
11679
mov.b FPCR_ENABLE(%a6),%d1
11680
andi.b &0x13,%d1 # is OVFL or INEX enabled?
11681
bne.b fmul_ovfl_ena # yes
11682
11683
# calculate the default result
11684
fmul_ovfl_dis:
11685
btst &neg_bit,FPSR_CC(%a6) # is result negative?
11686
sne %d1 # set sign param accordingly
11687
mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode
11688
bsr.l ovf_res # calculate default result
11689
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11690
fmovm.x (%a0),&0x80 # return default result in fp0
11691
rts
11692
11693
#
11694
# OVFL is enabled; Create EXOP:
11695
# - if precision is extended, then we have the EXOP. simply bias the exponent
11696
# with an extra -0x6000. if the precision is single or double, we need to
11697
# calculate a result rounded to extended precision.
11698
#
11699
fmul_ovfl_ena:
11700
mov.l L_SCR3(%a6),%d1
11701
andi.b &0xc0,%d1 # test the rnd prec
11702
bne.b fmul_ovfl_ena_sd # it's sgl or dbl
11703
11704
fmul_ovfl_ena_cont:
11705
fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
11706
11707
mov.l %d2,-(%sp) # save d2
11708
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11709
mov.w %d1,%d2 # make a copy
11710
andi.l &0x7fff,%d1 # strip sign
11711
sub.l %d0,%d1 # add scale factor
11712
subi.l &0x6000,%d1 # subtract bias
11713
andi.w &0x7fff,%d1 # clear sign bit
11714
andi.w &0x8000,%d2 # keep old sign
11715
or.w %d2,%d1 # concat old sign,new exp
11716
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11717
mov.l (%sp)+,%d2 # restore d2
11718
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11719
bra.b fmul_ovfl_dis
11720
11721
fmul_ovfl_ena_sd:
11722
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11723
11724
mov.l L_SCR3(%a6),%d1
11725
andi.b &0x30,%d1 # keep rnd mode only
11726
fmov.l %d1,%fpcr # set FPCR
11727
11728
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11729
11730
fmov.l &0x0,%fpcr # clear FPCR
11731
bra.b fmul_ovfl_ena_cont
11732
11733
#
11734
# may OVERFLOW:
11735
# - the result of the multiply operation MAY overflow.
11736
# - do the multiply to the proper precision and rounding mode in order to
11737
# set the inexact bits.
11738
# - calculate the default result and return it in fp0.
11739
#
11740
fmul_may_ovfl:
11741
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11742
11743
fmov.l L_SCR3(%a6),%fpcr # set FPCR
11744
fmov.l &0x0,%fpsr # clear FPSR
11745
11746
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11747
11748
fmov.l %fpsr,%d1 # save status
11749
fmov.l &0x0,%fpcr # clear FPCR
11750
11751
or.l %d1,USER_FPSR(%a6) # save INEX2,N
11752
11753
fabs.x %fp0,%fp1 # make a copy of result
11754
fcmp.b %fp1,&0x2 # is |result| >= 2.b?
11755
fbge.w fmul_ovfl_tst # yes; overflow has occurred
11756
11757
# no, it didn't overflow; we have correct result
11758
bra.w fmul_normal_exit
11759
11760
#
11761
# UNDERFLOW:
11762
# - the result of the multiply operation is an underflow.
11763
# - do the multiply to the proper precision and rounding mode in order to
11764
# set the inexact bits.
11765
# - calculate the default result and return it in fp0.
11766
# - if overflow or inexact is enabled, we need a multiply result rounded to
11767
# extended precision. if the original operation was extended, then we have this
11768
# result. if the original operation was single or double, we have to do another
11769
# multiply using extended precision and the correct rounding mode. the result
11770
# of this operation then has its exponent scaled by -0x6000 to create the
11771
# exceptional operand.
11772
#
11773
fmul_unfl:
11774
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11775
11776
# for fun, let's use only extended precision, round to zero. then, let
11777
# the unf_res() routine figure out all the rest.
11778
# will we get the correct answer.
11779
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11780
11781
fmov.l &rz_mode*0x10,%fpcr # set FPCR
11782
fmov.l &0x0,%fpsr # clear FPSR
11783
11784
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11785
11786
fmov.l %fpsr,%d1 # save status
11787
fmov.l &0x0,%fpcr # clear FPCR
11788
11789
or.l %d1,USER_FPSR(%a6) # save INEX2,N
11790
11791
mov.b FPCR_ENABLE(%a6),%d1
11792
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11793
bne.b fmul_unfl_ena # yes
11794
11795
fmul_unfl_dis:
11796
fmovm.x &0x80,FP_SCR0(%a6) # store out result
11797
11798
lea FP_SCR0(%a6),%a0 # pass: result addr
11799
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11800
bsr.l unf_res # calculate default result
11801
or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'
11802
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11803
rts
11804
11805
#
11806
# UNFL is enabled.
11807
#
11808
fmul_unfl_ena:
11809
fmovm.x FP_SCR1(%a6),&0x40 # load dst op
11810
11811
mov.l L_SCR3(%a6),%d1
11812
andi.b &0xc0,%d1 # is precision extended?
11813
bne.b fmul_unfl_ena_sd # no, sgl or dbl
11814
11815
# if the rnd mode is anything but RZ, then we have to re-do the above
11816
# multiplication because we used RZ for all.
11817
fmov.l L_SCR3(%a6),%fpcr # set FPCR
11818
11819
fmul_unfl_ena_cont:
11820
fmov.l &0x0,%fpsr # clear FPSR
11821
11822
fmul.x FP_SCR0(%a6),%fp1 # execute multiply
11823
11824
fmov.l &0x0,%fpcr # clear FPCR
11825
11826
fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
11827
mov.l %d2,-(%sp) # save d2
11828
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11829
mov.l %d1,%d2 # make a copy
11830
andi.l &0x7fff,%d1 # strip sign
11831
andi.w &0x8000,%d2 # keep old sign
11832
sub.l %d0,%d1 # add scale factor
11833
addi.l &0x6000,%d1 # add bias
11834
andi.w &0x7fff,%d1
11835
or.w %d2,%d1 # concat old sign,new exp
11836
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11837
mov.l (%sp)+,%d2 # restore d2
11838
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11839
bra.w fmul_unfl_dis
11840
11841
fmul_unfl_ena_sd:
11842
mov.l L_SCR3(%a6),%d1
11843
andi.b &0x30,%d1 # use only rnd mode
11844
fmov.l %d1,%fpcr # set FPCR
11845
11846
bra.b fmul_unfl_ena_cont
11847
11848
# MAY UNDERFLOW:
11849
# -use the correct rounding mode and precision. this code favors operations
11850
# that do not underflow.
11851
fmul_may_unfl:
11852
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11853
11854
fmov.l L_SCR3(%a6),%fpcr # set FPCR
11855
fmov.l &0x0,%fpsr # clear FPSR
11856
11857
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11858
11859
fmov.l %fpsr,%d1 # save status
11860
fmov.l &0x0,%fpcr # clear FPCR
11861
11862
or.l %d1,USER_FPSR(%a6) # save INEX2,N
11863
11864
fabs.x %fp0,%fp1 # make a copy of result
11865
fcmp.b %fp1,&0x2 # is |result| > 2.b?
11866
fbgt.w fmul_normal_exit # no; no underflow occurred
11867
fblt.w fmul_unfl # yes; underflow occurred
11868
11869
#
11870
# we still don't know if underflow occurred. result is ~ equal to 2. but,
11871
# we don't know if the result was an underflow that rounded up to a 2 or
11872
# a normalized number that rounded down to a 2. so, redo the entire operation
11873
# using RZ as the rounding mode to see what the pre-rounded result is.
11874
# this case should be relatively rare.
11875
#
11876
fmovm.x FP_SCR1(%a6),&0x40 # load dst operand
11877
11878
mov.l L_SCR3(%a6),%d1
11879
andi.b &0xc0,%d1 # keep rnd prec
11880
ori.b &rz_mode*0x10,%d1 # insert RZ
11881
11882
fmov.l %d1,%fpcr # set FPCR
11883
fmov.l &0x0,%fpsr # clear FPSR
11884
11885
fmul.x FP_SCR0(%a6),%fp1 # execute multiply
11886
11887
fmov.l &0x0,%fpcr # clear FPCR
11888
fabs.x %fp1 # make absolute value
11889
fcmp.b %fp1,&0x2 # is |result| < 2.b?
11890
fbge.w fmul_normal_exit # no; no underflow occurred
11891
bra.w fmul_unfl # yes, underflow occurred
11892
11893
################################################################################
11894
11895
#
11896
# Multiply: inputs are not both normalized; what are they?
11897
#
11898
fmul_not_norm:
11899
mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1
11900
jmp (tbl_fmul_op.b,%pc,%d1.w)
11901
11902
swbeg &48
11903
tbl_fmul_op:
11904
short fmul_norm - tbl_fmul_op # NORM x NORM
11905
short fmul_zero - tbl_fmul_op # NORM x ZERO
11906
short fmul_inf_src - tbl_fmul_op # NORM x INF
11907
short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
11908
short fmul_norm - tbl_fmul_op # NORM x DENORM
11909
short fmul_res_snan - tbl_fmul_op # NORM x SNAN
11910
short tbl_fmul_op - tbl_fmul_op #
11911
short tbl_fmul_op - tbl_fmul_op #
11912
11913
short fmul_zero - tbl_fmul_op # ZERO x NORM
11914
short fmul_zero - tbl_fmul_op # ZERO x ZERO
11915
short fmul_res_operr - tbl_fmul_op # ZERO x INF
11916
short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN
11917
short fmul_zero - tbl_fmul_op # ZERO x DENORM
11918
short fmul_res_snan - tbl_fmul_op # ZERO x SNAN
11919
short tbl_fmul_op - tbl_fmul_op #
11920
short tbl_fmul_op - tbl_fmul_op #
11921
11922
short fmul_inf_dst - tbl_fmul_op # INF x NORM
11923
short fmul_res_operr - tbl_fmul_op # INF x ZERO
11924
short fmul_inf_dst - tbl_fmul_op # INF x INF
11925
short fmul_res_qnan - tbl_fmul_op # INF x QNAN
11926
short fmul_inf_dst - tbl_fmul_op # INF x DENORM
11927
short fmul_res_snan - tbl_fmul_op # INF x SNAN
11928
short tbl_fmul_op - tbl_fmul_op #
11929
short tbl_fmul_op - tbl_fmul_op #
11930
11931
short fmul_res_qnan - tbl_fmul_op # QNAN x NORM
11932
short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO
11933
short fmul_res_qnan - tbl_fmul_op # QNAN x INF
11934
short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN
11935
short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM
11936
short fmul_res_snan - tbl_fmul_op # QNAN x SNAN
11937
short tbl_fmul_op - tbl_fmul_op #
11938
short tbl_fmul_op - tbl_fmul_op #
11939
11940
short fmul_norm - tbl_fmul_op # NORM x NORM
11941
short fmul_zero - tbl_fmul_op # NORM x ZERO
11942
short fmul_inf_src - tbl_fmul_op # NORM x INF
11943
short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
11944
short fmul_norm - tbl_fmul_op # NORM x DENORM
11945
short fmul_res_snan - tbl_fmul_op # NORM x SNAN
11946
short tbl_fmul_op - tbl_fmul_op #
11947
short tbl_fmul_op - tbl_fmul_op #
11948
11949
short fmul_res_snan - tbl_fmul_op # SNAN x NORM
11950
short fmul_res_snan - tbl_fmul_op # SNAN x ZERO
11951
short fmul_res_snan - tbl_fmul_op # SNAN x INF
11952
short fmul_res_snan - tbl_fmul_op # SNAN x QNAN
11953
short fmul_res_snan - tbl_fmul_op # SNAN x DENORM
11954
short fmul_res_snan - tbl_fmul_op # SNAN x SNAN
11955
short tbl_fmul_op - tbl_fmul_op #
11956
short tbl_fmul_op - tbl_fmul_op #
11957
11958
fmul_res_operr:
11959
bra.l res_operr
11960
fmul_res_snan:
11961
bra.l res_snan
11962
fmul_res_qnan:
11963
bra.l res_qnan
11964
11965
#
11966
# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
11967
#
11968
global fmul_zero # global for fsglmul
11969
fmul_zero:
11970
mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11971
mov.b DST_EX(%a1),%d1
11972
eor.b %d0,%d1
11973
bpl.b fmul_zero_p # result ZERO is pos.
11974
fmul_zero_n:
11975
fmov.s &0x80000000,%fp0 # load -ZERO
11976
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
11977
rts
11978
fmul_zero_p:
11979
fmov.s &0x00000000,%fp0 # load +ZERO
11980
mov.b &z_bmask,FPSR_CC(%a6) # set Z
11981
rts
11982
11983
#
11984
# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
11985
#
11986
# Note: The j-bit for an infinity is a don't-care. However, to be
11987
# strictly compatible w/ the 68881/882, we make sure to return an
11988
# INF w/ the j-bit set if the input INF j-bit was set. Destination
11989
# INFs take priority.
11990
#
11991
global fmul_inf_dst # global for fsglmul
11992
fmul_inf_dst:
11993
fmovm.x DST(%a1),&0x80 # return INF result in fp0
11994
mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11995
mov.b DST_EX(%a1),%d1
11996
eor.b %d0,%d1
11997
bpl.b fmul_inf_dst_p # result INF is pos.
11998
fmul_inf_dst_n:
11999
fabs.x %fp0 # clear result sign
12000
fneg.x %fp0 # set result sign
12001
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12002
rts
12003
fmul_inf_dst_p:
12004
fabs.x %fp0 # clear result sign
12005
mov.b &inf_bmask,FPSR_CC(%a6) # set INF
12006
rts
12007
12008
global fmul_inf_src # global for fsglmul
12009
fmul_inf_src:
12010
fmovm.x SRC(%a0),&0x80 # return INF result in fp0
12011
mov.b SRC_EX(%a0),%d0 # exclusive or the signs
12012
mov.b DST_EX(%a1),%d1
12013
eor.b %d0,%d1
12014
bpl.b fmul_inf_dst_p # result INF is pos.
12015
bra.b fmul_inf_dst_n
12016
12017
#########################################################################
12018
# XDEF **************************************************************** #
12019
# fin(): emulates the fmove instruction #
12020
# fsin(): emulates the fsmove instruction #
12021
# fdin(): emulates the fdmove instruction #
12022
# #
12023
# XREF **************************************************************** #
12024
# norm() - normalize mantissa for EXOP on denorm #
12025
# scale_to_zero_src() - scale src exponent to zero #
12026
# ovf_res() - return default overflow result #
12027
# unf_res() - return default underflow result #
12028
# res_qnan_1op() - return QNAN result #
12029
# res_snan_1op() - return SNAN result #
12030
# #
12031
# INPUT *************************************************************** #
12032
# a0 = pointer to extended precision source operand #
12033
# d0 = round prec/mode #
12034
# #
12035
# OUTPUT ************************************************************** #
12036
# fp0 = result #
12037
# fp1 = EXOP (if exception occurred) #
12038
# #
12039
# ALGORITHM *********************************************************** #
12040
# Handle NANs, infinities, and zeroes as special cases. Divide #
12041
# norms into extended, single, and double precision. #
12042
# Norms can be emulated w/ a regular fmove instruction. For #
12043
# sgl/dbl, must scale exponent and perform an "fmove". Check to see #
12044
# if the result would have overflowed/underflowed. If so, use unf_res() #
12045
# or ovf_res() to return the default result. Also return EXOP if #
12046
# exception is enabled. If no exception, return the default result. #
12047
# Unnorms don't pass through here. #
12048
# #
12049
#########################################################################
12050
12051
global fsin
12052
fsin:
12053
andi.b &0x30,%d0 # clear rnd prec
12054
ori.b &s_mode*0x10,%d0 # insert sgl precision
12055
bra.b fin
12056
12057
global fdin
12058
fdin:
12059
andi.b &0x30,%d0 # clear rnd prec
12060
ori.b &d_mode*0x10,%d0 # insert dbl precision
12061
12062
global fin
12063
fin:
12064
mov.l %d0,L_SCR3(%a6) # store rnd info
12065
12066
mov.b STAG(%a6),%d1 # fetch src optype tag
12067
bne.w fin_not_norm # optimize on non-norm input
12068
12069
#
12070
# FP MOVE IN: NORMs and DENORMs ONLY!
12071
#
12072
fin_norm:
12073
andi.b &0xc0,%d0 # is precision extended?
12074
bne.w fin_not_ext # no, so go handle dbl or sgl
12075
12076
#
12077
# precision selected is extended. so...we cannot get an underflow
12078
# or overflow because of rounding to the correct precision. so...
12079
# skip the scaling and unscaling...
12080
#
12081
tst.b SRC_EX(%a0) # is the operand negative?
12082
bpl.b fin_norm_done # no
12083
bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
12084
fin_norm_done:
12085
fmovm.x SRC(%a0),&0x80 # return result in fp0
12086
rts
12087
12088
#
12089
# for an extended precision DENORM, the UNFL exception bit is set
12090
# the accrued bit is NOT set in this instance(no inexactness!)
12091
#
12092
fin_denorm:
12093
andi.b &0xc0,%d0 # is precision extended?
12094
bne.w fin_not_ext # no, so go handle dbl or sgl
12095
12096
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12097
tst.b SRC_EX(%a0) # is the operand negative?
12098
bpl.b fin_denorm_done # no
12099
bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
12100
fin_denorm_done:
12101
fmovm.x SRC(%a0),&0x80 # return result in fp0
12102
btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12103
bne.b fin_denorm_unfl_ena # yes
12104
rts
12105
12106
#
12107
# the input is an extended DENORM and underflow is enabled in the FPCR.
12108
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
12109
# exponent and insert back into the operand.
12110
#
12111
fin_denorm_unfl_ena:
12112
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12113
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12114
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12115
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
12116
bsr.l norm # normalize result
12117
neg.w %d0 # new exponent = -(shft val)
12118
addi.w &0x6000,%d0 # add new bias to exponent
12119
mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
12120
andi.w &0x8000,%d1 # keep old sign
12121
andi.w &0x7fff,%d0 # clear sign position
12122
or.w %d1,%d0 # concat new exo,old sign
12123
mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
12124
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12125
rts
12126
12127
#
12128
# operand is to be rounded to single or double precision
12129
#
12130
fin_not_ext:
12131
cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12132
bne.b fin_dbl
12133
12134
#
12135
# operand is to be rounded to single precision
12136
#
12137
fin_sgl:
12138
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12139
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12140
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12141
bsr.l scale_to_zero_src # calculate scale factor
12142
12143
cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
12144
bge.w fin_sd_unfl # yes; go handle underflow
12145
cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
12146
beq.w fin_sd_may_ovfl # maybe; go check
12147
blt.w fin_sd_ovfl # yes; go handle overflow
12148
12149
#
12150
# operand will NOT overflow or underflow when moved into the fp reg file
12151
#
12152
fin_sd_normal:
12153
fmov.l &0x0,%fpsr # clear FPSR
12154
fmov.l L_SCR3(%a6),%fpcr # set FPCR
12155
12156
fmov.x FP_SCR0(%a6),%fp0 # perform move
12157
12158
fmov.l %fpsr,%d1 # save FPSR
12159
fmov.l &0x0,%fpcr # clear FPCR
12160
12161
or.l %d1,USER_FPSR(%a6) # save INEX2,N
12162
12163
fin_sd_normal_exit:
12164
mov.l %d2,-(%sp) # save d2
12165
fmovm.x &0x80,FP_SCR0(%a6) # store out result
12166
mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
12167
mov.w %d1,%d2 # make a copy
12168
andi.l &0x7fff,%d1 # strip sign
12169
sub.l %d0,%d1 # add scale factor
12170
andi.w &0x8000,%d2 # keep old sign
12171
or.w %d1,%d2 # concat old sign,new exponent
12172
mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12173
mov.l (%sp)+,%d2 # restore d2
12174
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12175
rts
12176
12177
#
12178
# operand is to be rounded to double precision
12179
#
12180
fin_dbl:
12181
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12182
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12183
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12184
bsr.l scale_to_zero_src # calculate scale factor
12185
12186
cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
12187
bge.w fin_sd_unfl # yes; go handle underflow
12188
cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
12189
beq.w fin_sd_may_ovfl # maybe; go check
12190
blt.w fin_sd_ovfl # yes; go handle overflow
12191
bra.w fin_sd_normal # no; ho handle normalized op
12192
12193
#
12194
# operand WILL underflow when moved in to the fp register file
12195
#
12196
fin_sd_unfl:
12197
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12198
12199
tst.b FP_SCR0_EX(%a6) # is operand negative?
12200
bpl.b fin_sd_unfl_tst
12201
bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
12202
12203
# if underflow or inexact is enabled, then go calculate the EXOP first.
12204
fin_sd_unfl_tst:
12205
mov.b FPCR_ENABLE(%a6),%d1
12206
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12207
bne.b fin_sd_unfl_ena # yes
12208
12209
fin_sd_unfl_dis:
12210
lea FP_SCR0(%a6),%a0 # pass: result addr
12211
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12212
bsr.l unf_res # calculate default result
12213
or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
12214
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12215
rts
12216
12217
#
12218
# operand will underflow AND underflow or inexact is enabled.
12219
# Therefore, we must return the result rounded to extended precision.
12220
#
12221
fin_sd_unfl_ena:
12222
mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12223
mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12224
mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
12225
12226
mov.l %d2,-(%sp) # save d2
12227
mov.w %d1,%d2 # make a copy
12228
andi.l &0x7fff,%d1 # strip sign
12229
sub.l %d0,%d1 # subtract scale factor
12230
andi.w &0x8000,%d2 # extract old sign
12231
addi.l &0x6000,%d1 # add new bias
12232
andi.w &0x7fff,%d1
12233
or.w %d1,%d2 # concat old sign,new exp
12234
mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent
12235
fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
12236
mov.l (%sp)+,%d2 # restore d2
12237
bra.b fin_sd_unfl_dis
12238
12239
#
12240
# operand WILL overflow.
12241
#
12242
fin_sd_ovfl:
12243
fmov.l &0x0,%fpsr # clear FPSR
12244
fmov.l L_SCR3(%a6),%fpcr # set FPCR
12245
12246
fmov.x FP_SCR0(%a6),%fp0 # perform move
12247
12248
fmov.l &0x0,%fpcr # clear FPCR
12249
fmov.l %fpsr,%d1 # save FPSR
12250
12251
or.l %d1,USER_FPSR(%a6) # save INEX2,N
12252
12253
fin_sd_ovfl_tst:
12254
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12255
12256
mov.b FPCR_ENABLE(%a6),%d1
12257
andi.b &0x13,%d1 # is OVFL or INEX enabled?
12258
bne.b fin_sd_ovfl_ena # yes
12259
12260
#
12261
# OVFL is not enabled; therefore, we must create the default result by
12262
# calling ovf_res().
12263
#
12264
fin_sd_ovfl_dis:
12265
btst &neg_bit,FPSR_CC(%a6) # is result negative?
12266
sne %d1 # set sign param accordingly
12267
mov.l L_SCR3(%a6),%d0 # pass: prec,mode
12268
bsr.l ovf_res # calculate default result
12269
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
12270
fmovm.x (%a0),&0x80 # return default result in fp0
12271
rts
12272
12273
#
12274
# OVFL is enabled.
12275
# the INEX2 bit has already been updated by the round to the correct precision.
12276
# now, round to extended(and don't alter the FPSR).
12277
#
12278
fin_sd_ovfl_ena:
12279
mov.l %d2,-(%sp) # save d2
12280
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12281
mov.l %d1,%d2 # make a copy
12282
andi.l &0x7fff,%d1 # strip sign
12283
andi.w &0x8000,%d2 # keep old sign
12284
sub.l %d0,%d1 # add scale factor
12285
sub.l &0x6000,%d1 # subtract bias
12286
andi.w &0x7fff,%d1
12287
or.w %d2,%d1
12288
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12289
mov.l (%sp)+,%d2 # restore d2
12290
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12291
bra.b fin_sd_ovfl_dis
12292
12293
#
12294
# the move in MAY overflow. so...
12295
#
12296
fin_sd_may_ovfl:
12297
fmov.l &0x0,%fpsr # clear FPSR
12298
fmov.l L_SCR3(%a6),%fpcr # set FPCR
12299
12300
fmov.x FP_SCR0(%a6),%fp0 # perform the move
12301
12302
fmov.l %fpsr,%d1 # save status
12303
fmov.l &0x0,%fpcr # clear FPCR
12304
12305
or.l %d1,USER_FPSR(%a6) # save INEX2,N
12306
12307
fabs.x %fp0,%fp1 # make a copy of result
12308
fcmp.b %fp1,&0x2 # is |result| >= 2.b?
12309
fbge.w fin_sd_ovfl_tst # yes; overflow has occurred
12310
12311
# no, it didn't overflow; we have correct result
12312
bra.w fin_sd_normal_exit
12313
12314
##########################################################################
12315
12316
#
12317
# operand is not a NORM: check its optype and branch accordingly
12318
#
12319
fin_not_norm:
12320
cmpi.b %d1,&DENORM # weed out DENORM
12321
beq.w fin_denorm
12322
cmpi.b %d1,&SNAN # weed out SNANs
12323
beq.l res_snan_1op
12324
cmpi.b %d1,&QNAN # weed out QNANs
12325
beq.l res_qnan_1op
12326
12327
#
12328
# do the fmove in; at this point, only possible ops are ZERO and INF.
12329
# use fmov to determine ccodes.
12330
# prec:mode should be zero at this point but it won't affect answer anyways.
12331
#
12332
fmov.x SRC(%a0),%fp0 # do fmove in
12333
fmov.l %fpsr,%d0 # no exceptions possible
12334
rol.l &0x8,%d0 # put ccodes in lo byte
12335
mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
12336
rts
12337
12338
#########################################################################
12339
# XDEF **************************************************************** #
12340
# fdiv(): emulates the fdiv instruction #
12341
# fsdiv(): emulates the fsdiv instruction #
12342
# fddiv(): emulates the fddiv instruction #
12343
# #
12344
# XREF **************************************************************** #
12345
# scale_to_zero_src() - scale src exponent to zero #
12346
# scale_to_zero_dst() - scale dst exponent to zero #
12347
# unf_res() - return default underflow result #
12348
# ovf_res() - return default overflow result #
12349
# res_qnan() - return QNAN result #
12350
# res_snan() - return SNAN result #
12351
# #
12352
# INPUT *************************************************************** #
12353
# a0 = pointer to extended precision source operand #
12354
# a1 = pointer to extended precision destination operand #
12355
# d0 rnd prec,mode #
12356
# #
12357
# OUTPUT ************************************************************** #
12358
# fp0 = result #
12359
# fp1 = EXOP (if exception occurred) #
12360
# #
12361
# ALGORITHM *********************************************************** #
12362
# Handle NANs, infinities, and zeroes as special cases. Divide #
12363
# norms/denorms into ext/sgl/dbl precision. #
12364
# For norms/denorms, scale the exponents such that a divide #
12365
# instruction won't cause an exception. Use the regular fdiv to #
12366
# compute a result. Check if the regular operands would have taken #
12367
# an exception. If so, return the default overflow/underflow result #
12368
# and return the EXOP if exceptions are enabled. Else, scale the #
12369
# result operand to the proper exponent. #
12370
# #
12371
#########################################################################
12372
12373
align 0x10
12374
tbl_fdiv_unfl:
12375
long 0x3fff - 0x0000 # ext_unfl
12376
long 0x3fff - 0x3f81 # sgl_unfl
12377
long 0x3fff - 0x3c01 # dbl_unfl
12378
12379
tbl_fdiv_ovfl:
12380
long 0x3fff - 0x7ffe # ext overflow exponent
12381
long 0x3fff - 0x407e # sgl overflow exponent
12382
long 0x3fff - 0x43fe # dbl overflow exponent
12383
12384
global fsdiv
12385
fsdiv:
12386
andi.b &0x30,%d0 # clear rnd prec
12387
ori.b &s_mode*0x10,%d0 # insert sgl prec
12388
bra.b fdiv
12389
12390
global fddiv
12391
fddiv:
12392
andi.b &0x30,%d0 # clear rnd prec
12393
ori.b &d_mode*0x10,%d0 # insert dbl prec
12394
12395
global fdiv
12396
fdiv:
12397
mov.l %d0,L_SCR3(%a6) # store rnd info
12398
12399
clr.w %d1
12400
mov.b DTAG(%a6),%d1
12401
lsl.b &0x3,%d1
12402
or.b STAG(%a6),%d1 # combine src tags
12403
12404
bne.w fdiv_not_norm # optimize on non-norm input
12405
12406
#
12407
# DIVIDE: NORMs and DENORMs ONLY!
12408
#
12409
fdiv_norm:
12410
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
12411
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
12412
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
12413
12414
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12415
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12416
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12417
12418
bsr.l scale_to_zero_src # scale src exponent
12419
mov.l %d0,-(%sp) # save scale factor 1
12420
12421
bsr.l scale_to_zero_dst # scale dst exponent
12422
12423
neg.l (%sp) # SCALE FACTOR = scale1 - scale2
12424
add.l %d0,(%sp)
12425
12426
mov.w 2+L_SCR3(%a6),%d1 # fetch precision
12427
lsr.b &0x6,%d1 # shift to lo bits
12428
mov.l (%sp)+,%d0 # load S.F.
12429
cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
12430
ble.w fdiv_may_ovfl # result will overflow
12431
12432
cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
12433
beq.w fdiv_may_unfl # maybe
12434
bgt.w fdiv_unfl # yes; go handle underflow
12435
12436
fdiv_normal:
12437
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12438
12439
fmov.l L_SCR3(%a6),%fpcr # save FPCR
12440
fmov.l &0x0,%fpsr # clear FPSR
12441
12442
fdiv.x FP_SCR0(%a6),%fp0 # perform divide
12443
12444
fmov.l %fpsr,%d1 # save FPSR
12445
fmov.l &0x0,%fpcr # clear FPCR
12446
12447
or.l %d1,USER_FPSR(%a6) # save INEX2,N
12448
12449
fdiv_normal_exit:
12450
fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
12451
mov.l %d2,-(%sp) # store d2
12452
mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
12453
mov.l %d1,%d2 # make a copy
12454
andi.l &0x7fff,%d1 # strip sign
12455
andi.w &0x8000,%d2 # keep old sign
12456
sub.l %d0,%d1 # add scale factor
12457
or.w %d2,%d1 # concat old sign,new exp
12458
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12459
mov.l (%sp)+,%d2 # restore d2
12460
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12461
rts
12462
12463
tbl_fdiv_ovfl2:
12464
long 0x7fff
12465
long 0x407f
12466
long 0x43ff
12467
12468
fdiv_no_ovfl:
12469
mov.l (%sp)+,%d0 # restore scale factor
12470
bra.b fdiv_normal_exit
12471
12472
fdiv_may_ovfl:
12473
mov.l %d0,-(%sp) # save scale factor
12474
12475
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12476
12477
fmov.l L_SCR3(%a6),%fpcr # set FPCR
12478
fmov.l &0x0,%fpsr # set FPSR
12479
12480
fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12481
12482
fmov.l %fpsr,%d0
12483
fmov.l &0x0,%fpcr
12484
12485
or.l %d0,USER_FPSR(%a6) # save INEX,N
12486
12487
fmovm.x &0x01,-(%sp) # save result to stack
12488
mov.w (%sp),%d0 # fetch new exponent
12489
add.l &0xc,%sp # clear result from stack
12490
andi.l &0x7fff,%d0 # strip sign
12491
sub.l (%sp),%d0 # add scale factor
12492
cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
12493
blt.b fdiv_no_ovfl
12494
mov.l (%sp)+,%d0
12495
12496
fdiv_ovfl_tst:
12497
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12498
12499
mov.b FPCR_ENABLE(%a6),%d1
12500
andi.b &0x13,%d1 # is OVFL or INEX enabled?
12501
bne.b fdiv_ovfl_ena # yes
12502
12503
fdiv_ovfl_dis:
12504
btst &neg_bit,FPSR_CC(%a6) # is result negative?
12505
sne %d1 # set sign param accordingly
12506
mov.l L_SCR3(%a6),%d0 # pass prec:rnd
12507
bsr.l ovf_res # calculate default result
12508
or.b %d0,FPSR_CC(%a6) # set INF if applicable
12509
fmovm.x (%a0),&0x80 # return default result in fp0
12510
rts
12511
12512
fdiv_ovfl_ena:
12513
mov.l L_SCR3(%a6),%d1
12514
andi.b &0xc0,%d1 # is precision extended?
12515
bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl
12516
12517
fdiv_ovfl_ena_cont:
12518
fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
12519
12520
mov.l %d2,-(%sp) # save d2
12521
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12522
mov.w %d1,%d2 # make a copy
12523
andi.l &0x7fff,%d1 # strip sign
12524
sub.l %d0,%d1 # add scale factor
12525
subi.l &0x6000,%d1 # subtract bias
12526
andi.w &0x7fff,%d1 # clear sign bit
12527
andi.w &0x8000,%d2 # keep old sign
12528
or.w %d2,%d1 # concat old sign,new exp
12529
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12530
mov.l (%sp)+,%d2 # restore d2
12531
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12532
bra.b fdiv_ovfl_dis
12533
12534
fdiv_ovfl_ena_sd:
12535
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
12536
12537
mov.l L_SCR3(%a6),%d1
12538
andi.b &0x30,%d1 # keep rnd mode
12539
fmov.l %d1,%fpcr # set FPCR
12540
12541
fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12542
12543
fmov.l &0x0,%fpcr # clear FPCR
12544
bra.b fdiv_ovfl_ena_cont
12545
12546
fdiv_unfl:
12547
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12548
12549
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12550
12551
fmov.l &rz_mode*0x10,%fpcr # set FPCR
12552
fmov.l &0x0,%fpsr # clear FPSR
12553
12554
fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12555
12556
fmov.l %fpsr,%d1 # save status
12557
fmov.l &0x0,%fpcr # clear FPCR
12558
12559
or.l %d1,USER_FPSR(%a6) # save INEX2,N
12560
12561
mov.b FPCR_ENABLE(%a6),%d1
12562
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12563
bne.b fdiv_unfl_ena # yes
12564
12565
fdiv_unfl_dis:
12566
fmovm.x &0x80,FP_SCR0(%a6) # store out result
12567
12568
lea FP_SCR0(%a6),%a0 # pass: result addr
12569
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12570
bsr.l unf_res # calculate default result
12571
or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
12572
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12573
rts
12574
12575
#
12576
# UNFL is enabled.
12577
#
12578
fdiv_unfl_ena:
12579
fmovm.x FP_SCR1(%a6),&0x40 # load dst op
12580
12581
mov.l L_SCR3(%a6),%d1
12582
andi.b &0xc0,%d1 # is precision extended?
12583
bne.b fdiv_unfl_ena_sd # no, sgl or dbl
12584
12585
fmov.l L_SCR3(%a6),%fpcr # set FPCR
12586
12587
fdiv_unfl_ena_cont:
12588
fmov.l &0x0,%fpsr # clear FPSR
12589
12590
fdiv.x FP_SCR0(%a6),%fp1 # execute divide
12591
12592
fmov.l &0x0,%fpcr # clear FPCR
12593
12594
fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
12595
mov.l %d2,-(%sp) # save d2
12596
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12597
mov.l %d1,%d2 # make a copy
12598
andi.l &0x7fff,%d1 # strip sign
12599
andi.w &0x8000,%d2 # keep old sign
12600
sub.l %d0,%d1 # add scale factoer
12601
addi.l &0x6000,%d1 # add bias
12602
andi.w &0x7fff,%d1
12603
or.w %d2,%d1 # concat old sign,new exp
12604
mov.w %d1,FP_SCR0_EX(%a6) # insert new exp
12605
mov.l (%sp)+,%d2 # restore d2
12606
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12607
bra.w fdiv_unfl_dis
12608
12609
fdiv_unfl_ena_sd:
12610
mov.l L_SCR3(%a6),%d1
12611
andi.b &0x30,%d1 # use only rnd mode
12612
fmov.l %d1,%fpcr # set FPCR
12613
12614
bra.b fdiv_unfl_ena_cont
12615
12616
#
12617
# the divide operation MAY underflow:
12618
#
12619
fdiv_may_unfl:
12620
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12621
12622
fmov.l L_SCR3(%a6),%fpcr # set FPCR
12623
fmov.l &0x0,%fpsr # clear FPSR
12624
12625
fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12626
12627
fmov.l %fpsr,%d1 # save status
12628
fmov.l &0x0,%fpcr # clear FPCR
12629
12630
or.l %d1,USER_FPSR(%a6) # save INEX2,N
12631
12632
fabs.x %fp0,%fp1 # make a copy of result
12633
fcmp.b %fp1,&0x1 # is |result| > 1.b?
12634
fbgt.w fdiv_normal_exit # no; no underflow occurred
12635
fblt.w fdiv_unfl # yes; underflow occurred
12636
12637
#
12638
# we still don't know if underflow occurred. result is ~ equal to 1. but,
12639
# we don't know if the result was an underflow that rounded up to a 1
12640
# or a normalized number that rounded down to a 1. so, redo the entire
12641
# operation using RZ as the rounding mode to see what the pre-rounded
12642
# result is. this case should be relatively rare.
12643
#
12644
fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
12645
12646
mov.l L_SCR3(%a6),%d1
12647
andi.b &0xc0,%d1 # keep rnd prec
12648
ori.b &rz_mode*0x10,%d1 # insert RZ
12649
12650
fmov.l %d1,%fpcr # set FPCR
12651
fmov.l &0x0,%fpsr # clear FPSR
12652
12653
fdiv.x FP_SCR0(%a6),%fp1 # execute divide
12654
12655
fmov.l &0x0,%fpcr # clear FPCR
12656
fabs.x %fp1 # make absolute value
12657
fcmp.b %fp1,&0x1 # is |result| < 1.b?
12658
fbge.w fdiv_normal_exit # no; no underflow occurred
12659
bra.w fdiv_unfl # yes; underflow occurred
12660
12661
############################################################################
12662
12663
#
12664
# Divide: inputs are not both normalized; what are they?
12665
#
12666
fdiv_not_norm:
12667
mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
12668
jmp (tbl_fdiv_op.b,%pc,%d1.w*1)
12669
12670
swbeg &48
12671
tbl_fdiv_op:
12672
short fdiv_norm - tbl_fdiv_op # NORM / NORM
12673
short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO
12674
short fdiv_zero_load - tbl_fdiv_op # NORM / INF
12675
short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN
12676
short fdiv_norm - tbl_fdiv_op # NORM / DENORM
12677
short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN
12678
short tbl_fdiv_op - tbl_fdiv_op #
12679
short tbl_fdiv_op - tbl_fdiv_op #
12680
12681
short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM
12682
short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO
12683
short fdiv_zero_load - tbl_fdiv_op # ZERO / INF
12684
short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN
12685
short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM
12686
short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN
12687
short tbl_fdiv_op - tbl_fdiv_op #
12688
short tbl_fdiv_op - tbl_fdiv_op #
12689
12690
short fdiv_inf_dst - tbl_fdiv_op # INF / NORM
12691
short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO
12692
short fdiv_res_operr - tbl_fdiv_op # INF / INF
12693
short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN
12694
short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM
12695
short fdiv_res_snan - tbl_fdiv_op # INF / SNAN
12696
short tbl_fdiv_op - tbl_fdiv_op #
12697
short tbl_fdiv_op - tbl_fdiv_op #
12698
12699
short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM
12700
short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO
12701
short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF
12702
short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN
12703
short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM
12704
short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN
12705
short tbl_fdiv_op - tbl_fdiv_op #
12706
short tbl_fdiv_op - tbl_fdiv_op #
12707
12708
short fdiv_norm - tbl_fdiv_op # DENORM / NORM
12709
short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO
12710
short fdiv_zero_load - tbl_fdiv_op # DENORM / INF
12711
short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN
12712
short fdiv_norm - tbl_fdiv_op # DENORM / DENORM
12713
short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN
12714
short tbl_fdiv_op - tbl_fdiv_op #
12715
short tbl_fdiv_op - tbl_fdiv_op #
12716
12717
short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM
12718
short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO
12719
short fdiv_res_snan - tbl_fdiv_op # SNAN / INF
12720
short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN
12721
short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM
12722
short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN
12723
short tbl_fdiv_op - tbl_fdiv_op #
12724
short tbl_fdiv_op - tbl_fdiv_op #
12725
12726
fdiv_res_qnan:
12727
bra.l res_qnan
12728
fdiv_res_snan:
12729
bra.l res_snan
12730
fdiv_res_operr:
12731
bra.l res_operr
12732
12733
global fdiv_zero_load # global for fsgldiv
12734
fdiv_zero_load:
12735
mov.b SRC_EX(%a0),%d0 # result sign is exclusive
12736
mov.b DST_EX(%a1),%d1 # or of input signs.
12737
eor.b %d0,%d1
12738
bpl.b fdiv_zero_load_p # result is positive
12739
fmov.s &0x80000000,%fp0 # load a -ZERO
12740
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
12741
rts
12742
fdiv_zero_load_p:
12743
fmov.s &0x00000000,%fp0 # load a +ZERO
12744
mov.b &z_bmask,FPSR_CC(%a6) # set Z
12745
rts
12746
12747
#
12748
# The destination was In Range and the source was a ZERO. The result,
12749
# Therefore, is an INF w/ the proper sign.
12750
# So, determine the sign and return a new INF (w/ the j-bit cleared).
12751
#
12752
global fdiv_inf_load # global for fsgldiv
12753
fdiv_inf_load:
12754
ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
12755
mov.b SRC_EX(%a0),%d0 # load both signs
12756
mov.b DST_EX(%a1),%d1
12757
eor.b %d0,%d1
12758
bpl.b fdiv_inf_load_p # result is positive
12759
fmov.s &0xff800000,%fp0 # make result -INF
12760
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12761
rts
12762
fdiv_inf_load_p:
12763
fmov.s &0x7f800000,%fp0 # make result +INF
12764
mov.b &inf_bmask,FPSR_CC(%a6) # set INF
12765
rts
12766
12767
#
12768
# The destination was an INF w/ an In Range or ZERO source, the result is
12769
# an INF w/ the proper sign.
12770
# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
12771
# dst INF is set, then then j-bit of the result INF is also set).
12772
#
12773
global fdiv_inf_dst # global for fsgldiv
12774
fdiv_inf_dst:
12775
mov.b DST_EX(%a1),%d0 # load both signs
12776
mov.b SRC_EX(%a0),%d1
12777
eor.b %d0,%d1
12778
bpl.b fdiv_inf_dst_p # result is positive
12779
12780
fmovm.x DST(%a1),&0x80 # return result in fp0
12781
fabs.x %fp0 # clear sign bit
12782
fneg.x %fp0 # set sign bit
12783
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
12784
rts
12785
12786
fdiv_inf_dst_p:
12787
fmovm.x DST(%a1),&0x80 # return result in fp0
12788
fabs.x %fp0 # return positive INF
12789
mov.b &inf_bmask,FPSR_CC(%a6) # set INF
12790
rts
12791
12792
#########################################################################
12793
# XDEF **************************************************************** #
12794
# fneg(): emulates the fneg instruction #
12795
# fsneg(): emulates the fsneg instruction #
12796
# fdneg(): emulates the fdneg instruction #
12797
# #
12798
# XREF **************************************************************** #
12799
# norm() - normalize a denorm to provide EXOP #
12800
# scale_to_zero_src() - scale sgl/dbl source exponent #
12801
# ovf_res() - return default overflow result #
12802
# unf_res() - return default underflow result #
12803
# res_qnan_1op() - return QNAN result #
12804
# res_snan_1op() - return SNAN result #
12805
# #
12806
# INPUT *************************************************************** #
12807
# a0 = pointer to extended precision source operand #
12808
# d0 = rnd prec,mode #
12809
# #
12810
# OUTPUT ************************************************************** #
12811
# fp0 = result #
12812
# fp1 = EXOP (if exception occurred) #
12813
# #
12814
# ALGORITHM *********************************************************** #
12815
# Handle NANs, zeroes, and infinities as special cases. Separate #
12816
# norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
12817
# emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
12818
# and an actual fneg performed to see if overflow/underflow would have #
12819
# occurred. If so, return default underflow/overflow result. Else, #
12820
# scale the result exponent and return result. FPSR gets set based on #
12821
# the result value. #
12822
# #
12823
#########################################################################
12824
12825
global fsneg
12826
fsneg:
12827
andi.b &0x30,%d0 # clear rnd prec
12828
ori.b &s_mode*0x10,%d0 # insert sgl precision
12829
bra.b fneg
12830
12831
global fdneg
12832
fdneg:
12833
andi.b &0x30,%d0 # clear rnd prec
12834
ori.b &d_mode*0x10,%d0 # insert dbl prec
12835
12836
global fneg
12837
fneg:
12838
mov.l %d0,L_SCR3(%a6) # store rnd info
12839
mov.b STAG(%a6),%d1
12840
bne.w fneg_not_norm # optimize on non-norm input
12841
12842
#
12843
# NEGATE SIGN : norms and denorms ONLY!
12844
#
12845
fneg_norm:
12846
andi.b &0xc0,%d0 # is precision extended?
12847
bne.w fneg_not_ext # no; go handle sgl or dbl
12848
12849
#
12850
# precision selected is extended. so...we can not get an underflow
12851
# or overflow because of rounding to the correct precision. so...
12852
# skip the scaling and unscaling...
12853
#
12854
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12855
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12856
mov.w SRC_EX(%a0),%d0
12857
eori.w &0x8000,%d0 # negate sign
12858
bpl.b fneg_norm_load # sign is positive
12859
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
12860
fneg_norm_load:
12861
mov.w %d0,FP_SCR0_EX(%a6)
12862
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12863
rts
12864
12865
#
12866
# for an extended precision DENORM, the UNFL exception bit is set
12867
# the accrued bit is NOT set in this instance(no inexactness!)
12868
#
12869
fneg_denorm:
12870
andi.b &0xc0,%d0 # is precision extended?
12871
bne.b fneg_not_ext # no; go handle sgl or dbl
12872
12873
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12874
12875
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12876
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12877
mov.w SRC_EX(%a0),%d0
12878
eori.w &0x8000,%d0 # negate sign
12879
bpl.b fneg_denorm_done # no
12880
mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
12881
fneg_denorm_done:
12882
mov.w %d0,FP_SCR0_EX(%a6)
12883
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12884
12885
btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12886
bne.b fneg_ext_unfl_ena # yes
12887
rts
12888
12889
#
12890
# the input is an extended DENORM and underflow is enabled in the FPCR.
12891
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
12892
# exponent and insert back into the operand.
12893
#
12894
fneg_ext_unfl_ena:
12895
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
12896
bsr.l norm # normalize result
12897
neg.w %d0 # new exponent = -(shft val)
12898
addi.w &0x6000,%d0 # add new bias to exponent
12899
mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
12900
andi.w &0x8000,%d1 # keep old sign
12901
andi.w &0x7fff,%d0 # clear sign position
12902
or.w %d1,%d0 # concat old sign, new exponent
12903
mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
12904
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12905
rts
12906
12907
#
12908
# operand is either single or double
12909
#
12910
fneg_not_ext:
12911
cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12912
bne.b fneg_dbl
12913
12914
#
12915
# operand is to be rounded to single precision
12916
#
12917
fneg_sgl:
12918
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12919
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12920
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12921
bsr.l scale_to_zero_src # calculate scale factor
12922
12923
cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
12924
bge.w fneg_sd_unfl # yes; go handle underflow
12925
cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
12926
beq.w fneg_sd_may_ovfl # maybe; go check
12927
blt.w fneg_sd_ovfl # yes; go handle overflow
12928
12929
#
12930
# operand will NOT overflow or underflow when moved in to the fp reg file
12931
#
12932
fneg_sd_normal:
12933
fmov.l &0x0,%fpsr # clear FPSR
12934
fmov.l L_SCR3(%a6),%fpcr # set FPCR
12935
12936
fneg.x FP_SCR0(%a6),%fp0 # perform negation
12937
12938
fmov.l %fpsr,%d1 # save FPSR
12939
fmov.l &0x0,%fpcr # clear FPCR
12940
12941
or.l %d1,USER_FPSR(%a6) # save INEX2,N
12942
12943
fneg_sd_normal_exit:
12944
mov.l %d2,-(%sp) # save d2
12945
fmovm.x &0x80,FP_SCR0(%a6) # store out result
12946
mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
12947
mov.w %d1,%d2 # make a copy
12948
andi.l &0x7fff,%d1 # strip sign
12949
sub.l %d0,%d1 # add scale factor
12950
andi.w &0x8000,%d2 # keep old sign
12951
or.w %d1,%d2 # concat old sign,new exp
12952
mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12953
mov.l (%sp)+,%d2 # restore d2
12954
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12955
rts
12956
12957
#
12958
# operand is to be rounded to double precision
12959
#
12960
fneg_dbl:
12961
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12962
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12963
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12964
bsr.l scale_to_zero_src # calculate scale factor
12965
12966
cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
12967
bge.b fneg_sd_unfl # yes; go handle underflow
12968
cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
12969
beq.w fneg_sd_may_ovfl # maybe; go check
12970
blt.w fneg_sd_ovfl # yes; go handle overflow
12971
bra.w fneg_sd_normal # no; ho handle normalized op
12972
12973
#
12974
# operand WILL underflow when moved in to the fp register file
12975
#
12976
fneg_sd_unfl:
12977
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12978
12979
eori.b &0x80,FP_SCR0_EX(%a6) # negate sign
12980
bpl.b fneg_sd_unfl_tst
12981
bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
12982
12983
# if underflow or inexact is enabled, go calculate EXOP first.
12984
fneg_sd_unfl_tst:
12985
mov.b FPCR_ENABLE(%a6),%d1
12986
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12987
bne.b fneg_sd_unfl_ena # yes
12988
12989
fneg_sd_unfl_dis:
12990
lea FP_SCR0(%a6),%a0 # pass: result addr
12991
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12992
bsr.l unf_res # calculate default result
12993
or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
12994
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12995
rts
12996
12997
#
12998
# operand will underflow AND underflow is enabled.
12999
# Therefore, we must return the result rounded to extended precision.
13000
#
13001
fneg_sd_unfl_ena:
13002
mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13003
mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13004
mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
13005
13006
mov.l %d2,-(%sp) # save d2
13007
mov.l %d1,%d2 # make a copy
13008
andi.l &0x7fff,%d1 # strip sign
13009
andi.w &0x8000,%d2 # keep old sign
13010
sub.l %d0,%d1 # subtract scale factor
13011
addi.l &0x6000,%d1 # add new bias
13012
andi.w &0x7fff,%d1
13013
or.w %d2,%d1 # concat new sign,new exp
13014
mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
13015
fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
13016
mov.l (%sp)+,%d2 # restore d2
13017
bra.b fneg_sd_unfl_dis
13018
13019
#
13020
# operand WILL overflow.
13021
#
13022
fneg_sd_ovfl:
13023
fmov.l &0x0,%fpsr # clear FPSR
13024
fmov.l L_SCR3(%a6),%fpcr # set FPCR
13025
13026
fneg.x FP_SCR0(%a6),%fp0 # perform negation
13027
13028
fmov.l &0x0,%fpcr # clear FPCR
13029
fmov.l %fpsr,%d1 # save FPSR
13030
13031
or.l %d1,USER_FPSR(%a6) # save INEX2,N
13032
13033
fneg_sd_ovfl_tst:
13034
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13035
13036
mov.b FPCR_ENABLE(%a6),%d1
13037
andi.b &0x13,%d1 # is OVFL or INEX enabled?
13038
bne.b fneg_sd_ovfl_ena # yes
13039
13040
#
13041
# OVFL is not enabled; therefore, we must create the default result by
13042
# calling ovf_res().
13043
#
13044
fneg_sd_ovfl_dis:
13045
btst &neg_bit,FPSR_CC(%a6) # is result negative?
13046
sne %d1 # set sign param accordingly
13047
mov.l L_SCR3(%a6),%d0 # pass: prec,mode
13048
bsr.l ovf_res # calculate default result
13049
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
13050
fmovm.x (%a0),&0x80 # return default result in fp0
13051
rts
13052
13053
#
13054
# OVFL is enabled.
13055
# the INEX2 bit has already been updated by the round to the correct precision.
13056
# now, round to extended(and don't alter the FPSR).
13057
#
13058
fneg_sd_ovfl_ena:
13059
mov.l %d2,-(%sp) # save d2
13060
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
13061
mov.l %d1,%d2 # make a copy
13062
andi.l &0x7fff,%d1 # strip sign
13063
andi.w &0x8000,%d2 # keep old sign
13064
sub.l %d0,%d1 # add scale factor
13065
subi.l &0x6000,%d1 # subtract bias
13066
andi.w &0x7fff,%d1
13067
or.w %d2,%d1 # concat sign,exp
13068
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
13069
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
13070
mov.l (%sp)+,%d2 # restore d2
13071
bra.b fneg_sd_ovfl_dis
13072
13073
#
13074
# the move in MAY underflow. so...
13075
#
13076
fneg_sd_may_ovfl:
13077
fmov.l &0x0,%fpsr # clear FPSR
13078
fmov.l L_SCR3(%a6),%fpcr # set FPCR
13079
13080
fneg.x FP_SCR0(%a6),%fp0 # perform negation
13081
13082
fmov.l %fpsr,%d1 # save status
13083
fmov.l &0x0,%fpcr # clear FPCR
13084
13085
or.l %d1,USER_FPSR(%a6) # save INEX2,N
13086
13087
fabs.x %fp0,%fp1 # make a copy of result
13088
fcmp.b %fp1,&0x2 # is |result| >= 2.b?
13089
fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred
13090
13091
# no, it didn't overflow; we have correct result
13092
bra.w fneg_sd_normal_exit
13093
13094
##########################################################################
13095
13096
#
13097
# input is not normalized; what is it?
13098
#
13099
fneg_not_norm:
13100
cmpi.b %d1,&DENORM # weed out DENORM
13101
beq.w fneg_denorm
13102
cmpi.b %d1,&SNAN # weed out SNAN
13103
beq.l res_snan_1op
13104
cmpi.b %d1,&QNAN # weed out QNAN
13105
beq.l res_qnan_1op
13106
13107
#
13108
# do the fneg; at this point, only possible ops are ZERO and INF.
13109
# use fneg to determine ccodes.
13110
# prec:mode should be zero at this point but it won't affect answer anyways.
13111
#
13112
fneg.x SRC_EX(%a0),%fp0 # do fneg
13113
fmov.l %fpsr,%d0
13114
rol.l &0x8,%d0 # put ccodes in lo byte
13115
mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
13116
rts
13117
13118
#########################################################################
13119
# XDEF **************************************************************** #
13120
# ftst(): emulates the ftest instruction #
13121
# #
13122
# XREF **************************************************************** #
13123
# res{s,q}nan_1op() - set NAN result for monadic instruction #
13124
# #
13125
# INPUT *************************************************************** #
13126
# a0 = pointer to extended precision source operand #
13127
# #
13128
# OUTPUT ************************************************************** #
13129
# none #
13130
# #
13131
# ALGORITHM *********************************************************** #
13132
# Check the source operand tag (STAG) and set the FPCR according #
13133
# to the operand type and sign. #
13134
# #
13135
#########################################################################
13136
13137
global ftst
13138
ftst:
13139
mov.b STAG(%a6),%d1
13140
bne.b ftst_not_norm # optimize on non-norm input
13141
13142
#
13143
# Norm:
13144
#
13145
ftst_norm:
13146
tst.b SRC_EX(%a0) # is operand negative?
13147
bmi.b ftst_norm_m # yes
13148
rts
13149
ftst_norm_m:
13150
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13151
rts
13152
13153
#
13154
# input is not normalized; what is it?
13155
#
13156
ftst_not_norm:
13157
cmpi.b %d1,&ZERO # weed out ZERO
13158
beq.b ftst_zero
13159
cmpi.b %d1,&INF # weed out INF
13160
beq.b ftst_inf
13161
cmpi.b %d1,&SNAN # weed out SNAN
13162
beq.l res_snan_1op
13163
cmpi.b %d1,&QNAN # weed out QNAN
13164
beq.l res_qnan_1op
13165
13166
#
13167
# Denorm:
13168
#
13169
ftst_denorm:
13170
tst.b SRC_EX(%a0) # is operand negative?
13171
bmi.b ftst_denorm_m # yes
13172
rts
13173
ftst_denorm_m:
13174
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13175
rts
13176
13177
#
13178
# Infinity:
13179
#
13180
ftst_inf:
13181
tst.b SRC_EX(%a0) # is operand negative?
13182
bmi.b ftst_inf_m # yes
13183
ftst_inf_p:
13184
mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13185
rts
13186
ftst_inf_m:
13187
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
13188
rts
13189
13190
#
13191
# Zero:
13192
#
13193
ftst_zero:
13194
tst.b SRC_EX(%a0) # is operand negative?
13195
bmi.b ftst_zero_m # yes
13196
ftst_zero_p:
13197
mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13198
rts
13199
ftst_zero_m:
13200
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13201
rts
13202
13203
#########################################################################
13204
# XDEF **************************************************************** #
13205
# fint(): emulates the fint instruction #
13206
# #
13207
# XREF **************************************************************** #
13208
# res_{s,q}nan_1op() - set NAN result for monadic operation #
13209
# #
13210
# INPUT *************************************************************** #
13211
# a0 = pointer to extended precision source operand #
13212
# d0 = round precision/mode #
13213
# #
13214
# OUTPUT ************************************************************** #
13215
# fp0 = result #
13216
# #
13217
# ALGORITHM *********************************************************** #
13218
# Separate according to operand type. Unnorms don't pass through #
13219
# here. For norms, load the rounding mode/prec, execute a "fint", then #
13220
# store the resulting FPSR bits. #
13221
# For denorms, force the j-bit to a one and do the same as for #
13222
# norms. Denorms are so low that the answer will either be a zero or a #
13223
# one. #
13224
# For zeroes/infs/NANs, return the same while setting the FPSR #
13225
# as appropriate. #
13226
# #
13227
#########################################################################
13228
13229
global fint
13230
fint:
13231
mov.b STAG(%a6),%d1
13232
bne.b fint_not_norm # optimize on non-norm input
13233
13234
#
13235
# Norm:
13236
#
13237
fint_norm:
13238
andi.b &0x30,%d0 # set prec = ext
13239
13240
fmov.l %d0,%fpcr # set FPCR
13241
fmov.l &0x0,%fpsr # clear FPSR
13242
13243
fint.x SRC(%a0),%fp0 # execute fint
13244
13245
fmov.l &0x0,%fpcr # clear FPCR
13246
fmov.l %fpsr,%d0 # save FPSR
13247
or.l %d0,USER_FPSR(%a6) # set exception bits
13248
13249
rts
13250
13251
#
13252
# input is not normalized; what is it?
13253
#
13254
fint_not_norm:
13255
cmpi.b %d1,&ZERO # weed out ZERO
13256
beq.b fint_zero
13257
cmpi.b %d1,&INF # weed out INF
13258
beq.b fint_inf
13259
cmpi.b %d1,&DENORM # weed out DENORM
13260
beq.b fint_denorm
13261
cmpi.b %d1,&SNAN # weed out SNAN
13262
beq.l res_snan_1op
13263
bra.l res_qnan_1op # weed out QNAN
13264
13265
#
13266
# Denorm:
13267
#
13268
# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
13269
# also, the INEX2 and AINEX exception bits will be set.
13270
# so, we could either set these manually or force the DENORM
13271
# to a very small NORM and ship it to the NORM routine.
13272
# I do the latter.
13273
#
13274
fint_denorm:
13275
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13276
mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
13277
lea FP_SCR0(%a6),%a0
13278
bra.b fint_norm
13279
13280
#
13281
# Zero:
13282
#
13283
fint_zero:
13284
tst.b SRC_EX(%a0) # is ZERO negative?
13285
bmi.b fint_zero_m # yes
13286
fint_zero_p:
13287
fmov.s &0x00000000,%fp0 # return +ZERO in fp0
13288
mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13289
rts
13290
fint_zero_m:
13291
fmov.s &0x80000000,%fp0 # return -ZERO in fp0
13292
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13293
rts
13294
13295
#
13296
# Infinity:
13297
#
13298
fint_inf:
13299
fmovm.x SRC(%a0),&0x80 # return result in fp0
13300
tst.b SRC_EX(%a0) # is INF negative?
13301
bmi.b fint_inf_m # yes
13302
fint_inf_p:
13303
mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13304
rts
13305
fint_inf_m:
13306
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13307
rts
13308
13309
#########################################################################
13310
# XDEF **************************************************************** #
13311
# fintrz(): emulates the fintrz instruction #
13312
# #
13313
# XREF **************************************************************** #
13314
# res_{s,q}nan_1op() - set NAN result for monadic operation #
13315
# #
13316
# INPUT *************************************************************** #
13317
# a0 = pointer to extended precision source operand #
13318
# d0 = round precision/mode #
13319
# #
13320
# OUTPUT ************************************************************** #
13321
# fp0 = result #
13322
# #
13323
# ALGORITHM *********************************************************** #
13324
# Separate according to operand type. Unnorms don't pass through #
13325
# here. For norms, load the rounding mode/prec, execute a "fintrz", #
13326
# then store the resulting FPSR bits. #
13327
# For denorms, force the j-bit to a one and do the same as for #
13328
# norms. Denorms are so low that the answer will either be a zero or a #
13329
# one. #
13330
# For zeroes/infs/NANs, return the same while setting the FPSR #
13331
# as appropriate. #
13332
# #
13333
#########################################################################
13334
13335
global fintrz
13336
fintrz:
13337
mov.b STAG(%a6),%d1
13338
bne.b fintrz_not_norm # optimize on non-norm input
13339
13340
#
13341
# Norm:
13342
#
13343
fintrz_norm:
13344
fmov.l &0x0,%fpsr # clear FPSR
13345
13346
fintrz.x SRC(%a0),%fp0 # execute fintrz
13347
13348
fmov.l %fpsr,%d0 # save FPSR
13349
or.l %d0,USER_FPSR(%a6) # set exception bits
13350
13351
rts
13352
13353
#
13354
# input is not normalized; what is it?
13355
#
13356
fintrz_not_norm:
13357
cmpi.b %d1,&ZERO # weed out ZERO
13358
beq.b fintrz_zero
13359
cmpi.b %d1,&INF # weed out INF
13360
beq.b fintrz_inf
13361
cmpi.b %d1,&DENORM # weed out DENORM
13362
beq.b fintrz_denorm
13363
cmpi.b %d1,&SNAN # weed out SNAN
13364
beq.l res_snan_1op
13365
bra.l res_qnan_1op # weed out QNAN
13366
13367
#
13368
# Denorm:
13369
#
13370
# for DENORMs, the result will be (+/-)ZERO.
13371
# also, the INEX2 and AINEX exception bits will be set.
13372
# so, we could either set these manually or force the DENORM
13373
# to a very small NORM and ship it to the NORM routine.
13374
# I do the latter.
13375
#
13376
fintrz_denorm:
13377
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13378
mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
13379
lea FP_SCR0(%a6),%a0
13380
bra.b fintrz_norm
13381
13382
#
13383
# Zero:
13384
#
13385
fintrz_zero:
13386
tst.b SRC_EX(%a0) # is ZERO negative?
13387
bmi.b fintrz_zero_m # yes
13388
fintrz_zero_p:
13389
fmov.s &0x00000000,%fp0 # return +ZERO in fp0
13390
mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13391
rts
13392
fintrz_zero_m:
13393
fmov.s &0x80000000,%fp0 # return -ZERO in fp0
13394
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13395
rts
13396
13397
#
13398
# Infinity:
13399
#
13400
fintrz_inf:
13401
fmovm.x SRC(%a0),&0x80 # return result in fp0
13402
tst.b SRC_EX(%a0) # is INF negative?
13403
bmi.b fintrz_inf_m # yes
13404
fintrz_inf_p:
13405
mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13406
rts
13407
fintrz_inf_m:
13408
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13409
rts
13410
13411
#########################################################################
13412
# XDEF **************************************************************** #
13413
# fabs(): emulates the fabs instruction #
13414
# fsabs(): emulates the fsabs instruction #
13415
# fdabs(): emulates the fdabs instruction #
13416
# #
13417
# XREF **************************************************************** #
13418
# norm() - normalize denorm mantissa to provide EXOP #
13419
# scale_to_zero_src() - make exponent. = 0; get scale factor #
13420
# unf_res() - calculate underflow result #
13421
# ovf_res() - calculate overflow result #
13422
# res_{s,q}nan_1op() - set NAN result for monadic operation #
13423
# #
13424
# INPUT *************************************************************** #
13425
# a0 = pointer to extended precision source operand #
13426
# d0 = rnd precision/mode #
13427
# #
13428
# OUTPUT ************************************************************** #
13429
# fp0 = result #
13430
# fp1 = EXOP (if exception occurred) #
13431
# #
13432
# ALGORITHM *********************************************************** #
13433
# Handle NANs, infinities, and zeroes as special cases. Divide #
13434
# norms into extended, single, and double precision. #
13435
# Simply clear sign for extended precision norm. Ext prec denorm #
13436
# gets an EXOP created for it since it's an underflow. #
13437
# Double and single precision can overflow and underflow. First, #
13438
# scale the operand such that the exponent is zero. Perform an "fabs" #
13439
# using the correct rnd mode/prec. Check to see if the original #
13440
# exponent would take an exception. If so, use unf_res() or ovf_res() #
13441
# to calculate the default result. Also, create the EXOP for the #
13442
# exceptional case. If no exception should occur, insert the correct #
13443
# result exponent and return. #
13444
# Unnorms don't pass through here. #
13445
# #
13446
#########################################################################
13447
13448
global fsabs
13449
fsabs:
13450
andi.b &0x30,%d0 # clear rnd prec
13451
ori.b &s_mode*0x10,%d0 # insert sgl precision
13452
bra.b fabs
13453
13454
global fdabs
13455
fdabs:
13456
andi.b &0x30,%d0 # clear rnd prec
13457
ori.b &d_mode*0x10,%d0 # insert dbl precision
13458
13459
global fabs
13460
fabs:
13461
mov.l %d0,L_SCR3(%a6) # store rnd info
13462
mov.b STAG(%a6),%d1
13463
bne.w fabs_not_norm # optimize on non-norm input
13464
13465
#
13466
# ABSOLUTE VALUE: norms and denorms ONLY!
13467
#
13468
fabs_norm:
13469
andi.b &0xc0,%d0 # is precision extended?
13470
bne.b fabs_not_ext # no; go handle sgl or dbl
13471
13472
#
13473
# precision selected is extended. so...we can not get an underflow
13474
# or overflow because of rounding to the correct precision. so...
13475
# skip the scaling and unscaling...
13476
#
13477
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13478
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13479
mov.w SRC_EX(%a0),%d1
13480
bclr &15,%d1 # force absolute value
13481
mov.w %d1,FP_SCR0_EX(%a6) # insert exponent
13482
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
13483
rts
13484
13485
#
13486
# for an extended precision DENORM, the UNFL exception bit is set
13487
# the accrued bit is NOT set in this instance(no inexactness!)
13488
#
13489
fabs_denorm:
13490
andi.b &0xc0,%d0 # is precision extended?
13491
bne.b fabs_not_ext # no
13492
13493
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13494
13495
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13496
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13497
mov.w SRC_EX(%a0),%d0
13498
bclr &15,%d0 # clear sign
13499
mov.w %d0,FP_SCR0_EX(%a6) # insert exponent
13500
13501
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
13502
13503
btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
13504
bne.b fabs_ext_unfl_ena
13505
rts
13506
13507
#
13508
# the input is an extended DENORM and underflow is enabled in the FPCR.
13509
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
13510
# exponent and insert back into the operand.
13511
#
13512
fabs_ext_unfl_ena:
13513
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
13514
bsr.l norm # normalize result
13515
neg.w %d0 # new exponent = -(shft val)
13516
addi.w &0x6000,%d0 # add new bias to exponent
13517
mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
13518
andi.w &0x8000,%d1 # keep old sign
13519
andi.w &0x7fff,%d0 # clear sign position
13520
or.w %d1,%d0 # concat old sign, new exponent
13521
mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
13522
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
13523
rts
13524
13525
#
13526
# operand is either single or double
13527
#
13528
fabs_not_ext:
13529
cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
13530
bne.b fabs_dbl
13531
13532
#
13533
# operand is to be rounded to single precision
13534
#
13535
fabs_sgl:
13536
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13537
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13538
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13539
bsr.l scale_to_zero_src # calculate scale factor
13540
13541
cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
13542
bge.w fabs_sd_unfl # yes; go handle underflow
13543
cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
13544
beq.w fabs_sd_may_ovfl # maybe; go check
13545
blt.w fabs_sd_ovfl # yes; go handle overflow
13546
13547
#
13548
# operand will NOT overflow or underflow when moved in to the fp reg file
13549
#
13550
fabs_sd_normal:
13551
fmov.l &0x0,%fpsr # clear FPSR
13552
fmov.l L_SCR3(%a6),%fpcr # set FPCR
13553
13554
fabs.x FP_SCR0(%a6),%fp0 # perform absolute
13555
13556
fmov.l %fpsr,%d1 # save FPSR
13557
fmov.l &0x0,%fpcr # clear FPCR
13558
13559
or.l %d1,USER_FPSR(%a6) # save INEX2,N
13560
13561
fabs_sd_normal_exit:
13562
mov.l %d2,-(%sp) # save d2
13563
fmovm.x &0x80,FP_SCR0(%a6) # store out result
13564
mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
13565
mov.l %d1,%d2 # make a copy
13566
andi.l &0x7fff,%d1 # strip sign
13567
sub.l %d0,%d1 # add scale factor
13568
andi.w &0x8000,%d2 # keep old sign
13569
or.w %d1,%d2 # concat old sign,new exp
13570
mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
13571
mov.l (%sp)+,%d2 # restore d2
13572
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
13573
rts
13574
13575
#
13576
# operand is to be rounded to double precision
13577
#
13578
fabs_dbl:
13579
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13580
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13581
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13582
bsr.l scale_to_zero_src # calculate scale factor
13583
13584
cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
13585
bge.b fabs_sd_unfl # yes; go handle underflow
13586
cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
13587
beq.w fabs_sd_may_ovfl # maybe; go check
13588
blt.w fabs_sd_ovfl # yes; go handle overflow
13589
bra.w fabs_sd_normal # no; ho handle normalized op
13590
13591
#
13592
# operand WILL underflow when moved in to the fp register file
13593
#
13594
fabs_sd_unfl:
13595
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13596
13597
bclr &0x7,FP_SCR0_EX(%a6) # force absolute value
13598
13599
# if underflow or inexact is enabled, go calculate EXOP first.
13600
mov.b FPCR_ENABLE(%a6),%d1
13601
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
13602
bne.b fabs_sd_unfl_ena # yes
13603
13604
fabs_sd_unfl_dis:
13605
lea FP_SCR0(%a6),%a0 # pass: result addr
13606
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
13607
bsr.l unf_res # calculate default result
13608
or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
13609
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
13610
rts
13611
13612
#
13613
# operand will underflow AND underflow is enabled.
13614
# Therefore, we must return the result rounded to extended precision.
13615
#
13616
fabs_sd_unfl_ena:
13617
mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13618
mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13619
mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
13620
13621
mov.l %d2,-(%sp) # save d2
13622
mov.l %d1,%d2 # make a copy
13623
andi.l &0x7fff,%d1 # strip sign
13624
andi.w &0x8000,%d2 # keep old sign
13625
sub.l %d0,%d1 # subtract scale factor
13626
addi.l &0x6000,%d1 # add new bias
13627
andi.w &0x7fff,%d1
13628
or.w %d2,%d1 # concat new sign,new exp
13629
mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
13630
fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
13631
mov.l (%sp)+,%d2 # restore d2
13632
bra.b fabs_sd_unfl_dis
13633
13634
#
13635
# operand WILL overflow.
13636
#
13637
fabs_sd_ovfl:
13638
fmov.l &0x0,%fpsr # clear FPSR
13639
fmov.l L_SCR3(%a6),%fpcr # set FPCR
13640
13641
fabs.x FP_SCR0(%a6),%fp0 # perform absolute
13642
13643
fmov.l &0x0,%fpcr # clear FPCR
13644
fmov.l %fpsr,%d1 # save FPSR
13645
13646
or.l %d1,USER_FPSR(%a6) # save INEX2,N
13647
13648
fabs_sd_ovfl_tst:
13649
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13650
13651
mov.b FPCR_ENABLE(%a6),%d1
13652
andi.b &0x13,%d1 # is OVFL or INEX enabled?
13653
bne.b fabs_sd_ovfl_ena # yes
13654
13655
#
13656
# OVFL is not enabled; therefore, we must create the default result by
13657
# calling ovf_res().
13658
#
13659
fabs_sd_ovfl_dis:
13660
btst &neg_bit,FPSR_CC(%a6) # is result negative?
13661
sne %d1 # set sign param accordingly
13662
mov.l L_SCR3(%a6),%d0 # pass: prec,mode
13663
bsr.l ovf_res # calculate default result
13664
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
13665
fmovm.x (%a0),&0x80 # return default result in fp0
13666
rts
13667
13668
#
13669
# OVFL is enabled.
13670
# the INEX2 bit has already been updated by the round to the correct precision.
13671
# now, round to extended(and don't alter the FPSR).
13672
#
13673
fabs_sd_ovfl_ena:
13674
mov.l %d2,-(%sp) # save d2
13675
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
13676
mov.l %d1,%d2 # make a copy
13677
andi.l &0x7fff,%d1 # strip sign
13678
andi.w &0x8000,%d2 # keep old sign
13679
sub.l %d0,%d1 # add scale factor
13680
subi.l &0x6000,%d1 # subtract bias
13681
andi.w &0x7fff,%d1
13682
or.w %d2,%d1 # concat sign,exp
13683
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
13684
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
13685
mov.l (%sp)+,%d2 # restore d2
13686
bra.b fabs_sd_ovfl_dis
13687
13688
#
13689
# the move in MAY underflow. so...
13690
#
13691
fabs_sd_may_ovfl:
13692
fmov.l &0x0,%fpsr # clear FPSR
13693
fmov.l L_SCR3(%a6),%fpcr # set FPCR
13694
13695
fabs.x FP_SCR0(%a6),%fp0 # perform absolute
13696
13697
fmov.l %fpsr,%d1 # save status
13698
fmov.l &0x0,%fpcr # clear FPCR
13699
13700
or.l %d1,USER_FPSR(%a6) # save INEX2,N
13701
13702
fabs.x %fp0,%fp1 # make a copy of result
13703
fcmp.b %fp1,&0x2 # is |result| >= 2.b?
13704
fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred
13705
13706
# no, it didn't overflow; we have correct result
13707
bra.w fabs_sd_normal_exit
13708
13709
##########################################################################
13710
13711
#
13712
# input is not normalized; what is it?
13713
#
13714
fabs_not_norm:
13715
cmpi.b %d1,&DENORM # weed out DENORM
13716
beq.w fabs_denorm
13717
cmpi.b %d1,&SNAN # weed out SNAN
13718
beq.l res_snan_1op
13719
cmpi.b %d1,&QNAN # weed out QNAN
13720
beq.l res_qnan_1op
13721
13722
fabs.x SRC(%a0),%fp0 # force absolute value
13723
13724
cmpi.b %d1,&INF # weed out INF
13725
beq.b fabs_inf
13726
fabs_zero:
13727
mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13728
rts
13729
fabs_inf:
13730
mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13731
rts
13732
13733
#########################################################################
13734
# XDEF **************************************************************** #
13735
# fcmp(): fp compare op routine #
13736
# #
13737
# XREF **************************************************************** #
13738
# res_qnan() - return QNAN result #
13739
# res_snan() - return SNAN result #
13740
# #
13741
# INPUT *************************************************************** #
13742
# a0 = pointer to extended precision source operand #
13743
# a1 = pointer to extended precision destination operand #
13744
# d0 = round prec/mode #
13745
# #
13746
# OUTPUT ************************************************************** #
13747
# None #
13748
# #
13749
# ALGORITHM *********************************************************** #
13750
# Handle NANs and denorms as special cases. For everything else, #
13751
# just use the actual fcmp instruction to produce the correct condition #
13752
# codes. #
13753
# #
13754
#########################################################################
13755
13756
global fcmp
13757
fcmp:
13758
clr.w %d1
13759
mov.b DTAG(%a6),%d1
13760
lsl.b &0x3,%d1
13761
or.b STAG(%a6),%d1
13762
bne.b fcmp_not_norm # optimize on non-norm input
13763
13764
#
13765
# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
13766
#
13767
fcmp_norm:
13768
fmovm.x DST(%a1),&0x80 # load dst op
13769
13770
fcmp.x %fp0,SRC(%a0) # do compare
13771
13772
fmov.l %fpsr,%d0 # save FPSR
13773
rol.l &0x8,%d0 # extract ccode bits
13774
mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)
13775
13776
rts
13777
13778
#
13779
# fcmp: inputs are not both normalized; what are they?
13780
#
13781
fcmp_not_norm:
13782
mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
13783
jmp (tbl_fcmp_op.b,%pc,%d1.w*1)
13784
13785
swbeg &48
13786
tbl_fcmp_op:
13787
short fcmp_norm - tbl_fcmp_op # NORM - NORM
13788
short fcmp_norm - tbl_fcmp_op # NORM - ZERO
13789
short fcmp_norm - tbl_fcmp_op # NORM - INF
13790
short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN
13791
short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM
13792
short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN
13793
short tbl_fcmp_op - tbl_fcmp_op #
13794
short tbl_fcmp_op - tbl_fcmp_op #
13795
13796
short fcmp_norm - tbl_fcmp_op # ZERO - NORM
13797
short fcmp_norm - tbl_fcmp_op # ZERO - ZERO
13798
short fcmp_norm - tbl_fcmp_op # ZERO - INF
13799
short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN
13800
short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM
13801
short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN
13802
short tbl_fcmp_op - tbl_fcmp_op #
13803
short tbl_fcmp_op - tbl_fcmp_op #
13804
13805
short fcmp_norm - tbl_fcmp_op # INF - NORM
13806
short fcmp_norm - tbl_fcmp_op # INF - ZERO
13807
short fcmp_norm - tbl_fcmp_op # INF - INF
13808
short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN
13809
short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM
13810
short fcmp_res_snan - tbl_fcmp_op # INF - SNAN
13811
short tbl_fcmp_op - tbl_fcmp_op #
13812
short tbl_fcmp_op - tbl_fcmp_op #
13813
13814
short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM
13815
short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO
13816
short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF
13817
short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN
13818
short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM
13819
short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN
13820
short tbl_fcmp_op - tbl_fcmp_op #
13821
short tbl_fcmp_op - tbl_fcmp_op #
13822
13823
short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM
13824
short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO
13825
short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF
13826
short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN
13827
short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM
13828
short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN
13829
short tbl_fcmp_op - tbl_fcmp_op #
13830
short tbl_fcmp_op - tbl_fcmp_op #
13831
13832
short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM
13833
short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO
13834
short fcmp_res_snan - tbl_fcmp_op # SNAN - INF
13835
short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN
13836
short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM
13837
short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN
13838
short tbl_fcmp_op - tbl_fcmp_op #
13839
short tbl_fcmp_op - tbl_fcmp_op #
13840
13841
# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
13842
# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
13843
fcmp_res_qnan:
13844
bsr.l res_qnan
13845
andi.b &0xf7,FPSR_CC(%a6)
13846
rts
13847
fcmp_res_snan:
13848
bsr.l res_snan
13849
andi.b &0xf7,FPSR_CC(%a6)
13850
rts
13851
13852
#
13853
# DENORMs are a little more difficult.
13854
# If you have a 2 DENORMs, then you can just force the j-bit to a one
13855
# and use the fcmp_norm routine.
13856
# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
13857
# and use the fcmp_norm routine.
13858
# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
13859
# But with a DENORM and a NORM of the same sign, the neg bit is set if the
13860
# (1) signs are (+) and the DENORM is the dst or
13861
# (2) signs are (-) and the DENORM is the src
13862
#
13863
13864
fcmp_dnrm_s:
13865
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13866
mov.l SRC_HI(%a0),%d0
13867
bset &31,%d0 # DENORM src; make into small norm
13868
mov.l %d0,FP_SCR0_HI(%a6)
13869
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13870
lea FP_SCR0(%a6),%a0
13871
bra.w fcmp_norm
13872
13873
fcmp_dnrm_d:
13874
mov.l DST_EX(%a1),FP_SCR0_EX(%a6)
13875
mov.l DST_HI(%a1),%d0
13876
bset &31,%d0 # DENORM src; make into small norm
13877
mov.l %d0,FP_SCR0_HI(%a6)
13878
mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
13879
lea FP_SCR0(%a6),%a1
13880
bra.w fcmp_norm
13881
13882
fcmp_dnrm_sd:
13883
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
13884
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13885
mov.l DST_HI(%a1),%d0
13886
bset &31,%d0 # DENORM dst; make into small norm
13887
mov.l %d0,FP_SCR1_HI(%a6)
13888
mov.l SRC_HI(%a0),%d0
13889
bset &31,%d0 # DENORM dst; make into small norm
13890
mov.l %d0,FP_SCR0_HI(%a6)
13891
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
13892
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13893
lea FP_SCR1(%a6),%a1
13894
lea FP_SCR0(%a6),%a0
13895
bra.w fcmp_norm
13896
13897
fcmp_nrm_dnrm:
13898
mov.b SRC_EX(%a0),%d0 # determine if like signs
13899
mov.b DST_EX(%a1),%d1
13900
eor.b %d0,%d1
13901
bmi.w fcmp_dnrm_s
13902
13903
# signs are the same, so must determine the answer ourselves.
13904
tst.b %d0 # is src op negative?
13905
bmi.b fcmp_nrm_dnrm_m # yes
13906
rts
13907
fcmp_nrm_dnrm_m:
13908
mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13909
rts
13910
13911
fcmp_dnrm_nrm:
13912
mov.b SRC_EX(%a0),%d0 # determine if like signs
13913
mov.b DST_EX(%a1),%d1
13914
eor.b %d0,%d1
13915
bmi.w fcmp_dnrm_d
13916
13917
# signs are the same, so must determine the answer ourselves.
13918
tst.b %d0 # is src op negative?
13919
bpl.b fcmp_dnrm_nrm_m # no
13920
rts
13921
fcmp_dnrm_nrm_m:
13922
mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13923
rts
13924
13925
#########################################################################
13926
# XDEF **************************************************************** #
13927
# fsglmul(): emulates the fsglmul instruction #
13928
# #
13929
# XREF **************************************************************** #
13930
# scale_to_zero_src() - scale src exponent to zero #
13931
# scale_to_zero_dst() - scale dst exponent to zero #
13932
# unf_res4() - return default underflow result for sglop #
13933
# ovf_res() - return default overflow result #
13934
# res_qnan() - return QNAN result #
13935
# res_snan() - return SNAN result #
13936
# #
13937
# INPUT *************************************************************** #
13938
# a0 = pointer to extended precision source operand #
13939
# a1 = pointer to extended precision destination operand #
13940
# d0 rnd prec,mode #
13941
# #
13942
# OUTPUT ************************************************************** #
13943
# fp0 = result #
13944
# fp1 = EXOP (if exception occurred) #
13945
# #
13946
# ALGORITHM *********************************************************** #
13947
# Handle NANs, infinities, and zeroes as special cases. Divide #
13948
# norms/denorms into ext/sgl/dbl precision. #
13949
# For norms/denorms, scale the exponents such that a multiply #
13950
# instruction won't cause an exception. Use the regular fsglmul to #
13951
# compute a result. Check if the regular operands would have taken #
13952
# an exception. If so, return the default overflow/underflow result #
13953
# and return the EXOP if exceptions are enabled. Else, scale the #
13954
# result operand to the proper exponent. #
13955
# #
13956
#########################################################################
13957
13958
global fsglmul
13959
fsglmul:
13960
mov.l %d0,L_SCR3(%a6) # store rnd info
13961
13962
clr.w %d1
13963
mov.b DTAG(%a6),%d1
13964
lsl.b &0x3,%d1
13965
or.b STAG(%a6),%d1
13966
13967
bne.w fsglmul_not_norm # optimize on non-norm input
13968
13969
fsglmul_norm:
13970
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
13971
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
13972
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
13973
13974
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13975
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13976
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13977
13978
bsr.l scale_to_zero_src # scale exponent
13979
mov.l %d0,-(%sp) # save scale factor 1
13980
13981
bsr.l scale_to_zero_dst # scale dst exponent
13982
13983
add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2
13984
13985
cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?
13986
beq.w fsglmul_may_ovfl # result may rnd to overflow
13987
blt.w fsglmul_ovfl # result will overflow
13988
13989
cmpi.l %d0,&0x3fff+0x0001 # would result unfl?
13990
beq.w fsglmul_may_unfl # result may rnd to no unfl
13991
bgt.w fsglmul_unfl # result will underflow
13992
13993
fsglmul_normal:
13994
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
13995
13996
fmov.l L_SCR3(%a6),%fpcr # set FPCR
13997
fmov.l &0x0,%fpsr # clear FPSR
13998
13999
fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14000
14001
fmov.l %fpsr,%d1 # save status
14002
fmov.l &0x0,%fpcr # clear FPCR
14003
14004
or.l %d1,USER_FPSR(%a6) # save INEX2,N
14005
14006
fsglmul_normal_exit:
14007
fmovm.x &0x80,FP_SCR0(%a6) # store out result
14008
mov.l %d2,-(%sp) # save d2
14009
mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
14010
mov.l %d1,%d2 # make a copy
14011
andi.l &0x7fff,%d1 # strip sign
14012
andi.w &0x8000,%d2 # keep old sign
14013
sub.l %d0,%d1 # add scale factor
14014
or.w %d2,%d1 # concat old sign,new exp
14015
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14016
mov.l (%sp)+,%d2 # restore d2
14017
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
14018
rts
14019
14020
fsglmul_ovfl:
14021
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14022
14023
fmov.l L_SCR3(%a6),%fpcr # set FPCR
14024
fmov.l &0x0,%fpsr # clear FPSR
14025
14026
fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14027
14028
fmov.l %fpsr,%d1 # save status
14029
fmov.l &0x0,%fpcr # clear FPCR
14030
14031
or.l %d1,USER_FPSR(%a6) # save INEX2,N
14032
14033
fsglmul_ovfl_tst:
14034
14035
# save setting this until now because this is where fsglmul_may_ovfl may jump in
14036
or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
14037
14038
mov.b FPCR_ENABLE(%a6),%d1
14039
andi.b &0x13,%d1 # is OVFL or INEX enabled?
14040
bne.b fsglmul_ovfl_ena # yes
14041
14042
fsglmul_ovfl_dis:
14043
btst &neg_bit,FPSR_CC(%a6) # is result negative?
14044
sne %d1 # set sign param accordingly
14045
mov.l L_SCR3(%a6),%d0 # pass prec:rnd
14046
andi.b &0x30,%d0 # force prec = ext
14047
bsr.l ovf_res # calculate default result
14048
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
14049
fmovm.x (%a0),&0x80 # return default result in fp0
14050
rts
14051
14052
fsglmul_ovfl_ena:
14053
fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
14054
14055
mov.l %d2,-(%sp) # save d2
14056
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14057
mov.l %d1,%d2 # make a copy
14058
andi.l &0x7fff,%d1 # strip sign
14059
sub.l %d0,%d1 # add scale factor
14060
subi.l &0x6000,%d1 # subtract bias
14061
andi.w &0x7fff,%d1
14062
andi.w &0x8000,%d2 # keep old sign
14063
or.w %d2,%d1 # concat old sign,new exp
14064
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14065
mov.l (%sp)+,%d2 # restore d2
14066
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14067
bra.b fsglmul_ovfl_dis
14068
14069
fsglmul_may_ovfl:
14070
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14071
14072
fmov.l L_SCR3(%a6),%fpcr # set FPCR
14073
fmov.l &0x0,%fpsr # clear FPSR
14074
14075
fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14076
14077
fmov.l %fpsr,%d1 # save status
14078
fmov.l &0x0,%fpcr # clear FPCR
14079
14080
or.l %d1,USER_FPSR(%a6) # save INEX2,N
14081
14082
fabs.x %fp0,%fp1 # make a copy of result
14083
fcmp.b %fp1,&0x2 # is |result| >= 2.b?
14084
fbge.w fsglmul_ovfl_tst # yes; overflow has occurred
14085
14086
# no, it didn't overflow; we have correct result
14087
bra.w fsglmul_normal_exit
14088
14089
fsglmul_unfl:
14090
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14091
14092
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14093
14094
fmov.l &rz_mode*0x10,%fpcr # set FPCR
14095
fmov.l &0x0,%fpsr # clear FPSR
14096
14097
fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14098
14099
fmov.l %fpsr,%d1 # save status
14100
fmov.l &0x0,%fpcr # clear FPCR
14101
14102
or.l %d1,USER_FPSR(%a6) # save INEX2,N
14103
14104
mov.b FPCR_ENABLE(%a6),%d1
14105
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
14106
bne.b fsglmul_unfl_ena # yes
14107
14108
fsglmul_unfl_dis:
14109
fmovm.x &0x80,FP_SCR0(%a6) # store out result
14110
14111
lea FP_SCR0(%a6),%a0 # pass: result addr
14112
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
14113
bsr.l unf_res4 # calculate default result
14114
or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
14115
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
14116
rts
14117
14118
#
14119
# UNFL is enabled.
14120
#
14121
fsglmul_unfl_ena:
14122
fmovm.x FP_SCR1(%a6),&0x40 # load dst op
14123
14124
fmov.l L_SCR3(%a6),%fpcr # set FPCR
14125
fmov.l &0x0,%fpsr # clear FPSR
14126
14127
fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
14128
14129
fmov.l &0x0,%fpcr # clear FPCR
14130
14131
fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
14132
mov.l %d2,-(%sp) # save d2
14133
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14134
mov.l %d1,%d2 # make a copy
14135
andi.l &0x7fff,%d1 # strip sign
14136
andi.w &0x8000,%d2 # keep old sign
14137
sub.l %d0,%d1 # add scale factor
14138
addi.l &0x6000,%d1 # add bias
14139
andi.w &0x7fff,%d1
14140
or.w %d2,%d1 # concat old sign,new exp
14141
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14142
mov.l (%sp)+,%d2 # restore d2
14143
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14144
bra.w fsglmul_unfl_dis
14145
14146
fsglmul_may_unfl:
14147
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14148
14149
fmov.l L_SCR3(%a6),%fpcr # set FPCR
14150
fmov.l &0x0,%fpsr # clear FPSR
14151
14152
fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14153
14154
fmov.l %fpsr,%d1 # save status
14155
fmov.l &0x0,%fpcr # clear FPCR
14156
14157
or.l %d1,USER_FPSR(%a6) # save INEX2,N
14158
14159
fabs.x %fp0,%fp1 # make a copy of result
14160
fcmp.b %fp1,&0x2 # is |result| > 2.b?
14161
fbgt.w fsglmul_normal_exit # no; no underflow occurred
14162
fblt.w fsglmul_unfl # yes; underflow occurred
14163
14164
#
14165
# we still don't know if underflow occurred. result is ~ equal to 2. but,
14166
# we don't know if the result was an underflow that rounded up to a 2 or
14167
# a normalized number that rounded down to a 2. so, redo the entire operation
14168
# using RZ as the rounding mode to see what the pre-rounded result is.
14169
# this case should be relatively rare.
14170
#
14171
fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
14172
14173
mov.l L_SCR3(%a6),%d1
14174
andi.b &0xc0,%d1 # keep rnd prec
14175
ori.b &rz_mode*0x10,%d1 # insert RZ
14176
14177
fmov.l %d1,%fpcr # set FPCR
14178
fmov.l &0x0,%fpsr # clear FPSR
14179
14180
fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
14181
14182
fmov.l &0x0,%fpcr # clear FPCR
14183
fabs.x %fp1 # make absolute value
14184
fcmp.b %fp1,&0x2 # is |result| < 2.b?
14185
fbge.w fsglmul_normal_exit # no; no underflow occurred
14186
bra.w fsglmul_unfl # yes, underflow occurred
14187
14188
##############################################################################
14189
14190
#
14191
# Single Precision Multiply: inputs are not both normalized; what are they?
14192
#
14193
fsglmul_not_norm:
14194
mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
14195
jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)
14196
14197
swbeg &48
14198
tbl_fsglmul_op:
14199
short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
14200
short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
14201
short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
14202
short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
14203
short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
14204
short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
14205
short tbl_fsglmul_op - tbl_fsglmul_op #
14206
short tbl_fsglmul_op - tbl_fsglmul_op #
14207
14208
short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM
14209
short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO
14210
short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF
14211
short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN
14212
short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM
14213
short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN
14214
short tbl_fsglmul_op - tbl_fsglmul_op #
14215
short tbl_fsglmul_op - tbl_fsglmul_op #
14216
14217
short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM
14218
short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO
14219
short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF
14220
short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN
14221
short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM
14222
short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN
14223
short tbl_fsglmul_op - tbl_fsglmul_op #
14224
short tbl_fsglmul_op - tbl_fsglmul_op #
14225
14226
short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM
14227
short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO
14228
short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF
14229
short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN
14230
short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM
14231
short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN
14232
short tbl_fsglmul_op - tbl_fsglmul_op #
14233
short tbl_fsglmul_op - tbl_fsglmul_op #
14234
14235
short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
14236
short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
14237
short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
14238
short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
14239
short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
14240
short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
14241
short tbl_fsglmul_op - tbl_fsglmul_op #
14242
short tbl_fsglmul_op - tbl_fsglmul_op #
14243
14244
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM
14245
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO
14246
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF
14247
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN
14248
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM
14249
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN
14250
short tbl_fsglmul_op - tbl_fsglmul_op #
14251
short tbl_fsglmul_op - tbl_fsglmul_op #
14252
14253
fsglmul_res_operr:
14254
bra.l res_operr
14255
fsglmul_res_snan:
14256
bra.l res_snan
14257
fsglmul_res_qnan:
14258
bra.l res_qnan
14259
fsglmul_zero:
14260
bra.l fmul_zero
14261
fsglmul_inf_src:
14262
bra.l fmul_inf_src
14263
fsglmul_inf_dst:
14264
bra.l fmul_inf_dst
14265
14266
#########################################################################
14267
# XDEF **************************************************************** #
14268
# fsgldiv(): emulates the fsgldiv instruction #
14269
# #
14270
# XREF **************************************************************** #
14271
# scale_to_zero_src() - scale src exponent to zero #
14272
# scale_to_zero_dst() - scale dst exponent to zero #
14273
# unf_res4() - return default underflow result for sglop #
14274
# ovf_res() - return default overflow result #
14275
# res_qnan() - return QNAN result #
14276
# res_snan() - return SNAN result #
14277
# #
14278
# INPUT *************************************************************** #
14279
# a0 = pointer to extended precision source operand #
14280
# a1 = pointer to extended precision destination operand #
14281
# d0 rnd prec,mode #
14282
# #
14283
# OUTPUT ************************************************************** #
14284
# fp0 = result #
14285
# fp1 = EXOP (if exception occurred) #
14286
# #
14287
# ALGORITHM *********************************************************** #
14288
# Handle NANs, infinities, and zeroes as special cases. Divide #
14289
# norms/denorms into ext/sgl/dbl precision. #
14290
# For norms/denorms, scale the exponents such that a divide #
14291
# instruction won't cause an exception. Use the regular fsgldiv to #
14292
# compute a result. Check if the regular operands would have taken #
14293
# an exception. If so, return the default overflow/underflow result #
14294
# and return the EXOP if exceptions are enabled. Else, scale the #
14295
# result operand to the proper exponent. #
14296
# #
14297
#########################################################################
14298
14299
global fsgldiv
14300
fsgldiv:
14301
mov.l %d0,L_SCR3(%a6) # store rnd info
14302
14303
clr.w %d1
14304
mov.b DTAG(%a6),%d1
14305
lsl.b &0x3,%d1
14306
or.b STAG(%a6),%d1 # combine src tags
14307
14308
bne.w fsgldiv_not_norm # optimize on non-norm input
14309
14310
#
14311
# DIVIDE: NORMs and DENORMs ONLY!
14312
#
14313
fsgldiv_norm:
14314
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
14315
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
14316
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
14317
14318
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
14319
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
14320
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
14321
14322
bsr.l scale_to_zero_src # calculate scale factor 1
14323
mov.l %d0,-(%sp) # save scale factor 1
14324
14325
bsr.l scale_to_zero_dst # calculate scale factor 2
14326
14327
neg.l (%sp) # S.F. = scale1 - scale2
14328
add.l %d0,(%sp)
14329
14330
mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode
14331
lsr.b &0x6,%d1
14332
mov.l (%sp)+,%d0
14333
cmpi.l %d0,&0x3fff-0x7ffe
14334
ble.w fsgldiv_may_ovfl
14335
14336
cmpi.l %d0,&0x3fff-0x0000 # will result underflow?
14337
beq.w fsgldiv_may_unfl # maybe
14338
bgt.w fsgldiv_unfl # yes; go handle underflow
14339
14340
fsgldiv_normal:
14341
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14342
14343
fmov.l L_SCR3(%a6),%fpcr # save FPCR
14344
fmov.l &0x0,%fpsr # clear FPSR
14345
14346
fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide
14347
14348
fmov.l %fpsr,%d1 # save FPSR
14349
fmov.l &0x0,%fpcr # clear FPCR
14350
14351
or.l %d1,USER_FPSR(%a6) # save INEX2,N
14352
14353
fsgldiv_normal_exit:
14354
fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
14355
mov.l %d2,-(%sp) # save d2
14356
mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
14357
mov.l %d1,%d2 # make a copy
14358
andi.l &0x7fff,%d1 # strip sign
14359
andi.w &0x8000,%d2 # keep old sign
14360
sub.l %d0,%d1 # add scale factor
14361
or.w %d2,%d1 # concat old sign,new exp
14362
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14363
mov.l (%sp)+,%d2 # restore d2
14364
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
14365
rts
14366
14367
fsgldiv_may_ovfl:
14368
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14369
14370
fmov.l L_SCR3(%a6),%fpcr # set FPCR
14371
fmov.l &0x0,%fpsr # set FPSR
14372
14373
fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide
14374
14375
fmov.l %fpsr,%d1
14376
fmov.l &0x0,%fpcr
14377
14378
or.l %d1,USER_FPSR(%a6) # save INEX,N
14379
14380
fmovm.x &0x01,-(%sp) # save result to stack
14381
mov.w (%sp),%d1 # fetch new exponent
14382
add.l &0xc,%sp # clear result
14383
andi.l &0x7fff,%d1 # strip sign
14384
sub.l %d0,%d1 # add scale factor
14385
cmp.l %d1,&0x7fff # did divide overflow?
14386
blt.b fsgldiv_normal_exit
14387
14388
fsgldiv_ovfl_tst:
14389
or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
14390
14391
mov.b FPCR_ENABLE(%a6),%d1
14392
andi.b &0x13,%d1 # is OVFL or INEX enabled?
14393
bne.b fsgldiv_ovfl_ena # yes
14394
14395
fsgldiv_ovfl_dis:
14396
btst &neg_bit,FPSR_CC(%a6) # is result negative
14397
sne %d1 # set sign param accordingly
14398
mov.l L_SCR3(%a6),%d0 # pass prec:rnd
14399
andi.b &0x30,%d0 # kill precision
14400
bsr.l ovf_res # calculate default result
14401
or.b %d0,FPSR_CC(%a6) # set INF if applicable
14402
fmovm.x (%a0),&0x80 # return default result in fp0
14403
rts
14404
14405
fsgldiv_ovfl_ena:
14406
fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
14407
14408
mov.l %d2,-(%sp) # save d2
14409
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14410
mov.l %d1,%d2 # make a copy
14411
andi.l &0x7fff,%d1 # strip sign
14412
andi.w &0x8000,%d2 # keep old sign
14413
sub.l %d0,%d1 # add scale factor
14414
subi.l &0x6000,%d1 # subtract new bias
14415
andi.w &0x7fff,%d1 # clear ms bit
14416
or.w %d2,%d1 # concat old sign,new exp
14417
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14418
mov.l (%sp)+,%d2 # restore d2
14419
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14420
bra.b fsgldiv_ovfl_dis
14421
14422
fsgldiv_unfl:
14423
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14424
14425
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14426
14427
fmov.l &rz_mode*0x10,%fpcr # set FPCR
14428
fmov.l &0x0,%fpsr # clear FPSR
14429
14430
fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
14431
14432
fmov.l %fpsr,%d1 # save status
14433
fmov.l &0x0,%fpcr # clear FPCR
14434
14435
or.l %d1,USER_FPSR(%a6) # save INEX2,N
14436
14437
mov.b FPCR_ENABLE(%a6),%d1
14438
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
14439
bne.b fsgldiv_unfl_ena # yes
14440
14441
fsgldiv_unfl_dis:
14442
fmovm.x &0x80,FP_SCR0(%a6) # store out result
14443
14444
lea FP_SCR0(%a6),%a0 # pass: result addr
14445
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
14446
bsr.l unf_res4 # calculate default result
14447
or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
14448
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
14449
rts
14450
14451
#
14452
# UNFL is enabled.
14453
#
14454
fsgldiv_unfl_ena:
14455
fmovm.x FP_SCR1(%a6),&0x40 # load dst op
14456
14457
fmov.l L_SCR3(%a6),%fpcr # set FPCR
14458
fmov.l &0x0,%fpsr # clear FPSR
14459
14460
fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
14461
14462
fmov.l &0x0,%fpcr # clear FPCR
14463
14464
fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
14465
mov.l %d2,-(%sp) # save d2
14466
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14467
mov.l %d1,%d2 # make a copy
14468
andi.l &0x7fff,%d1 # strip sign
14469
andi.w &0x8000,%d2 # keep old sign
14470
sub.l %d0,%d1 # add scale factor
14471
addi.l &0x6000,%d1 # add bias
14472
andi.w &0x7fff,%d1 # clear top bit
14473
or.w %d2,%d1 # concat old sign, new exp
14474
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14475
mov.l (%sp)+,%d2 # restore d2
14476
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14477
bra.b fsgldiv_unfl_dis
14478
14479
#
14480
# the divide operation MAY underflow:
14481
#
14482
fsgldiv_may_unfl:
14483
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14484
14485
fmov.l L_SCR3(%a6),%fpcr # set FPCR
14486
fmov.l &0x0,%fpsr # clear FPSR
14487
14488
fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
14489
14490
fmov.l %fpsr,%d1 # save status
14491
fmov.l &0x0,%fpcr # clear FPCR
14492
14493
or.l %d1,USER_FPSR(%a6) # save INEX2,N
14494
14495
fabs.x %fp0,%fp1 # make a copy of result
14496
fcmp.b %fp1,&0x1 # is |result| > 1.b?
14497
fbgt.w fsgldiv_normal_exit # no; no underflow occurred
14498
fblt.w fsgldiv_unfl # yes; underflow occurred
14499
14500
#
14501
# we still don't know if underflow occurred. result is ~ equal to 1. but,
14502
# we don't know if the result was an underflow that rounded up to a 1
14503
# or a normalized number that rounded down to a 1. so, redo the entire
14504
# operation using RZ as the rounding mode to see what the pre-rounded
14505
# result is. this case should be relatively rare.
14506
#
14507
fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1
14508
14509
clr.l %d1 # clear scratch register
14510
ori.b &rz_mode*0x10,%d1 # force RZ rnd mode
14511
14512
fmov.l %d1,%fpcr # set FPCR
14513
fmov.l &0x0,%fpsr # clear FPSR
14514
14515
fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
14516
14517
fmov.l &0x0,%fpcr # clear FPCR
14518
fabs.x %fp1 # make absolute value
14519
fcmp.b %fp1,&0x1 # is |result| < 1.b?
14520
fbge.w fsgldiv_normal_exit # no; no underflow occurred
14521
bra.w fsgldiv_unfl # yes; underflow occurred
14522
14523
############################################################################
14524
14525
#
14526
# Divide: inputs are not both normalized; what are they?
14527
#
14528
fsgldiv_not_norm:
14529
mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
14530
jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)
14531
14532
swbeg &48
14533
tbl_fsgldiv_op:
14534
short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM
14535
short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO
14536
short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF
14537
short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN
14538
short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM
14539
short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN
14540
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14541
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14542
14543
short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM
14544
short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO
14545
short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF
14546
short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN
14547
short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM
14548
short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN
14549
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14550
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14551
14552
short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM
14553
short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO
14554
short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF
14555
short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN
14556
short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM
14557
short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN
14558
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14559
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14560
14561
short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM
14562
short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO
14563
short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF
14564
short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN
14565
short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM
14566
short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN
14567
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14568
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14569
14570
short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM
14571
short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO
14572
short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF
14573
short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN
14574
short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM
14575
short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN
14576
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14577
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14578
14579
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM
14580
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO
14581
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF
14582
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN
14583
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM
14584
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN
14585
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14586
short tbl_fsgldiv_op - tbl_fsgldiv_op #
14587
14588
fsgldiv_res_qnan:
14589
bra.l res_qnan
14590
fsgldiv_res_snan:
14591
bra.l res_snan
14592
fsgldiv_res_operr:
14593
bra.l res_operr
14594
fsgldiv_inf_load:
14595
bra.l fdiv_inf_load
14596
fsgldiv_zero_load:
14597
bra.l fdiv_zero_load
14598
fsgldiv_inf_dst:
14599
bra.l fdiv_inf_dst
14600
14601
#########################################################################
14602
# XDEF **************************************************************** #
14603
# fadd(): emulates the fadd instruction #
14604
# fsadd(): emulates the fadd instruction #
14605
# fdadd(): emulates the fdadd instruction #
14606
# #
14607
# XREF **************************************************************** #
14608
# addsub_scaler2() - scale the operands so they won't take exc #
14609
# ovf_res() - return default overflow result #
14610
# unf_res() - return default underflow result #
14611
# res_qnan() - set QNAN result #
14612
# res_snan() - set SNAN result #
14613
# res_operr() - set OPERR result #
14614
# scale_to_zero_src() - set src operand exponent equal to zero #
14615
# scale_to_zero_dst() - set dst operand exponent equal to zero #
14616
# #
14617
# INPUT *************************************************************** #
14618
# a0 = pointer to extended precision source operand #
14619
# a1 = pointer to extended precision destination operand #
14620
# #
14621
# OUTPUT ************************************************************** #
14622
# fp0 = result #
14623
# fp1 = EXOP (if exception occurred) #
14624
# #
14625
# ALGORITHM *********************************************************** #
14626
# Handle NANs, infinities, and zeroes as special cases. Divide #
14627
# norms into extended, single, and double precision. #
14628
# Do addition after scaling exponents such that exception won't #
14629
# occur. Then, check result exponent to see if exception would have #
14630
# occurred. If so, return default result and maybe EXOP. Else, insert #
14631
# the correct result exponent and return. Set FPSR bits as appropriate. #
14632
# #
14633
#########################################################################
14634
14635
global fsadd
14636
fsadd:
14637
andi.b &0x30,%d0 # clear rnd prec
14638
ori.b &s_mode*0x10,%d0 # insert sgl prec
14639
bra.b fadd
14640
14641
global fdadd
14642
fdadd:
14643
andi.b &0x30,%d0 # clear rnd prec
14644
ori.b &d_mode*0x10,%d0 # insert dbl prec
14645
14646
global fadd
14647
fadd:
14648
mov.l %d0,L_SCR3(%a6) # store rnd info
14649
14650
clr.w %d1
14651
mov.b DTAG(%a6),%d1
14652
lsl.b &0x3,%d1
14653
or.b STAG(%a6),%d1 # combine src tags
14654
14655
bne.w fadd_not_norm # optimize on non-norm input
14656
14657
#
14658
# ADD: norms and denorms
14659
#
14660
fadd_norm:
14661
bsr.l addsub_scaler2 # scale exponents
14662
14663
fadd_zero_entry:
14664
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14665
14666
fmov.l &0x0,%fpsr # clear FPSR
14667
fmov.l L_SCR3(%a6),%fpcr # set FPCR
14668
14669
fadd.x FP_SCR0(%a6),%fp0 # execute add
14670
14671
fmov.l &0x0,%fpcr # clear FPCR
14672
fmov.l %fpsr,%d1 # fetch INEX2,N,Z
14673
14674
or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
14675
14676
fbeq.w fadd_zero_exit # if result is zero, end now
14677
14678
mov.l %d2,-(%sp) # save d2
14679
14680
fmovm.x &0x01,-(%sp) # save result to stack
14681
14682
mov.w 2+L_SCR3(%a6),%d1
14683
lsr.b &0x6,%d1
14684
14685
mov.w (%sp),%d2 # fetch new sign, exp
14686
andi.l &0x7fff,%d2 # strip sign
14687
sub.l %d0,%d2 # add scale factor
14688
14689
cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
14690
bge.b fadd_ovfl # yes
14691
14692
cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
14693
blt.w fadd_unfl # yes
14694
beq.w fadd_may_unfl # maybe; go find out
14695
14696
fadd_normal:
14697
mov.w (%sp),%d1
14698
andi.w &0x8000,%d1 # keep sign
14699
or.w %d2,%d1 # concat sign,new exp
14700
mov.w %d1,(%sp) # insert new exponent
14701
14702
fmovm.x (%sp)+,&0x80 # return result in fp0
14703
14704
mov.l (%sp)+,%d2 # restore d2
14705
rts
14706
14707
fadd_zero_exit:
14708
# fmov.s &0x00000000,%fp0 # return zero in fp0
14709
rts
14710
14711
tbl_fadd_ovfl:
14712
long 0x7fff # ext ovfl
14713
long 0x407f # sgl ovfl
14714
long 0x43ff # dbl ovfl
14715
14716
tbl_fadd_unfl:
14717
long 0x0000 # ext unfl
14718
long 0x3f81 # sgl unfl
14719
long 0x3c01 # dbl unfl
14720
14721
fadd_ovfl:
14722
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
14723
14724
mov.b FPCR_ENABLE(%a6),%d1
14725
andi.b &0x13,%d1 # is OVFL or INEX enabled?
14726
bne.b fadd_ovfl_ena # yes
14727
14728
add.l &0xc,%sp
14729
fadd_ovfl_dis:
14730
btst &neg_bit,FPSR_CC(%a6) # is result negative?
14731
sne %d1 # set sign param accordingly
14732
mov.l L_SCR3(%a6),%d0 # pass prec:rnd
14733
bsr.l ovf_res # calculate default result
14734
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
14735
fmovm.x (%a0),&0x80 # return default result in fp0
14736
mov.l (%sp)+,%d2 # restore d2
14737
rts
14738
14739
fadd_ovfl_ena:
14740
mov.b L_SCR3(%a6),%d1
14741
andi.b &0xc0,%d1 # is precision extended?
14742
bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl
14743
14744
fadd_ovfl_ena_cont:
14745
mov.w (%sp),%d1
14746
andi.w &0x8000,%d1 # keep sign
14747
subi.l &0x6000,%d2 # add extra bias
14748
andi.w &0x7fff,%d2
14749
or.w %d2,%d1 # concat sign,new exp
14750
mov.w %d1,(%sp) # insert new exponent
14751
14752
fmovm.x (%sp)+,&0x40 # return EXOP in fp1
14753
bra.b fadd_ovfl_dis
14754
14755
fadd_ovfl_ena_sd:
14756
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14757
14758
mov.l L_SCR3(%a6),%d1
14759
andi.b &0x30,%d1 # keep rnd mode
14760
fmov.l %d1,%fpcr # set FPCR
14761
14762
fadd.x FP_SCR0(%a6),%fp0 # execute add
14763
14764
fmov.l &0x0,%fpcr # clear FPCR
14765
14766
add.l &0xc,%sp
14767
fmovm.x &0x01,-(%sp)
14768
bra.b fadd_ovfl_ena_cont
14769
14770
fadd_unfl:
14771
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14772
14773
add.l &0xc,%sp
14774
14775
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14776
14777
fmov.l &rz_mode*0x10,%fpcr # set FPCR
14778
fmov.l &0x0,%fpsr # clear FPSR
14779
14780
fadd.x FP_SCR0(%a6),%fp0 # execute add
14781
14782
fmov.l &0x0,%fpcr # clear FPCR
14783
fmov.l %fpsr,%d1 # save status
14784
14785
or.l %d1,USER_FPSR(%a6) # save INEX,N
14786
14787
mov.b FPCR_ENABLE(%a6),%d1
14788
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
14789
bne.b fadd_unfl_ena # yes
14790
14791
fadd_unfl_dis:
14792
fmovm.x &0x80,FP_SCR0(%a6) # store out result
14793
14794
lea FP_SCR0(%a6),%a0 # pass: result addr
14795
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
14796
bsr.l unf_res # calculate default result
14797
or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
14798
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
14799
mov.l (%sp)+,%d2 # restore d2
14800
rts
14801
14802
fadd_unfl_ena:
14803
fmovm.x FP_SCR1(%a6),&0x40 # load dst op
14804
14805
mov.l L_SCR3(%a6),%d1
14806
andi.b &0xc0,%d1 # is precision extended?
14807
bne.b fadd_unfl_ena_sd # no; sgl or dbl
14808
14809
fmov.l L_SCR3(%a6),%fpcr # set FPCR
14810
14811
fadd_unfl_ena_cont:
14812
fmov.l &0x0,%fpsr # clear FPSR
14813
14814
fadd.x FP_SCR0(%a6),%fp1 # execute multiply
14815
14816
fmov.l &0x0,%fpcr # clear FPCR
14817
14818
fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
14819
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14820
mov.l %d1,%d2 # make a copy
14821
andi.l &0x7fff,%d1 # strip sign
14822
andi.w &0x8000,%d2 # keep old sign
14823
sub.l %d0,%d1 # add scale factor
14824
addi.l &0x6000,%d1 # add new bias
14825
andi.w &0x7fff,%d1 # clear top bit
14826
or.w %d2,%d1 # concat sign,new exp
14827
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14828
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14829
bra.w fadd_unfl_dis
14830
14831
fadd_unfl_ena_sd:
14832
mov.l L_SCR3(%a6),%d1
14833
andi.b &0x30,%d1 # use only rnd mode
14834
fmov.l %d1,%fpcr # set FPCR
14835
14836
bra.b fadd_unfl_ena_cont
14837
14838
#
14839
# result is equal to the smallest normalized number in the selected precision
14840
# if the precision is extended, this result could not have come from an
14841
# underflow that rounded up.
14842
#
14843
fadd_may_unfl:
14844
mov.l L_SCR3(%a6),%d1
14845
andi.b &0xc0,%d1
14846
beq.w fadd_normal # yes; no underflow occurred
14847
14848
mov.l 0x4(%sp),%d1 # extract hi(man)
14849
cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
14850
bne.w fadd_normal # no; no underflow occurred
14851
14852
tst.l 0x8(%sp) # is lo(man) = 0x0?
14853
bne.w fadd_normal # no; no underflow occurred
14854
14855
btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
14856
beq.w fadd_normal # no; no underflow occurred
14857
14858
#
14859
# ok, so now the result has a exponent equal to the smallest normalized
14860
# exponent for the selected precision. also, the mantissa is equal to
14861
# 0x8000000000000000 and this mantissa is the result of rounding non-zero
14862
# g,r,s.
14863
# now, we must determine whether the pre-rounded result was an underflow
14864
# rounded "up" or a normalized number rounded "down".
14865
# so, we do this be re-executing the add using RZ as the rounding mode and
14866
# seeing if the new result is smaller or equal to the current result.
14867
#
14868
fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
14869
14870
mov.l L_SCR3(%a6),%d1
14871
andi.b &0xc0,%d1 # keep rnd prec
14872
ori.b &rz_mode*0x10,%d1 # insert rnd mode
14873
fmov.l %d1,%fpcr # set FPCR
14874
fmov.l &0x0,%fpsr # clear FPSR
14875
14876
fadd.x FP_SCR0(%a6),%fp1 # execute add
14877
14878
fmov.l &0x0,%fpcr # clear FPCR
14879
14880
fabs.x %fp0 # compare absolute values
14881
fabs.x %fp1
14882
fcmp.x %fp0,%fp1 # is first result > second?
14883
14884
fbgt.w fadd_unfl # yes; it's an underflow
14885
bra.w fadd_normal # no; it's not an underflow
14886
14887
##########################################################################
14888
14889
#
14890
# Add: inputs are not both normalized; what are they?
14891
#
14892
fadd_not_norm:
14893
mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1
14894
jmp (tbl_fadd_op.b,%pc,%d1.w*1)
14895
14896
swbeg &48
14897
tbl_fadd_op:
14898
short fadd_norm - tbl_fadd_op # NORM + NORM
14899
short fadd_zero_src - tbl_fadd_op # NORM + ZERO
14900
short fadd_inf_src - tbl_fadd_op # NORM + INF
14901
short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14902
short fadd_norm - tbl_fadd_op # NORM + DENORM
14903
short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14904
short tbl_fadd_op - tbl_fadd_op #
14905
short tbl_fadd_op - tbl_fadd_op #
14906
14907
short fadd_zero_dst - tbl_fadd_op # ZERO + NORM
14908
short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO
14909
short fadd_inf_src - tbl_fadd_op # ZERO + INF
14910
short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14911
short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM
14912
short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14913
short tbl_fadd_op - tbl_fadd_op #
14914
short tbl_fadd_op - tbl_fadd_op #
14915
14916
short fadd_inf_dst - tbl_fadd_op # INF + NORM
14917
short fadd_inf_dst - tbl_fadd_op # INF + ZERO
14918
short fadd_inf_2 - tbl_fadd_op # INF + INF
14919
short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14920
short fadd_inf_dst - tbl_fadd_op # INF + DENORM
14921
short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14922
short tbl_fadd_op - tbl_fadd_op #
14923
short tbl_fadd_op - tbl_fadd_op #
14924
14925
short fadd_res_qnan - tbl_fadd_op # QNAN + NORM
14926
short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO
14927
short fadd_res_qnan - tbl_fadd_op # QNAN + INF
14928
short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN
14929
short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM
14930
short fadd_res_snan - tbl_fadd_op # QNAN + SNAN
14931
short tbl_fadd_op - tbl_fadd_op #
14932
short tbl_fadd_op - tbl_fadd_op #
14933
14934
short fadd_norm - tbl_fadd_op # DENORM + NORM
14935
short fadd_zero_src - tbl_fadd_op # DENORM + ZERO
14936
short fadd_inf_src - tbl_fadd_op # DENORM + INF
14937
short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14938
short fadd_norm - tbl_fadd_op # DENORM + DENORM
14939
short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14940
short tbl_fadd_op - tbl_fadd_op #
14941
short tbl_fadd_op - tbl_fadd_op #
14942
14943
short fadd_res_snan - tbl_fadd_op # SNAN + NORM
14944
short fadd_res_snan - tbl_fadd_op # SNAN + ZERO
14945
short fadd_res_snan - tbl_fadd_op # SNAN + INF
14946
short fadd_res_snan - tbl_fadd_op # SNAN + QNAN
14947
short fadd_res_snan - tbl_fadd_op # SNAN + DENORM
14948
short fadd_res_snan - tbl_fadd_op # SNAN + SNAN
14949
short tbl_fadd_op - tbl_fadd_op #
14950
short tbl_fadd_op - tbl_fadd_op #
14951
14952
fadd_res_qnan:
14953
bra.l res_qnan
14954
fadd_res_snan:
14955
bra.l res_snan
14956
14957
#
14958
# both operands are ZEROes
14959
#
14960
fadd_zero_2:
14961
mov.b SRC_EX(%a0),%d0 # are the signs opposite
14962
mov.b DST_EX(%a1),%d1
14963
eor.b %d0,%d1
14964
bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)
14965
14966
# the signs are the same. so determine whether they are positive or negative
14967
# and return the appropriately signed zero.
14968
tst.b %d0 # are ZEROes positive or negative?
14969
bmi.b fadd_zero_rm # negative
14970
fmov.s &0x00000000,%fp0 # return +ZERO
14971
mov.b &z_bmask,FPSR_CC(%a6) # set Z
14972
rts
14973
14974
#
14975
# the ZEROes have opposite signs:
14976
# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
14977
# - -ZERO is returned in the case of RM.
14978
#
14979
fadd_zero_2_chk_rm:
14980
mov.b 3+L_SCR3(%a6),%d1
14981
andi.b &0x30,%d1 # extract rnd mode
14982
cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?
14983
beq.b fadd_zero_rm # yes
14984
fmov.s &0x00000000,%fp0 # return +ZERO
14985
mov.b &z_bmask,FPSR_CC(%a6) # set Z
14986
rts
14987
14988
fadd_zero_rm:
14989
fmov.s &0x80000000,%fp0 # return -ZERO
14990
mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
14991
rts
14992
14993
#
14994
# one operand is a ZERO and the other is a DENORM or NORM. scale
14995
# the DENORM or NORM and jump to the regular fadd routine.
14996
#
14997
fadd_zero_dst:
14998
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
14999
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15000
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15001
bsr.l scale_to_zero_src # scale the operand
15002
clr.w FP_SCR1_EX(%a6)
15003
clr.l FP_SCR1_HI(%a6)
15004
clr.l FP_SCR1_LO(%a6)
15005
bra.w fadd_zero_entry # go execute fadd
15006
15007
fadd_zero_src:
15008
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
15009
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
15010
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
15011
bsr.l scale_to_zero_dst # scale the operand
15012
clr.w FP_SCR0_EX(%a6)
15013
clr.l FP_SCR0_HI(%a6)
15014
clr.l FP_SCR0_LO(%a6)
15015
bra.w fadd_zero_entry # go execute fadd
15016
15017
#
15018
# both operands are INFs. an OPERR will result if the INFs have
15019
# different signs. else, an INF of the same sign is returned
15020
#
15021
fadd_inf_2:
15022
mov.b SRC_EX(%a0),%d0 # exclusive or the signs
15023
mov.b DST_EX(%a1),%d1
15024
eor.b %d1,%d0
15025
bmi.l res_operr # weed out (-INF)+(+INF)
15026
15027
# ok, so it's not an OPERR. but, we do have to remember to return the
15028
# src INF since that's where the 881/882 gets the j-bit from...
15029
15030
#
15031
# operands are INF and one of {ZERO, INF, DENORM, NORM}
15032
#
15033
fadd_inf_src:
15034
fmovm.x SRC(%a0),&0x80 # return src INF
15035
tst.b SRC_EX(%a0) # is INF positive?
15036
bpl.b fadd_inf_done # yes; we're done
15037
mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15038
rts
15039
15040
#
15041
# operands are INF and one of {ZERO, INF, DENORM, NORM}
15042
#
15043
fadd_inf_dst:
15044
fmovm.x DST(%a1),&0x80 # return dst INF
15045
tst.b DST_EX(%a1) # is INF positive?
15046
bpl.b fadd_inf_done # yes; we're done
15047
mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15048
rts
15049
15050
fadd_inf_done:
15051
mov.b &inf_bmask,FPSR_CC(%a6) # set INF
15052
rts
15053
15054
#########################################################################
15055
# XDEF **************************************************************** #
15056
# fsub(): emulates the fsub instruction #
15057
# fssub(): emulates the fssub instruction #
15058
# fdsub(): emulates the fdsub instruction #
15059
# #
15060
# XREF **************************************************************** #
15061
# addsub_scaler2() - scale the operands so they won't take exc #
15062
# ovf_res() - return default overflow result #
15063
# unf_res() - return default underflow result #
15064
# res_qnan() - set QNAN result #
15065
# res_snan() - set SNAN result #
15066
# res_operr() - set OPERR result #
15067
# scale_to_zero_src() - set src operand exponent equal to zero #
15068
# scale_to_zero_dst() - set dst operand exponent equal to zero #
15069
# #
15070
# INPUT *************************************************************** #
15071
# a0 = pointer to extended precision source operand #
15072
# a1 = pointer to extended precision destination operand #
15073
# #
15074
# OUTPUT ************************************************************** #
15075
# fp0 = result #
15076
# fp1 = EXOP (if exception occurred) #
15077
# #
15078
# ALGORITHM *********************************************************** #
15079
# Handle NANs, infinities, and zeroes as special cases. Divide #
15080
# norms into extended, single, and double precision. #
15081
# Do subtraction after scaling exponents such that exception won't#
15082
# occur. Then, check result exponent to see if exception would have #
15083
# occurred. If so, return default result and maybe EXOP. Else, insert #
15084
# the correct result exponent and return. Set FPSR bits as appropriate. #
15085
# #
15086
#########################################################################
15087
15088
global fssub
15089
fssub:
15090
andi.b &0x30,%d0 # clear rnd prec
15091
ori.b &s_mode*0x10,%d0 # insert sgl prec
15092
bra.b fsub
15093
15094
global fdsub
15095
fdsub:
15096
andi.b &0x30,%d0 # clear rnd prec
15097
ori.b &d_mode*0x10,%d0 # insert dbl prec
15098
15099
global fsub
15100
fsub:
15101
mov.l %d0,L_SCR3(%a6) # store rnd info
15102
15103
clr.w %d1
15104
mov.b DTAG(%a6),%d1
15105
lsl.b &0x3,%d1
15106
or.b STAG(%a6),%d1 # combine src tags
15107
15108
bne.w fsub_not_norm # optimize on non-norm input
15109
15110
#
15111
# SUB: norms and denorms
15112
#
15113
fsub_norm:
15114
bsr.l addsub_scaler2 # scale exponents
15115
15116
fsub_zero_entry:
15117
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
15118
15119
fmov.l &0x0,%fpsr # clear FPSR
15120
fmov.l L_SCR3(%a6),%fpcr # set FPCR
15121
15122
fsub.x FP_SCR0(%a6),%fp0 # execute subtract
15123
15124
fmov.l &0x0,%fpcr # clear FPCR
15125
fmov.l %fpsr,%d1 # fetch INEX2, N, Z
15126
15127
or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
15128
15129
fbeq.w fsub_zero_exit # if result zero, end now
15130
15131
mov.l %d2,-(%sp) # save d2
15132
15133
fmovm.x &0x01,-(%sp) # save result to stack
15134
15135
mov.w 2+L_SCR3(%a6),%d1
15136
lsr.b &0x6,%d1
15137
15138
mov.w (%sp),%d2 # fetch new exponent
15139
andi.l &0x7fff,%d2 # strip sign
15140
sub.l %d0,%d2 # add scale factor
15141
15142
cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
15143
bge.b fsub_ovfl # yes
15144
15145
cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
15146
blt.w fsub_unfl # yes
15147
beq.w fsub_may_unfl # maybe; go find out
15148
15149
fsub_normal:
15150
mov.w (%sp),%d1
15151
andi.w &0x8000,%d1 # keep sign
15152
or.w %d2,%d1 # insert new exponent
15153
mov.w %d1,(%sp) # insert new exponent
15154
15155
fmovm.x (%sp)+,&0x80 # return result in fp0
15156
15157
mov.l (%sp)+,%d2 # restore d2
15158
rts
15159
15160
fsub_zero_exit:
15161
# fmov.s &0x00000000,%fp0 # return zero in fp0
15162
rts
15163
15164
tbl_fsub_ovfl:
15165
long 0x7fff # ext ovfl
15166
long 0x407f # sgl ovfl
15167
long 0x43ff # dbl ovfl
15168
15169
tbl_fsub_unfl:
15170
long 0x0000 # ext unfl
15171
long 0x3f81 # sgl unfl
15172
long 0x3c01 # dbl unfl
15173
15174
fsub_ovfl:
15175
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15176
15177
mov.b FPCR_ENABLE(%a6),%d1
15178
andi.b &0x13,%d1 # is OVFL or INEX enabled?
15179
bne.b fsub_ovfl_ena # yes
15180
15181
add.l &0xc,%sp
15182
fsub_ovfl_dis:
15183
btst &neg_bit,FPSR_CC(%a6) # is result negative?
15184
sne %d1 # set sign param accordingly
15185
mov.l L_SCR3(%a6),%d0 # pass prec:rnd
15186
bsr.l ovf_res # calculate default result
15187
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
15188
fmovm.x (%a0),&0x80 # return default result in fp0
15189
mov.l (%sp)+,%d2 # restore d2
15190
rts
15191
15192
fsub_ovfl_ena:
15193
mov.b L_SCR3(%a6),%d1
15194
andi.b &0xc0,%d1 # is precision extended?
15195
bne.b fsub_ovfl_ena_sd # no
15196
15197
fsub_ovfl_ena_cont:
15198
mov.w (%sp),%d1 # fetch {sgn,exp}
15199
andi.w &0x8000,%d1 # keep sign
15200
subi.l &0x6000,%d2 # subtract new bias
15201
andi.w &0x7fff,%d2 # clear top bit
15202
or.w %d2,%d1 # concat sign,exp
15203
mov.w %d1,(%sp) # insert new exponent
15204
15205
fmovm.x (%sp)+,&0x40 # return EXOP in fp1
15206
bra.b fsub_ovfl_dis
15207
15208
fsub_ovfl_ena_sd:
15209
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
15210
15211
mov.l L_SCR3(%a6),%d1
15212
andi.b &0x30,%d1 # clear rnd prec
15213
fmov.l %d1,%fpcr # set FPCR
15214
15215
fsub.x FP_SCR0(%a6),%fp0 # execute subtract
15216
15217
fmov.l &0x0,%fpcr # clear FPCR
15218
15219
add.l &0xc,%sp
15220
fmovm.x &0x01,-(%sp)
15221
bra.b fsub_ovfl_ena_cont
15222
15223
fsub_unfl:
15224
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15225
15226
add.l &0xc,%sp
15227
15228
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
15229
15230
fmov.l &rz_mode*0x10,%fpcr # set FPCR
15231
fmov.l &0x0,%fpsr # clear FPSR
15232
15233
fsub.x FP_SCR0(%a6),%fp0 # execute subtract
15234
15235
fmov.l &0x0,%fpcr # clear FPCR
15236
fmov.l %fpsr,%d1 # save status
15237
15238
or.l %d1,USER_FPSR(%a6)
15239
15240
mov.b FPCR_ENABLE(%a6),%d1
15241
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
15242
bne.b fsub_unfl_ena # yes
15243
15244
fsub_unfl_dis:
15245
fmovm.x &0x80,FP_SCR0(%a6) # store out result
15246
15247
lea FP_SCR0(%a6),%a0 # pass: result addr
15248
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
15249
bsr.l unf_res # calculate default result
15250
or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
15251
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
15252
mov.l (%sp)+,%d2 # restore d2
15253
rts
15254
15255
fsub_unfl_ena:
15256
fmovm.x FP_SCR1(%a6),&0x40
15257
15258
mov.l L_SCR3(%a6),%d1
15259
andi.b &0xc0,%d1 # is precision extended?
15260
bne.b fsub_unfl_ena_sd # no
15261
15262
fmov.l L_SCR3(%a6),%fpcr # set FPCR
15263
15264
fsub_unfl_ena_cont:
15265
fmov.l &0x0,%fpsr # clear FPSR
15266
15267
fsub.x FP_SCR0(%a6),%fp1 # execute subtract
15268
15269
fmov.l &0x0,%fpcr # clear FPCR
15270
15271
fmovm.x &0x40,FP_SCR0(%a6) # store result to stack
15272
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
15273
mov.l %d1,%d2 # make a copy
15274
andi.l &0x7fff,%d1 # strip sign
15275
andi.w &0x8000,%d2 # keep old sign
15276
sub.l %d0,%d1 # add scale factor
15277
addi.l &0x6000,%d1 # subtract new bias
15278
andi.w &0x7fff,%d1 # clear top bit
15279
or.w %d2,%d1 # concat sgn,exp
15280
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
15281
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
15282
bra.w fsub_unfl_dis
15283
15284
fsub_unfl_ena_sd:
15285
mov.l L_SCR3(%a6),%d1
15286
andi.b &0x30,%d1 # clear rnd prec
15287
fmov.l %d1,%fpcr # set FPCR
15288
15289
bra.b fsub_unfl_ena_cont
15290
15291
#
15292
# result is equal to the smallest normalized number in the selected precision
15293
# if the precision is extended, this result could not have come from an
15294
# underflow that rounded up.
15295
#
15296
fsub_may_unfl:
15297
mov.l L_SCR3(%a6),%d1
15298
andi.b &0xc0,%d1 # fetch rnd prec
15299
beq.w fsub_normal # yes; no underflow occurred
15300
15301
mov.l 0x4(%sp),%d1
15302
cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
15303
bne.w fsub_normal # no; no underflow occurred
15304
15305
tst.l 0x8(%sp) # is lo(man) = 0x0?
15306
bne.w fsub_normal # no; no underflow occurred
15307
15308
btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
15309
beq.w fsub_normal # no; no underflow occurred
15310
15311
#
15312
# ok, so now the result has a exponent equal to the smallest normalized
15313
# exponent for the selected precision. also, the mantissa is equal to
15314
# 0x8000000000000000 and this mantissa is the result of rounding non-zero
15315
# g,r,s.
15316
# now, we must determine whether the pre-rounded result was an underflow
15317
# rounded "up" or a normalized number rounded "down".
15318
# so, we do this be re-executing the add using RZ as the rounding mode and
15319
# seeing if the new result is smaller or equal to the current result.
15320
#
15321
fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
15322
15323
mov.l L_SCR3(%a6),%d1
15324
andi.b &0xc0,%d1 # keep rnd prec
15325
ori.b &rz_mode*0x10,%d1 # insert rnd mode
15326
fmov.l %d1,%fpcr # set FPCR
15327
fmov.l &0x0,%fpsr # clear FPSR
15328
15329
fsub.x FP_SCR0(%a6),%fp1 # execute subtract
15330
15331
fmov.l &0x0,%fpcr # clear FPCR
15332
15333
fabs.x %fp0 # compare absolute values
15334
fabs.x %fp1
15335
fcmp.x %fp0,%fp1 # is first result > second?
15336
15337
fbgt.w fsub_unfl # yes; it's an underflow
15338
bra.w fsub_normal # no; it's not an underflow
15339
15340
##########################################################################
15341
15342
#
15343
# Sub: inputs are not both normalized; what are they?
15344
#
15345
fsub_not_norm:
15346
mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1
15347
jmp (tbl_fsub_op.b,%pc,%d1.w*1)
15348
15349
swbeg &48
15350
tbl_fsub_op:
15351
short fsub_norm - tbl_fsub_op # NORM - NORM
15352
short fsub_zero_src - tbl_fsub_op # NORM - ZERO
15353
short fsub_inf_src - tbl_fsub_op # NORM - INF
15354
short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15355
short fsub_norm - tbl_fsub_op # NORM - DENORM
15356
short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15357
short tbl_fsub_op - tbl_fsub_op #
15358
short tbl_fsub_op - tbl_fsub_op #
15359
15360
short fsub_zero_dst - tbl_fsub_op # ZERO - NORM
15361
short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO
15362
short fsub_inf_src - tbl_fsub_op # ZERO - INF
15363
short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15364
short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM
15365
short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15366
short tbl_fsub_op - tbl_fsub_op #
15367
short tbl_fsub_op - tbl_fsub_op #
15368
15369
short fsub_inf_dst - tbl_fsub_op # INF - NORM
15370
short fsub_inf_dst - tbl_fsub_op # INF - ZERO
15371
short fsub_inf_2 - tbl_fsub_op # INF - INF
15372
short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15373
short fsub_inf_dst - tbl_fsub_op # INF - DENORM
15374
short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15375
short tbl_fsub_op - tbl_fsub_op #
15376
short tbl_fsub_op - tbl_fsub_op #
15377
15378
short fsub_res_qnan - tbl_fsub_op # QNAN - NORM
15379
short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO
15380
short fsub_res_qnan - tbl_fsub_op # QNAN - INF
15381
short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN
15382
short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM
15383
short fsub_res_snan - tbl_fsub_op # QNAN - SNAN
15384
short tbl_fsub_op - tbl_fsub_op #
15385
short tbl_fsub_op - tbl_fsub_op #
15386
15387
short fsub_norm - tbl_fsub_op # DENORM - NORM
15388
short fsub_zero_src - tbl_fsub_op # DENORM - ZERO
15389
short fsub_inf_src - tbl_fsub_op # DENORM - INF
15390
short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15391
short fsub_norm - tbl_fsub_op # DENORM - DENORM
15392
short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15393
short tbl_fsub_op - tbl_fsub_op #
15394
short tbl_fsub_op - tbl_fsub_op #
15395
15396
short fsub_res_snan - tbl_fsub_op # SNAN - NORM
15397
short fsub_res_snan - tbl_fsub_op # SNAN - ZERO
15398
short fsub_res_snan - tbl_fsub_op # SNAN - INF
15399
short fsub_res_snan - tbl_fsub_op # SNAN - QNAN
15400
short fsub_res_snan - tbl_fsub_op # SNAN - DENORM
15401
short fsub_res_snan - tbl_fsub_op # SNAN - SNAN
15402
short tbl_fsub_op - tbl_fsub_op #
15403
short tbl_fsub_op - tbl_fsub_op #
15404
15405
fsub_res_qnan:
15406
bra.l res_qnan
15407
fsub_res_snan:
15408
bra.l res_snan
15409
15410
#
15411
# both operands are ZEROes
15412
#
15413
fsub_zero_2:
15414
mov.b SRC_EX(%a0),%d0
15415
mov.b DST_EX(%a1),%d1
15416
eor.b %d1,%d0
15417
bpl.b fsub_zero_2_chk_rm
15418
15419
# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
15420
tst.b %d0 # is dst negative?
15421
bmi.b fsub_zero_2_rm # yes
15422
fmov.s &0x00000000,%fp0 # no; return +ZERO
15423
mov.b &z_bmask,FPSR_CC(%a6) # set Z
15424
rts
15425
15426
#
15427
# the ZEROes have the same signs:
15428
# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
15429
# - -ZERO is returned in the case of RM.
15430
#
15431
fsub_zero_2_chk_rm:
15432
mov.b 3+L_SCR3(%a6),%d1
15433
andi.b &0x30,%d1 # extract rnd mode
15434
cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?
15435
beq.b fsub_zero_2_rm # yes
15436
fmov.s &0x00000000,%fp0 # no; return +ZERO
15437
mov.b &z_bmask,FPSR_CC(%a6) # set Z
15438
rts
15439
15440
fsub_zero_2_rm:
15441
fmov.s &0x80000000,%fp0 # return -ZERO
15442
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
15443
rts
15444
15445
#
15446
# one operand is a ZERO and the other is a DENORM or a NORM.
15447
# scale the DENORM or NORM and jump to the regular fsub routine.
15448
#
15449
fsub_zero_dst:
15450
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15451
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15452
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15453
bsr.l scale_to_zero_src # scale the operand
15454
clr.w FP_SCR1_EX(%a6)
15455
clr.l FP_SCR1_HI(%a6)
15456
clr.l FP_SCR1_LO(%a6)
15457
bra.w fsub_zero_entry # go execute fsub
15458
15459
fsub_zero_src:
15460
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
15461
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
15462
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
15463
bsr.l scale_to_zero_dst # scale the operand
15464
clr.w FP_SCR0_EX(%a6)
15465
clr.l FP_SCR0_HI(%a6)
15466
clr.l FP_SCR0_LO(%a6)
15467
bra.w fsub_zero_entry # go execute fsub
15468
15469
#
15470
# both operands are INFs. an OPERR will result if the INFs have the
15471
# same signs. else,
15472
#
15473
fsub_inf_2:
15474
mov.b SRC_EX(%a0),%d0 # exclusive or the signs
15475
mov.b DST_EX(%a1),%d1
15476
eor.b %d1,%d0
15477
bpl.l res_operr # weed out (-INF)+(+INF)
15478
15479
# ok, so it's not an OPERR. but we do have to remember to return
15480
# the src INF since that's where the 881/882 gets the j-bit.
15481
15482
fsub_inf_src:
15483
fmovm.x SRC(%a0),&0x80 # return src INF
15484
fneg.x %fp0 # invert sign
15485
fbge.w fsub_inf_done # sign is now positive
15486
mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15487
rts
15488
15489
fsub_inf_dst:
15490
fmovm.x DST(%a1),&0x80 # return dst INF
15491
tst.b DST_EX(%a1) # is INF negative?
15492
bpl.b fsub_inf_done # no
15493
mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15494
rts
15495
15496
fsub_inf_done:
15497
mov.b &inf_bmask,FPSR_CC(%a6) # set INF
15498
rts
15499
15500
#########################################################################
15501
# XDEF **************************************************************** #
15502
# fsqrt(): emulates the fsqrt instruction #
15503
# fssqrt(): emulates the fssqrt instruction #
15504
# fdsqrt(): emulates the fdsqrt instruction #
15505
# #
15506
# XREF **************************************************************** #
15507
# scale_sqrt() - scale the source operand #
15508
# unf_res() - return default underflow result #
15509
# ovf_res() - return default overflow result #
15510
# res_qnan_1op() - return QNAN result #
15511
# res_snan_1op() - return SNAN result #
15512
# #
15513
# INPUT *************************************************************** #
15514
# a0 = pointer to extended precision source operand #
15515
# d0 rnd prec,mode #
15516
# #
15517
# OUTPUT ************************************************************** #
15518
# fp0 = result #
15519
# fp1 = EXOP (if exception occurred) #
15520
# #
15521
# ALGORITHM *********************************************************** #
15522
# Handle NANs, infinities, and zeroes as special cases. Divide #
15523
# norms/denorms into ext/sgl/dbl precision. #
15524
# For norms/denorms, scale the exponents such that a sqrt #
15525
# instruction won't cause an exception. Use the regular fsqrt to #
15526
# compute a result. Check if the regular operands would have taken #
15527
# an exception. If so, return the default overflow/underflow result #
15528
# and return the EXOP if exceptions are enabled. Else, scale the #
15529
# result operand to the proper exponent. #
15530
# #
15531
#########################################################################
15532
15533
global fssqrt
15534
fssqrt:
15535
andi.b &0x30,%d0 # clear rnd prec
15536
ori.b &s_mode*0x10,%d0 # insert sgl precision
15537
bra.b fsqrt
15538
15539
global fdsqrt
15540
fdsqrt:
15541
andi.b &0x30,%d0 # clear rnd prec
15542
ori.b &d_mode*0x10,%d0 # insert dbl precision
15543
15544
global fsqrt
15545
fsqrt:
15546
mov.l %d0,L_SCR3(%a6) # store rnd info
15547
clr.w %d1
15548
mov.b STAG(%a6),%d1
15549
bne.w fsqrt_not_norm # optimize on non-norm input
15550
15551
#
15552
# SQUARE ROOT: norms and denorms ONLY!
15553
#
15554
fsqrt_norm:
15555
tst.b SRC_EX(%a0) # is operand negative?
15556
bmi.l res_operr # yes
15557
15558
andi.b &0xc0,%d0 # is precision extended?
15559
bne.b fsqrt_not_ext # no; go handle sgl or dbl
15560
15561
fmov.l L_SCR3(%a6),%fpcr # set FPCR
15562
fmov.l &0x0,%fpsr # clear FPSR
15563
15564
fsqrt.x (%a0),%fp0 # execute square root
15565
15566
fmov.l %fpsr,%d1
15567
or.l %d1,USER_FPSR(%a6) # set N,INEX
15568
15569
rts
15570
15571
fsqrt_denorm:
15572
tst.b SRC_EX(%a0) # is operand negative?
15573
bmi.l res_operr # yes
15574
15575
andi.b &0xc0,%d0 # is precision extended?
15576
bne.b fsqrt_not_ext # no; go handle sgl or dbl
15577
15578
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15579
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15580
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15581
15582
bsr.l scale_sqrt # calculate scale factor
15583
15584
bra.w fsqrt_sd_normal
15585
15586
#
15587
# operand is either single or double
15588
#
15589
fsqrt_not_ext:
15590
cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
15591
bne.w fsqrt_dbl
15592
15593
#
15594
# operand is to be rounded to single precision
15595
#
15596
fsqrt_sgl:
15597
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15598
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15599
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15600
15601
bsr.l scale_sqrt # calculate scale factor
15602
15603
cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?
15604
beq.w fsqrt_sd_may_unfl
15605
bgt.w fsqrt_sd_unfl # yes; go handle underflow
15606
cmpi.l %d0,&0x3fff-0x407f # will move in overflow?
15607
beq.w fsqrt_sd_may_ovfl # maybe; go check
15608
blt.w fsqrt_sd_ovfl # yes; go handle overflow
15609
15610
#
15611
# operand will NOT overflow or underflow when moved in to the fp reg file
15612
#
15613
fsqrt_sd_normal:
15614
fmov.l &0x0,%fpsr # clear FPSR
15615
fmov.l L_SCR3(%a6),%fpcr # set FPCR
15616
15617
fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
15618
15619
fmov.l %fpsr,%d1 # save FPSR
15620
fmov.l &0x0,%fpcr # clear FPCR
15621
15622
or.l %d1,USER_FPSR(%a6) # save INEX2,N
15623
15624
fsqrt_sd_normal_exit:
15625
mov.l %d2,-(%sp) # save d2
15626
fmovm.x &0x80,FP_SCR0(%a6) # store out result
15627
mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
15628
mov.l %d1,%d2 # make a copy
15629
andi.l &0x7fff,%d1 # strip sign
15630
sub.l %d0,%d1 # add scale factor
15631
andi.w &0x8000,%d2 # keep old sign
15632
or.w %d1,%d2 # concat old sign,new exp
15633
mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
15634
mov.l (%sp)+,%d2 # restore d2
15635
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
15636
rts
15637
15638
#
15639
# operand is to be rounded to double precision
15640
#
15641
fsqrt_dbl:
15642
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15643
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15644
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15645
15646
bsr.l scale_sqrt # calculate scale factor
15647
15648
cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?
15649
beq.w fsqrt_sd_may_unfl
15650
bgt.b fsqrt_sd_unfl # yes; go handle underflow
15651
cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?
15652
beq.w fsqrt_sd_may_ovfl # maybe; go check
15653
blt.w fsqrt_sd_ovfl # yes; go handle overflow
15654
bra.w fsqrt_sd_normal # no; ho handle normalized op
15655
15656
# we're on the line here and the distinguising characteristic is whether
15657
# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
15658
# elsewise fall through to underflow.
15659
fsqrt_sd_may_unfl:
15660
btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
15661
bne.w fsqrt_sd_normal # yes, so no underflow
15662
15663
#
15664
# operand WILL underflow when moved in to the fp register file
15665
#
15666
fsqrt_sd_unfl:
15667
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15668
15669
fmov.l &rz_mode*0x10,%fpcr # set FPCR
15670
fmov.l &0x0,%fpsr # clear FPSR
15671
15672
fsqrt.x FP_SCR0(%a6),%fp0 # execute square root
15673
15674
fmov.l %fpsr,%d1 # save status
15675
fmov.l &0x0,%fpcr # clear FPCR
15676
15677
or.l %d1,USER_FPSR(%a6) # save INEX2,N
15678
15679
# if underflow or inexact is enabled, go calculate EXOP first.
15680
mov.b FPCR_ENABLE(%a6),%d1
15681
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
15682
bne.b fsqrt_sd_unfl_ena # yes
15683
15684
fsqrt_sd_unfl_dis:
15685
fmovm.x &0x80,FP_SCR0(%a6) # store out result
15686
15687
lea FP_SCR0(%a6),%a0 # pass: result addr
15688
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
15689
bsr.l unf_res # calculate default result
15690
or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
15691
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
15692
rts
15693
15694
#
15695
# operand will underflow AND underflow is enabled.
15696
# Therefore, we must return the result rounded to extended precision.
15697
#
15698
fsqrt_sd_unfl_ena:
15699
mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
15700
mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
15701
mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
15702
15703
mov.l %d2,-(%sp) # save d2
15704
mov.l %d1,%d2 # make a copy
15705
andi.l &0x7fff,%d1 # strip sign
15706
andi.w &0x8000,%d2 # keep old sign
15707
sub.l %d0,%d1 # subtract scale factor
15708
addi.l &0x6000,%d1 # add new bias
15709
andi.w &0x7fff,%d1
15710
or.w %d2,%d1 # concat new sign,new exp
15711
mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
15712
fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
15713
mov.l (%sp)+,%d2 # restore d2
15714
bra.b fsqrt_sd_unfl_dis
15715
15716
#
15717
# operand WILL overflow.
15718
#
15719
fsqrt_sd_ovfl:
15720
fmov.l &0x0,%fpsr # clear FPSR
15721
fmov.l L_SCR3(%a6),%fpcr # set FPCR
15722
15723
fsqrt.x FP_SCR0(%a6),%fp0 # perform square root
15724
15725
fmov.l &0x0,%fpcr # clear FPCR
15726
fmov.l %fpsr,%d1 # save FPSR
15727
15728
or.l %d1,USER_FPSR(%a6) # save INEX2,N
15729
15730
fsqrt_sd_ovfl_tst:
15731
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15732
15733
mov.b FPCR_ENABLE(%a6),%d1
15734
andi.b &0x13,%d1 # is OVFL or INEX enabled?
15735
bne.b fsqrt_sd_ovfl_ena # yes
15736
15737
#
15738
# OVFL is not enabled; therefore, we must create the default result by
15739
# calling ovf_res().
15740
#
15741
fsqrt_sd_ovfl_dis:
15742
btst &neg_bit,FPSR_CC(%a6) # is result negative?
15743
sne %d1 # set sign param accordingly
15744
mov.l L_SCR3(%a6),%d0 # pass: prec,mode
15745
bsr.l ovf_res # calculate default result
15746
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
15747
fmovm.x (%a0),&0x80 # return default result in fp0
15748
rts
15749
15750
#
15751
# OVFL is enabled.
15752
# the INEX2 bit has already been updated by the round to the correct precision.
15753
# now, round to extended(and don't alter the FPSR).
15754
#
15755
fsqrt_sd_ovfl_ena:
15756
mov.l %d2,-(%sp) # save d2
15757
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
15758
mov.l %d1,%d2 # make a copy
15759
andi.l &0x7fff,%d1 # strip sign
15760
andi.w &0x8000,%d2 # keep old sign
15761
sub.l %d0,%d1 # add scale factor
15762
subi.l &0x6000,%d1 # subtract bias
15763
andi.w &0x7fff,%d1
15764
or.w %d2,%d1 # concat sign,exp
15765
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
15766
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
15767
mov.l (%sp)+,%d2 # restore d2
15768
bra.b fsqrt_sd_ovfl_dis
15769
15770
#
15771
# the move in MAY underflow. so...
15772
#
15773
fsqrt_sd_may_ovfl:
15774
btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
15775
bne.w fsqrt_sd_ovfl # yes, so overflow
15776
15777
fmov.l &0x0,%fpsr # clear FPSR
15778
fmov.l L_SCR3(%a6),%fpcr # set FPCR
15779
15780
fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
15781
15782
fmov.l %fpsr,%d1 # save status
15783
fmov.l &0x0,%fpcr # clear FPCR
15784
15785
or.l %d1,USER_FPSR(%a6) # save INEX2,N
15786
15787
fmov.x %fp0,%fp1 # make a copy of result
15788
fcmp.b %fp1,&0x1 # is |result| >= 1.b?
15789
fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred
15790
15791
# no, it didn't overflow; we have correct result
15792
bra.w fsqrt_sd_normal_exit
15793
15794
##########################################################################
15795
15796
#
15797
# input is not normalized; what is it?
15798
#
15799
fsqrt_not_norm:
15800
cmpi.b %d1,&DENORM # weed out DENORM
15801
beq.w fsqrt_denorm
15802
cmpi.b %d1,&ZERO # weed out ZERO
15803
beq.b fsqrt_zero
15804
cmpi.b %d1,&INF # weed out INF
15805
beq.b fsqrt_inf
15806
cmpi.b %d1,&SNAN # weed out SNAN
15807
beq.l res_snan_1op
15808
bra.l res_qnan_1op
15809
15810
#
15811
# fsqrt(+0) = +0
15812
# fsqrt(-0) = -0
15813
# fsqrt(+INF) = +INF
15814
# fsqrt(-INF) = OPERR
15815
#
15816
fsqrt_zero:
15817
tst.b SRC_EX(%a0) # is ZERO positive or negative?
15818
bmi.b fsqrt_zero_m # negative
15819
fsqrt_zero_p:
15820
fmov.s &0x00000000,%fp0 # return +ZERO
15821
mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
15822
rts
15823
fsqrt_zero_m:
15824
fmov.s &0x80000000,%fp0 # return -ZERO
15825
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
15826
rts
15827
15828
fsqrt_inf:
15829
tst.b SRC_EX(%a0) # is INF positive or negative?
15830
bmi.l res_operr # negative
15831
fsqrt_inf_p:
15832
fmovm.x SRC(%a0),&0x80 # return +INF in fp0
15833
mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
15834
rts
15835
15836
##########################################################################
15837
15838
#########################################################################
15839
# XDEF **************************************************************** #
15840
# addsub_scaler2(): scale inputs to fadd/fsub such that no #
15841
# OVFL/UNFL exceptions will result #
15842
# #
15843
# XREF **************************************************************** #
15844
# norm() - normalize mantissa after adjusting exponent #
15845
# #
15846
# INPUT *************************************************************** #
15847
# FP_SRC(a6) = fp op1(src) #
15848
# FP_DST(a6) = fp op2(dst) #
15849
# #
15850
# OUTPUT ************************************************************** #
15851
# FP_SRC(a6) = fp op1 scaled(src) #
15852
# FP_DST(a6) = fp op2 scaled(dst) #
15853
# d0 = scale amount #
15854
# #
15855
# ALGORITHM *********************************************************** #
15856
# If the DST exponent is > the SRC exponent, set the DST exponent #
15857
# equal to 0x3fff and scale the SRC exponent by the value that the #
15858
# DST exponent was scaled by. If the SRC exponent is greater or equal, #
15859
# do the opposite. Return this scale factor in d0. #
15860
# If the two exponents differ by > the number of mantissa bits #
15861
# plus two, then set the smallest exponent to a very small value as a #
15862
# quick shortcut. #
15863
# #
15864
#########################################################################
15865
15866
global addsub_scaler2
15867
addsub_scaler2:
15868
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15869
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
15870
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15871
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
15872
mov.w SRC_EX(%a0),%d0
15873
mov.w DST_EX(%a1),%d1
15874
mov.w %d0,FP_SCR0_EX(%a6)
15875
mov.w %d1,FP_SCR1_EX(%a6)
15876
15877
andi.w &0x7fff,%d0
15878
andi.w &0x7fff,%d1
15879
mov.w %d0,L_SCR1(%a6) # store src exponent
15880
mov.w %d1,2+L_SCR1(%a6) # store dst exponent
15881
15882
cmp.w %d0, %d1 # is src exp >= dst exp?
15883
bge.l src_exp_ge2
15884
15885
# dst exp is > src exp; scale dst to exp = 0x3fff
15886
dst_exp_gt2:
15887
bsr.l scale_to_zero_dst
15888
mov.l %d0,-(%sp) # save scale factor
15889
15890
cmpi.b STAG(%a6),&DENORM # is dst denormalized?
15891
bne.b cmpexp12
15892
15893
lea FP_SCR0(%a6),%a0
15894
bsr.l norm # normalize the denorm; result is new exp
15895
neg.w %d0 # new exp = -(shft val)
15896
mov.w %d0,L_SCR1(%a6) # inset new exp
15897
15898
cmpexp12:
15899
mov.w 2+L_SCR1(%a6),%d0
15900
subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
15901
15902
cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?
15903
bge.b quick_scale12
15904
15905
mov.w L_SCR1(%a6),%d0
15906
add.w 0x2(%sp),%d0 # scale src exponent by scale factor
15907
mov.w FP_SCR0_EX(%a6),%d1
15908
and.w &0x8000,%d1
15909
or.w %d1,%d0 # concat {sgn,new exp}
15910
mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent
15911
15912
mov.l (%sp)+,%d0 # return SCALE factor
15913
rts
15914
15915
quick_scale12:
15916
andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent
15917
bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1
15918
15919
mov.l (%sp)+,%d0 # return SCALE factor
15920
rts
15921
15922
# src exp is >= dst exp; scale src to exp = 0x3fff
15923
src_exp_ge2:
15924
bsr.l scale_to_zero_src
15925
mov.l %d0,-(%sp) # save scale factor
15926
15927
cmpi.b DTAG(%a6),&DENORM # is dst denormalized?
15928
bne.b cmpexp22
15929
lea FP_SCR1(%a6),%a0
15930
bsr.l norm # normalize the denorm; result is new exp
15931
neg.w %d0 # new exp = -(shft val)
15932
mov.w %d0,2+L_SCR1(%a6) # inset new exp
15933
15934
cmpexp22:
15935
mov.w L_SCR1(%a6),%d0
15936
subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
15937
15938
cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?
15939
bge.b quick_scale22
15940
15941
mov.w 2+L_SCR1(%a6),%d0
15942
add.w 0x2(%sp),%d0 # scale dst exponent by scale factor
15943
mov.w FP_SCR1_EX(%a6),%d1
15944
andi.w &0x8000,%d1
15945
or.w %d1,%d0 # concat {sgn,new exp}
15946
mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent
15947
15948
mov.l (%sp)+,%d0 # return SCALE factor
15949
rts
15950
15951
quick_scale22:
15952
andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
15953
bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1
15954
15955
mov.l (%sp)+,%d0 # return SCALE factor
15956
rts
15957
15958
##########################################################################
15959
15960
#########################################################################
15961
# XDEF **************************************************************** #
15962
# scale_to_zero_src(): scale the exponent of extended precision #
15963
# value at FP_SCR0(a6). #
15964
# #
15965
# XREF **************************************************************** #
15966
# norm() - normalize the mantissa if the operand was a DENORM #
15967
# #
15968
# INPUT *************************************************************** #
15969
# FP_SCR0(a6) = extended precision operand to be scaled #
15970
# #
15971
# OUTPUT ************************************************************** #
15972
# FP_SCR0(a6) = scaled extended precision operand #
15973
# d0 = scale value #
15974
# #
15975
# ALGORITHM *********************************************************** #
15976
# Set the exponent of the input operand to 0x3fff. Save the value #
15977
# of the difference between the original and new exponent. Then, #
15978
# normalize the operand if it was a DENORM. Add this normalization #
15979
# value to the previous value. Return the result. #
15980
# #
15981
#########################################################################
15982
15983
global scale_to_zero_src
15984
scale_to_zero_src:
15985
mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
15986
mov.w %d1,%d0 # make a copy
15987
15988
andi.l &0x7fff,%d1 # extract operand's exponent
15989
15990
andi.w &0x8000,%d0 # extract operand's sgn
15991
or.w &0x3fff,%d0 # insert new operand's exponent(=0)
15992
15993
mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent
15994
15995
cmpi.b STAG(%a6),&DENORM # is operand normalized?
15996
beq.b stzs_denorm # normalize the DENORM
15997
15998
stzs_norm:
15999
mov.l &0x3fff,%d0
16000
sub.l %d1,%d0 # scale = BIAS + (-exp)
16001
16002
rts
16003
16004
stzs_denorm:
16005
lea FP_SCR0(%a6),%a0 # pass ptr to src op
16006
bsr.l norm # normalize denorm
16007
neg.l %d0 # new exponent = -(shft val)
16008
mov.l %d0,%d1 # prepare for op_norm call
16009
bra.b stzs_norm # finish scaling
16010
16011
###
16012
16013
#########################################################################
16014
# XDEF **************************************************************** #
16015
# scale_sqrt(): scale the input operand exponent so a subsequent #
16016
# fsqrt operation won't take an exception. #
16017
# #
16018
# XREF **************************************************************** #
16019
# norm() - normalize the mantissa if the operand was a DENORM #
16020
# #
16021
# INPUT *************************************************************** #
16022
# FP_SCR0(a6) = extended precision operand to be scaled #
16023
# #
16024
# OUTPUT ************************************************************** #
16025
# FP_SCR0(a6) = scaled extended precision operand #
16026
# d0 = scale value #
16027
# #
16028
# ALGORITHM *********************************************************** #
16029
# If the input operand is a DENORM, normalize it. #
16030
# If the exponent of the input operand is even, set the exponent #
16031
# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
16032
# exponent of the input operand is off, set the exponent to ox3fff and #
16033
# return a scale factor of "(exp-0x3fff)/2". #
16034
# #
16035
#########################################################################
16036
16037
global scale_sqrt
16038
scale_sqrt:
16039
cmpi.b STAG(%a6),&DENORM # is operand normalized?
16040
beq.b ss_denorm # normalize the DENORM
16041
16042
mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
16043
andi.l &0x7fff,%d1 # extract operand's exponent
16044
16045
andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
16046
16047
btst &0x0,%d1 # is exp even or odd?
16048
beq.b ss_norm_even
16049
16050
ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16051
16052
mov.l &0x3fff,%d0
16053
sub.l %d1,%d0 # scale = BIAS + (-exp)
16054
asr.l &0x1,%d0 # divide scale factor by 2
16055
rts
16056
16057
ss_norm_even:
16058
ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16059
16060
mov.l &0x3ffe,%d0
16061
sub.l %d1,%d0 # scale = BIAS + (-exp)
16062
asr.l &0x1,%d0 # divide scale factor by 2
16063
rts
16064
16065
ss_denorm:
16066
lea FP_SCR0(%a6),%a0 # pass ptr to src op
16067
bsr.l norm # normalize denorm
16068
16069
btst &0x0,%d0 # is exp even or odd?
16070
beq.b ss_denorm_even
16071
16072
ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16073
16074
add.l &0x3fff,%d0
16075
asr.l &0x1,%d0 # divide scale factor by 2
16076
rts
16077
16078
ss_denorm_even:
16079
ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16080
16081
add.l &0x3ffe,%d0
16082
asr.l &0x1,%d0 # divide scale factor by 2
16083
rts
16084
16085
###
16086
16087
#########################################################################
16088
# XDEF **************************************************************** #
16089
# scale_to_zero_dst(): scale the exponent of extended precision #
16090
# value at FP_SCR1(a6). #
16091
# #
16092
# XREF **************************************************************** #
16093
# norm() - normalize the mantissa if the operand was a DENORM #
16094
# #
16095
# INPUT *************************************************************** #
16096
# FP_SCR1(a6) = extended precision operand to be scaled #
16097
# #
16098
# OUTPUT ************************************************************** #
16099
# FP_SCR1(a6) = scaled extended precision operand #
16100
# d0 = scale value #
16101
# #
16102
# ALGORITHM *********************************************************** #
16103
# Set the exponent of the input operand to 0x3fff. Save the value #
16104
# of the difference between the original and new exponent. Then, #
16105
# normalize the operand if it was a DENORM. Add this normalization #
16106
# value to the previous value. Return the result. #
16107
# #
16108
#########################################################################
16109
16110
global scale_to_zero_dst
16111
scale_to_zero_dst:
16112
mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}
16113
mov.w %d1,%d0 # make a copy
16114
16115
andi.l &0x7fff,%d1 # extract operand's exponent
16116
16117
andi.w &0x8000,%d0 # extract operand's sgn
16118
or.w &0x3fff,%d0 # insert new operand's exponent(=0)
16119
16120
mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent
16121
16122
cmpi.b DTAG(%a6),&DENORM # is operand normalized?
16123
beq.b stzd_denorm # normalize the DENORM
16124
16125
stzd_norm:
16126
mov.l &0x3fff,%d0
16127
sub.l %d1,%d0 # scale = BIAS + (-exp)
16128
rts
16129
16130
stzd_denorm:
16131
lea FP_SCR1(%a6),%a0 # pass ptr to dst op
16132
bsr.l norm # normalize denorm
16133
neg.l %d0 # new exponent = -(shft val)
16134
mov.l %d0,%d1 # prepare for op_norm call
16135
bra.b stzd_norm # finish scaling
16136
16137
##########################################################################
16138
16139
#########################################################################
16140
# XDEF **************************************************************** #
16141
# res_qnan(): return default result w/ QNAN operand for dyadic #
16142
# res_snan(): return default result w/ SNAN operand for dyadic #
16143
# res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
16144
# res_snan_1op(): return dflt result w/ SNAN operand for monadic #
16145
# #
16146
# XREF **************************************************************** #
16147
# None #
16148
# #
16149
# INPUT *************************************************************** #
16150
# FP_SRC(a6) = pointer to extended precision src operand #
16151
# FP_DST(a6) = pointer to extended precision dst operand #
16152
# #
16153
# OUTPUT ************************************************************** #
16154
# fp0 = default result #
16155
# #
16156
# ALGORITHM *********************************************************** #
16157
# If either operand (but not both operands) of an operation is a #
16158
# nonsignalling NAN, then that NAN is returned as the result. If both #
16159
# operands are nonsignalling NANs, then the destination operand #
16160
# nonsignalling NAN is returned as the result. #
16161
# If either operand to an operation is a signalling NAN (SNAN), #
16162
# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
16163
# enable bit is set in the FPCR, then the trap is taken and the #
16164
# destination is not modified. If the SNAN trap enable bit is not set, #
16165
# then the SNAN is converted to a nonsignalling NAN (by setting the #
16166
# SNAN bit in the operand to one), and the operation continues as #
16167
# described in the preceding paragraph, for nonsignalling NANs. #
16168
# Make sure the appropriate FPSR bits are set before exiting. #
16169
# #
16170
#########################################################################
16171
16172
global res_qnan
16173
global res_snan
16174
res_qnan:
16175
res_snan:
16176
cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?
16177
beq.b dst_snan2
16178
cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?
16179
beq.b dst_qnan2
16180
src_nan:
16181
cmp.b STAG(%a6), &QNAN
16182
beq.b src_qnan2
16183
global res_snan_1op
16184
res_snan_1op:
16185
src_snan2:
16186
bset &0x6, FP_SRC_HI(%a6) # set SNAN bit
16187
or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16188
lea FP_SRC(%a6), %a0
16189
bra.b nan_comp
16190
global res_qnan_1op
16191
res_qnan_1op:
16192
src_qnan2:
16193
or.l &nan_mask, USER_FPSR(%a6)
16194
lea FP_SRC(%a6), %a0
16195
bra.b nan_comp
16196
dst_snan2:
16197
or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16198
bset &0x6, FP_DST_HI(%a6) # set SNAN bit
16199
lea FP_DST(%a6), %a0
16200
bra.b nan_comp
16201
dst_qnan2:
16202
lea FP_DST(%a6), %a0
16203
cmp.b STAG(%a6), &SNAN
16204
bne nan_done
16205
or.l &aiop_mask+snan_mask, USER_FPSR(%a6)
16206
nan_done:
16207
or.l &nan_mask, USER_FPSR(%a6)
16208
nan_comp:
16209
btst &0x7, FTEMP_EX(%a0) # is NAN neg?
16210
beq.b nan_not_neg
16211
or.l &neg_mask, USER_FPSR(%a6)
16212
nan_not_neg:
16213
fmovm.x (%a0), &0x80
16214
rts
16215
16216
#########################################################################
16217
# XDEF **************************************************************** #
16218
# res_operr(): return default result during operand error #
16219
# #
16220
# XREF **************************************************************** #
16221
# None #
16222
# #
16223
# INPUT *************************************************************** #
16224
# None #
16225
# #
16226
# OUTPUT ************************************************************** #
16227
# fp0 = default operand error result #
16228
# #
16229
# ALGORITHM *********************************************************** #
16230
# An nonsignalling NAN is returned as the default result when #
16231
# an operand error occurs for the following cases: #
16232
# #
16233
# Multiply: (Infinity x Zero) #
16234
# Divide : (Zero / Zero) || (Infinity / Infinity) #
16235
# #
16236
#########################################################################
16237
16238
global res_operr
16239
res_operr:
16240
or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
16241
fmovm.x nan_return(%pc), &0x80
16242
rts
16243
16244
nan_return:
16245
long 0x7fff0000, 0xffffffff, 0xffffffff
16246
16247
#########################################################################
16248
# fdbcc(): routine to emulate the fdbcc instruction #
16249
# #
16250
# XDEF **************************************************************** #
16251
# _fdbcc() #
16252
# #
16253
# XREF **************************************************************** #
16254
# fetch_dreg() - fetch Dn value #
16255
# store_dreg_l() - store updated Dn value #
16256
# #
16257
# INPUT *************************************************************** #
16258
# d0 = displacement #
16259
# #
16260
# OUTPUT ************************************************************** #
16261
# none #
16262
# #
16263
# ALGORITHM *********************************************************** #
16264
# This routine checks which conditional predicate is specified by #
16265
# the stacked fdbcc instruction opcode and then branches to a routine #
16266
# for that predicate. The corresponding fbcc instruction is then used #
16267
# to see whether the condition (specified by the stacked FPSR) is true #
16268
# or false. #
16269
# If a BSUN exception should be indicated, the BSUN and ABSUN #
16270
# bits are set in the stacked FPSR. If the BSUN exception is enabled, #
16271
# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
16272
# enabled BSUN should not be flagged and the predicate is true, then #
16273
# Dn is fetched and decremented by one. If Dn is not equal to -1, add #
16274
# the displacement value to the stacked PC so that when an "rte" is #
16275
# finally executed, the branch occurs. #
16276
# #
16277
#########################################################################
16278
global _fdbcc
16279
_fdbcc:
16280
mov.l %d0,L_SCR1(%a6) # save displacement
16281
16282
mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
16283
16284
clr.l %d1 # clear scratch reg
16285
mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
16286
ror.l &0x8,%d1 # rotate to top byte
16287
fmov.l %d1,%fpsr # insert into FPSR
16288
16289
mov.w (tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
16290
jmp (tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
16291
16292
tbl_fdbcc:
16293
short fdbcc_f - tbl_fdbcc # 00
16294
short fdbcc_eq - tbl_fdbcc # 01
16295
short fdbcc_ogt - tbl_fdbcc # 02
16296
short fdbcc_oge - tbl_fdbcc # 03
16297
short fdbcc_olt - tbl_fdbcc # 04
16298
short fdbcc_ole - tbl_fdbcc # 05
16299
short fdbcc_ogl - tbl_fdbcc # 06
16300
short fdbcc_or - tbl_fdbcc # 07
16301
short fdbcc_un - tbl_fdbcc # 08
16302
short fdbcc_ueq - tbl_fdbcc # 09
16303
short fdbcc_ugt - tbl_fdbcc # 10
16304
short fdbcc_uge - tbl_fdbcc # 11
16305
short fdbcc_ult - tbl_fdbcc # 12
16306
short fdbcc_ule - tbl_fdbcc # 13
16307
short fdbcc_neq - tbl_fdbcc # 14
16308
short fdbcc_t - tbl_fdbcc # 15
16309
short fdbcc_sf - tbl_fdbcc # 16
16310
short fdbcc_seq - tbl_fdbcc # 17
16311
short fdbcc_gt - tbl_fdbcc # 18
16312
short fdbcc_ge - tbl_fdbcc # 19
16313
short fdbcc_lt - tbl_fdbcc # 20
16314
short fdbcc_le - tbl_fdbcc # 21
16315
short fdbcc_gl - tbl_fdbcc # 22
16316
short fdbcc_gle - tbl_fdbcc # 23
16317
short fdbcc_ngle - tbl_fdbcc # 24
16318
short fdbcc_ngl - tbl_fdbcc # 25
16319
short fdbcc_nle - tbl_fdbcc # 26
16320
short fdbcc_nlt - tbl_fdbcc # 27
16321
short fdbcc_nge - tbl_fdbcc # 28
16322
short fdbcc_ngt - tbl_fdbcc # 29
16323
short fdbcc_sneq - tbl_fdbcc # 30
16324
short fdbcc_st - tbl_fdbcc # 31
16325
16326
#########################################################################
16327
# #
16328
# IEEE Nonaware tests #
16329
# #
16330
# For the IEEE nonaware tests, only the false branch changes the #
16331
# counter. However, the true branch may set bsun so we check to see #
16332
# if the NAN bit is set, in which case BSUN and AIOP will be set. #
16333
# #
16334
# The cases EQ and NE are shared by the Aware and Nonaware groups #
16335
# and are incapable of setting the BSUN exception bit. #
16336
# #
16337
# Typically, only one of the two possible branch directions could #
16338
# have the NAN bit set. #
16339
# (This is assuming the mutual exclusiveness of FPSR cc bit groupings #
16340
# is preserved.) #
16341
# #
16342
#########################################################################
16343
16344
#
16345
# equal:
16346
#
16347
# Z
16348
#
16349
fdbcc_eq:
16350
fbeq.w fdbcc_eq_yes # equal?
16351
fdbcc_eq_no:
16352
bra.w fdbcc_false # no; go handle counter
16353
fdbcc_eq_yes:
16354
rts
16355
16356
#
16357
# not equal:
16358
# _
16359
# Z
16360
#
16361
fdbcc_neq:
16362
fbneq.w fdbcc_neq_yes # not equal?
16363
fdbcc_neq_no:
16364
bra.w fdbcc_false # no; go handle counter
16365
fdbcc_neq_yes:
16366
rts
16367
16368
#
16369
# greater than:
16370
# _______
16371
# NANvZvN
16372
#
16373
fdbcc_gt:
16374
fbgt.w fdbcc_gt_yes # greater than?
16375
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16376
beq.w fdbcc_false # no;go handle counter
16377
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16378
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16379
bne.w fdbcc_bsun # yes; we have an exception
16380
bra.w fdbcc_false # no; go handle counter
16381
fdbcc_gt_yes:
16382
rts # do nothing
16383
16384
#
16385
# not greater than:
16386
#
16387
# NANvZvN
16388
#
16389
fdbcc_ngt:
16390
fbngt.w fdbcc_ngt_yes # not greater than?
16391
fdbcc_ngt_no:
16392
bra.w fdbcc_false # no; go handle counter
16393
fdbcc_ngt_yes:
16394
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16395
beq.b fdbcc_ngt_done # no;go finish
16396
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16397
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16398
bne.w fdbcc_bsun # yes; we have an exception
16399
fdbcc_ngt_done:
16400
rts # no; do nothing
16401
16402
#
16403
# greater than or equal:
16404
# _____
16405
# Zv(NANvN)
16406
#
16407
fdbcc_ge:
16408
fbge.w fdbcc_ge_yes # greater than or equal?
16409
fdbcc_ge_no:
16410
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16411
beq.w fdbcc_false # no;go handle counter
16412
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16413
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16414
bne.w fdbcc_bsun # yes; we have an exception
16415
bra.w fdbcc_false # no; go handle counter
16416
fdbcc_ge_yes:
16417
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16418
beq.b fdbcc_ge_yes_done # no;go do nothing
16419
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16420
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16421
bne.w fdbcc_bsun # yes; we have an exception
16422
fdbcc_ge_yes_done:
16423
rts # do nothing
16424
16425
#
16426
# not (greater than or equal):
16427
# _
16428
# NANv(N^Z)
16429
#
16430
fdbcc_nge:
16431
fbnge.w fdbcc_nge_yes # not (greater than or equal)?
16432
fdbcc_nge_no:
16433
bra.w fdbcc_false # no; go handle counter
16434
fdbcc_nge_yes:
16435
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16436
beq.b fdbcc_nge_done # no;go finish
16437
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16438
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16439
bne.w fdbcc_bsun # yes; we have an exception
16440
fdbcc_nge_done:
16441
rts # no; do nothing
16442
16443
#
16444
# less than:
16445
# _____
16446
# N^(NANvZ)
16447
#
16448
fdbcc_lt:
16449
fblt.w fdbcc_lt_yes # less than?
16450
fdbcc_lt_no:
16451
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16452
beq.w fdbcc_false # no; go handle counter
16453
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16454
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16455
bne.w fdbcc_bsun # yes; we have an exception
16456
bra.w fdbcc_false # no; go handle counter
16457
fdbcc_lt_yes:
16458
rts # do nothing
16459
16460
#
16461
# not less than:
16462
# _
16463
# NANv(ZvN)
16464
#
16465
fdbcc_nlt:
16466
fbnlt.w fdbcc_nlt_yes # not less than?
16467
fdbcc_nlt_no:
16468
bra.w fdbcc_false # no; go handle counter
16469
fdbcc_nlt_yes:
16470
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16471
beq.b fdbcc_nlt_done # no;go finish
16472
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16473
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16474
bne.w fdbcc_bsun # yes; we have an exception
16475
fdbcc_nlt_done:
16476
rts # no; do nothing
16477
16478
#
16479
# less than or equal:
16480
# ___
16481
# Zv(N^NAN)
16482
#
16483
fdbcc_le:
16484
fble.w fdbcc_le_yes # less than or equal?
16485
fdbcc_le_no:
16486
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16487
beq.w fdbcc_false # no; go handle counter
16488
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16489
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16490
bne.w fdbcc_bsun # yes; we have an exception
16491
bra.w fdbcc_false # no; go handle counter
16492
fdbcc_le_yes:
16493
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16494
beq.b fdbcc_le_yes_done # no; go do nothing
16495
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16496
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16497
bne.w fdbcc_bsun # yes; we have an exception
16498
fdbcc_le_yes_done:
16499
rts # do nothing
16500
16501
#
16502
# not (less than or equal):
16503
# ___
16504
# NANv(NvZ)
16505
#
16506
fdbcc_nle:
16507
fbnle.w fdbcc_nle_yes # not (less than or equal)?
16508
fdbcc_nle_no:
16509
bra.w fdbcc_false # no; go handle counter
16510
fdbcc_nle_yes:
16511
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16512
beq.w fdbcc_nle_done # no; go finish
16513
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16514
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16515
bne.w fdbcc_bsun # yes; we have an exception
16516
fdbcc_nle_done:
16517
rts # no; do nothing
16518
16519
#
16520
# greater or less than:
16521
# _____
16522
# NANvZ
16523
#
16524
fdbcc_gl:
16525
fbgl.w fdbcc_gl_yes # greater or less than?
16526
fdbcc_gl_no:
16527
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16528
beq.w fdbcc_false # no; handle counter
16529
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16530
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16531
bne.w fdbcc_bsun # yes; we have an exception
16532
bra.w fdbcc_false # no; go handle counter
16533
fdbcc_gl_yes:
16534
rts # do nothing
16535
16536
#
16537
# not (greater or less than):
16538
#
16539
# NANvZ
16540
#
16541
fdbcc_ngl:
16542
fbngl.w fdbcc_ngl_yes # not (greater or less than)?
16543
fdbcc_ngl_no:
16544
bra.w fdbcc_false # no; go handle counter
16545
fdbcc_ngl_yes:
16546
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16547
beq.b fdbcc_ngl_done # no; go finish
16548
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16549
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16550
bne.w fdbcc_bsun # yes; we have an exception
16551
fdbcc_ngl_done:
16552
rts # no; do nothing
16553
16554
#
16555
# greater, less, or equal:
16556
# ___
16557
# NAN
16558
#
16559
fdbcc_gle:
16560
fbgle.w fdbcc_gle_yes # greater, less, or equal?
16561
fdbcc_gle_no:
16562
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16563
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16564
bne.w fdbcc_bsun # yes; we have an exception
16565
bra.w fdbcc_false # no; go handle counter
16566
fdbcc_gle_yes:
16567
rts # do nothing
16568
16569
#
16570
# not (greater, less, or equal):
16571
#
16572
# NAN
16573
#
16574
fdbcc_ngle:
16575
fbngle.w fdbcc_ngle_yes # not (greater, less, or equal)?
16576
fdbcc_ngle_no:
16577
bra.w fdbcc_false # no; go handle counter
16578
fdbcc_ngle_yes:
16579
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16580
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16581
bne.w fdbcc_bsun # yes; we have an exception
16582
rts # no; do nothing
16583
16584
#########################################################################
16585
# #
16586
# Miscellaneous tests #
16587
# #
16588
# For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
16589
# #
16590
#########################################################################
16591
16592
#
16593
# false:
16594
#
16595
# False
16596
#
16597
fdbcc_f: # no bsun possible
16598
bra.w fdbcc_false # go handle counter
16599
16600
#
16601
# true:
16602
#
16603
# True
16604
#
16605
fdbcc_t: # no bsun possible
16606
rts # do nothing
16607
16608
#
16609
# signalling false:
16610
#
16611
# False
16612
#
16613
fdbcc_sf:
16614
btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16615
beq.w fdbcc_false # no;go handle counter
16616
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16617
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16618
bne.w fdbcc_bsun # yes; we have an exception
16619
bra.w fdbcc_false # go handle counter
16620
16621
#
16622
# signalling true:
16623
#
16624
# True
16625
#
16626
fdbcc_st:
16627
btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16628
beq.b fdbcc_st_done # no;go finish
16629
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16630
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16631
bne.w fdbcc_bsun # yes; we have an exception
16632
fdbcc_st_done:
16633
rts
16634
16635
#
16636
# signalling equal:
16637
#
16638
# Z
16639
#
16640
fdbcc_seq:
16641
fbseq.w fdbcc_seq_yes # signalling equal?
16642
fdbcc_seq_no:
16643
btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16644
beq.w fdbcc_false # no;go handle counter
16645
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16646
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16647
bne.w fdbcc_bsun # yes; we have an exception
16648
bra.w fdbcc_false # go handle counter
16649
fdbcc_seq_yes:
16650
btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16651
beq.b fdbcc_seq_yes_done # no;go do nothing
16652
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16653
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16654
bne.w fdbcc_bsun # yes; we have an exception
16655
fdbcc_seq_yes_done:
16656
rts # yes; do nothing
16657
16658
#
16659
# signalling not equal:
16660
# _
16661
# Z
16662
#
16663
fdbcc_sneq:
16664
fbsneq.w fdbcc_sneq_yes # signalling not equal?
16665
fdbcc_sneq_no:
16666
btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16667
beq.w fdbcc_false # no;go handle counter
16668
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16669
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16670
bne.w fdbcc_bsun # yes; we have an exception
16671
bra.w fdbcc_false # go handle counter
16672
fdbcc_sneq_yes:
16673
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
16674
beq.w fdbcc_sneq_done # no;go finish
16675
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16676
btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16677
bne.w fdbcc_bsun # yes; we have an exception
16678
fdbcc_sneq_done:
16679
rts
16680
16681
#########################################################################
16682
# #
16683
# IEEE Aware tests #
16684
# #
16685
# For the IEEE aware tests, action is only taken if the result is false.#
16686
# Therefore, the opposite branch type is used to jump to the decrement #
16687
# routine. #
16688
# The BSUN exception will not be set for any of these tests. #
16689
# #
16690
#########################################################################
16691
16692
#
16693
# ordered greater than:
16694
# _______
16695
# NANvZvN
16696
#
16697
fdbcc_ogt:
16698
fbogt.w fdbcc_ogt_yes # ordered greater than?
16699
fdbcc_ogt_no:
16700
bra.w fdbcc_false # no; go handle counter
16701
fdbcc_ogt_yes:
16702
rts # yes; do nothing
16703
16704
#
16705
# unordered or less or equal:
16706
# _______
16707
# NANvZvN
16708
#
16709
fdbcc_ule:
16710
fbule.w fdbcc_ule_yes # unordered or less or equal?
16711
fdbcc_ule_no:
16712
bra.w fdbcc_false # no; go handle counter
16713
fdbcc_ule_yes:
16714
rts # yes; do nothing
16715
16716
#
16717
# ordered greater than or equal:
16718
# _____
16719
# Zv(NANvN)
16720
#
16721
fdbcc_oge:
16722
fboge.w fdbcc_oge_yes # ordered greater than or equal?
16723
fdbcc_oge_no:
16724
bra.w fdbcc_false # no; go handle counter
16725
fdbcc_oge_yes:
16726
rts # yes; do nothing
16727
16728
#
16729
# unordered or less than:
16730
# _
16731
# NANv(N^Z)
16732
#
16733
fdbcc_ult:
16734
fbult.w fdbcc_ult_yes # unordered or less than?
16735
fdbcc_ult_no:
16736
bra.w fdbcc_false # no; go handle counter
16737
fdbcc_ult_yes:
16738
rts # yes; do nothing
16739
16740
#
16741
# ordered less than:
16742
# _____
16743
# N^(NANvZ)
16744
#
16745
fdbcc_olt:
16746
fbolt.w fdbcc_olt_yes # ordered less than?
16747
fdbcc_olt_no:
16748
bra.w fdbcc_false # no; go handle counter
16749
fdbcc_olt_yes:
16750
rts # yes; do nothing
16751
16752
#
16753
# unordered or greater or equal:
16754
#
16755
# NANvZvN
16756
#
16757
fdbcc_uge:
16758
fbuge.w fdbcc_uge_yes # unordered or greater than?
16759
fdbcc_uge_no:
16760
bra.w fdbcc_false # no; go handle counter
16761
fdbcc_uge_yes:
16762
rts # yes; do nothing
16763
16764
#
16765
# ordered less than or equal:
16766
# ___
16767
# Zv(N^NAN)
16768
#
16769
fdbcc_ole:
16770
fbole.w fdbcc_ole_yes # ordered greater or less than?
16771
fdbcc_ole_no:
16772
bra.w fdbcc_false # no; go handle counter
16773
fdbcc_ole_yes:
16774
rts # yes; do nothing
16775
16776
#
16777
# unordered or greater than:
16778
# ___
16779
# NANv(NvZ)
16780
#
16781
fdbcc_ugt:
16782
fbugt.w fdbcc_ugt_yes # unordered or greater than?
16783
fdbcc_ugt_no:
16784
bra.w fdbcc_false # no; go handle counter
16785
fdbcc_ugt_yes:
16786
rts # yes; do nothing
16787
16788
#
16789
# ordered greater or less than:
16790
# _____
16791
# NANvZ
16792
#
16793
fdbcc_ogl:
16794
fbogl.w fdbcc_ogl_yes # ordered greater or less than?
16795
fdbcc_ogl_no:
16796
bra.w fdbcc_false # no; go handle counter
16797
fdbcc_ogl_yes:
16798
rts # yes; do nothing
16799
16800
#
16801
# unordered or equal:
16802
#
16803
# NANvZ
16804
#
16805
fdbcc_ueq:
16806
fbueq.w fdbcc_ueq_yes # unordered or equal?
16807
fdbcc_ueq_no:
16808
bra.w fdbcc_false # no; go handle counter
16809
fdbcc_ueq_yes:
16810
rts # yes; do nothing
16811
16812
#
16813
# ordered:
16814
# ___
16815
# NAN
16816
#
16817
fdbcc_or:
16818
fbor.w fdbcc_or_yes # ordered?
16819
fdbcc_or_no:
16820
bra.w fdbcc_false # no; go handle counter
16821
fdbcc_or_yes:
16822
rts # yes; do nothing
16823
16824
#
16825
# unordered:
16826
#
16827
# NAN
16828
#
16829
fdbcc_un:
16830
fbun.w fdbcc_un_yes # unordered?
16831
fdbcc_un_no:
16832
bra.w fdbcc_false # no; go handle counter
16833
fdbcc_un_yes:
16834
rts # yes; do nothing
16835
16836
#######################################################################
16837
16838
#
16839
# the bsun exception bit was not set.
16840
#
16841
# (1) subtract 1 from the count register
16842
# (2) if (cr == -1) then
16843
# pc = pc of next instruction
16844
# else
16845
# pc += sign_ext(16-bit displacement)
16846
#
16847
fdbcc_false:
16848
mov.b 1+EXC_OPWORD(%a6), %d1 # fetch lo opword
16849
andi.w &0x7, %d1 # extract count register
16850
16851
bsr.l fetch_dreg # fetch count value
16852
# make sure that d0 isn't corrupted between calls...
16853
16854
subq.w &0x1, %d0 # Dn - 1 -> Dn
16855
16856
bsr.l store_dreg_l # store new count value
16857
16858
cmpi.w %d0, &-0x1 # is (Dn == -1)?
16859
bne.b fdbcc_false_cont # no;
16860
rts
16861
16862
fdbcc_false_cont:
16863
mov.l L_SCR1(%a6),%d0 # fetch displacement
16864
add.l USER_FPIAR(%a6),%d0 # add instruction PC
16865
addq.l &0x4,%d0 # add instruction length
16866
mov.l %d0,EXC_PC(%a6) # set new PC
16867
rts
16868
16869
# the emulation routine set bsun and BSUN was enabled. have to
16870
# fix stack and jump to the bsun handler.
16871
# let the caller of this routine shift the stack frame up to
16872
# eliminate the effective address field.
16873
fdbcc_bsun:
16874
mov.b &fbsun_flg,SPCOND_FLG(%a6)
16875
rts
16876
16877
#########################################################################
16878
# ftrapcc(): routine to emulate the ftrapcc instruction #
16879
# #
16880
# XDEF **************************************************************** #
16881
# _ftrapcc() #
16882
# #
16883
# XREF **************************************************************** #
16884
# none #
16885
# #
16886
# INPUT *************************************************************** #
16887
# none #
16888
# #
16889
# OUTPUT ************************************************************** #
16890
# none #
16891
# #
16892
# ALGORITHM *********************************************************** #
16893
# This routine checks which conditional predicate is specified by #
16894
# the stacked ftrapcc instruction opcode and then branches to a routine #
16895
# for that predicate. The corresponding fbcc instruction is then used #
16896
# to see whether the condition (specified by the stacked FPSR) is true #
16897
# or false. #
16898
# If a BSUN exception should be indicated, the BSUN and ABSUN #
16899
# bits are set in the stacked FPSR. If the BSUN exception is enabled, #
16900
# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
16901
# enabled BSUN should not be flagged and the predicate is true, then #
16902
# the ftrapcc_flg is set in the SPCOND_FLG location. These special #
16903
# flags indicate to the calling routine to emulate the exceptional #
16904
# condition. #
16905
# #
16906
#########################################################################
16907
16908
global _ftrapcc
16909
_ftrapcc:
16910
mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
16911
16912
clr.l %d1 # clear scratch reg
16913
mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
16914
ror.l &0x8,%d1 # rotate to top byte
16915
fmov.l %d1,%fpsr # insert into FPSR
16916
16917
mov.w (tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
16918
jmp (tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
16919
16920
tbl_ftrapcc:
16921
short ftrapcc_f - tbl_ftrapcc # 00
16922
short ftrapcc_eq - tbl_ftrapcc # 01
16923
short ftrapcc_ogt - tbl_ftrapcc # 02
16924
short ftrapcc_oge - tbl_ftrapcc # 03
16925
short ftrapcc_olt - tbl_ftrapcc # 04
16926
short ftrapcc_ole - tbl_ftrapcc # 05
16927
short ftrapcc_ogl - tbl_ftrapcc # 06
16928
short ftrapcc_or - tbl_ftrapcc # 07
16929
short ftrapcc_un - tbl_ftrapcc # 08
16930
short ftrapcc_ueq - tbl_ftrapcc # 09
16931
short ftrapcc_ugt - tbl_ftrapcc # 10
16932
short ftrapcc_uge - tbl_ftrapcc # 11
16933
short ftrapcc_ult - tbl_ftrapcc # 12
16934
short ftrapcc_ule - tbl_ftrapcc # 13
16935
short ftrapcc_neq - tbl_ftrapcc # 14
16936
short ftrapcc_t - tbl_ftrapcc # 15
16937
short ftrapcc_sf - tbl_ftrapcc # 16
16938
short ftrapcc_seq - tbl_ftrapcc # 17
16939
short ftrapcc_gt - tbl_ftrapcc # 18
16940
short ftrapcc_ge - tbl_ftrapcc # 19
16941
short ftrapcc_lt - tbl_ftrapcc # 20
16942
short ftrapcc_le - tbl_ftrapcc # 21
16943
short ftrapcc_gl - tbl_ftrapcc # 22
16944
short ftrapcc_gle - tbl_ftrapcc # 23
16945
short ftrapcc_ngle - tbl_ftrapcc # 24
16946
short ftrapcc_ngl - tbl_ftrapcc # 25
16947
short ftrapcc_nle - tbl_ftrapcc # 26
16948
short ftrapcc_nlt - tbl_ftrapcc # 27
16949
short ftrapcc_nge - tbl_ftrapcc # 28
16950
short ftrapcc_ngt - tbl_ftrapcc # 29
16951
short ftrapcc_sneq - tbl_ftrapcc # 30
16952
short ftrapcc_st - tbl_ftrapcc # 31
16953
16954
#########################################################################
16955
# #
16956
# IEEE Nonaware tests #
16957
# #
16958
# For the IEEE nonaware tests, we set the result based on the #
16959
# floating point condition codes. In addition, we check to see #
16960
# if the NAN bit is set, in which case BSUN and AIOP will be set. #
16961
# #
16962
# The cases EQ and NE are shared by the Aware and Nonaware groups #
16963
# and are incapable of setting the BSUN exception bit. #
16964
# #
16965
# Typically, only one of the two possible branch directions could #
16966
# have the NAN bit set. #
16967
# #
16968
#########################################################################
16969
16970
#
16971
# equal:
16972
#
16973
# Z
16974
#
16975
ftrapcc_eq:
16976
fbeq.w ftrapcc_trap # equal?
16977
ftrapcc_eq_no:
16978
rts # do nothing
16979
16980
#
16981
# not equal:
16982
# _
16983
# Z
16984
#
16985
ftrapcc_neq:
16986
fbneq.w ftrapcc_trap # not equal?
16987
ftrapcc_neq_no:
16988
rts # do nothing
16989
16990
#
16991
# greater than:
16992
# _______
16993
# NANvZvN
16994
#
16995
ftrapcc_gt:
16996
fbgt.w ftrapcc_trap # greater than?
16997
ftrapcc_gt_no:
16998
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16999
beq.b ftrapcc_gt_done # no
17000
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17001
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17002
bne.w ftrapcc_bsun # yes
17003
ftrapcc_gt_done:
17004
rts # no; do nothing
17005
17006
#
17007
# not greater than:
17008
#
17009
# NANvZvN
17010
#
17011
ftrapcc_ngt:
17012
fbngt.w ftrapcc_ngt_yes # not greater than?
17013
ftrapcc_ngt_no:
17014
rts # do nothing
17015
ftrapcc_ngt_yes:
17016
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17017
beq.w ftrapcc_trap # no; go take trap
17018
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17019
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17020
bne.w ftrapcc_bsun # yes
17021
bra.w ftrapcc_trap # no; go take trap
17022
17023
#
17024
# greater than or equal:
17025
# _____
17026
# Zv(NANvN)
17027
#
17028
ftrapcc_ge:
17029
fbge.w ftrapcc_ge_yes # greater than or equal?
17030
ftrapcc_ge_no:
17031
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17032
beq.b ftrapcc_ge_done # no; go finish
17033
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17034
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17035
bne.w ftrapcc_bsun # yes
17036
ftrapcc_ge_done:
17037
rts # no; do nothing
17038
ftrapcc_ge_yes:
17039
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17040
beq.w ftrapcc_trap # no; go take trap
17041
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17042
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17043
bne.w ftrapcc_bsun # yes
17044
bra.w ftrapcc_trap # no; go take trap
17045
17046
#
17047
# not (greater than or equal):
17048
# _
17049
# NANv(N^Z)
17050
#
17051
ftrapcc_nge:
17052
fbnge.w ftrapcc_nge_yes # not (greater than or equal)?
17053
ftrapcc_nge_no:
17054
rts # do nothing
17055
ftrapcc_nge_yes:
17056
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17057
beq.w ftrapcc_trap # no; go take trap
17058
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17059
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17060
bne.w ftrapcc_bsun # yes
17061
bra.w ftrapcc_trap # no; go take trap
17062
17063
#
17064
# less than:
17065
# _____
17066
# N^(NANvZ)
17067
#
17068
ftrapcc_lt:
17069
fblt.w ftrapcc_trap # less than?
17070
ftrapcc_lt_no:
17071
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17072
beq.b ftrapcc_lt_done # no; go finish
17073
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17074
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17075
bne.w ftrapcc_bsun # yes
17076
ftrapcc_lt_done:
17077
rts # no; do nothing
17078
17079
#
17080
# not less than:
17081
# _
17082
# NANv(ZvN)
17083
#
17084
ftrapcc_nlt:
17085
fbnlt.w ftrapcc_nlt_yes # not less than?
17086
ftrapcc_nlt_no:
17087
rts # do nothing
17088
ftrapcc_nlt_yes:
17089
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17090
beq.w ftrapcc_trap # no; go take trap
17091
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17092
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17093
bne.w ftrapcc_bsun # yes
17094
bra.w ftrapcc_trap # no; go take trap
17095
17096
#
17097
# less than or equal:
17098
# ___
17099
# Zv(N^NAN)
17100
#
17101
ftrapcc_le:
17102
fble.w ftrapcc_le_yes # less than or equal?
17103
ftrapcc_le_no:
17104
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17105
beq.b ftrapcc_le_done # no; go finish
17106
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17107
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17108
bne.w ftrapcc_bsun # yes
17109
ftrapcc_le_done:
17110
rts # no; do nothing
17111
ftrapcc_le_yes:
17112
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17113
beq.w ftrapcc_trap # no; go take trap
17114
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17115
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17116
bne.w ftrapcc_bsun # yes
17117
bra.w ftrapcc_trap # no; go take trap
17118
17119
#
17120
# not (less than or equal):
17121
# ___
17122
# NANv(NvZ)
17123
#
17124
ftrapcc_nle:
17125
fbnle.w ftrapcc_nle_yes # not (less than or equal)?
17126
ftrapcc_nle_no:
17127
rts # do nothing
17128
ftrapcc_nle_yes:
17129
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17130
beq.w ftrapcc_trap # no; go take trap
17131
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17132
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17133
bne.w ftrapcc_bsun # yes
17134
bra.w ftrapcc_trap # no; go take trap
17135
17136
#
17137
# greater or less than:
17138
# _____
17139
# NANvZ
17140
#
17141
ftrapcc_gl:
17142
fbgl.w ftrapcc_trap # greater or less than?
17143
ftrapcc_gl_no:
17144
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17145
beq.b ftrapcc_gl_done # no; go finish
17146
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17147
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17148
bne.w ftrapcc_bsun # yes
17149
ftrapcc_gl_done:
17150
rts # no; do nothing
17151
17152
#
17153
# not (greater or less than):
17154
#
17155
# NANvZ
17156
#
17157
ftrapcc_ngl:
17158
fbngl.w ftrapcc_ngl_yes # not (greater or less than)?
17159
ftrapcc_ngl_no:
17160
rts # do nothing
17161
ftrapcc_ngl_yes:
17162
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17163
beq.w ftrapcc_trap # no; go take trap
17164
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17165
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17166
bne.w ftrapcc_bsun # yes
17167
bra.w ftrapcc_trap # no; go take trap
17168
17169
#
17170
# greater, less, or equal:
17171
# ___
17172
# NAN
17173
#
17174
ftrapcc_gle:
17175
fbgle.w ftrapcc_trap # greater, less, or equal?
17176
ftrapcc_gle_no:
17177
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17178
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17179
bne.w ftrapcc_bsun # yes
17180
rts # no; do nothing
17181
17182
#
17183
# not (greater, less, or equal):
17184
#
17185
# NAN
17186
#
17187
ftrapcc_ngle:
17188
fbngle.w ftrapcc_ngle_yes # not (greater, less, or equal)?
17189
ftrapcc_ngle_no:
17190
rts # do nothing
17191
ftrapcc_ngle_yes:
17192
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17193
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17194
bne.w ftrapcc_bsun # yes
17195
bra.w ftrapcc_trap # no; go take trap
17196
17197
#########################################################################
17198
# #
17199
# Miscellaneous tests #
17200
# #
17201
# For the IEEE aware tests, we only have to set the result based on the #
17202
# floating point condition codes. The BSUN exception will not be #
17203
# set for any of these tests. #
17204
# #
17205
#########################################################################
17206
17207
#
17208
# false:
17209
#
17210
# False
17211
#
17212
ftrapcc_f:
17213
rts # do nothing
17214
17215
#
17216
# true:
17217
#
17218
# True
17219
#
17220
ftrapcc_t:
17221
bra.w ftrapcc_trap # go take trap
17222
17223
#
17224
# signalling false:
17225
#
17226
# False
17227
#
17228
ftrapcc_sf:
17229
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17230
beq.b ftrapcc_sf_done # no; go finish
17231
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17232
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17233
bne.w ftrapcc_bsun # yes
17234
ftrapcc_sf_done:
17235
rts # no; do nothing
17236
17237
#
17238
# signalling true:
17239
#
17240
# True
17241
#
17242
ftrapcc_st:
17243
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17244
beq.w ftrapcc_trap # no; go take trap
17245
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17246
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17247
bne.w ftrapcc_bsun # yes
17248
bra.w ftrapcc_trap # no; go take trap
17249
17250
#
17251
# signalling equal:
17252
#
17253
# Z
17254
#
17255
ftrapcc_seq:
17256
fbseq.w ftrapcc_seq_yes # signalling equal?
17257
ftrapcc_seq_no:
17258
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17259
beq.w ftrapcc_seq_done # no; go finish
17260
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17261
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17262
bne.w ftrapcc_bsun # yes
17263
ftrapcc_seq_done:
17264
rts # no; do nothing
17265
ftrapcc_seq_yes:
17266
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17267
beq.w ftrapcc_trap # no; go take trap
17268
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17269
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17270
bne.w ftrapcc_bsun # yes
17271
bra.w ftrapcc_trap # no; go take trap
17272
17273
#
17274
# signalling not equal:
17275
# _
17276
# Z
17277
#
17278
ftrapcc_sneq:
17279
fbsneq.w ftrapcc_sneq_yes # signalling equal?
17280
ftrapcc_sneq_no:
17281
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17282
beq.w ftrapcc_sneq_no_done # no; go finish
17283
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17284
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17285
bne.w ftrapcc_bsun # yes
17286
ftrapcc_sneq_no_done:
17287
rts # do nothing
17288
ftrapcc_sneq_yes:
17289
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17290
beq.w ftrapcc_trap # no; go take trap
17291
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17292
btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17293
bne.w ftrapcc_bsun # yes
17294
bra.w ftrapcc_trap # no; go take trap
17295
17296
#########################################################################
17297
# #
17298
# IEEE Aware tests #
17299
# #
17300
# For the IEEE aware tests, we only have to set the result based on the #
17301
# floating point condition codes. The BSUN exception will not be #
17302
# set for any of these tests. #
17303
# #
17304
#########################################################################
17305
17306
#
17307
# ordered greater than:
17308
# _______
17309
# NANvZvN
17310
#
17311
ftrapcc_ogt:
17312
fbogt.w ftrapcc_trap # ordered greater than?
17313
ftrapcc_ogt_no:
17314
rts # do nothing
17315
17316
#
17317
# unordered or less or equal:
17318
# _______
17319
# NANvZvN
17320
#
17321
ftrapcc_ule:
17322
fbule.w ftrapcc_trap # unordered or less or equal?
17323
ftrapcc_ule_no:
17324
rts # do nothing
17325
17326
#
17327
# ordered greater than or equal:
17328
# _____
17329
# Zv(NANvN)
17330
#
17331
ftrapcc_oge:
17332
fboge.w ftrapcc_trap # ordered greater than or equal?
17333
ftrapcc_oge_no:
17334
rts # do nothing
17335
17336
#
17337
# unordered or less than:
17338
# _
17339
# NANv(N^Z)
17340
#
17341
ftrapcc_ult:
17342
fbult.w ftrapcc_trap # unordered or less than?
17343
ftrapcc_ult_no:
17344
rts # do nothing
17345
17346
#
17347
# ordered less than:
17348
# _____
17349
# N^(NANvZ)
17350
#
17351
ftrapcc_olt:
17352
fbolt.w ftrapcc_trap # ordered less than?
17353
ftrapcc_olt_no:
17354
rts # do nothing
17355
17356
#
17357
# unordered or greater or equal:
17358
#
17359
# NANvZvN
17360
#
17361
ftrapcc_uge:
17362
fbuge.w ftrapcc_trap # unordered or greater than?
17363
ftrapcc_uge_no:
17364
rts # do nothing
17365
17366
#
17367
# ordered less than or equal:
17368
# ___
17369
# Zv(N^NAN)
17370
#
17371
ftrapcc_ole:
17372
fbole.w ftrapcc_trap # ordered greater or less than?
17373
ftrapcc_ole_no:
17374
rts # do nothing
17375
17376
#
17377
# unordered or greater than:
17378
# ___
17379
# NANv(NvZ)
17380
#
17381
ftrapcc_ugt:
17382
fbugt.w ftrapcc_trap # unordered or greater than?
17383
ftrapcc_ugt_no:
17384
rts # do nothing
17385
17386
#
17387
# ordered greater or less than:
17388
# _____
17389
# NANvZ
17390
#
17391
ftrapcc_ogl:
17392
fbogl.w ftrapcc_trap # ordered greater or less than?
17393
ftrapcc_ogl_no:
17394
rts # do nothing
17395
17396
#
17397
# unordered or equal:
17398
#
17399
# NANvZ
17400
#
17401
ftrapcc_ueq:
17402
fbueq.w ftrapcc_trap # unordered or equal?
17403
ftrapcc_ueq_no:
17404
rts # do nothing
17405
17406
#
17407
# ordered:
17408
# ___
17409
# NAN
17410
#
17411
ftrapcc_or:
17412
fbor.w ftrapcc_trap # ordered?
17413
ftrapcc_or_no:
17414
rts # do nothing
17415
17416
#
17417
# unordered:
17418
#
17419
# NAN
17420
#
17421
ftrapcc_un:
17422
fbun.w ftrapcc_trap # unordered?
17423
ftrapcc_un_no:
17424
rts # do nothing
17425
17426
#######################################################################
17427
17428
# the bsun exception bit was not set.
17429
# we will need to jump to the ftrapcc vector. the stack frame
17430
# is the same size as that of the fp unimp instruction. the
17431
# only difference is that the <ea> field should hold the PC
17432
# of the ftrapcc instruction and the vector offset field
17433
# should denote the ftrapcc trap.
17434
ftrapcc_trap:
17435
mov.b &ftrapcc_flg,SPCOND_FLG(%a6)
17436
rts
17437
17438
# the emulation routine set bsun and BSUN was enabled. have to
17439
# fix stack and jump to the bsun handler.
17440
# let the caller of this routine shift the stack frame up to
17441
# eliminate the effective address field.
17442
ftrapcc_bsun:
17443
mov.b &fbsun_flg,SPCOND_FLG(%a6)
17444
rts
17445
17446
#########################################################################
17447
# fscc(): routine to emulate the fscc instruction #
17448
# #
17449
# XDEF **************************************************************** #
17450
# _fscc() #
17451
# #
17452
# XREF **************************************************************** #
17453
# store_dreg_b() - store result to data register file #
17454
# dec_areg() - decrement an areg for -(an) mode #
17455
# inc_areg() - increment an areg for (an)+ mode #
17456
# _dmem_write_byte() - store result to memory #
17457
# #
17458
# INPUT *************************************************************** #
17459
# none #
17460
# #
17461
# OUTPUT ************************************************************** #
17462
# none #
17463
# #
17464
# ALGORITHM *********************************************************** #
17465
# This routine checks which conditional predicate is specified by #
17466
# the stacked fscc instruction opcode and then branches to a routine #
17467
# for that predicate. The corresponding fbcc instruction is then used #
17468
# to see whether the condition (specified by the stacked FPSR) is true #
17469
# or false. #
17470
# If a BSUN exception should be indicated, the BSUN and ABSUN #
17471
# bits are set in the stacked FPSR. If the BSUN exception is enabled, #
17472
# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
17473
# enabled BSUN should not be flagged and the predicate is true, then #
17474
# the result is stored to the data register file or memory #
17475
# #
17476
#########################################################################
17477
17478
global _fscc
17479
_fscc:
17480
mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
17481
17482
clr.l %d1 # clear scratch reg
17483
mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
17484
ror.l &0x8,%d1 # rotate to top byte
17485
fmov.l %d1,%fpsr # insert into FPSR
17486
17487
mov.w (tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
17488
jmp (tbl_fscc.b,%pc,%d1.w) # jump to fscc routine
17489
17490
tbl_fscc:
17491
short fscc_f - tbl_fscc # 00
17492
short fscc_eq - tbl_fscc # 01
17493
short fscc_ogt - tbl_fscc # 02
17494
short fscc_oge - tbl_fscc # 03
17495
short fscc_olt - tbl_fscc # 04
17496
short fscc_ole - tbl_fscc # 05
17497
short fscc_ogl - tbl_fscc # 06
17498
short fscc_or - tbl_fscc # 07
17499
short fscc_un - tbl_fscc # 08
17500
short fscc_ueq - tbl_fscc # 09
17501
short fscc_ugt - tbl_fscc # 10
17502
short fscc_uge - tbl_fscc # 11
17503
short fscc_ult - tbl_fscc # 12
17504
short fscc_ule - tbl_fscc # 13
17505
short fscc_neq - tbl_fscc # 14
17506
short fscc_t - tbl_fscc # 15
17507
short fscc_sf - tbl_fscc # 16
17508
short fscc_seq - tbl_fscc # 17
17509
short fscc_gt - tbl_fscc # 18
17510
short fscc_ge - tbl_fscc # 19
17511
short fscc_lt - tbl_fscc # 20
17512
short fscc_le - tbl_fscc # 21
17513
short fscc_gl - tbl_fscc # 22
17514
short fscc_gle - tbl_fscc # 23
17515
short fscc_ngle - tbl_fscc # 24
17516
short fscc_ngl - tbl_fscc # 25
17517
short fscc_nle - tbl_fscc # 26
17518
short fscc_nlt - tbl_fscc # 27
17519
short fscc_nge - tbl_fscc # 28
17520
short fscc_ngt - tbl_fscc # 29
17521
short fscc_sneq - tbl_fscc # 30
17522
short fscc_st - tbl_fscc # 31
17523
17524
#########################################################################
17525
# #
17526
# IEEE Nonaware tests #
17527
# #
17528
# For the IEEE nonaware tests, we set the result based on the #
17529
# floating point condition codes. In addition, we check to see #
17530
# if the NAN bit is set, in which case BSUN and AIOP will be set. #
17531
# #
17532
# The cases EQ and NE are shared by the Aware and Nonaware groups #
17533
# and are incapable of setting the BSUN exception bit. #
17534
# #
17535
# Typically, only one of the two possible branch directions could #
17536
# have the NAN bit set. #
17537
# #
17538
#########################################################################
17539
17540
#
17541
# equal:
17542
#
17543
# Z
17544
#
17545
fscc_eq:
17546
fbeq.w fscc_eq_yes # equal?
17547
fscc_eq_no:
17548
clr.b %d0 # set false
17549
bra.w fscc_done # go finish
17550
fscc_eq_yes:
17551
st %d0 # set true
17552
bra.w fscc_done # go finish
17553
17554
#
17555
# not equal:
17556
# _
17557
# Z
17558
#
17559
fscc_neq:
17560
fbneq.w fscc_neq_yes # not equal?
17561
fscc_neq_no:
17562
clr.b %d0 # set false
17563
bra.w fscc_done # go finish
17564
fscc_neq_yes:
17565
st %d0 # set true
17566
bra.w fscc_done # go finish
17567
17568
#
17569
# greater than:
17570
# _______
17571
# NANvZvN
17572
#
17573
fscc_gt:
17574
fbgt.w fscc_gt_yes # greater than?
17575
fscc_gt_no:
17576
clr.b %d0 # set false
17577
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17578
beq.w fscc_done # no;go finish
17579
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17580
bra.w fscc_chk_bsun # go finish
17581
fscc_gt_yes:
17582
st %d0 # set true
17583
bra.w fscc_done # go finish
17584
17585
#
17586
# not greater than:
17587
#
17588
# NANvZvN
17589
#
17590
fscc_ngt:
17591
fbngt.w fscc_ngt_yes # not greater than?
17592
fscc_ngt_no:
17593
clr.b %d0 # set false
17594
bra.w fscc_done # go finish
17595
fscc_ngt_yes:
17596
st %d0 # set true
17597
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17598
beq.w fscc_done # no;go finish
17599
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17600
bra.w fscc_chk_bsun # go finish
17601
17602
#
17603
# greater than or equal:
17604
# _____
17605
# Zv(NANvN)
17606
#
17607
fscc_ge:
17608
fbge.w fscc_ge_yes # greater than or equal?
17609
fscc_ge_no:
17610
clr.b %d0 # set false
17611
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17612
beq.w fscc_done # no;go finish
17613
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17614
bra.w fscc_chk_bsun # go finish
17615
fscc_ge_yes:
17616
st %d0 # set true
17617
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17618
beq.w fscc_done # no;go finish
17619
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17620
bra.w fscc_chk_bsun # go finish
17621
17622
#
17623
# not (greater than or equal):
17624
# _
17625
# NANv(N^Z)
17626
#
17627
fscc_nge:
17628
fbnge.w fscc_nge_yes # not (greater than or equal)?
17629
fscc_nge_no:
17630
clr.b %d0 # set false
17631
bra.w fscc_done # go finish
17632
fscc_nge_yes:
17633
st %d0 # set true
17634
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17635
beq.w fscc_done # no;go finish
17636
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17637
bra.w fscc_chk_bsun # go finish
17638
17639
#
17640
# less than:
17641
# _____
17642
# N^(NANvZ)
17643
#
17644
fscc_lt:
17645
fblt.w fscc_lt_yes # less than?
17646
fscc_lt_no:
17647
clr.b %d0 # set false
17648
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17649
beq.w fscc_done # no;go finish
17650
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17651
bra.w fscc_chk_bsun # go finish
17652
fscc_lt_yes:
17653
st %d0 # set true
17654
bra.w fscc_done # go finish
17655
17656
#
17657
# not less than:
17658
# _
17659
# NANv(ZvN)
17660
#
17661
fscc_nlt:
17662
fbnlt.w fscc_nlt_yes # not less than?
17663
fscc_nlt_no:
17664
clr.b %d0 # set false
17665
bra.w fscc_done # go finish
17666
fscc_nlt_yes:
17667
st %d0 # set true
17668
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17669
beq.w fscc_done # no;go finish
17670
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17671
bra.w fscc_chk_bsun # go finish
17672
17673
#
17674
# less than or equal:
17675
# ___
17676
# Zv(N^NAN)
17677
#
17678
fscc_le:
17679
fble.w fscc_le_yes # less than or equal?
17680
fscc_le_no:
17681
clr.b %d0 # set false
17682
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17683
beq.w fscc_done # no;go finish
17684
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17685
bra.w fscc_chk_bsun # go finish
17686
fscc_le_yes:
17687
st %d0 # set true
17688
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17689
beq.w fscc_done # no;go finish
17690
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17691
bra.w fscc_chk_bsun # go finish
17692
17693
#
17694
# not (less than or equal):
17695
# ___
17696
# NANv(NvZ)
17697
#
17698
fscc_nle:
17699
fbnle.w fscc_nle_yes # not (less than or equal)?
17700
fscc_nle_no:
17701
clr.b %d0 # set false
17702
bra.w fscc_done # go finish
17703
fscc_nle_yes:
17704
st %d0 # set true
17705
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17706
beq.w fscc_done # no;go finish
17707
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17708
bra.w fscc_chk_bsun # go finish
17709
17710
#
17711
# greater or less than:
17712
# _____
17713
# NANvZ
17714
#
17715
fscc_gl:
17716
fbgl.w fscc_gl_yes # greater or less than?
17717
fscc_gl_no:
17718
clr.b %d0 # set false
17719
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17720
beq.w fscc_done # no;go finish
17721
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17722
bra.w fscc_chk_bsun # go finish
17723
fscc_gl_yes:
17724
st %d0 # set true
17725
bra.w fscc_done # go finish
17726
17727
#
17728
# not (greater or less than):
17729
#
17730
# NANvZ
17731
#
17732
fscc_ngl:
17733
fbngl.w fscc_ngl_yes # not (greater or less than)?
17734
fscc_ngl_no:
17735
clr.b %d0 # set false
17736
bra.w fscc_done # go finish
17737
fscc_ngl_yes:
17738
st %d0 # set true
17739
btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17740
beq.w fscc_done # no;go finish
17741
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17742
bra.w fscc_chk_bsun # go finish
17743
17744
#
17745
# greater, less, or equal:
17746
# ___
17747
# NAN
17748
#
17749
fscc_gle:
17750
fbgle.w fscc_gle_yes # greater, less, or equal?
17751
fscc_gle_no:
17752
clr.b %d0 # set false
17753
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17754
bra.w fscc_chk_bsun # go finish
17755
fscc_gle_yes:
17756
st %d0 # set true
17757
bra.w fscc_done # go finish
17758
17759
#
17760
# not (greater, less, or equal):
17761
#
17762
# NAN
17763
#
17764
fscc_ngle:
17765
fbngle.w fscc_ngle_yes # not (greater, less, or equal)?
17766
fscc_ngle_no:
17767
clr.b %d0 # set false
17768
bra.w fscc_done # go finish
17769
fscc_ngle_yes:
17770
st %d0 # set true
17771
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17772
bra.w fscc_chk_bsun # go finish
17773
17774
#########################################################################
17775
# #
17776
# Miscellaneous tests #
17777
# #
17778
# For the IEEE aware tests, we only have to set the result based on the #
17779
# floating point condition codes. The BSUN exception will not be #
17780
# set for any of these tests. #
17781
# #
17782
#########################################################################
17783
17784
#
17785
# false:
17786
#
17787
# False
17788
#
17789
fscc_f:
17790
clr.b %d0 # set false
17791
bra.w fscc_done # go finish
17792
17793
#
17794
# true:
17795
#
17796
# True
17797
#
17798
fscc_t:
17799
st %d0 # set true
17800
bra.w fscc_done # go finish
17801
17802
#
17803
# signalling false:
17804
#
17805
# False
17806
#
17807
fscc_sf:
17808
clr.b %d0 # set false
17809
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17810
beq.w fscc_done # no;go finish
17811
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17812
bra.w fscc_chk_bsun # go finish
17813
17814
#
17815
# signalling true:
17816
#
17817
# True
17818
#
17819
fscc_st:
17820
st %d0 # set false
17821
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17822
beq.w fscc_done # no;go finish
17823
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17824
bra.w fscc_chk_bsun # go finish
17825
17826
#
17827
# signalling equal:
17828
#
17829
# Z
17830
#
17831
fscc_seq:
17832
fbseq.w fscc_seq_yes # signalling equal?
17833
fscc_seq_no:
17834
clr.b %d0 # set false
17835
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17836
beq.w fscc_done # no;go finish
17837
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17838
bra.w fscc_chk_bsun # go finish
17839
fscc_seq_yes:
17840
st %d0 # set true
17841
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17842
beq.w fscc_done # no;go finish
17843
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17844
bra.w fscc_chk_bsun # go finish
17845
17846
#
17847
# signalling not equal:
17848
# _
17849
# Z
17850
#
17851
fscc_sneq:
17852
fbsneq.w fscc_sneq_yes # signalling equal?
17853
fscc_sneq_no:
17854
clr.b %d0 # set false
17855
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17856
beq.w fscc_done # no;go finish
17857
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17858
bra.w fscc_chk_bsun # go finish
17859
fscc_sneq_yes:
17860
st %d0 # set true
17861
btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17862
beq.w fscc_done # no;go finish
17863
ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17864
bra.w fscc_chk_bsun # go finish
17865
17866
#########################################################################
17867
# #
17868
# IEEE Aware tests #
17869
# #
17870
# For the IEEE aware tests, we only have to set the result based on the #
17871
# floating point condition codes. The BSUN exception will not be #
17872
# set for any of these tests. #
17873
# #
17874
#########################################################################
17875
17876
#
17877
# ordered greater than:
17878
# _______
17879
# NANvZvN
17880
#
17881
fscc_ogt:
17882
fbogt.w fscc_ogt_yes # ordered greater than?
17883
fscc_ogt_no:
17884
clr.b %d0 # set false
17885
bra.w fscc_done # go finish
17886
fscc_ogt_yes:
17887
st %d0 # set true
17888
bra.w fscc_done # go finish
17889
17890
#
17891
# unordered or less or equal:
17892
# _______
17893
# NANvZvN
17894
#
17895
fscc_ule:
17896
fbule.w fscc_ule_yes # unordered or less or equal?
17897
fscc_ule_no:
17898
clr.b %d0 # set false
17899
bra.w fscc_done # go finish
17900
fscc_ule_yes:
17901
st %d0 # set true
17902
bra.w fscc_done # go finish
17903
17904
#
17905
# ordered greater than or equal:
17906
# _____
17907
# Zv(NANvN)
17908
#
17909
fscc_oge:
17910
fboge.w fscc_oge_yes # ordered greater than or equal?
17911
fscc_oge_no:
17912
clr.b %d0 # set false
17913
bra.w fscc_done # go finish
17914
fscc_oge_yes:
17915
st %d0 # set true
17916
bra.w fscc_done # go finish
17917
17918
#
17919
# unordered or less than:
17920
# _
17921
# NANv(N^Z)
17922
#
17923
fscc_ult:
17924
fbult.w fscc_ult_yes # unordered or less than?
17925
fscc_ult_no:
17926
clr.b %d0 # set false
17927
bra.w fscc_done # go finish
17928
fscc_ult_yes:
17929
st %d0 # set true
17930
bra.w fscc_done # go finish
17931
17932
#
17933
# ordered less than:
17934
# _____
17935
# N^(NANvZ)
17936
#
17937
fscc_olt:
17938
fbolt.w fscc_olt_yes # ordered less than?
17939
fscc_olt_no:
17940
clr.b %d0 # set false
17941
bra.w fscc_done # go finish
17942
fscc_olt_yes:
17943
st %d0 # set true
17944
bra.w fscc_done # go finish
17945
17946
#
17947
# unordered or greater or equal:
17948
#
17949
# NANvZvN
17950
#
17951
fscc_uge:
17952
fbuge.w fscc_uge_yes # unordered or greater than?
17953
fscc_uge_no:
17954
clr.b %d0 # set false
17955
bra.w fscc_done # go finish
17956
fscc_uge_yes:
17957
st %d0 # set true
17958
bra.w fscc_done # go finish
17959
17960
#
17961
# ordered less than or equal:
17962
# ___
17963
# Zv(N^NAN)
17964
#
17965
fscc_ole:
17966
fbole.w fscc_ole_yes # ordered greater or less than?
17967
fscc_ole_no:
17968
clr.b %d0 # set false
17969
bra.w fscc_done # go finish
17970
fscc_ole_yes:
17971
st %d0 # set true
17972
bra.w fscc_done # go finish
17973
17974
#
17975
# unordered or greater than:
17976
# ___
17977
# NANv(NvZ)
17978
#
17979
fscc_ugt:
17980
fbugt.w fscc_ugt_yes # unordered or greater than?
17981
fscc_ugt_no:
17982
clr.b %d0 # set false
17983
bra.w fscc_done # go finish
17984
fscc_ugt_yes:
17985
st %d0 # set true
17986
bra.w fscc_done # go finish
17987
17988
#
17989
# ordered greater or less than:
17990
# _____
17991
# NANvZ
17992
#
17993
fscc_ogl:
17994
fbogl.w fscc_ogl_yes # ordered greater or less than?
17995
fscc_ogl_no:
17996
clr.b %d0 # set false
17997
bra.w fscc_done # go finish
17998
fscc_ogl_yes:
17999
st %d0 # set true
18000
bra.w fscc_done # go finish
18001
18002
#
18003
# unordered or equal:
18004
#
18005
# NANvZ
18006
#
18007
fscc_ueq:
18008
fbueq.w fscc_ueq_yes # unordered or equal?
18009
fscc_ueq_no:
18010
clr.b %d0 # set false
18011
bra.w fscc_done # go finish
18012
fscc_ueq_yes:
18013
st %d0 # set true
18014
bra.w fscc_done # go finish
18015
18016
#
18017
# ordered:
18018
# ___
18019
# NAN
18020
#
18021
fscc_or:
18022
fbor.w fscc_or_yes # ordered?
18023
fscc_or_no:
18024
clr.b %d0 # set false
18025
bra.w fscc_done # go finish
18026
fscc_or_yes:
18027
st %d0 # set true
18028
bra.w fscc_done # go finish
18029
18030
#
18031
# unordered:
18032
#
18033
# NAN
18034
#
18035
fscc_un:
18036
fbun.w fscc_un_yes # unordered?
18037
fscc_un_no:
18038
clr.b %d0 # set false
18039
bra.w fscc_done # go finish
18040
fscc_un_yes:
18041
st %d0 # set true
18042
bra.w fscc_done # go finish
18043
18044
#######################################################################
18045
18046
#
18047
# the bsun exception bit was set. now, check to see is BSUN
18048
# is enabled. if so, don't store result and correct stack frame
18049
# for a bsun exception.
18050
#
18051
fscc_chk_bsun:
18052
btst &bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
18053
bne.w fscc_bsun
18054
18055
#
18056
# the bsun exception bit was not set.
18057
# the result has been selected.
18058
# now, check to see if the result is to be stored in the data register
18059
# file or in memory.
18060
#
18061
fscc_done:
18062
mov.l %d0,%a0 # save result for a moment
18063
18064
mov.b 1+EXC_OPWORD(%a6),%d1 # fetch lo opword
18065
mov.l %d1,%d0 # make a copy
18066
andi.b &0x38,%d1 # extract src mode
18067
18068
bne.b fscc_mem_op # it's a memory operation
18069
18070
mov.l %d0,%d1
18071
andi.w &0x7,%d1 # pass index in d1
18072
mov.l %a0,%d0 # pass result in d0
18073
bsr.l store_dreg_b # save result in regfile
18074
rts
18075
18076
#
18077
# the stacked <ea> is correct with the exception of:
18078
# -> Dn : <ea> is garbage
18079
#
18080
# if the addressing mode is post-increment or pre-decrement,
18081
# then the address registers have not been updated.
18082
#
18083
fscc_mem_op:
18084
cmpi.b %d1,&0x18 # is <ea> (An)+ ?
18085
beq.b fscc_mem_inc # yes
18086
cmpi.b %d1,&0x20 # is <ea> -(An) ?
18087
beq.b fscc_mem_dec # yes
18088
18089
mov.l %a0,%d0 # pass result in d0
18090
mov.l EXC_EA(%a6),%a0 # fetch <ea>
18091
bsr.l _dmem_write_byte # write result byte
18092
18093
tst.l %d1 # did dstore fail?
18094
bne.w fscc_err # yes
18095
18096
rts
18097
18098
# addressing mode is post-increment. write the result byte. if the write
18099
# fails then don't update the address register. if write passes then
18100
# call inc_areg() to update the address register.
18101
fscc_mem_inc:
18102
mov.l %a0,%d0 # pass result in d0
18103
mov.l EXC_EA(%a6),%a0 # fetch <ea>
18104
bsr.l _dmem_write_byte # write result byte
18105
18106
tst.l %d1 # did dstore fail?
18107
bne.w fscc_err # yes
18108
18109
mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword
18110
andi.w &0x7,%d1 # pass index in d1
18111
movq.l &0x1,%d0 # pass amt to inc by
18112
bsr.l inc_areg # increment address register
18113
18114
rts
18115
18116
# addressing mode is pre-decrement. write the result byte. if the write
18117
# fails then don't update the address register. if the write passes then
18118
# call dec_areg() to update the address register.
18119
fscc_mem_dec:
18120
mov.l %a0,%d0 # pass result in d0
18121
mov.l EXC_EA(%a6),%a0 # fetch <ea>
18122
bsr.l _dmem_write_byte # write result byte
18123
18124
tst.l %d1 # did dstore fail?
18125
bne.w fscc_err # yes
18126
18127
mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword
18128
andi.w &0x7,%d1 # pass index in d1
18129
movq.l &0x1,%d0 # pass amt to dec by
18130
bsr.l dec_areg # decrement address register
18131
18132
rts
18133
18134
# the emulation routine set bsun and BSUN was enabled. have to
18135
# fix stack and jump to the bsun handler.
18136
# let the caller of this routine shift the stack frame up to
18137
# eliminate the effective address field.
18138
fscc_bsun:
18139
mov.b &fbsun_flg,SPCOND_FLG(%a6)
18140
rts
18141
18142
# the byte write to memory has failed. pass the failing effective address
18143
# and a FSLW to funimp_dacc().
18144
fscc_err:
18145
mov.w &0x00a1,EXC_VOFF(%a6)
18146
bra.l facc_finish
18147
18148
#########################################################################
18149
# XDEF **************************************************************** #
18150
# fmovm_dynamic(): emulate "fmovm" dynamic instruction #
18151
# #
18152
# XREF **************************************************************** #
18153
# fetch_dreg() - fetch data register #
18154
# {i,d,}mem_read() - fetch data from memory #
18155
# _mem_write() - write data to memory #
18156
# iea_iacc() - instruction memory access error occurred #
18157
# iea_dacc() - data memory access error occurred #
18158
# restore() - restore An index regs if access error occurred #
18159
# #
18160
# INPUT *************************************************************** #
18161
# None #
18162
# #
18163
# OUTPUT ************************************************************** #
18164
# If instr is "fmovm Dn,-(A7)" from supervisor mode, #
18165
# d0 = size of dump #
18166
# d1 = Dn #
18167
# Else if instruction access error, #
18168
# d0 = FSLW #
18169
# Else if data access error, #
18170
# d0 = FSLW #
18171
# a0 = address of fault #
18172
# Else #
18173
# none. #
18174
# #
18175
# ALGORITHM *********************************************************** #
18176
# The effective address must be calculated since this is entered #
18177
# from an "Unimplemented Effective Address" exception handler. So, we #
18178
# have our own fcalc_ea() routine here. If an access error is flagged #
18179
# by a _{i,d,}mem_read() call, we must exit through the special #
18180
# handler. #
18181
# The data register is determined and its value loaded to get the #
18182
# string of FP registers affected. This value is used as an index into #
18183
# a lookup table such that we can determine the number of bytes #
18184
# involved. #
18185
# If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
18186
# to read in all FP values. Again, _mem_read() may fail and require a #
18187
# special exit. #
18188
# If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
18189
# to write all FP values. _mem_write() may also fail. #
18190
# If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
18191
# then we return the size of the dump and the string to the caller #
18192
# so that the move can occur outside of this routine. This special #
18193
# case is required so that moves to the system stack are handled #
18194
# correctly. #
18195
# #
18196
# DYNAMIC: #
18197
# fmovm.x dn, <ea> #
18198
# fmovm.x <ea>, dn #
18199
# #
18200
# <WORD 1> <WORD2> #
18201
# 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
18202
# #
18203
# & = (0): predecrement addressing mode #
18204
# (1): postincrement or control addressing mode #
18205
# @ = (0): move listed regs from memory to the FPU #
18206
# (1): move listed regs from the FPU to memory #
18207
# $$$ : index of data register holding reg select mask #
18208
# #
18209
# NOTES: #
18210
# If the data register holds a zero, then the #
18211
# instruction is a nop. #
18212
# #
18213
#########################################################################
18214
18215
global fmovm_dynamic
18216
fmovm_dynamic:
18217
18218
# extract the data register in which the bit string resides...
18219
mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword
18220
andi.w &0x70,%d1 # extract reg bits
18221
lsr.b &0x4,%d1 # shift into lo bits
18222
18223
# fetch the bit string into d0...
18224
bsr.l fetch_dreg # fetch reg string
18225
18226
andi.l &0x000000ff,%d0 # keep only lo byte
18227
18228
mov.l %d0,-(%sp) # save strg
18229
mov.b (tbl_fmovm_size.w,%pc,%d0),%d0
18230
mov.l %d0,-(%sp) # save size
18231
bsr.l fmovm_calc_ea # calculate <ea>
18232
mov.l (%sp)+,%d0 # restore size
18233
mov.l (%sp)+,%d1 # restore strg
18234
18235
# if the bit string is a zero, then the operation is a no-op
18236
# but, make sure that we've calculated ea and advanced the opword pointer
18237
beq.w fmovm_data_done
18238
18239
# separate move ins from move outs...
18240
btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?
18241
beq.w fmovm_data_in # it's a move out
18242
18243
#############
18244
# MOVE OUT: #
18245
#############
18246
fmovm_data_out:
18247
btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?
18248
bne.w fmovm_out_ctrl # control
18249
18250
############################
18251
fmovm_out_predec:
18252
# for predecrement mode, the bit string is the opposite of both control
18253
# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
18254
# here, we convert it to be just like the others...
18255
mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
18256
18257
btst &0x5,EXC_SR(%a6) # user or supervisor mode?
18258
beq.b fmovm_out_ctrl # user
18259
18260
fmovm_out_predec_s:
18261
cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
18262
bne.b fmovm_out_ctrl
18263
18264
# the operation was unfortunately an: fmovm.x dn,-(sp)
18265
# called from supervisor mode.
18266
# we're also passing "size" and "strg" back to the calling routine
18267
rts
18268
18269
############################
18270
fmovm_out_ctrl:
18271
mov.l %a0,%a1 # move <ea> to a1
18272
18273
sub.l %d0,%sp # subtract size of dump
18274
lea (%sp),%a0
18275
18276
tst.b %d1 # should FP0 be moved?
18277
bpl.b fmovm_out_ctrl_fp1 # no
18278
18279
mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes
18280
mov.l 0x4+EXC_FP0(%a6),(%a0)+
18281
mov.l 0x8+EXC_FP0(%a6),(%a0)+
18282
18283
fmovm_out_ctrl_fp1:
18284
lsl.b &0x1,%d1 # should FP1 be moved?
18285
bpl.b fmovm_out_ctrl_fp2 # no
18286
18287
mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes
18288
mov.l 0x4+EXC_FP1(%a6),(%a0)+
18289
mov.l 0x8+EXC_FP1(%a6),(%a0)+
18290
18291
fmovm_out_ctrl_fp2:
18292
lsl.b &0x1,%d1 # should FP2 be moved?
18293
bpl.b fmovm_out_ctrl_fp3 # no
18294
18295
fmovm.x &0x20,(%a0) # yes
18296
add.l &0xc,%a0
18297
18298
fmovm_out_ctrl_fp3:
18299
lsl.b &0x1,%d1 # should FP3 be moved?
18300
bpl.b fmovm_out_ctrl_fp4 # no
18301
18302
fmovm.x &0x10,(%a0) # yes
18303
add.l &0xc,%a0
18304
18305
fmovm_out_ctrl_fp4:
18306
lsl.b &0x1,%d1 # should FP4 be moved?
18307
bpl.b fmovm_out_ctrl_fp5 # no
18308
18309
fmovm.x &0x08,(%a0) # yes
18310
add.l &0xc,%a0
18311
18312
fmovm_out_ctrl_fp5:
18313
lsl.b &0x1,%d1 # should FP5 be moved?
18314
bpl.b fmovm_out_ctrl_fp6 # no
18315
18316
fmovm.x &0x04,(%a0) # yes
18317
add.l &0xc,%a0
18318
18319
fmovm_out_ctrl_fp6:
18320
lsl.b &0x1,%d1 # should FP6 be moved?
18321
bpl.b fmovm_out_ctrl_fp7 # no
18322
18323
fmovm.x &0x02,(%a0) # yes
18324
add.l &0xc,%a0
18325
18326
fmovm_out_ctrl_fp7:
18327
lsl.b &0x1,%d1 # should FP7 be moved?
18328
bpl.b fmovm_out_ctrl_done # no
18329
18330
fmovm.x &0x01,(%a0) # yes
18331
add.l &0xc,%a0
18332
18333
fmovm_out_ctrl_done:
18334
mov.l %a1,L_SCR1(%a6)
18335
18336
lea (%sp),%a0 # pass: supervisor src
18337
mov.l %d0,-(%sp) # save size
18338
bsr.l _dmem_write # copy data to user mem
18339
18340
mov.l (%sp)+,%d0
18341
add.l %d0,%sp # clear fpreg data from stack
18342
18343
tst.l %d1 # did dstore err?
18344
bne.w fmovm_out_err # yes
18345
18346
rts
18347
18348
############
18349
# MOVE IN: #
18350
############
18351
fmovm_data_in:
18352
mov.l %a0,L_SCR1(%a6)
18353
18354
sub.l %d0,%sp # make room for fpregs
18355
lea (%sp),%a1
18356
18357
mov.l %d1,-(%sp) # save bit string for later
18358
mov.l %d0,-(%sp) # save # of bytes
18359
18360
bsr.l _dmem_read # copy data from user mem
18361
18362
mov.l (%sp)+,%d0 # retrieve # of bytes
18363
18364
tst.l %d1 # did dfetch fail?
18365
bne.w fmovm_in_err # yes
18366
18367
mov.l (%sp)+,%d1 # load bit string
18368
18369
lea (%sp),%a0 # addr of stack
18370
18371
tst.b %d1 # should FP0 be moved?
18372
bpl.b fmovm_data_in_fp1 # no
18373
18374
mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes
18375
mov.l (%a0)+,0x4+EXC_FP0(%a6)
18376
mov.l (%a0)+,0x8+EXC_FP0(%a6)
18377
18378
fmovm_data_in_fp1:
18379
lsl.b &0x1,%d1 # should FP1 be moved?
18380
bpl.b fmovm_data_in_fp2 # no
18381
18382
mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes
18383
mov.l (%a0)+,0x4+EXC_FP1(%a6)
18384
mov.l (%a0)+,0x8+EXC_FP1(%a6)
18385
18386
fmovm_data_in_fp2:
18387
lsl.b &0x1,%d1 # should FP2 be moved?
18388
bpl.b fmovm_data_in_fp3 # no
18389
18390
fmovm.x (%a0)+,&0x20 # yes
18391
18392
fmovm_data_in_fp3:
18393
lsl.b &0x1,%d1 # should FP3 be moved?
18394
bpl.b fmovm_data_in_fp4 # no
18395
18396
fmovm.x (%a0)+,&0x10 # yes
18397
18398
fmovm_data_in_fp4:
18399
lsl.b &0x1,%d1 # should FP4 be moved?
18400
bpl.b fmovm_data_in_fp5 # no
18401
18402
fmovm.x (%a0)+,&0x08 # yes
18403
18404
fmovm_data_in_fp5:
18405
lsl.b &0x1,%d1 # should FP5 be moved?
18406
bpl.b fmovm_data_in_fp6 # no
18407
18408
fmovm.x (%a0)+,&0x04 # yes
18409
18410
fmovm_data_in_fp6:
18411
lsl.b &0x1,%d1 # should FP6 be moved?
18412
bpl.b fmovm_data_in_fp7 # no
18413
18414
fmovm.x (%a0)+,&0x02 # yes
18415
18416
fmovm_data_in_fp7:
18417
lsl.b &0x1,%d1 # should FP7 be moved?
18418
bpl.b fmovm_data_in_done # no
18419
18420
fmovm.x (%a0)+,&0x01 # yes
18421
18422
fmovm_data_in_done:
18423
add.l %d0,%sp # remove fpregs from stack
18424
rts
18425
18426
#####################################
18427
18428
fmovm_data_done:
18429
rts
18430
18431
##############################################################################
18432
18433
#
18434
# table indexed by the operation's bit string that gives the number
18435
# of bytes that will be moved.
18436
#
18437
# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
18438
#
18439
tbl_fmovm_size:
18440
byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
18441
byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18442
byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18443
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18444
byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18445
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18446
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18447
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18448
byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18449
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18450
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18451
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18452
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18453
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18454
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18455
byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18456
byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18457
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18458
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18459
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18460
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18461
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18462
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18463
byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18464
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18465
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18466
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18467
byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18468
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18469
byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18470
byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18471
byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
18472
18473
#
18474
# table to convert a pre-decrement bit string into a post-increment
18475
# or control bit string.
18476
# ex: 0x00 ==> 0x00
18477
# 0x01 ==> 0x80
18478
# 0x02 ==> 0x40
18479
# .
18480
# .
18481
# 0xfd ==> 0xbf
18482
# 0xfe ==> 0x7f
18483
# 0xff ==> 0xff
18484
#
18485
tbl_fmovm_convert:
18486
byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
18487
byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
18488
byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
18489
byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
18490
byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
18491
byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
18492
byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
18493
byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
18494
byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
18495
byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
18496
byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
18497
byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
18498
byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
18499
byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
18500
byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
18501
byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
18502
byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
18503
byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
18504
byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
18505
byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
18506
byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
18507
byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
18508
byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
18509
byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
18510
byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
18511
byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
18512
byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
18513
byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
18514
byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
18515
byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
18516
byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
18517
byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
18518
18519
global fmovm_calc_ea
18520
###############################################
18521
# _fmovm_calc_ea: calculate effective address #
18522
###############################################
18523
fmovm_calc_ea:
18524
mov.l %d0,%a0 # move # bytes to a0
18525
18526
# currently, MODE and REG are taken from the EXC_OPWORD. this could be
18527
# easily changed if they were inputs passed in registers.
18528
mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word
18529
mov.w %d0,%d1 # make a copy
18530
18531
andi.w &0x3f,%d0 # extract mode field
18532
andi.l &0x7,%d1 # extract reg field
18533
18534
# jump to the corresponding function for each {MODE,REG} pair.
18535
mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
18536
jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
18537
18538
swbeg &64
18539
tbl_fea_mode:
18540
short tbl_fea_mode - tbl_fea_mode
18541
short tbl_fea_mode - tbl_fea_mode
18542
short tbl_fea_mode - tbl_fea_mode
18543
short tbl_fea_mode - tbl_fea_mode
18544
short tbl_fea_mode - tbl_fea_mode
18545
short tbl_fea_mode - tbl_fea_mode
18546
short tbl_fea_mode - tbl_fea_mode
18547
short tbl_fea_mode - tbl_fea_mode
18548
18549
short tbl_fea_mode - tbl_fea_mode
18550
short tbl_fea_mode - tbl_fea_mode
18551
short tbl_fea_mode - tbl_fea_mode
18552
short tbl_fea_mode - tbl_fea_mode
18553
short tbl_fea_mode - tbl_fea_mode
18554
short tbl_fea_mode - tbl_fea_mode
18555
short tbl_fea_mode - tbl_fea_mode
18556
short tbl_fea_mode - tbl_fea_mode
18557
18558
short faddr_ind_a0 - tbl_fea_mode
18559
short faddr_ind_a1 - tbl_fea_mode
18560
short faddr_ind_a2 - tbl_fea_mode
18561
short faddr_ind_a3 - tbl_fea_mode
18562
short faddr_ind_a4 - tbl_fea_mode
18563
short faddr_ind_a5 - tbl_fea_mode
18564
short faddr_ind_a6 - tbl_fea_mode
18565
short faddr_ind_a7 - tbl_fea_mode
18566
18567
short faddr_ind_p_a0 - tbl_fea_mode
18568
short faddr_ind_p_a1 - tbl_fea_mode
18569
short faddr_ind_p_a2 - tbl_fea_mode
18570
short faddr_ind_p_a3 - tbl_fea_mode
18571
short faddr_ind_p_a4 - tbl_fea_mode
18572
short faddr_ind_p_a5 - tbl_fea_mode
18573
short faddr_ind_p_a6 - tbl_fea_mode
18574
short faddr_ind_p_a7 - tbl_fea_mode
18575
18576
short faddr_ind_m_a0 - tbl_fea_mode
18577
short faddr_ind_m_a1 - tbl_fea_mode
18578
short faddr_ind_m_a2 - tbl_fea_mode
18579
short faddr_ind_m_a3 - tbl_fea_mode
18580
short faddr_ind_m_a4 - tbl_fea_mode
18581
short faddr_ind_m_a5 - tbl_fea_mode
18582
short faddr_ind_m_a6 - tbl_fea_mode
18583
short faddr_ind_m_a7 - tbl_fea_mode
18584
18585
short faddr_ind_disp_a0 - tbl_fea_mode
18586
short faddr_ind_disp_a1 - tbl_fea_mode
18587
short faddr_ind_disp_a2 - tbl_fea_mode
18588
short faddr_ind_disp_a3 - tbl_fea_mode
18589
short faddr_ind_disp_a4 - tbl_fea_mode
18590
short faddr_ind_disp_a5 - tbl_fea_mode
18591
short faddr_ind_disp_a6 - tbl_fea_mode
18592
short faddr_ind_disp_a7 - tbl_fea_mode
18593
18594
short faddr_ind_ext - tbl_fea_mode
18595
short faddr_ind_ext - tbl_fea_mode
18596
short faddr_ind_ext - tbl_fea_mode
18597
short faddr_ind_ext - tbl_fea_mode
18598
short faddr_ind_ext - tbl_fea_mode
18599
short faddr_ind_ext - tbl_fea_mode
18600
short faddr_ind_ext - tbl_fea_mode
18601
short faddr_ind_ext - tbl_fea_mode
18602
18603
short fabs_short - tbl_fea_mode
18604
short fabs_long - tbl_fea_mode
18605
short fpc_ind - tbl_fea_mode
18606
short fpc_ind_ext - tbl_fea_mode
18607
short tbl_fea_mode - tbl_fea_mode
18608
short tbl_fea_mode - tbl_fea_mode
18609
short tbl_fea_mode - tbl_fea_mode
18610
short tbl_fea_mode - tbl_fea_mode
18611
18612
###################################
18613
# Address register indirect: (An) #
18614
###################################
18615
faddr_ind_a0:
18616
mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0
18617
rts
18618
18619
faddr_ind_a1:
18620
mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1
18621
rts
18622
18623
faddr_ind_a2:
18624
mov.l %a2,%a0 # Get current a2
18625
rts
18626
18627
faddr_ind_a3:
18628
mov.l %a3,%a0 # Get current a3
18629
rts
18630
18631
faddr_ind_a4:
18632
mov.l %a4,%a0 # Get current a4
18633
rts
18634
18635
faddr_ind_a5:
18636
mov.l %a5,%a0 # Get current a5
18637
rts
18638
18639
faddr_ind_a6:
18640
mov.l (%a6),%a0 # Get current a6
18641
rts
18642
18643
faddr_ind_a7:
18644
mov.l EXC_A7(%a6),%a0 # Get current a7
18645
rts
18646
18647
#####################################################
18648
# Address register indirect w/ postincrement: (An)+ #
18649
#####################################################
18650
faddr_ind_p_a0:
18651
mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
18652
mov.l %d0,%d1
18653
add.l %a0,%d1 # Increment
18654
mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value
18655
mov.l %d0,%a0
18656
rts
18657
18658
faddr_ind_p_a1:
18659
mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
18660
mov.l %d0,%d1
18661
add.l %a0,%d1 # Increment
18662
mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value
18663
mov.l %d0,%a0
18664
rts
18665
18666
faddr_ind_p_a2:
18667
mov.l %a2,%d0 # Get current a2
18668
mov.l %d0,%d1
18669
add.l %a0,%d1 # Increment
18670
mov.l %d1,%a2 # Save incr value
18671
mov.l %d0,%a0
18672
rts
18673
18674
faddr_ind_p_a3:
18675
mov.l %a3,%d0 # Get current a3
18676
mov.l %d0,%d1
18677
add.l %a0,%d1 # Increment
18678
mov.l %d1,%a3 # Save incr value
18679
mov.l %d0,%a0
18680
rts
18681
18682
faddr_ind_p_a4:
18683
mov.l %a4,%d0 # Get current a4
18684
mov.l %d0,%d1
18685
add.l %a0,%d1 # Increment
18686
mov.l %d1,%a4 # Save incr value
18687
mov.l %d0,%a0
18688
rts
18689
18690
faddr_ind_p_a5:
18691
mov.l %a5,%d0 # Get current a5
18692
mov.l %d0,%d1
18693
add.l %a0,%d1 # Increment
18694
mov.l %d1,%a5 # Save incr value
18695
mov.l %d0,%a0
18696
rts
18697
18698
faddr_ind_p_a6:
18699
mov.l (%a6),%d0 # Get current a6
18700
mov.l %d0,%d1
18701
add.l %a0,%d1 # Increment
18702
mov.l %d1,(%a6) # Save incr value
18703
mov.l %d0,%a0
18704
rts
18705
18706
faddr_ind_p_a7:
18707
mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
18708
18709
mov.l EXC_A7(%a6),%d0 # Get current a7
18710
mov.l %d0,%d1
18711
add.l %a0,%d1 # Increment
18712
mov.l %d1,EXC_A7(%a6) # Save incr value
18713
mov.l %d0,%a0
18714
rts
18715
18716
####################################################
18717
# Address register indirect w/ predecrement: -(An) #
18718
####################################################
18719
faddr_ind_m_a0:
18720
mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
18721
sub.l %a0,%d0 # Decrement
18722
mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value
18723
mov.l %d0,%a0
18724
rts
18725
18726
faddr_ind_m_a1:
18727
mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
18728
sub.l %a0,%d0 # Decrement
18729
mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value
18730
mov.l %d0,%a0
18731
rts
18732
18733
faddr_ind_m_a2:
18734
mov.l %a2,%d0 # Get current a2
18735
sub.l %a0,%d0 # Decrement
18736
mov.l %d0,%a2 # Save decr value
18737
mov.l %d0,%a0
18738
rts
18739
18740
faddr_ind_m_a3:
18741
mov.l %a3,%d0 # Get current a3
18742
sub.l %a0,%d0 # Decrement
18743
mov.l %d0,%a3 # Save decr value
18744
mov.l %d0,%a0
18745
rts
18746
18747
faddr_ind_m_a4:
18748
mov.l %a4,%d0 # Get current a4
18749
sub.l %a0,%d0 # Decrement
18750
mov.l %d0,%a4 # Save decr value
18751
mov.l %d0,%a0
18752
rts
18753
18754
faddr_ind_m_a5:
18755
mov.l %a5,%d0 # Get current a5
18756
sub.l %a0,%d0 # Decrement
18757
mov.l %d0,%a5 # Save decr value
18758
mov.l %d0,%a0
18759
rts
18760
18761
faddr_ind_m_a6:
18762
mov.l (%a6),%d0 # Get current a6
18763
sub.l %a0,%d0 # Decrement
18764
mov.l %d0,(%a6) # Save decr value
18765
mov.l %d0,%a0
18766
rts
18767
18768
faddr_ind_m_a7:
18769
mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
18770
18771
mov.l EXC_A7(%a6),%d0 # Get current a7
18772
sub.l %a0,%d0 # Decrement
18773
mov.l %d0,EXC_A7(%a6) # Save decr value
18774
mov.l %d0,%a0
18775
rts
18776
18777
########################################################
18778
# Address register indirect w/ displacement: (d16, An) #
18779
########################################################
18780
faddr_ind_disp_a0:
18781
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18782
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18783
bsr.l _imem_read_word
18784
18785
tst.l %d1 # did ifetch fail?
18786
bne.l iea_iacc # yes
18787
18788
mov.w %d0,%a0 # sign extend displacement
18789
18790
add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16
18791
rts
18792
18793
faddr_ind_disp_a1:
18794
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18795
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18796
bsr.l _imem_read_word
18797
18798
tst.l %d1 # did ifetch fail?
18799
bne.l iea_iacc # yes
18800
18801
mov.w %d0,%a0 # sign extend displacement
18802
18803
add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16
18804
rts
18805
18806
faddr_ind_disp_a2:
18807
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18808
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18809
bsr.l _imem_read_word
18810
18811
tst.l %d1 # did ifetch fail?
18812
bne.l iea_iacc # yes
18813
18814
mov.w %d0,%a0 # sign extend displacement
18815
18816
add.l %a2,%a0 # a2 + d16
18817
rts
18818
18819
faddr_ind_disp_a3:
18820
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18821
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18822
bsr.l _imem_read_word
18823
18824
tst.l %d1 # did ifetch fail?
18825
bne.l iea_iacc # yes
18826
18827
mov.w %d0,%a0 # sign extend displacement
18828
18829
add.l %a3,%a0 # a3 + d16
18830
rts
18831
18832
faddr_ind_disp_a4:
18833
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18834
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18835
bsr.l _imem_read_word
18836
18837
tst.l %d1 # did ifetch fail?
18838
bne.l iea_iacc # yes
18839
18840
mov.w %d0,%a0 # sign extend displacement
18841
18842
add.l %a4,%a0 # a4 + d16
18843
rts
18844
18845
faddr_ind_disp_a5:
18846
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18847
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18848
bsr.l _imem_read_word
18849
18850
tst.l %d1 # did ifetch fail?
18851
bne.l iea_iacc # yes
18852
18853
mov.w %d0,%a0 # sign extend displacement
18854
18855
add.l %a5,%a0 # a5 + d16
18856
rts
18857
18858
faddr_ind_disp_a6:
18859
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18860
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18861
bsr.l _imem_read_word
18862
18863
tst.l %d1 # did ifetch fail?
18864
bne.l iea_iacc # yes
18865
18866
mov.w %d0,%a0 # sign extend displacement
18867
18868
add.l (%a6),%a0 # a6 + d16
18869
rts
18870
18871
faddr_ind_disp_a7:
18872
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18873
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18874
bsr.l _imem_read_word
18875
18876
tst.l %d1 # did ifetch fail?
18877
bne.l iea_iacc # yes
18878
18879
mov.w %d0,%a0 # sign extend displacement
18880
18881
add.l EXC_A7(%a6),%a0 # a7 + d16
18882
rts
18883
18884
########################################################################
18885
# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
18886
# " " " w/ " (base displacement): (bd, An, Xn) #
18887
# Memory indirect postindexed: ([bd, An], Xn, od) #
18888
# Memory indirect preindexed: ([bd, An, Xn], od) #
18889
########################################################################
18890
faddr_ind_ext:
18891
addq.l &0x8,%d1
18892
bsr.l fetch_dreg # fetch base areg
18893
mov.l %d0,-(%sp)
18894
18895
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18896
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18897
bsr.l _imem_read_word # fetch extword in d0
18898
18899
tst.l %d1 # did ifetch fail?
18900
bne.l iea_iacc # yes
18901
18902
mov.l (%sp)+,%a0
18903
18904
btst &0x8,%d0
18905
bne.w fcalc_mem_ind
18906
18907
mov.l %d0,L_SCR1(%a6) # hold opword
18908
18909
mov.l %d0,%d1
18910
rol.w &0x4,%d1
18911
andi.w &0xf,%d1 # extract index regno
18912
18913
# count on fetch_dreg() not to alter a0...
18914
bsr.l fetch_dreg # fetch index
18915
18916
mov.l %d2,-(%sp) # save d2
18917
mov.l L_SCR1(%a6),%d2 # fetch opword
18918
18919
btst &0xb,%d2 # is it word or long?
18920
bne.b faii8_long
18921
ext.l %d0 # sign extend word index
18922
faii8_long:
18923
mov.l %d2,%d1
18924
rol.w &0x7,%d1
18925
andi.l &0x3,%d1 # extract scale value
18926
18927
lsl.l %d1,%d0 # shift index by scale
18928
18929
extb.l %d2 # sign extend displacement
18930
add.l %d2,%d0 # index + disp
18931
add.l %d0,%a0 # An + (index + disp)
18932
18933
mov.l (%sp)+,%d2 # restore old d2
18934
rts
18935
18936
###########################
18937
# Absolute short: (XXX).W #
18938
###########################
18939
fabs_short:
18940
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18941
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18942
bsr.l _imem_read_word # fetch short address
18943
18944
tst.l %d1 # did ifetch fail?
18945
bne.l iea_iacc # yes
18946
18947
mov.w %d0,%a0 # return <ea> in a0
18948
rts
18949
18950
##########################
18951
# Absolute long: (XXX).L #
18952
##########################
18953
fabs_long:
18954
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18955
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
18956
bsr.l _imem_read_long # fetch long address
18957
18958
tst.l %d1 # did ifetch fail?
18959
bne.l iea_iacc # yes
18960
18961
mov.l %d0,%a0 # return <ea> in a0
18962
rts
18963
18964
#######################################################
18965
# Program counter indirect w/ displacement: (d16, PC) #
18966
#######################################################
18967
fpc_ind:
18968
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18969
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18970
bsr.l _imem_read_word # fetch word displacement
18971
18972
tst.l %d1 # did ifetch fail?
18973
bne.l iea_iacc # yes
18974
18975
mov.w %d0,%a0 # sign extend displacement
18976
18977
add.l EXC_EXTWPTR(%a6),%a0 # pc + d16
18978
18979
# _imem_read_word() increased the extwptr by 2. need to adjust here.
18980
subq.l &0x2,%a0 # adjust <ea>
18981
rts
18982
18983
##########################################################
18984
# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
18985
# " " w/ " (base displacement): (bd, PC, An) #
18986
# PC memory indirect postindexed: ([bd, PC], Xn, od) #
18987
# PC memory indirect preindexed: ([bd, PC, Xn], od) #
18988
##########################################################
18989
fpc_ind_ext:
18990
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18991
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18992
bsr.l _imem_read_word # fetch ext word
18993
18994
tst.l %d1 # did ifetch fail?
18995
bne.l iea_iacc # yes
18996
18997
mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0
18998
subq.l &0x2,%a0 # adjust base
18999
19000
btst &0x8,%d0 # is disp only 8 bits?
19001
bne.w fcalc_mem_ind # calc memory indirect
19002
19003
mov.l %d0,L_SCR1(%a6) # store opword
19004
19005
mov.l %d0,%d1 # make extword copy
19006
rol.w &0x4,%d1 # rotate reg num into place
19007
andi.w &0xf,%d1 # extract register number
19008
19009
# count on fetch_dreg() not to alter a0...
19010
bsr.l fetch_dreg # fetch index
19011
19012
mov.l %d2,-(%sp) # save d2
19013
mov.l L_SCR1(%a6),%d2 # fetch opword
19014
19015
btst &0xb,%d2 # is index word or long?
19016
bne.b fpii8_long # long
19017
ext.l %d0 # sign extend word index
19018
fpii8_long:
19019
mov.l %d2,%d1
19020
rol.w &0x7,%d1 # rotate scale value into place
19021
andi.l &0x3,%d1 # extract scale value
19022
19023
lsl.l %d1,%d0 # shift index by scale
19024
19025
extb.l %d2 # sign extend displacement
19026
add.l %d2,%d0 # disp + index
19027
add.l %d0,%a0 # An + (index + disp)
19028
19029
mov.l (%sp)+,%d2 # restore temp register
19030
rts
19031
19032
# d2 = index
19033
# d3 = base
19034
# d4 = od
19035
# d5 = extword
19036
fcalc_mem_ind:
19037
btst &0x6,%d0 # is the index suppressed?
19038
beq.b fcalc_index
19039
19040
movm.l &0x3c00,-(%sp) # save d2-d5
19041
19042
mov.l %d0,%d5 # put extword in d5
19043
mov.l %a0,%d3 # put base in d3
19044
19045
clr.l %d2 # yes, so index = 0
19046
bra.b fbase_supp_ck
19047
19048
# index:
19049
fcalc_index:
19050
mov.l %d0,L_SCR1(%a6) # save d0 (opword)
19051
bfextu %d0{&16:&4},%d1 # fetch dreg index
19052
bsr.l fetch_dreg
19053
19054
movm.l &0x3c00,-(%sp) # save d2-d5
19055
mov.l %d0,%d2 # put index in d2
19056
mov.l L_SCR1(%a6),%d5
19057
mov.l %a0,%d3
19058
19059
btst &0xb,%d5 # is index word or long?
19060
bne.b fno_ext
19061
ext.l %d2
19062
19063
fno_ext:
19064
bfextu %d5{&21:&2},%d0
19065
lsl.l %d0,%d2
19066
19067
# base address (passed as parameter in d3):
19068
# we clear the value here if it should actually be suppressed.
19069
fbase_supp_ck:
19070
btst &0x7,%d5 # is the bd suppressed?
19071
beq.b fno_base_sup
19072
clr.l %d3
19073
19074
# base displacement:
19075
fno_base_sup:
19076
bfextu %d5{&26:&2},%d0 # get bd size
19077
# beq.l fmovm_error # if (size == 0) it's reserved
19078
19079
cmpi.b %d0,&0x2
19080
blt.b fno_bd
19081
beq.b fget_word_bd
19082
19083
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19084
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19085
bsr.l _imem_read_long
19086
19087
tst.l %d1 # did ifetch fail?
19088
bne.l fcea_iacc # yes
19089
19090
bra.b fchk_ind
19091
19092
fget_word_bd:
19093
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19094
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
19095
bsr.l _imem_read_word
19096
19097
tst.l %d1 # did ifetch fail?
19098
bne.l fcea_iacc # yes
19099
19100
ext.l %d0 # sign extend bd
19101
19102
fchk_ind:
19103
add.l %d0,%d3 # base += bd
19104
19105
# outer displacement:
19106
fno_bd:
19107
bfextu %d5{&30:&2},%d0 # is od suppressed?
19108
beq.w faii_bd
19109
19110
cmpi.b %d0,&0x2
19111
blt.b fnull_od
19112
beq.b fword_od
19113
19114
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19115
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19116
bsr.l _imem_read_long
19117
19118
tst.l %d1 # did ifetch fail?
19119
bne.l fcea_iacc # yes
19120
19121
bra.b fadd_them
19122
19123
fword_od:
19124
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19125
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
19126
bsr.l _imem_read_word
19127
19128
tst.l %d1 # did ifetch fail?
19129
bne.l fcea_iacc # yes
19130
19131
ext.l %d0 # sign extend od
19132
bra.b fadd_them
19133
19134
fnull_od:
19135
clr.l %d0
19136
19137
fadd_them:
19138
mov.l %d0,%d4
19139
19140
btst &0x2,%d5 # pre or post indexing?
19141
beq.b fpre_indexed
19142
19143
mov.l %d3,%a0
19144
bsr.l _dmem_read_long
19145
19146
tst.l %d1 # did dfetch fail?
19147
bne.w fcea_err # yes
19148
19149
add.l %d2,%d0 # <ea> += index
19150
add.l %d4,%d0 # <ea> += od
19151
bra.b fdone_ea
19152
19153
fpre_indexed:
19154
add.l %d2,%d3 # preindexing
19155
mov.l %d3,%a0
19156
bsr.l _dmem_read_long
19157
19158
tst.l %d1 # did dfetch fail?
19159
bne.w fcea_err # yes
19160
19161
add.l %d4,%d0 # ea += od
19162
bra.b fdone_ea
19163
19164
faii_bd:
19165
add.l %d2,%d3 # ea = (base + bd) + index
19166
mov.l %d3,%d0
19167
fdone_ea:
19168
mov.l %d0,%a0
19169
19170
movm.l (%sp)+,&0x003c # restore d2-d5
19171
rts
19172
19173
#########################################################
19174
fcea_err:
19175
mov.l %d3,%a0
19176
19177
movm.l (%sp)+,&0x003c # restore d2-d5
19178
mov.w &0x0101,%d0
19179
bra.l iea_dacc
19180
19181
fcea_iacc:
19182
movm.l (%sp)+,&0x003c # restore d2-d5
19183
bra.l iea_iacc
19184
19185
fmovm_out_err:
19186
bsr.l restore
19187
mov.w &0x00e1,%d0
19188
bra.b fmovm_err
19189
19190
fmovm_in_err:
19191
bsr.l restore
19192
mov.w &0x0161,%d0
19193
19194
fmovm_err:
19195
mov.l L_SCR1(%a6),%a0
19196
bra.l iea_dacc
19197
19198
#########################################################################
19199
# XDEF **************************************************************** #
19200
# fmovm_ctrl(): emulate fmovm.l of control registers instr #
19201
# #
19202
# XREF **************************************************************** #
19203
# _imem_read_long() - read longword from memory #
19204
# iea_iacc() - _imem_read_long() failed; error recovery #
19205
# #
19206
# INPUT *************************************************************** #
19207
# None #
19208
# #
19209
# OUTPUT ************************************************************** #
19210
# If _imem_read_long() doesn't fail: #
19211
# USER_FPCR(a6) = new FPCR value #
19212
# USER_FPSR(a6) = new FPSR value #
19213
# USER_FPIAR(a6) = new FPIAR value #
19214
# #
19215
# ALGORITHM *********************************************************** #
19216
# Decode the instruction type by looking at the extension word #
19217
# in order to see how many control registers to fetch from memory. #
19218
# Fetch them using _imem_read_long(). If this fetch fails, exit through #
19219
# the special access error exit handler iea_iacc(). #
19220
# #
19221
# Instruction word decoding: #
19222
# #
19223
# fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
19224
# #
19225
# WORD1 WORD2 #
19226
# 1111 0010 00 111100 100$ $$00 0000 0000 #
19227
# #
19228
# $$$ (100): FPCR #
19229
# (010): FPSR #
19230
# (001): FPIAR #
19231
# (000): FPIAR #
19232
# #
19233
#########################################################################
19234
19235
global fmovm_ctrl
19236
fmovm_ctrl:
19237
mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits
19238
cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?
19239
beq.w fctrl_in_7 # yes
19240
cmpi.b %d0,&0x98 # fpcr & fpsr ?
19241
beq.w fctrl_in_6 # yes
19242
cmpi.b %d0,&0x94 # fpcr & fpiar ?
19243
beq.b fctrl_in_5 # yes
19244
19245
# fmovem.l #<data>, fpsr/fpiar
19246
fctrl_in_3:
19247
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19248
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19249
bsr.l _imem_read_long # fetch FPSR from mem
19250
19251
tst.l %d1 # did ifetch fail?
19252
bne.l iea_iacc # yes
19253
19254
mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack
19255
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19256
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19257
bsr.l _imem_read_long # fetch FPIAR from mem
19258
19259
tst.l %d1 # did ifetch fail?
19260
bne.l iea_iacc # yes
19261
19262
mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
19263
rts
19264
19265
# fmovem.l #<data>, fpcr/fpiar
19266
fctrl_in_5:
19267
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19268
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19269
bsr.l _imem_read_long # fetch FPCR from mem
19270
19271
tst.l %d1 # did ifetch fail?
19272
bne.l iea_iacc # yes
19273
19274
mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack
19275
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19276
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19277
bsr.l _imem_read_long # fetch FPIAR from mem
19278
19279
tst.l %d1 # did ifetch fail?
19280
bne.l iea_iacc # yes
19281
19282
mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
19283
rts
19284
19285
# fmovem.l #<data>, fpcr/fpsr
19286
fctrl_in_6:
19287
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19288
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19289
bsr.l _imem_read_long # fetch FPCR from mem
19290
19291
tst.l %d1 # did ifetch fail?
19292
bne.l iea_iacc # yes
19293
19294
mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
19295
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19296
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19297
bsr.l _imem_read_long # fetch FPSR from mem
19298
19299
tst.l %d1 # did ifetch fail?
19300
bne.l iea_iacc # yes
19301
19302
mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
19303
rts
19304
19305
# fmovem.l #<data>, fpcr/fpsr/fpiar
19306
fctrl_in_7:
19307
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19308
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19309
bsr.l _imem_read_long # fetch FPCR from mem
19310
19311
tst.l %d1 # did ifetch fail?
19312
bne.l iea_iacc # yes
19313
19314
mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
19315
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19316
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19317
bsr.l _imem_read_long # fetch FPSR from mem
19318
19319
tst.l %d1 # did ifetch fail?
19320
bne.l iea_iacc # yes
19321
19322
mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
19323
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19324
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19325
bsr.l _imem_read_long # fetch FPIAR from mem
19326
19327
tst.l %d1 # did ifetch fail?
19328
bne.l iea_iacc # yes
19329
19330
mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem
19331
rts
19332
19333
#########################################################################
19334
# XDEF **************************************************************** #
19335
# _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
19336
# #
19337
# XREF **************************************************************** #
19338
# inc_areg() - increment an address register #
19339
# dec_areg() - decrement an address register #
19340
# #
19341
# INPUT *************************************************************** #
19342
# d0 = number of bytes to adjust <ea> by #
19343
# #
19344
# OUTPUT ************************************************************** #
19345
# None #
19346
# #
19347
# ALGORITHM *********************************************************** #
19348
# "Dummy" CALCulate Effective Address: #
19349
# The stacked <ea> for FP unimplemented instructions and opclass #
19350
# two packed instructions is correct with the exception of... #
19351
# #
19352
# 1) -(An) : The register is not updated regardless of size. #
19353
# Also, for extended precision and packed, the #
19354
# stacked <ea> value is 8 bytes too big #
19355
# 2) (An)+ : The register is not updated. #
19356
# 3) #<data> : The upper longword of the immediate operand is #
19357
# stacked b,w,l and s sizes are completely stacked. #
19358
# d,x, and p are not. #
19359
# #
19360
#########################################################################
19361
19362
global _dcalc_ea
19363
_dcalc_ea:
19364
mov.l %d0, %a0 # move # bytes to %a0
19365
19366
mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word
19367
mov.l %d0, %d1 # make a copy
19368
19369
andi.w &0x38, %d0 # extract mode field
19370
andi.l &0x7, %d1 # extract reg field
19371
19372
cmpi.b %d0,&0x18 # is mode (An)+ ?
19373
beq.b dcea_pi # yes
19374
19375
cmpi.b %d0,&0x20 # is mode -(An) ?
19376
beq.b dcea_pd # yes
19377
19378
or.w %d1,%d0 # concat mode,reg
19379
cmpi.b %d0,&0x3c # is mode #<data>?
19380
19381
beq.b dcea_imm # yes
19382
19383
mov.l EXC_EA(%a6),%a0 # return <ea>
19384
rts
19385
19386
# need to set immediate data flag here since we'll need to do
19387
# an imem_read to fetch this later.
19388
dcea_imm:
19389
mov.b &immed_flg,SPCOND_FLG(%a6)
19390
lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
19391
rts
19392
19393
# here, the <ea> is stacked correctly. however, we must update the
19394
# address register...
19395
dcea_pi:
19396
mov.l %a0,%d0 # pass amt to inc by
19397
bsr.l inc_areg # inc addr register
19398
19399
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
19400
rts
19401
19402
# the <ea> is stacked correctly for all but extended and packed which
19403
# the <ea>s are 8 bytes too large.
19404
# it would make no sense to have a pre-decrement to a7 in supervisor
19405
# mode so we don't even worry about this tricky case here : )
19406
dcea_pd:
19407
mov.l %a0,%d0 # pass amt to dec by
19408
bsr.l dec_areg # dec addr register
19409
19410
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
19411
19412
cmpi.b %d0,&0xc # is opsize ext or packed?
19413
beq.b dcea_pd2 # yes
19414
rts
19415
dcea_pd2:
19416
sub.l &0x8,%a0 # correct <ea>
19417
mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack
19418
rts
19419
19420
#########################################################################
19421
# XDEF **************************************************************** #
19422
# _calc_ea_fout(): calculate correct stacked <ea> for extended #
19423
# and packed data opclass 3 operations. #
19424
# #
19425
# XREF **************************************************************** #
19426
# None #
19427
# #
19428
# INPUT *************************************************************** #
19429
# None #
19430
# #
19431
# OUTPUT ************************************************************** #
19432
# a0 = return correct effective address #
19433
# #
19434
# ALGORITHM *********************************************************** #
19435
# For opclass 3 extended and packed data operations, the <ea> #
19436
# stacked for the exception is incorrect for -(an) and (an)+ addressing #
19437
# modes. Also, while we're at it, the index register itself must get #
19438
# updated. #
19439
# So, for -(an), we must subtract 8 off of the stacked <ea> value #
19440
# and return that value as the correct <ea> and store that value in An. #
19441
# For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
19442
# #
19443
#########################################################################
19444
19445
# This calc_ea is currently used to retrieve the correct <ea>
19446
# for fmove outs of type extended and packed.
19447
global _calc_ea_fout
19448
_calc_ea_fout:
19449
mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word
19450
mov.l %d0,%d1 # make a copy
19451
19452
andi.w &0x38,%d0 # extract mode field
19453
andi.l &0x7,%d1 # extract reg field
19454
19455
cmpi.b %d0,&0x18 # is mode (An)+ ?
19456
beq.b ceaf_pi # yes
19457
19458
cmpi.b %d0,&0x20 # is mode -(An) ?
19459
beq.w ceaf_pd # yes
19460
19461
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
19462
rts
19463
19464
# (An)+ : extended and packed fmove out
19465
# : stacked <ea> is correct
19466
# : "An" not updated
19467
ceaf_pi:
19468
mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
19469
mov.l EXC_EA(%a6),%a0
19470
jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)
19471
19472
swbeg &0x8
19473
tbl_ceaf_pi:
19474
short ceaf_pi0 - tbl_ceaf_pi
19475
short ceaf_pi1 - tbl_ceaf_pi
19476
short ceaf_pi2 - tbl_ceaf_pi
19477
short ceaf_pi3 - tbl_ceaf_pi
19478
short ceaf_pi4 - tbl_ceaf_pi
19479
short ceaf_pi5 - tbl_ceaf_pi
19480
short ceaf_pi6 - tbl_ceaf_pi
19481
short ceaf_pi7 - tbl_ceaf_pi
19482
19483
ceaf_pi0:
19484
addi.l &0xc,EXC_DREGS+0x8(%a6)
19485
rts
19486
ceaf_pi1:
19487
addi.l &0xc,EXC_DREGS+0xc(%a6)
19488
rts
19489
ceaf_pi2:
19490
add.l &0xc,%a2
19491
rts
19492
ceaf_pi3:
19493
add.l &0xc,%a3
19494
rts
19495
ceaf_pi4:
19496
add.l &0xc,%a4
19497
rts
19498
ceaf_pi5:
19499
add.l &0xc,%a5
19500
rts
19501
ceaf_pi6:
19502
addi.l &0xc,EXC_A6(%a6)
19503
rts
19504
ceaf_pi7:
19505
mov.b &mia7_flg,SPCOND_FLG(%a6)
19506
addi.l &0xc,EXC_A7(%a6)
19507
rts
19508
19509
# -(An) : extended and packed fmove out
19510
# : stacked <ea> = actual <ea> + 8
19511
# : "An" not updated
19512
ceaf_pd:
19513
mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
19514
mov.l EXC_EA(%a6),%a0
19515
sub.l &0x8,%a0
19516
sub.l &0x8,EXC_EA(%a6)
19517
jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)
19518
19519
swbeg &0x8
19520
tbl_ceaf_pd:
19521
short ceaf_pd0 - tbl_ceaf_pd
19522
short ceaf_pd1 - tbl_ceaf_pd
19523
short ceaf_pd2 - tbl_ceaf_pd
19524
short ceaf_pd3 - tbl_ceaf_pd
19525
short ceaf_pd4 - tbl_ceaf_pd
19526
short ceaf_pd5 - tbl_ceaf_pd
19527
short ceaf_pd6 - tbl_ceaf_pd
19528
short ceaf_pd7 - tbl_ceaf_pd
19529
19530
ceaf_pd0:
19531
mov.l %a0,EXC_DREGS+0x8(%a6)
19532
rts
19533
ceaf_pd1:
19534
mov.l %a0,EXC_DREGS+0xc(%a6)
19535
rts
19536
ceaf_pd2:
19537
mov.l %a0,%a2
19538
rts
19539
ceaf_pd3:
19540
mov.l %a0,%a3
19541
rts
19542
ceaf_pd4:
19543
mov.l %a0,%a4
19544
rts
19545
ceaf_pd5:
19546
mov.l %a0,%a5
19547
rts
19548
ceaf_pd6:
19549
mov.l %a0,EXC_A6(%a6)
19550
rts
19551
ceaf_pd7:
19552
mov.l %a0,EXC_A7(%a6)
19553
mov.b &mda7_flg,SPCOND_FLG(%a6)
19554
rts
19555
19556
#########################################################################
19557
# XDEF **************************************************************** #
19558
# _load_fop(): load operand for unimplemented FP exception #
19559
# #
19560
# XREF **************************************************************** #
19561
# set_tag_x() - determine ext prec optype tag #
19562
# set_tag_s() - determine sgl prec optype tag #
19563
# set_tag_d() - determine dbl prec optype tag #
19564
# unnorm_fix() - convert normalized number to denorm or zero #
19565
# norm() - normalize a denormalized number #
19566
# get_packed() - fetch a packed operand from memory #
19567
# _dcalc_ea() - calculate <ea>, fixing An in process #
19568
# #
19569
# _imem_read_{word,long}() - read from instruction memory #
19570
# _dmem_read() - read from data memory #
19571
# _dmem_read_{byte,word,long}() - read from data memory #
19572
# #
19573
# facc_in_{b,w,l,d,x}() - mem read failed; special exit point #
19574
# #
19575
# INPUT *************************************************************** #
19576
# None #
19577
# #
19578
# OUTPUT ************************************************************** #
19579
# If memory access doesn't fail: #
19580
# FP_SRC(a6) = source operand in extended precision #
19581
# FP_DST(a6) = destination operand in extended precision #
19582
# #
19583
# ALGORITHM *********************************************************** #
19584
# This is called from the Unimplemented FP exception handler in #
19585
# order to load the source and maybe destination operand into #
19586
# FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load #
19587
# the source and destination from the FP register file. Set the optype #
19588
# tags for both if dyadic, one for monadic. If a number is an UNNORM, #
19589
# convert it to a DENORM or a ZERO. #
19590
# If the instruction is opclass two (memory->reg), then fetch #
19591
# the destination from the register file and the source operand from #
19592
# memory. Tag and fix both as above w/ opclass zero instructions. #
19593
# If the source operand is byte,word,long, or single, it may be #
19594
# in the data register file. If it's actually out in memory, use one of #
19595
# the mem_read() routines to fetch it. If the mem_read() access returns #
19596
# a failing value, exit through the special facc_in() routine which #
19597
# will create an access error exception frame from the current exception #
19598
# frame. #
19599
# Immediate data and regular data accesses are separated because #
19600
# if an immediate data access fails, the resulting fault status #
19601
# longword stacked for the access error exception must have the #
19602
# instruction bit set. #
19603
# #
19604
#########################################################################
19605
19606
global _load_fop
19607
_load_fop:
19608
19609
# 15 13 12 10 9 7 6 0
19610
# / \ / \ / \ / \
19611
# ---------------------------------
19612
# | opclass | RX | RY | EXTENSION | (2nd word of general FP instruction)
19613
# ---------------------------------
19614
#
19615
19616
# bfextu EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
19617
# cmpi.b %d0, &0x2 # which class is it? ('000,'010,'011)
19618
# beq.w op010 # handle <ea> -> fpn
19619
# bgt.w op011 # handle fpn -> <ea>
19620
19621
# we're not using op011 for now...
19622
btst &0x6,EXC_CMDREG(%a6)
19623
bne.b op010
19624
19625
############################
19626
# OPCLASS '000: reg -> reg #
19627
############################
19628
op000:
19629
mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension word lo
19630
btst &0x5,%d0 # testing extension bits
19631
beq.b op000_src # (bit 5 == 0) => monadic
19632
btst &0x4,%d0 # (bit 5 == 1)
19633
beq.b op000_dst # (bit 4 == 0) => dyadic
19634
and.w &0x007f,%d0 # extract extension bits {6:0}
19635
cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ?
19636
bne.b op000_src # it's an fcmp
19637
19638
op000_dst:
19639
bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19640
bsr.l load_fpn2 # fetch dst fpreg into FP_DST
19641
19642
bsr.l set_tag_x # get dst optype tag
19643
19644
cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM?
19645
beq.b op000_dst_unnorm # yes
19646
op000_dst_cont:
19647
mov.b %d0, DTAG(%a6) # store the dst optype tag
19648
19649
op000_src:
19650
bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
19651
bsr.l load_fpn1 # fetch src fpreg into FP_SRC
19652
19653
bsr.l set_tag_x # get src optype tag
19654
19655
cmpi.b %d0, &UNNORM # is src fpreg an UNNORM?
19656
beq.b op000_src_unnorm # yes
19657
op000_src_cont:
19658
mov.b %d0, STAG(%a6) # store the src optype tag
19659
rts
19660
19661
op000_dst_unnorm:
19662
bsr.l unnorm_fix # fix the dst UNNORM
19663
bra.b op000_dst_cont
19664
op000_src_unnorm:
19665
bsr.l unnorm_fix # fix the src UNNORM
19666
bra.b op000_src_cont
19667
19668
#############################
19669
# OPCLASS '010: <ea> -> reg #
19670
#############################
19671
op010:
19672
mov.w EXC_CMDREG(%a6),%d0 # fetch extension word
19673
btst &0x5,%d0 # testing extension bits
19674
beq.b op010_src # (bit 5 == 0) => monadic
19675
btst &0x4,%d0 # (bit 5 == 1)
19676
beq.b op010_dst # (bit 4 == 0) => dyadic
19677
and.w &0x007f,%d0 # extract extension bits {6:0}
19678
cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ?
19679
bne.b op010_src # it's an fcmp
19680
19681
op010_dst:
19682
bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19683
bsr.l load_fpn2 # fetch dst fpreg ptr
19684
19685
bsr.l set_tag_x # get dst type tag
19686
19687
cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM?
19688
beq.b op010_dst_unnorm # yes
19689
op010_dst_cont:
19690
mov.b %d0, DTAG(%a6) # store the dst optype tag
19691
19692
op010_src:
19693
bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
19694
19695
bfextu EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
19696
bne.w fetch_from_mem # src op is in memory
19697
19698
op010_dreg:
19699
clr.b STAG(%a6) # either NORM or ZERO
19700
bfextu EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
19701
19702
mov.w (tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
19703
jmp (tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
19704
19705
op010_dst_unnorm:
19706
bsr.l unnorm_fix # fix the dst UNNORM
19707
bra.b op010_dst_cont
19708
19709
swbeg &0x8
19710
tbl_op010_dreg:
19711
short opd_long - tbl_op010_dreg
19712
short opd_sgl - tbl_op010_dreg
19713
short tbl_op010_dreg - tbl_op010_dreg
19714
short tbl_op010_dreg - tbl_op010_dreg
19715
short opd_word - tbl_op010_dreg
19716
short tbl_op010_dreg - tbl_op010_dreg
19717
short opd_byte - tbl_op010_dreg
19718
short tbl_op010_dreg - tbl_op010_dreg
19719
19720
#
19721
# LONG: can be either NORM or ZERO...
19722
#
19723
opd_long:
19724
bsr.l fetch_dreg # fetch long in d0
19725
fmov.l %d0, %fp0 # load a long
19726
fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19727
fbeq.w opd_long_zero # long is a ZERO
19728
rts
19729
opd_long_zero:
19730
mov.b &ZERO, STAG(%a6) # set ZERO optype flag
19731
rts
19732
19733
#
19734
# WORD: can be either NORM or ZERO...
19735
#
19736
opd_word:
19737
bsr.l fetch_dreg # fetch word in d0
19738
fmov.w %d0, %fp0 # load a word
19739
fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19740
fbeq.w opd_word_zero # WORD is a ZERO
19741
rts
19742
opd_word_zero:
19743
mov.b &ZERO, STAG(%a6) # set ZERO optype flag
19744
rts
19745
19746
#
19747
# BYTE: can be either NORM or ZERO...
19748
#
19749
opd_byte:
19750
bsr.l fetch_dreg # fetch word in d0
19751
fmov.b %d0, %fp0 # load a byte
19752
fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19753
fbeq.w opd_byte_zero # byte is a ZERO
19754
rts
19755
opd_byte_zero:
19756
mov.b &ZERO, STAG(%a6) # set ZERO optype flag
19757
rts
19758
19759
#
19760
# SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
19761
#
19762
# separate SNANs and DENORMs so they can be loaded w/ special care.
19763
# all others can simply be moved "in" using fmove.
19764
#
19765
opd_sgl:
19766
bsr.l fetch_dreg # fetch sgl in d0
19767
mov.l %d0,L_SCR1(%a6)
19768
19769
lea L_SCR1(%a6), %a0 # pass: ptr to the sgl
19770
bsr.l set_tag_s # determine sgl type
19771
mov.b %d0, STAG(%a6) # save the src tag
19772
19773
cmpi.b %d0, &SNAN # is it an SNAN?
19774
beq.w get_sgl_snan # yes
19775
19776
cmpi.b %d0, &DENORM # is it a DENORM?
19777
beq.w get_sgl_denorm # yes
19778
19779
fmov.s (%a0), %fp0 # no, so can load it regular
19780
fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19781
rts
19782
19783
##############################################################################
19784
19785
#########################################################################
19786
# fetch_from_mem(): #
19787
# - src is out in memory. must: #
19788
# (1) calc ea - must read AFTER you know the src type since #
19789
# if the ea is -() or ()+, need to know # of bytes. #
19790
# (2) read it in from either user or supervisor space #
19791
# (3) if (b || w || l) then simply read in #
19792
# if (s || d || x) then check for SNAN,UNNORM,DENORM #
19793
# if (packed) then punt for now #
19794
# INPUT: #
19795
# %d0 : src type field #
19796
#########################################################################
19797
fetch_from_mem:
19798
clr.b STAG(%a6) # either NORM or ZERO
19799
19800
mov.w (tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
19801
jmp (tbl_fp_type.b,%pc,%d0.w*1)
19802
19803
swbeg &0x8
19804
tbl_fp_type:
19805
short load_long - tbl_fp_type
19806
short load_sgl - tbl_fp_type
19807
short load_ext - tbl_fp_type
19808
short load_packed - tbl_fp_type
19809
short load_word - tbl_fp_type
19810
short load_dbl - tbl_fp_type
19811
short load_byte - tbl_fp_type
19812
short tbl_fp_type - tbl_fp_type
19813
19814
#########################################
19815
# load a LONG into %fp0: #
19816
# -number can't fault #
19817
# (1) calc ea #
19818
# (2) read 4 bytes into L_SCR1 #
19819
# (3) fmov.l into %fp0 #
19820
#########################################
19821
load_long:
19822
movq.l &0x4, %d0 # pass: 4 (bytes)
19823
bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19824
19825
cmpi.b SPCOND_FLG(%a6),&immed_flg
19826
beq.b load_long_immed
19827
19828
bsr.l _dmem_read_long # fetch src operand from memory
19829
19830
tst.l %d1 # did dfetch fail?
19831
bne.l facc_in_l # yes
19832
19833
load_long_cont:
19834
fmov.l %d0, %fp0 # read into %fp0;convert to xprec
19835
fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19836
19837
fbeq.w load_long_zero # src op is a ZERO
19838
rts
19839
load_long_zero:
19840
mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
19841
rts
19842
19843
load_long_immed:
19844
bsr.l _imem_read_long # fetch src operand immed data
19845
19846
tst.l %d1 # did ifetch fail?
19847
bne.l funimp_iacc # yes
19848
bra.b load_long_cont
19849
19850
#########################################
19851
# load a WORD into %fp0: #
19852
# -number can't fault #
19853
# (1) calc ea #
19854
# (2) read 2 bytes into L_SCR1 #
19855
# (3) fmov.w into %fp0 #
19856
#########################################
19857
load_word:
19858
movq.l &0x2, %d0 # pass: 2 (bytes)
19859
bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19860
19861
cmpi.b SPCOND_FLG(%a6),&immed_flg
19862
beq.b load_word_immed
19863
19864
bsr.l _dmem_read_word # fetch src operand from memory
19865
19866
tst.l %d1 # did dfetch fail?
19867
bne.l facc_in_w # yes
19868
19869
load_word_cont:
19870
fmov.w %d0, %fp0 # read into %fp0;convert to xprec
19871
fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19872
19873
fbeq.w load_word_zero # src op is a ZERO
19874
rts
19875
load_word_zero:
19876
mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
19877
rts
19878
19879
load_word_immed:
19880
bsr.l _imem_read_word # fetch src operand immed data
19881
19882
tst.l %d1 # did ifetch fail?
19883
bne.l funimp_iacc # yes
19884
bra.b load_word_cont
19885
19886
#########################################
19887
# load a BYTE into %fp0: #
19888
# -number can't fault #
19889
# (1) calc ea #
19890
# (2) read 1 byte into L_SCR1 #
19891
# (3) fmov.b into %fp0 #
19892
#########################################
19893
load_byte:
19894
movq.l &0x1, %d0 # pass: 1 (byte)
19895
bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19896
19897
cmpi.b SPCOND_FLG(%a6),&immed_flg
19898
beq.b load_byte_immed
19899
19900
bsr.l _dmem_read_byte # fetch src operand from memory
19901
19902
tst.l %d1 # did dfetch fail?
19903
bne.l facc_in_b # yes
19904
19905
load_byte_cont:
19906
fmov.b %d0, %fp0 # read into %fp0;convert to xprec
19907
fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19908
19909
fbeq.w load_byte_zero # src op is a ZERO
19910
rts
19911
load_byte_zero:
19912
mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
19913
rts
19914
19915
load_byte_immed:
19916
bsr.l _imem_read_word # fetch src operand immed data
19917
19918
tst.l %d1 # did ifetch fail?
19919
bne.l funimp_iacc # yes
19920
bra.b load_byte_cont
19921
19922
#########################################
19923
# load a SGL into %fp0: #
19924
# -number can't fault #
19925
# (1) calc ea #
19926
# (2) read 4 bytes into L_SCR1 #
19927
# (3) fmov.s into %fp0 #
19928
#########################################
19929
load_sgl:
19930
movq.l &0x4, %d0 # pass: 4 (bytes)
19931
bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19932
19933
cmpi.b SPCOND_FLG(%a6),&immed_flg
19934
beq.b load_sgl_immed
19935
19936
bsr.l _dmem_read_long # fetch src operand from memory
19937
mov.l %d0, L_SCR1(%a6) # store src op on stack
19938
19939
tst.l %d1 # did dfetch fail?
19940
bne.l facc_in_l # yes
19941
19942
load_sgl_cont:
19943
lea L_SCR1(%a6), %a0 # pass: ptr to sgl src op
19944
bsr.l set_tag_s # determine src type tag
19945
mov.b %d0, STAG(%a6) # save src optype tag on stack
19946
19947
cmpi.b %d0, &DENORM # is it a sgl DENORM?
19948
beq.w get_sgl_denorm # yes
19949
19950
cmpi.b %d0, &SNAN # is it a sgl SNAN?
19951
beq.w get_sgl_snan # yes
19952
19953
fmov.s L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec
19954
fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19955
rts
19956
19957
load_sgl_immed:
19958
bsr.l _imem_read_long # fetch src operand immed data
19959
19960
tst.l %d1 # did ifetch fail?
19961
bne.l funimp_iacc # yes
19962
bra.b load_sgl_cont
19963
19964
# must convert sgl denorm format to an Xprec denorm fmt suitable for
19965
# normalization...
19966
# %a0 : points to sgl denorm
19967
get_sgl_denorm:
19968
clr.w FP_SRC_EX(%a6)
19969
bfextu (%a0){&9:&23}, %d0 # fetch sgl hi(_mantissa)
19970
lsl.l &0x8, %d0
19971
mov.l %d0, FP_SRC_HI(%a6) # set ext hi(_mantissa)
19972
clr.l FP_SRC_LO(%a6) # set ext lo(_mantissa)
19973
19974
clr.w FP_SRC_EX(%a6)
19975
btst &0x7, (%a0) # is sgn bit set?
19976
beq.b sgl_dnrm_norm
19977
bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value
19978
19979
sgl_dnrm_norm:
19980
lea FP_SRC(%a6), %a0
19981
bsr.l norm # normalize number
19982
mov.w &0x3f81, %d1 # xprec exp = 0x3f81
19983
sub.w %d0, %d1 # exp = 0x3f81 - shft amt.
19984
or.w %d1, FP_SRC_EX(%a6) # {sgn,exp}
19985
19986
mov.b &NORM, STAG(%a6) # fix src type tag
19987
rts
19988
19989
# convert sgl to ext SNAN
19990
# %a0 : points to sgl SNAN
19991
get_sgl_snan:
19992
mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
19993
bfextu (%a0){&9:&23}, %d0
19994
lsl.l &0x8, %d0 # extract and insert hi(man)
19995
mov.l %d0, FP_SRC_HI(%a6)
19996
clr.l FP_SRC_LO(%a6)
19997
19998
btst &0x7, (%a0) # see if sign of SNAN is set
19999
beq.b no_sgl_snan_sgn
20000
bset &0x7, FP_SRC_EX(%a6)
20001
no_sgl_snan_sgn:
20002
rts
20003
20004
#########################################
20005
# load a DBL into %fp0: #
20006
# -number can't fault #
20007
# (1) calc ea #
20008
# (2) read 8 bytes into L_SCR(1,2)#
20009
# (3) fmov.d into %fp0 #
20010
#########################################
20011
load_dbl:
20012
movq.l &0x8, %d0 # pass: 8 (bytes)
20013
bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
20014
20015
cmpi.b SPCOND_FLG(%a6),&immed_flg
20016
beq.b load_dbl_immed
20017
20018
lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space
20019
movq.l &0x8, %d0 # pass: # bytes to read
20020
bsr.l _dmem_read # fetch src operand from memory
20021
20022
tst.l %d1 # did dfetch fail?
20023
bne.l facc_in_d # yes
20024
20025
load_dbl_cont:
20026
lea L_SCR1(%a6), %a0 # pass: ptr to input dbl
20027
bsr.l set_tag_d # determine src type tag
20028
mov.b %d0, STAG(%a6) # set src optype tag
20029
20030
cmpi.b %d0, &DENORM # is it a dbl DENORM?
20031
beq.w get_dbl_denorm # yes
20032
20033
cmpi.b %d0, &SNAN # is it a dbl SNAN?
20034
beq.w get_dbl_snan # yes
20035
20036
fmov.d L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec
20037
fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
20038
rts
20039
20040
load_dbl_immed:
20041
lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space
20042
movq.l &0x8, %d0 # pass: # bytes to read
20043
bsr.l _imem_read # fetch src operand from memory
20044
20045
tst.l %d1 # did ifetch fail?
20046
bne.l funimp_iacc # yes
20047
bra.b load_dbl_cont
20048
20049
# must convert dbl denorm format to an Xprec denorm fmt suitable for
20050
# normalization...
20051
# %a0 : loc. of dbl denorm
20052
get_dbl_denorm:
20053
clr.w FP_SRC_EX(%a6)
20054
bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa)
20055
mov.l %d0, FP_SRC_HI(%a6)
20056
bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa)
20057
mov.l &0xb, %d1
20058
lsl.l %d1, %d0
20059
mov.l %d0, FP_SRC_LO(%a6)
20060
20061
btst &0x7, (%a0) # is sgn bit set?
20062
beq.b dbl_dnrm_norm
20063
bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value
20064
20065
dbl_dnrm_norm:
20066
lea FP_SRC(%a6), %a0
20067
bsr.l norm # normalize number
20068
mov.w &0x3c01, %d1 # xprec exp = 0x3c01
20069
sub.w %d0, %d1 # exp = 0x3c01 - shft amt.
20070
or.w %d1, FP_SRC_EX(%a6) # {sgn,exp}
20071
20072
mov.b &NORM, STAG(%a6) # fix src type tag
20073
rts
20074
20075
# convert dbl to ext SNAN
20076
# %a0 : points to dbl SNAN
20077
get_dbl_snan:
20078
mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20079
20080
bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa)
20081
mov.l %d0, FP_SRC_HI(%a6)
20082
bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa)
20083
mov.l &0xb, %d1
20084
lsl.l %d1, %d0
20085
mov.l %d0, FP_SRC_LO(%a6)
20086
20087
btst &0x7, (%a0) # see if sign of SNAN is set
20088
beq.b no_dbl_snan_sgn
20089
bset &0x7, FP_SRC_EX(%a6)
20090
no_dbl_snan_sgn:
20091
rts
20092
20093
#################################################
20094
# load a Xprec into %fp0: #
20095
# -number can't fault #
20096
# (1) calc ea #
20097
# (2) read 12 bytes into L_SCR(1,2) #
20098
# (3) fmov.x into %fp0 #
20099
#################################################
20100
load_ext:
20101
mov.l &0xc, %d0 # pass: 12 (bytes)
20102
bsr.l _dcalc_ea # calc <ea>
20103
20104
lea FP_SRC(%a6), %a1 # pass: ptr to input ext tmp space
20105
mov.l &0xc, %d0 # pass: # of bytes to read
20106
bsr.l _dmem_read # fetch src operand from memory
20107
20108
tst.l %d1 # did dfetch fail?
20109
bne.l facc_in_x # yes
20110
20111
lea FP_SRC(%a6), %a0 # pass: ptr to src op
20112
bsr.l set_tag_x # determine src type tag
20113
20114
cmpi.b %d0, &UNNORM # is the src op an UNNORM?
20115
beq.b load_ext_unnorm # yes
20116
20117
mov.b %d0, STAG(%a6) # store the src optype tag
20118
rts
20119
20120
load_ext_unnorm:
20121
bsr.l unnorm_fix # fix the src UNNORM
20122
mov.b %d0, STAG(%a6) # store the src optype tag
20123
rts
20124
20125
#################################################
20126
# load a packed into %fp0: #
20127
# -number can't fault #
20128
# (1) calc ea #
20129
# (2) read 12 bytes into L_SCR(1,2,3) #
20130
# (3) fmov.x into %fp0 #
20131
#################################################
20132
load_packed:
20133
bsr.l get_packed
20134
20135
lea FP_SRC(%a6),%a0 # pass ptr to src op
20136
bsr.l set_tag_x # determine src type tag
20137
cmpi.b %d0,&UNNORM # is the src op an UNNORM ZERO?
20138
beq.b load_packed_unnorm # yes
20139
20140
mov.b %d0,STAG(%a6) # store the src optype tag
20141
rts
20142
20143
load_packed_unnorm:
20144
bsr.l unnorm_fix # fix the UNNORM ZERO
20145
mov.b %d0,STAG(%a6) # store the src optype tag
20146
rts
20147
20148
#########################################################################
20149
# XDEF **************************************************************** #
20150
# fout(): move from fp register to memory or data register #
20151
# #
20152
# XREF **************************************************************** #
20153
# _round() - needed to create EXOP for sgl/dbl precision #
20154
# norm() - needed to create EXOP for extended precision #
20155
# ovf_res() - create default overflow result for sgl/dbl precision#
20156
# unf_res() - create default underflow result for sgl/dbl prec. #
20157
# dst_dbl() - create rounded dbl precision result. #
20158
# dst_sgl() - create rounded sgl precision result. #
20159
# fetch_dreg() - fetch dynamic k-factor reg for packed. #
20160
# bindec() - convert FP binary number to packed number. #
20161
# _mem_write() - write data to memory. #
20162
# _mem_write2() - write data to memory unless supv mode -(a7) exc.#
20163
# _dmem_write_{byte,word,long}() - write data to memory. #
20164
# store_dreg_{b,w,l}() - store data to data register file. #
20165
# facc_out_{b,w,l,d,x}() - data access error occurred. #
20166
# #
20167
# INPUT *************************************************************** #
20168
# a0 = pointer to extended precision source operand #
20169
# d0 = round prec,mode #
20170
# #
20171
# OUTPUT ************************************************************** #
20172
# fp0 : intermediate underflow or overflow result if #
20173
# OVFL/UNFL occurred for a sgl or dbl operand #
20174
# #
20175
# ALGORITHM *********************************************************** #
20176
# This routine is accessed by many handlers that need to do an #
20177
# opclass three move of an operand out to memory. #
20178
# Decode an fmove out (opclass 3) instruction to determine if #
20179
# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
20180
# register or memory. The algorithm uses a standard "fmove" to create #
20181
# the rounded result. Also, since exceptions are disabled, this also #
20182
# create the correct OPERR default result if appropriate. #
20183
# For sgl or dbl precision, overflow or underflow can occur. If #
20184
# either occurs and is enabled, the EXOP. #
20185
# For extended precision, the stacked <ea> must be fixed along #
20186
# w/ the address index register as appropriate w/ _calc_ea_fout(). If #
20187
# the source is a denorm and if underflow is enabled, an EXOP must be #
20188
# created. #
20189
# For packed, the k-factor must be fetched from the instruction #
20190
# word or a data register. The <ea> must be fixed as w/ extended #
20191
# precision. Then, bindec() is called to create the appropriate #
20192
# packed result. #
20193
# If at any time an access error is flagged by one of the move- #
20194
# to-memory routines, then a special exit must be made so that the #
20195
# access error can be handled properly. #
20196
# #
20197
#########################################################################
20198
20199
global fout
20200
fout:
20201
bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
20202
mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
20203
jmp (tbl_fout.b,%pc,%a1) # jump to routine
20204
20205
swbeg &0x8
20206
tbl_fout:
20207
short fout_long - tbl_fout
20208
short fout_sgl - tbl_fout
20209
short fout_ext - tbl_fout
20210
short fout_pack - tbl_fout
20211
short fout_word - tbl_fout
20212
short fout_dbl - tbl_fout
20213
short fout_byte - tbl_fout
20214
short fout_pack - tbl_fout
20215
20216
#################################################################
20217
# fmove.b out ###################################################
20218
#################################################################
20219
20220
# Only "Unimplemented Data Type" exceptions enter here. The operand
20221
# is either a DENORM or a NORM.
20222
fout_byte:
20223
tst.b STAG(%a6) # is operand normalized?
20224
bne.b fout_byte_denorm # no
20225
20226
fmovm.x SRC(%a0),&0x80 # load value
20227
20228
fout_byte_norm:
20229
fmov.l %d0,%fpcr # insert rnd prec,mode
20230
20231
fmov.b %fp0,%d0 # exec move out w/ correct rnd mode
20232
20233
fmov.l &0x0,%fpcr # clear FPCR
20234
fmov.l %fpsr,%d1 # fetch FPSR
20235
or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
20236
20237
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20238
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20239
beq.b fout_byte_dn # must save to integer regfile
20240
20241
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20242
bsr.l _dmem_write_byte # write byte
20243
20244
tst.l %d1 # did dstore fail?
20245
bne.l facc_out_b # yes
20246
20247
rts
20248
20249
fout_byte_dn:
20250
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20251
andi.w &0x7,%d1
20252
bsr.l store_dreg_b
20253
rts
20254
20255
fout_byte_denorm:
20256
mov.l SRC_EX(%a0),%d1
20257
andi.l &0x80000000,%d1 # keep DENORM sign
20258
ori.l &0x00800000,%d1 # make smallest sgl
20259
fmov.s %d1,%fp0
20260
bra.b fout_byte_norm
20261
20262
#################################################################
20263
# fmove.w out ###################################################
20264
#################################################################
20265
20266
# Only "Unimplemented Data Type" exceptions enter here. The operand
20267
# is either a DENORM or a NORM.
20268
fout_word:
20269
tst.b STAG(%a6) # is operand normalized?
20270
bne.b fout_word_denorm # no
20271
20272
fmovm.x SRC(%a0),&0x80 # load value
20273
20274
fout_word_norm:
20275
fmov.l %d0,%fpcr # insert rnd prec:mode
20276
20277
fmov.w %fp0,%d0 # exec move out w/ correct rnd mode
20278
20279
fmov.l &0x0,%fpcr # clear FPCR
20280
fmov.l %fpsr,%d1 # fetch FPSR
20281
or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
20282
20283
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20284
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20285
beq.b fout_word_dn # must save to integer regfile
20286
20287
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20288
bsr.l _dmem_write_word # write word
20289
20290
tst.l %d1 # did dstore fail?
20291
bne.l facc_out_w # yes
20292
20293
rts
20294
20295
fout_word_dn:
20296
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20297
andi.w &0x7,%d1
20298
bsr.l store_dreg_w
20299
rts
20300
20301
fout_word_denorm:
20302
mov.l SRC_EX(%a0),%d1
20303
andi.l &0x80000000,%d1 # keep DENORM sign
20304
ori.l &0x00800000,%d1 # make smallest sgl
20305
fmov.s %d1,%fp0
20306
bra.b fout_word_norm
20307
20308
#################################################################
20309
# fmove.l out ###################################################
20310
#################################################################
20311
20312
# Only "Unimplemented Data Type" exceptions enter here. The operand
20313
# is either a DENORM or a NORM.
20314
fout_long:
20315
tst.b STAG(%a6) # is operand normalized?
20316
bne.b fout_long_denorm # no
20317
20318
fmovm.x SRC(%a0),&0x80 # load value
20319
20320
fout_long_norm:
20321
fmov.l %d0,%fpcr # insert rnd prec:mode
20322
20323
fmov.l %fp0,%d0 # exec move out w/ correct rnd mode
20324
20325
fmov.l &0x0,%fpcr # clear FPCR
20326
fmov.l %fpsr,%d1 # fetch FPSR
20327
or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
20328
20329
fout_long_write:
20330
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20331
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20332
beq.b fout_long_dn # must save to integer regfile
20333
20334
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20335
bsr.l _dmem_write_long # write long
20336
20337
tst.l %d1 # did dstore fail?
20338
bne.l facc_out_l # yes
20339
20340
rts
20341
20342
fout_long_dn:
20343
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20344
andi.w &0x7,%d1
20345
bsr.l store_dreg_l
20346
rts
20347
20348
fout_long_denorm:
20349
mov.l SRC_EX(%a0),%d1
20350
andi.l &0x80000000,%d1 # keep DENORM sign
20351
ori.l &0x00800000,%d1 # make smallest sgl
20352
fmov.s %d1,%fp0
20353
bra.b fout_long_norm
20354
20355
#################################################################
20356
# fmove.x out ###################################################
20357
#################################################################
20358
20359
# Only "Unimplemented Data Type" exceptions enter here. The operand
20360
# is either a DENORM or a NORM.
20361
# The DENORM causes an Underflow exception.
20362
fout_ext:
20363
20364
# we copy the extended precision result to FP_SCR0 so that the reserved
20365
# 16-bit field gets zeroed. we do this since we promise not to disturb
20366
# what's at SRC(a0).
20367
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20368
clr.w 2+FP_SCR0_EX(%a6) # clear reserved field
20369
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20370
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20371
20372
fmovm.x SRC(%a0),&0x80 # return result
20373
20374
bsr.l _calc_ea_fout # fix stacked <ea>
20375
20376
mov.l %a0,%a1 # pass: dst addr
20377
lea FP_SCR0(%a6),%a0 # pass: src addr
20378
mov.l &0xc,%d0 # pass: opsize is 12 bytes
20379
20380
# we must not yet write the extended precision data to the stack
20381
# in the pre-decrement case from supervisor mode or else we'll corrupt
20382
# the stack frame. so, leave it in FP_SRC for now and deal with it later...
20383
cmpi.b SPCOND_FLG(%a6),&mda7_flg
20384
beq.b fout_ext_a7
20385
20386
bsr.l _dmem_write # write ext prec number to memory
20387
20388
tst.l %d1 # did dstore fail?
20389
bne.w fout_ext_err # yes
20390
20391
tst.b STAG(%a6) # is operand normalized?
20392
bne.b fout_ext_denorm # no
20393
rts
20394
20395
# the number is a DENORM. must set the underflow exception bit
20396
fout_ext_denorm:
20397
bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
20398
20399
mov.b FPCR_ENABLE(%a6),%d0
20400
andi.b &0x0a,%d0 # is UNFL or INEX enabled?
20401
bne.b fout_ext_exc # yes
20402
rts
20403
20404
# we don't want to do the write if the exception occurred in supervisor mode
20405
# so _mem_write2() handles this for us.
20406
fout_ext_a7:
20407
bsr.l _mem_write2 # write ext prec number to memory
20408
20409
tst.l %d1 # did dstore fail?
20410
bne.w fout_ext_err # yes
20411
20412
tst.b STAG(%a6) # is operand normalized?
20413
bne.b fout_ext_denorm # no
20414
rts
20415
20416
fout_ext_exc:
20417
lea FP_SCR0(%a6),%a0
20418
bsr.l norm # normalize the mantissa
20419
neg.w %d0 # new exp = -(shft amt)
20420
andi.w &0x7fff,%d0
20421
andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign
20422
or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
20423
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
20424
rts
20425
20426
fout_ext_err:
20427
mov.l EXC_A6(%a6),(%a6) # fix stacked a6
20428
bra.l facc_out_x
20429
20430
#########################################################################
20431
# fmove.s out ###########################################################
20432
#########################################################################
20433
fout_sgl:
20434
andi.b &0x30,%d0 # clear rnd prec
20435
ori.b &s_mode*0x10,%d0 # insert sgl prec
20436
mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
20437
20438
#
20439
# operand is a normalized number. first, we check to see if the move out
20440
# would cause either an underflow or overflow. these cases are handled
20441
# separately. otherwise, set the FPCR to the proper rounding mode and
20442
# execute the move.
20443
#
20444
mov.w SRC_EX(%a0),%d0 # extract exponent
20445
andi.w &0x7fff,%d0 # strip sign
20446
20447
cmpi.w %d0,&SGL_HI # will operand overflow?
20448
bgt.w fout_sgl_ovfl # yes; go handle OVFL
20449
beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL
20450
cmpi.w %d0,&SGL_LO # will operand underflow?
20451
blt.w fout_sgl_unfl # yes; go handle underflow
20452
20453
#
20454
# NORMs(in range) can be stored out by a simple "fmov.s"
20455
# Unnormalized inputs can come through this point.
20456
#
20457
fout_sgl_exg:
20458
fmovm.x SRC(%a0),&0x80 # fetch fop from stack
20459
20460
fmov.l L_SCR3(%a6),%fpcr # set FPCR
20461
fmov.l &0x0,%fpsr # clear FPSR
20462
20463
fmov.s %fp0,%d0 # store does convert and round
20464
20465
fmov.l &0x0,%fpcr # clear FPCR
20466
fmov.l %fpsr,%d1 # save FPSR
20467
20468
or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex
20469
20470
fout_sgl_exg_write:
20471
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20472
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20473
beq.b fout_sgl_exg_write_dn # must save to integer regfile
20474
20475
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20476
bsr.l _dmem_write_long # write long
20477
20478
tst.l %d1 # did dstore fail?
20479
bne.l facc_out_l # yes
20480
20481
rts
20482
20483
fout_sgl_exg_write_dn:
20484
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20485
andi.w &0x7,%d1
20486
bsr.l store_dreg_l
20487
rts
20488
20489
#
20490
# here, we know that the operand would UNFL if moved out to single prec,
20491
# so, denorm and round and then use generic store single routine to
20492
# write the value to memory.
20493
#
20494
fout_sgl_unfl:
20495
bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20496
20497
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20498
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20499
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20500
mov.l %a0,-(%sp)
20501
20502
clr.l %d0 # pass: S.F. = 0
20503
20504
cmpi.b STAG(%a6),&DENORM # fetch src optype tag
20505
bne.b fout_sgl_unfl_cont # let DENORMs fall through
20506
20507
lea FP_SCR0(%a6),%a0
20508
bsr.l norm # normalize the DENORM
20509
20510
fout_sgl_unfl_cont:
20511
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
20512
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
20513
bsr.l unf_res # calc default underflow result
20514
20515
lea FP_SCR0(%a6),%a0 # pass: ptr to fop
20516
bsr.l dst_sgl # convert to single prec
20517
20518
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20519
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20520
beq.b fout_sgl_unfl_dn # must save to integer regfile
20521
20522
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20523
bsr.l _dmem_write_long # write long
20524
20525
tst.l %d1 # did dstore fail?
20526
bne.l facc_out_l # yes
20527
20528
bra.b fout_sgl_unfl_chkexc
20529
20530
fout_sgl_unfl_dn:
20531
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20532
andi.w &0x7,%d1
20533
bsr.l store_dreg_l
20534
20535
fout_sgl_unfl_chkexc:
20536
mov.b FPCR_ENABLE(%a6),%d1
20537
andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20538
bne.w fout_sd_exc_unfl # yes
20539
addq.l &0x4,%sp
20540
rts
20541
20542
#
20543
# it's definitely an overflow so call ovf_res to get the correct answer
20544
#
20545
fout_sgl_ovfl:
20546
tst.b 3+SRC_HI(%a0) # is result inexact?
20547
bne.b fout_sgl_ovfl_inex2
20548
tst.l SRC_LO(%a0) # is result inexact?
20549
bne.b fout_sgl_ovfl_inex2
20550
ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20551
bra.b fout_sgl_ovfl_cont
20552
fout_sgl_ovfl_inex2:
20553
ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20554
20555
fout_sgl_ovfl_cont:
20556
mov.l %a0,-(%sp)
20557
20558
# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
20559
# overflow result. DON'T save the returned ccodes from ovf_res() since
20560
# fmove out doesn't alter them.
20561
tst.b SRC_EX(%a0) # is operand negative?
20562
smi %d1 # set if so
20563
mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode
20564
bsr.l ovf_res # calc OVFL result
20565
fmovm.x (%a0),&0x80 # load default overflow result
20566
fmov.s %fp0,%d0 # store to single
20567
20568
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20569
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20570
beq.b fout_sgl_ovfl_dn # must save to integer regfile
20571
20572
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20573
bsr.l _dmem_write_long # write long
20574
20575
tst.l %d1 # did dstore fail?
20576
bne.l facc_out_l # yes
20577
20578
bra.b fout_sgl_ovfl_chkexc
20579
20580
fout_sgl_ovfl_dn:
20581
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20582
andi.w &0x7,%d1
20583
bsr.l store_dreg_l
20584
20585
fout_sgl_ovfl_chkexc:
20586
mov.b FPCR_ENABLE(%a6),%d1
20587
andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20588
bne.w fout_sd_exc_ovfl # yes
20589
addq.l &0x4,%sp
20590
rts
20591
20592
#
20593
# move out MAY overflow:
20594
# (1) force the exp to 0x3fff
20595
# (2) do a move w/ appropriate rnd mode
20596
# (3) if exp still equals zero, then insert original exponent
20597
# for the correct result.
20598
# if exp now equals one, then it overflowed so call ovf_res.
20599
#
20600
fout_sgl_may_ovfl:
20601
mov.w SRC_EX(%a0),%d1 # fetch current sign
20602
andi.w &0x8000,%d1 # keep it,clear exp
20603
ori.w &0x3fff,%d1 # insert exp = 0
20604
mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
20605
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20606
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20607
20608
fmov.l L_SCR3(%a6),%fpcr # set FPCR
20609
20610
fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
20611
fmov.l &0x0,%fpcr # clear FPCR
20612
20613
fabs.x %fp0 # need absolute value
20614
fcmp.b %fp0,&0x2 # did exponent increase?
20615
fblt.w fout_sgl_exg # no; go finish NORM
20616
bra.w fout_sgl_ovfl # yes; go handle overflow
20617
20618
################
20619
20620
fout_sd_exc_unfl:
20621
mov.l (%sp)+,%a0
20622
20623
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20624
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20625
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20626
20627
cmpi.b STAG(%a6),&DENORM # was src a DENORM?
20628
bne.b fout_sd_exc_cont # no
20629
20630
lea FP_SCR0(%a6),%a0
20631
bsr.l norm
20632
neg.l %d0
20633
andi.w &0x7fff,%d0
20634
bfins %d0,FP_SCR0_EX(%a6){&1:&15}
20635
bra.b fout_sd_exc_cont
20636
20637
fout_sd_exc:
20638
fout_sd_exc_ovfl:
20639
mov.l (%sp)+,%a0 # restore a0
20640
20641
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20642
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20643
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20644
20645
fout_sd_exc_cont:
20646
bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit
20647
sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit
20648
lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM
20649
20650
mov.b 3+L_SCR3(%a6),%d1
20651
lsr.b &0x4,%d1
20652
andi.w &0x0c,%d1
20653
swap %d1
20654
mov.b 3+L_SCR3(%a6),%d1
20655
lsr.b &0x4,%d1
20656
andi.w &0x03,%d1
20657
clr.l %d0 # pass: zero g,r,s
20658
bsr.l _round # round the DENORM
20659
20660
tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?
20661
beq.b fout_sd_exc_done # no
20662
bset &0x7,FP_SCR0_EX(%a6) # yes
20663
20664
fout_sd_exc_done:
20665
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
20666
rts
20667
20668
#################################################################
20669
# fmove.d out ###################################################
20670
#################################################################
20671
fout_dbl:
20672
andi.b &0x30,%d0 # clear rnd prec
20673
ori.b &d_mode*0x10,%d0 # insert dbl prec
20674
mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
20675
20676
#
20677
# operand is a normalized number. first, we check to see if the move out
20678
# would cause either an underflow or overflow. these cases are handled
20679
# separately. otherwise, set the FPCR to the proper rounding mode and
20680
# execute the move.
20681
#
20682
mov.w SRC_EX(%a0),%d0 # extract exponent
20683
andi.w &0x7fff,%d0 # strip sign
20684
20685
cmpi.w %d0,&DBL_HI # will operand overflow?
20686
bgt.w fout_dbl_ovfl # yes; go handle OVFL
20687
beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL
20688
cmpi.w %d0,&DBL_LO # will operand underflow?
20689
blt.w fout_dbl_unfl # yes; go handle underflow
20690
20691
#
20692
# NORMs(in range) can be stored out by a simple "fmov.d"
20693
# Unnormalized inputs can come through this point.
20694
#
20695
fout_dbl_exg:
20696
fmovm.x SRC(%a0),&0x80 # fetch fop from stack
20697
20698
fmov.l L_SCR3(%a6),%fpcr # set FPCR
20699
fmov.l &0x0,%fpsr # clear FPSR
20700
20701
fmov.d %fp0,L_SCR1(%a6) # store does convert and round
20702
20703
fmov.l &0x0,%fpcr # clear FPCR
20704
fmov.l %fpsr,%d0 # save FPSR
20705
20706
or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex
20707
20708
mov.l EXC_EA(%a6),%a1 # pass: dst addr
20709
lea L_SCR1(%a6),%a0 # pass: src addr
20710
movq.l &0x8,%d0 # pass: opsize is 8 bytes
20711
bsr.l _dmem_write # store dbl fop to memory
20712
20713
tst.l %d1 # did dstore fail?
20714
bne.l facc_out_d # yes
20715
20716
rts # no; so we're finished
20717
20718
#
20719
# here, we know that the operand would UNFL if moved out to double prec,
20720
# so, denorm and round and then use generic store double routine to
20721
# write the value to memory.
20722
#
20723
fout_dbl_unfl:
20724
bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20725
20726
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20727
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20728
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20729
mov.l %a0,-(%sp)
20730
20731
clr.l %d0 # pass: S.F. = 0
20732
20733
cmpi.b STAG(%a6),&DENORM # fetch src optype tag
20734
bne.b fout_dbl_unfl_cont # let DENORMs fall through
20735
20736
lea FP_SCR0(%a6),%a0
20737
bsr.l norm # normalize the DENORM
20738
20739
fout_dbl_unfl_cont:
20740
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
20741
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
20742
bsr.l unf_res # calc default underflow result
20743
20744
lea FP_SCR0(%a6),%a0 # pass: ptr to fop
20745
bsr.l dst_dbl # convert to single prec
20746
mov.l %d0,L_SCR1(%a6)
20747
mov.l %d1,L_SCR2(%a6)
20748
20749
mov.l EXC_EA(%a6),%a1 # pass: dst addr
20750
lea L_SCR1(%a6),%a0 # pass: src addr
20751
movq.l &0x8,%d0 # pass: opsize is 8 bytes
20752
bsr.l _dmem_write # store dbl fop to memory
20753
20754
tst.l %d1 # did dstore fail?
20755
bne.l facc_out_d # yes
20756
20757
mov.b FPCR_ENABLE(%a6),%d1
20758
andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20759
bne.w fout_sd_exc_unfl # yes
20760
addq.l &0x4,%sp
20761
rts
20762
20763
#
20764
# it's definitely an overflow so call ovf_res to get the correct answer
20765
#
20766
fout_dbl_ovfl:
20767
mov.w 2+SRC_LO(%a0),%d0
20768
andi.w &0x7ff,%d0
20769
bne.b fout_dbl_ovfl_inex2
20770
20771
ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20772
bra.b fout_dbl_ovfl_cont
20773
fout_dbl_ovfl_inex2:
20774
ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20775
20776
fout_dbl_ovfl_cont:
20777
mov.l %a0,-(%sp)
20778
20779
# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
20780
# overflow result. DON'T save the returned ccodes from ovf_res() since
20781
# fmove out doesn't alter them.
20782
tst.b SRC_EX(%a0) # is operand negative?
20783
smi %d1 # set if so
20784
mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode
20785
bsr.l ovf_res # calc OVFL result
20786
fmovm.x (%a0),&0x80 # load default overflow result
20787
fmov.d %fp0,L_SCR1(%a6) # store to double
20788
20789
mov.l EXC_EA(%a6),%a1 # pass: dst addr
20790
lea L_SCR1(%a6),%a0 # pass: src addr
20791
movq.l &0x8,%d0 # pass: opsize is 8 bytes
20792
bsr.l _dmem_write # store dbl fop to memory
20793
20794
tst.l %d1 # did dstore fail?
20795
bne.l facc_out_d # yes
20796
20797
mov.b FPCR_ENABLE(%a6),%d1
20798
andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20799
bne.w fout_sd_exc_ovfl # yes
20800
addq.l &0x4,%sp
20801
rts
20802
20803
#
20804
# move out MAY overflow:
20805
# (1) force the exp to 0x3fff
20806
# (2) do a move w/ appropriate rnd mode
20807
# (3) if exp still equals zero, then insert original exponent
20808
# for the correct result.
20809
# if exp now equals one, then it overflowed so call ovf_res.
20810
#
20811
fout_dbl_may_ovfl:
20812
mov.w SRC_EX(%a0),%d1 # fetch current sign
20813
andi.w &0x8000,%d1 # keep it,clear exp
20814
ori.w &0x3fff,%d1 # insert exp = 0
20815
mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
20816
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20817
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20818
20819
fmov.l L_SCR3(%a6),%fpcr # set FPCR
20820
20821
fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
20822
fmov.l &0x0,%fpcr # clear FPCR
20823
20824
fabs.x %fp0 # need absolute value
20825
fcmp.b %fp0,&0x2 # did exponent increase?
20826
fblt.w fout_dbl_exg # no; go finish NORM
20827
bra.w fout_dbl_ovfl # yes; go handle overflow
20828
20829
#########################################################################
20830
# XDEF **************************************************************** #
20831
# dst_dbl(): create double precision value from extended prec. #
20832
# #
20833
# XREF **************************************************************** #
20834
# None #
20835
# #
20836
# INPUT *************************************************************** #
20837
# a0 = pointer to source operand in extended precision #
20838
# #
20839
# OUTPUT ************************************************************** #
20840
# d0 = hi(double precision result) #
20841
# d1 = lo(double precision result) #
20842
# #
20843
# ALGORITHM *********************************************************** #
20844
# #
20845
# Changes extended precision to double precision. #
20846
# Note: no attempt is made to round the extended value to double. #
20847
# dbl_sign = ext_sign #
20848
# dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
20849
# get rid of ext integer bit #
20850
# dbl_mant = ext_mant{62:12} #
20851
# #
20852
# --------------- --------------- --------------- #
20853
# extended -> |s| exp | |1| ms mant | | ls mant | #
20854
# --------------- --------------- --------------- #
20855
# 95 64 63 62 32 31 11 0 #
20856
# | | #
20857
# | | #
20858
# | | #
20859
# v v #
20860
# --------------- --------------- #
20861
# double -> |s|exp| mant | | mant | #
20862
# --------------- --------------- #
20863
# 63 51 32 31 0 #
20864
# #
20865
#########################################################################
20866
20867
dst_dbl:
20868
clr.l %d0 # clear d0
20869
mov.w FTEMP_EX(%a0),%d0 # get exponent
20870
subi.w &EXT_BIAS,%d0 # subtract extended precision bias
20871
addi.w &DBL_BIAS,%d0 # add double precision bias
20872
tst.b FTEMP_HI(%a0) # is number a denorm?
20873
bmi.b dst_get_dupper # no
20874
subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1
20875
dst_get_dupper:
20876
swap %d0 # d0 now in upper word
20877
lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp
20878
tst.b FTEMP_EX(%a0) # test sign
20879
bpl.b dst_get_dman # if positive, go process mantissa
20880
bset &0x1f,%d0 # if negative, set sign
20881
dst_get_dman:
20882
mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
20883
bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms
20884
or.l %d1,%d0 # put these bits in ms word of double
20885
mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack
20886
mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
20887
mov.l &21,%d0 # load shift count
20888
lsl.l %d0,%d1 # put lower 11 bits in upper bits
20889
mov.l %d1,L_SCR2(%a6) # build lower lword in memory
20890
mov.l FTEMP_LO(%a0),%d1 # get ls mantissa
20891
bfextu %d1{&0:&21},%d0 # get ls 21 bits of double
20892
mov.l L_SCR2(%a6),%d1
20893
or.l %d0,%d1 # put them in double result
20894
mov.l L_SCR1(%a6),%d0
20895
rts
20896
20897
#########################################################################
20898
# XDEF **************************************************************** #
20899
# dst_sgl(): create single precision value from extended prec #
20900
# #
20901
# XREF **************************************************************** #
20902
# #
20903
# INPUT *************************************************************** #
20904
# a0 = pointer to source operand in extended precision #
20905
# #
20906
# OUTPUT ************************************************************** #
20907
# d0 = single precision result #
20908
# #
20909
# ALGORITHM *********************************************************** #
20910
# #
20911
# Changes extended precision to single precision. #
20912
# sgl_sign = ext_sign #
20913
# sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
20914
# get rid of ext integer bit #
20915
# sgl_mant = ext_mant{62:12} #
20916
# #
20917
# --------------- --------------- --------------- #
20918
# extended -> |s| exp | |1| ms mant | | ls mant | #
20919
# --------------- --------------- --------------- #
20920
# 95 64 63 62 40 32 31 12 0 #
20921
# | | #
20922
# | | #
20923
# | | #
20924
# v v #
20925
# --------------- #
20926
# single -> |s|exp| mant | #
20927
# --------------- #
20928
# 31 22 0 #
20929
# #
20930
#########################################################################
20931
20932
dst_sgl:
20933
clr.l %d0
20934
mov.w FTEMP_EX(%a0),%d0 # get exponent
20935
subi.w &EXT_BIAS,%d0 # subtract extended precision bias
20936
addi.w &SGL_BIAS,%d0 # add single precision bias
20937
tst.b FTEMP_HI(%a0) # is number a denorm?
20938
bmi.b dst_get_supper # no
20939
subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1
20940
dst_get_supper:
20941
swap %d0 # put exp in upper word of d0
20942
lsl.l &0x7,%d0 # shift it into single exp bits
20943
tst.b FTEMP_EX(%a0) # test sign
20944
bpl.b dst_get_sman # if positive, continue
20945
bset &0x1f,%d0 # if negative, put in sign first
20946
dst_get_sman:
20947
mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
20948
andi.l &0x7fffff00,%d1 # get upper 23 bits of ms
20949
lsr.l &0x8,%d1 # and put them flush right
20950
or.l %d1,%d0 # put these bits in ms word of single
20951
rts
20952
20953
##############################################################################
20954
fout_pack:
20955
bsr.l _calc_ea_fout # fetch the <ea>
20956
mov.l %a0,-(%sp)
20957
20958
mov.b STAG(%a6),%d0 # fetch input type
20959
bne.w fout_pack_not_norm # input is not NORM
20960
20961
fout_pack_norm:
20962
btst &0x4,EXC_CMDREG(%a6) # static or dynamic?
20963
beq.b fout_pack_s # static
20964
20965
fout_pack_d:
20966
mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg
20967
lsr.b &0x4,%d1
20968
andi.w &0x7,%d1
20969
20970
bsr.l fetch_dreg # fetch Dn w/ k-factor
20971
20972
bra.b fout_pack_type
20973
fout_pack_s:
20974
mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field
20975
20976
fout_pack_type:
20977
bfexts %d0{&25:&7},%d0 # extract k-factor
20978
mov.l %d0,-(%sp)
20979
20980
lea FP_SRC(%a6),%a0 # pass: ptr to input
20981
20982
# bindec is currently scrambling FP_SRC for denorm inputs.
20983
# we'll have to change this, but for now, tough luck!!!
20984
bsr.l bindec # convert xprec to packed
20985
20986
# andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
20987
andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields
20988
20989
mov.l (%sp)+,%d0
20990
20991
tst.b 3+FP_SCR0_EX(%a6)
20992
bne.b fout_pack_set
20993
tst.l FP_SCR0_HI(%a6)
20994
bne.b fout_pack_set
20995
tst.l FP_SCR0_LO(%a6)
20996
bne.b fout_pack_set
20997
20998
# add the extra condition that only if the k-factor was zero, too, should
20999
# we zero the exponent
21000
tst.l %d0
21001
bne.b fout_pack_set
21002
# "mantissa" is all zero which means that the answer is zero. but, the '040
21003
# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
21004
# if the mantissa is zero, I will zero the exponent, too.
21005
# the question now is whether the exponents sign bit is allowed to be non-zero
21006
# for a zero, also...
21007
andi.w &0xf000,FP_SCR0(%a6)
21008
21009
fout_pack_set:
21010
21011
lea FP_SCR0(%a6),%a0 # pass: src addr
21012
21013
fout_pack_write:
21014
mov.l (%sp)+,%a1 # pass: dst addr
21015
mov.l &0xc,%d0 # pass: opsize is 12 bytes
21016
21017
cmpi.b SPCOND_FLG(%a6),&mda7_flg
21018
beq.b fout_pack_a7
21019
21020
bsr.l _dmem_write # write ext prec number to memory
21021
21022
tst.l %d1 # did dstore fail?
21023
bne.w fout_ext_err # yes
21024
21025
rts
21026
21027
# we don't want to do the write if the exception occurred in supervisor mode
21028
# so _mem_write2() handles this for us.
21029
fout_pack_a7:
21030
bsr.l _mem_write2 # write ext prec number to memory
21031
21032
tst.l %d1 # did dstore fail?
21033
bne.w fout_ext_err # yes
21034
21035
rts
21036
21037
fout_pack_not_norm:
21038
cmpi.b %d0,&DENORM # is it a DENORM?
21039
beq.w fout_pack_norm # yes
21040
lea FP_SRC(%a6),%a0
21041
clr.w 2+FP_SRC_EX(%a6)
21042
cmpi.b %d0,&SNAN # is it an SNAN?
21043
beq.b fout_pack_snan # yes
21044
bra.b fout_pack_write # no
21045
21046
fout_pack_snan:
21047
ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
21048
bset &0x6,FP_SRC_HI(%a6) # set snan bit
21049
bra.b fout_pack_write
21050
21051
#########################################################################
21052
# XDEF **************************************************************** #
21053
# fetch_dreg(): fetch register according to index in d1 #
21054
# #
21055
# XREF **************************************************************** #
21056
# None #
21057
# #
21058
# INPUT *************************************************************** #
21059
# d1 = index of register to fetch from #
21060
# #
21061
# OUTPUT ************************************************************** #
21062
# d0 = value of register fetched #
21063
# #
21064
# ALGORITHM *********************************************************** #
21065
# According to the index value in d1 which can range from zero #
21066
# to fifteen, load the corresponding register file value (where #
21067
# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
21068
# stack. The rest should still be in their original places. #
21069
# #
21070
#########################################################################
21071
21072
# this routine leaves d1 intact for subsequent store_dreg calls.
21073
global fetch_dreg
21074
fetch_dreg:
21075
mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0
21076
jmp (tbl_fdreg.b,%pc,%d0.w*1)
21077
21078
tbl_fdreg:
21079
short fdreg0 - tbl_fdreg
21080
short fdreg1 - tbl_fdreg
21081
short fdreg2 - tbl_fdreg
21082
short fdreg3 - tbl_fdreg
21083
short fdreg4 - tbl_fdreg
21084
short fdreg5 - tbl_fdreg
21085
short fdreg6 - tbl_fdreg
21086
short fdreg7 - tbl_fdreg
21087
short fdreg8 - tbl_fdreg
21088
short fdreg9 - tbl_fdreg
21089
short fdrega - tbl_fdreg
21090
short fdregb - tbl_fdreg
21091
short fdregc - tbl_fdreg
21092
short fdregd - tbl_fdreg
21093
short fdrege - tbl_fdreg
21094
short fdregf - tbl_fdreg
21095
21096
fdreg0:
21097
mov.l EXC_DREGS+0x0(%a6),%d0
21098
rts
21099
fdreg1:
21100
mov.l EXC_DREGS+0x4(%a6),%d0
21101
rts
21102
fdreg2:
21103
mov.l %d2,%d0
21104
rts
21105
fdreg3:
21106
mov.l %d3,%d0
21107
rts
21108
fdreg4:
21109
mov.l %d4,%d0
21110
rts
21111
fdreg5:
21112
mov.l %d5,%d0
21113
rts
21114
fdreg6:
21115
mov.l %d6,%d0
21116
rts
21117
fdreg7:
21118
mov.l %d7,%d0
21119
rts
21120
fdreg8:
21121
mov.l EXC_DREGS+0x8(%a6),%d0
21122
rts
21123
fdreg9:
21124
mov.l EXC_DREGS+0xc(%a6),%d0
21125
rts
21126
fdrega:
21127
mov.l %a2,%d0
21128
rts
21129
fdregb:
21130
mov.l %a3,%d0
21131
rts
21132
fdregc:
21133
mov.l %a4,%d0
21134
rts
21135
fdregd:
21136
mov.l %a5,%d0
21137
rts
21138
fdrege:
21139
mov.l (%a6),%d0
21140
rts
21141
fdregf:
21142
mov.l EXC_A7(%a6),%d0
21143
rts
21144
21145
#########################################################################
21146
# XDEF **************************************************************** #
21147
# store_dreg_l(): store longword to data register specified by d1 #
21148
# #
21149
# XREF **************************************************************** #
21150
# None #
21151
# #
21152
# INPUT *************************************************************** #
21153
# d0 = longowrd value to store #
21154
# d1 = index of register to fetch from #
21155
# #
21156
# OUTPUT ************************************************************** #
21157
# (data register is updated) #
21158
# #
21159
# ALGORITHM *********************************************************** #
21160
# According to the index value in d1, store the longword value #
21161
# in d0 to the corresponding data register. D0/D1 are on the stack #
21162
# while the rest are in their initial places. #
21163
# #
21164
#########################################################################
21165
21166
global store_dreg_l
21167
store_dreg_l:
21168
mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1
21169
jmp (tbl_sdregl.b,%pc,%d1.w*1)
21170
21171
tbl_sdregl:
21172
short sdregl0 - tbl_sdregl
21173
short sdregl1 - tbl_sdregl
21174
short sdregl2 - tbl_sdregl
21175
short sdregl3 - tbl_sdregl
21176
short sdregl4 - tbl_sdregl
21177
short sdregl5 - tbl_sdregl
21178
short sdregl6 - tbl_sdregl
21179
short sdregl7 - tbl_sdregl
21180
21181
sdregl0:
21182
mov.l %d0,EXC_DREGS+0x0(%a6)
21183
rts
21184
sdregl1:
21185
mov.l %d0,EXC_DREGS+0x4(%a6)
21186
rts
21187
sdregl2:
21188
mov.l %d0,%d2
21189
rts
21190
sdregl3:
21191
mov.l %d0,%d3
21192
rts
21193
sdregl4:
21194
mov.l %d0,%d4
21195
rts
21196
sdregl5:
21197
mov.l %d0,%d5
21198
rts
21199
sdregl6:
21200
mov.l %d0,%d6
21201
rts
21202
sdregl7:
21203
mov.l %d0,%d7
21204
rts
21205
21206
#########################################################################
21207
# XDEF **************************************************************** #
21208
# store_dreg_w(): store word to data register specified by d1 #
21209
# #
21210
# XREF **************************************************************** #
21211
# None #
21212
# #
21213
# INPUT *************************************************************** #
21214
# d0 = word value to store #
21215
# d1 = index of register to fetch from #
21216
# #
21217
# OUTPUT ************************************************************** #
21218
# (data register is updated) #
21219
# #
21220
# ALGORITHM *********************************************************** #
21221
# According to the index value in d1, store the word value #
21222
# in d0 to the corresponding data register. D0/D1 are on the stack #
21223
# while the rest are in their initial places. #
21224
# #
21225
#########################################################################
21226
21227
global store_dreg_w
21228
store_dreg_w:
21229
mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1
21230
jmp (tbl_sdregw.b,%pc,%d1.w*1)
21231
21232
tbl_sdregw:
21233
short sdregw0 - tbl_sdregw
21234
short sdregw1 - tbl_sdregw
21235
short sdregw2 - tbl_sdregw
21236
short sdregw3 - tbl_sdregw
21237
short sdregw4 - tbl_sdregw
21238
short sdregw5 - tbl_sdregw
21239
short sdregw6 - tbl_sdregw
21240
short sdregw7 - tbl_sdregw
21241
21242
sdregw0:
21243
mov.w %d0,2+EXC_DREGS+0x0(%a6)
21244
rts
21245
sdregw1:
21246
mov.w %d0,2+EXC_DREGS+0x4(%a6)
21247
rts
21248
sdregw2:
21249
mov.w %d0,%d2
21250
rts
21251
sdregw3:
21252
mov.w %d0,%d3
21253
rts
21254
sdregw4:
21255
mov.w %d0,%d4
21256
rts
21257
sdregw5:
21258
mov.w %d0,%d5
21259
rts
21260
sdregw6:
21261
mov.w %d0,%d6
21262
rts
21263
sdregw7:
21264
mov.w %d0,%d7
21265
rts
21266
21267
#########################################################################
21268
# XDEF **************************************************************** #
21269
# store_dreg_b(): store byte to data register specified by d1 #
21270
# #
21271
# XREF **************************************************************** #
21272
# None #
21273
# #
21274
# INPUT *************************************************************** #
21275
# d0 = byte value to store #
21276
# d1 = index of register to fetch from #
21277
# #
21278
# OUTPUT ************************************************************** #
21279
# (data register is updated) #
21280
# #
21281
# ALGORITHM *********************************************************** #
21282
# According to the index value in d1, store the byte value #
21283
# in d0 to the corresponding data register. D0/D1 are on the stack #
21284
# while the rest are in their initial places. #
21285
# #
21286
#########################################################################
21287
21288
global store_dreg_b
21289
store_dreg_b:
21290
mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1
21291
jmp (tbl_sdregb.b,%pc,%d1.w*1)
21292
21293
tbl_sdregb:
21294
short sdregb0 - tbl_sdregb
21295
short sdregb1 - tbl_sdregb
21296
short sdregb2 - tbl_sdregb
21297
short sdregb3 - tbl_sdregb
21298
short sdregb4 - tbl_sdregb
21299
short sdregb5 - tbl_sdregb
21300
short sdregb6 - tbl_sdregb
21301
short sdregb7 - tbl_sdregb
21302
21303
sdregb0:
21304
mov.b %d0,3+EXC_DREGS+0x0(%a6)
21305
rts
21306
sdregb1:
21307
mov.b %d0,3+EXC_DREGS+0x4(%a6)
21308
rts
21309
sdregb2:
21310
mov.b %d0,%d2
21311
rts
21312
sdregb3:
21313
mov.b %d0,%d3
21314
rts
21315
sdregb4:
21316
mov.b %d0,%d4
21317
rts
21318
sdregb5:
21319
mov.b %d0,%d5
21320
rts
21321
sdregb6:
21322
mov.b %d0,%d6
21323
rts
21324
sdregb7:
21325
mov.b %d0,%d7
21326
rts
21327
21328
#########################################################################
21329
# XDEF **************************************************************** #
21330
# inc_areg(): increment an address register by the value in d0 #
21331
# #
21332
# XREF **************************************************************** #
21333
# None #
21334
# #
21335
# INPUT *************************************************************** #
21336
# d0 = amount to increment by #
21337
# d1 = index of address register to increment #
21338
# #
21339
# OUTPUT ************************************************************** #
21340
# (address register is updated) #
21341
# #
21342
# ALGORITHM *********************************************************** #
21343
# Typically used for an instruction w/ a post-increment <ea>, #
21344
# this routine adds the increment value in d0 to the address register #
21345
# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
21346
# in their original places. #
21347
# For a7, if the increment amount is one, then we have to #
21348
# increment by two. For any a7 update, set the mia7_flag so that if #
21349
# an access error exception occurs later in emulation, this address #
21350
# register update can be undone. #
21351
# #
21352
#########################################################################
21353
21354
global inc_areg
21355
inc_areg:
21356
mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1
21357
jmp (tbl_iareg.b,%pc,%d1.w*1)
21358
21359
tbl_iareg:
21360
short iareg0 - tbl_iareg
21361
short iareg1 - tbl_iareg
21362
short iareg2 - tbl_iareg
21363
short iareg3 - tbl_iareg
21364
short iareg4 - tbl_iareg
21365
short iareg5 - tbl_iareg
21366
short iareg6 - tbl_iareg
21367
short iareg7 - tbl_iareg
21368
21369
iareg0: add.l %d0,EXC_DREGS+0x8(%a6)
21370
rts
21371
iareg1: add.l %d0,EXC_DREGS+0xc(%a6)
21372
rts
21373
iareg2: add.l %d0,%a2
21374
rts
21375
iareg3: add.l %d0,%a3
21376
rts
21377
iareg4: add.l %d0,%a4
21378
rts
21379
iareg5: add.l %d0,%a5
21380
rts
21381
iareg6: add.l %d0,(%a6)
21382
rts
21383
iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)
21384
cmpi.b %d0,&0x1
21385
beq.b iareg7b
21386
add.l %d0,EXC_A7(%a6)
21387
rts
21388
iareg7b:
21389
addq.l &0x2,EXC_A7(%a6)
21390
rts
21391
21392
#########################################################################
21393
# XDEF **************************************************************** #
21394
# dec_areg(): decrement an address register by the value in d0 #
21395
# #
21396
# XREF **************************************************************** #
21397
# None #
21398
# #
21399
# INPUT *************************************************************** #
21400
# d0 = amount to decrement by #
21401
# d1 = index of address register to decrement #
21402
# #
21403
# OUTPUT ************************************************************** #
21404
# (address register is updated) #
21405
# #
21406
# ALGORITHM *********************************************************** #
21407
# Typically used for an instruction w/ a pre-decrement <ea>, #
21408
# this routine adds the decrement value in d0 to the address register #
21409
# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
21410
# in their original places. #
21411
# For a7, if the decrement amount is one, then we have to #
21412
# decrement by two. For any a7 update, set the mda7_flag so that if #
21413
# an access error exception occurs later in emulation, this address #
21414
# register update can be undone. #
21415
# #
21416
#########################################################################
21417
21418
global dec_areg
21419
dec_areg:
21420
mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1
21421
jmp (tbl_dareg.b,%pc,%d1.w*1)
21422
21423
tbl_dareg:
21424
short dareg0 - tbl_dareg
21425
short dareg1 - tbl_dareg
21426
short dareg2 - tbl_dareg
21427
short dareg3 - tbl_dareg
21428
short dareg4 - tbl_dareg
21429
short dareg5 - tbl_dareg
21430
short dareg6 - tbl_dareg
21431
short dareg7 - tbl_dareg
21432
21433
dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)
21434
rts
21435
dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)
21436
rts
21437
dareg2: sub.l %d0,%a2
21438
rts
21439
dareg3: sub.l %d0,%a3
21440
rts
21441
dareg4: sub.l %d0,%a4
21442
rts
21443
dareg5: sub.l %d0,%a5
21444
rts
21445
dareg6: sub.l %d0,(%a6)
21446
rts
21447
dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)
21448
cmpi.b %d0,&0x1
21449
beq.b dareg7b
21450
sub.l %d0,EXC_A7(%a6)
21451
rts
21452
dareg7b:
21453
subq.l &0x2,EXC_A7(%a6)
21454
rts
21455
21456
##############################################################################
21457
21458
#########################################################################
21459
# XDEF **************************************************************** #
21460
# load_fpn1(): load FP register value into FP_SRC(a6). #
21461
# #
21462
# XREF **************************************************************** #
21463
# None #
21464
# #
21465
# INPUT *************************************************************** #
21466
# d0 = index of FP register to load #
21467
# #
21468
# OUTPUT ************************************************************** #
21469
# FP_SRC(a6) = value loaded from FP register file #
21470
# #
21471
# ALGORITHM *********************************************************** #
21472
# Using the index in d0, load FP_SRC(a6) with a number from the #
21473
# FP register file. #
21474
# #
21475
#########################################################################
21476
21477
global load_fpn1
21478
load_fpn1:
21479
mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
21480
jmp (tbl_load_fpn1.b,%pc,%d0.w*1)
21481
21482
tbl_load_fpn1:
21483
short load_fpn1_0 - tbl_load_fpn1
21484
short load_fpn1_1 - tbl_load_fpn1
21485
short load_fpn1_2 - tbl_load_fpn1
21486
short load_fpn1_3 - tbl_load_fpn1
21487
short load_fpn1_4 - tbl_load_fpn1
21488
short load_fpn1_5 - tbl_load_fpn1
21489
short load_fpn1_6 - tbl_load_fpn1
21490
short load_fpn1_7 - tbl_load_fpn1
21491
21492
load_fpn1_0:
21493
mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)
21494
mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)
21495
mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)
21496
lea FP_SRC(%a6), %a0
21497
rts
21498
load_fpn1_1:
21499
mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)
21500
mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)
21501
mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)
21502
lea FP_SRC(%a6), %a0
21503
rts
21504
load_fpn1_2:
21505
fmovm.x &0x20, FP_SRC(%a6)
21506
lea FP_SRC(%a6), %a0
21507
rts
21508
load_fpn1_3:
21509
fmovm.x &0x10, FP_SRC(%a6)
21510
lea FP_SRC(%a6), %a0
21511
rts
21512
load_fpn1_4:
21513
fmovm.x &0x08, FP_SRC(%a6)
21514
lea FP_SRC(%a6), %a0
21515
rts
21516
load_fpn1_5:
21517
fmovm.x &0x04, FP_SRC(%a6)
21518
lea FP_SRC(%a6), %a0
21519
rts
21520
load_fpn1_6:
21521
fmovm.x &0x02, FP_SRC(%a6)
21522
lea FP_SRC(%a6), %a0
21523
rts
21524
load_fpn1_7:
21525
fmovm.x &0x01, FP_SRC(%a6)
21526
lea FP_SRC(%a6), %a0
21527
rts
21528
21529
#############################################################################
21530
21531
#########################################################################
21532
# XDEF **************************************************************** #
21533
# load_fpn2(): load FP register value into FP_DST(a6). #
21534
# #
21535
# XREF **************************************************************** #
21536
# None #
21537
# #
21538
# INPUT *************************************************************** #
21539
# d0 = index of FP register to load #
21540
# #
21541
# OUTPUT ************************************************************** #
21542
# FP_DST(a6) = value loaded from FP register file #
21543
# #
21544
# ALGORITHM *********************************************************** #
21545
# Using the index in d0, load FP_DST(a6) with a number from the #
21546
# FP register file. #
21547
# #
21548
#########################################################################
21549
21550
global load_fpn2
21551
load_fpn2:
21552
mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
21553
jmp (tbl_load_fpn2.b,%pc,%d0.w*1)
21554
21555
tbl_load_fpn2:
21556
short load_fpn2_0 - tbl_load_fpn2
21557
short load_fpn2_1 - tbl_load_fpn2
21558
short load_fpn2_2 - tbl_load_fpn2
21559
short load_fpn2_3 - tbl_load_fpn2
21560
short load_fpn2_4 - tbl_load_fpn2
21561
short load_fpn2_5 - tbl_load_fpn2
21562
short load_fpn2_6 - tbl_load_fpn2
21563
short load_fpn2_7 - tbl_load_fpn2
21564
21565
load_fpn2_0:
21566
mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)
21567
mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)
21568
mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)
21569
lea FP_DST(%a6), %a0
21570
rts
21571
load_fpn2_1:
21572
mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)
21573
mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)
21574
mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)
21575
lea FP_DST(%a6), %a0
21576
rts
21577
load_fpn2_2:
21578
fmovm.x &0x20, FP_DST(%a6)
21579
lea FP_DST(%a6), %a0
21580
rts
21581
load_fpn2_3:
21582
fmovm.x &0x10, FP_DST(%a6)
21583
lea FP_DST(%a6), %a0
21584
rts
21585
load_fpn2_4:
21586
fmovm.x &0x08, FP_DST(%a6)
21587
lea FP_DST(%a6), %a0
21588
rts
21589
load_fpn2_5:
21590
fmovm.x &0x04, FP_DST(%a6)
21591
lea FP_DST(%a6), %a0
21592
rts
21593
load_fpn2_6:
21594
fmovm.x &0x02, FP_DST(%a6)
21595
lea FP_DST(%a6), %a0
21596
rts
21597
load_fpn2_7:
21598
fmovm.x &0x01, FP_DST(%a6)
21599
lea FP_DST(%a6), %a0
21600
rts
21601
21602
#############################################################################
21603
21604
#########################################################################
21605
# XDEF **************************************************************** #
21606
# store_fpreg(): store an fp value to the fpreg designated d0. #
21607
# #
21608
# XREF **************************************************************** #
21609
# None #
21610
# #
21611
# INPUT *************************************************************** #
21612
# fp0 = extended precision value to store #
21613
# d0 = index of floating-point register #
21614
# #
21615
# OUTPUT ************************************************************** #
21616
# None #
21617
# #
21618
# ALGORITHM *********************************************************** #
21619
# Store the value in fp0 to the FP register designated by the #
21620
# value in d0. The FP number can be DENORM or SNAN so we have to be #
21621
# careful that we don't take an exception here. #
21622
# #
21623
#########################################################################
21624
21625
global store_fpreg
21626
store_fpreg:
21627
mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
21628
jmp (tbl_store_fpreg.b,%pc,%d0.w*1)
21629
21630
tbl_store_fpreg:
21631
short store_fpreg_0 - tbl_store_fpreg
21632
short store_fpreg_1 - tbl_store_fpreg
21633
short store_fpreg_2 - tbl_store_fpreg
21634
short store_fpreg_3 - tbl_store_fpreg
21635
short store_fpreg_4 - tbl_store_fpreg
21636
short store_fpreg_5 - tbl_store_fpreg
21637
short store_fpreg_6 - tbl_store_fpreg
21638
short store_fpreg_7 - tbl_store_fpreg
21639
21640
store_fpreg_0:
21641
fmovm.x &0x80, EXC_FP0(%a6)
21642
rts
21643
store_fpreg_1:
21644
fmovm.x &0x80, EXC_FP1(%a6)
21645
rts
21646
store_fpreg_2:
21647
fmovm.x &0x01, -(%sp)
21648
fmovm.x (%sp)+, &0x20
21649
rts
21650
store_fpreg_3:
21651
fmovm.x &0x01, -(%sp)
21652
fmovm.x (%sp)+, &0x10
21653
rts
21654
store_fpreg_4:
21655
fmovm.x &0x01, -(%sp)
21656
fmovm.x (%sp)+, &0x08
21657
rts
21658
store_fpreg_5:
21659
fmovm.x &0x01, -(%sp)
21660
fmovm.x (%sp)+, &0x04
21661
rts
21662
store_fpreg_6:
21663
fmovm.x &0x01, -(%sp)
21664
fmovm.x (%sp)+, &0x02
21665
rts
21666
store_fpreg_7:
21667
fmovm.x &0x01, -(%sp)
21668
fmovm.x (%sp)+, &0x01
21669
rts
21670
21671
#########################################################################
21672
# XDEF **************************************************************** #
21673
# _denorm(): denormalize an intermediate result #
21674
# #
21675
# XREF **************************************************************** #
21676
# None #
21677
# #
21678
# INPUT *************************************************************** #
21679
# a0 = points to the operand to be denormalized #
21680
# (in the internal extended format) #
21681
# #
21682
# d0 = rounding precision #
21683
# #
21684
# OUTPUT ************************************************************** #
21685
# a0 = pointer to the denormalized result #
21686
# (in the internal extended format) #
21687
# #
21688
# d0 = guard,round,sticky #
21689
# #
21690
# ALGORITHM *********************************************************** #
21691
# According to the exponent underflow threshold for the given #
21692
# precision, shift the mantissa bits to the right in order raise the #
21693
# exponent of the operand to the threshold value. While shifting the #
21694
# mantissa bits right, maintain the value of the guard, round, and #
21695
# sticky bits. #
21696
# other notes: #
21697
# (1) _denorm() is called by the underflow routines #
21698
# (2) _denorm() does NOT affect the status register #
21699
# #
21700
#########################################################################
21701
21702
#
21703
# table of exponent threshold values for each precision
21704
#
21705
tbl_thresh:
21706
short 0x0
21707
short sgl_thresh
21708
short dbl_thresh
21709
21710
global _denorm
21711
_denorm:
21712
#
21713
# Load the exponent threshold for the precision selected and check
21714
# to see if (threshold - exponent) is > 65 in which case we can
21715
# simply calculate the sticky bit and zero the mantissa. otherwise
21716
# we have to call the denormalization routine.
21717
#
21718
lsr.b &0x2, %d0 # shift prec to lo bits
21719
mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
21720
mov.w %d1, %d0 # copy d1 into d0
21721
sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp
21722
cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)
21723
bpl.b denorm_set_stky # yes; just calc sticky
21724
21725
clr.l %d0 # clear g,r,s
21726
btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
21727
beq.b denorm_call # no; don't change anything
21728
bset &29, %d0 # yes; set sticky bit
21729
21730
denorm_call:
21731
bsr.l dnrm_lp # denormalize the number
21732
rts
21733
21734
#
21735
# all bit would have been shifted off during the denorm so simply
21736
# calculate if the sticky should be set and clear the entire mantissa.
21737
#
21738
denorm_set_stky:
21739
mov.l &0x20000000, %d0 # set sticky bit in return value
21740
mov.w %d1, FTEMP_EX(%a0) # load exp with threshold
21741
clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)
21742
clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)
21743
rts
21744
21745
# #
21746
# dnrm_lp(): normalize exponent/mantissa to specified threshold #
21747
# #
21748
# INPUT: #
21749
# %a0 : points to the operand to be denormalized #
21750
# %d0{31:29} : initial guard,round,sticky #
21751
# %d1{15:0} : denormalization threshold #
21752
# OUTPUT: #
21753
# %a0 : points to the denormalized operand #
21754
# %d0{31:29} : final guard,round,sticky #
21755
# #
21756
21757
# *** Local Equates *** #
21758
set GRS, L_SCR2 # g,r,s temp storage
21759
set FTEMP_LO2, L_SCR1 # FTEMP_LO copy
21760
21761
global dnrm_lp
21762
dnrm_lp:
21763
21764
#
21765
# make a copy of FTEMP_LO and place the g,r,s bits directly after it
21766
# in memory so as to make the bitfield extraction for denormalization easier.
21767
#
21768
mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
21769
mov.l %d0, GRS(%a6) # place g,r,s after it
21770
21771
#
21772
# check to see how much less than the underflow threshold the operand
21773
# exponent is.
21774
#
21775
mov.l %d1, %d0 # copy the denorm threshold
21776
sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent
21777
ble.b dnrm_no_lp # d1 <= 0
21778
cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?
21779
blt.b case_1 # yes
21780
cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?
21781
blt.b case_2 # yes
21782
bra.w case_3 # (d1 >= 64)
21783
21784
#
21785
# No normalization necessary
21786
#
21787
dnrm_no_lp:
21788
mov.l GRS(%a6), %d0 # restore original g,r,s
21789
rts
21790
21791
#
21792
# case (0<d1<32)
21793
#
21794
# %d0 = denorm threshold
21795
# %d1 = "n" = amt to shift
21796
#
21797
# ---------------------------------------------------------
21798
# | FTEMP_HI | FTEMP_LO |grs000.........000|
21799
# ---------------------------------------------------------
21800
# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21801
# \ \ \ \
21802
# \ \ \ \
21803
# \ \ \ \
21804
# \ \ \ \
21805
# \ \ \ \
21806
# \ \ \ \
21807
# \ \ \ \
21808
# \ \ \ \
21809
# <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
21810
# ---------------------------------------------------------
21811
# |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
21812
# ---------------------------------------------------------
21813
#
21814
case_1:
21815
mov.l %d2, -(%sp) # create temp storage
21816
21817
mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
21818
mov.l &32, %d0
21819
sub.w %d1, %d0 # %d0 = 32 - %d1
21820
21821
cmpi.w %d1, &29 # is shft amt >= 29
21822
blt.b case1_extract # no; no fix needed
21823
mov.b GRS(%a6), %d2
21824
or.b %d2, 3+FTEMP_LO2(%a6)
21825
21826
case1_extract:
21827
bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
21828
bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
21829
bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
21830
21831
mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI
21832
mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO
21833
21834
bftst %d0{&2:&30} # were bits shifted off?
21835
beq.b case1_sticky_clear # no; go finish
21836
bset &rnd_stky_bit, %d0 # yes; set sticky bit
21837
21838
case1_sticky_clear:
21839
and.l &0xe0000000, %d0 # clear all but G,R,S
21840
mov.l (%sp)+, %d2 # restore temp register
21841
rts
21842
21843
#
21844
# case (32<=d1<64)
21845
#
21846
# %d0 = denorm threshold
21847
# %d1 = "n" = amt to shift
21848
#
21849
# ---------------------------------------------------------
21850
# | FTEMP_HI | FTEMP_LO |grs000.........000|
21851
# ---------------------------------------------------------
21852
# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21853
# \ \ \
21854
# \ \ \
21855
# \ \ -------------------
21856
# \ -------------------- \
21857
# ------------------- \ \
21858
# \ \ \
21859
# \ \ \
21860
# \ \ \
21861
# <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
21862
# ---------------------------------------------------------
21863
# |0...............0|0....0| NEW_LO |grs |
21864
# ---------------------------------------------------------
21865
#
21866
case_2:
21867
mov.l %d2, -(%sp) # create temp storage
21868
21869
mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
21870
subi.w &0x20, %d1 # %d1 now between 0 and 32
21871
mov.l &0x20, %d0
21872
sub.w %d1, %d0 # %d0 = 32 - %d1
21873
21874
# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
21875
# the number of bits to check for the sticky detect.
21876
# it only plays a role in shift amounts of 61-63.
21877
mov.b GRS(%a6), %d2
21878
or.b %d2, 3+FTEMP_LO2(%a6)
21879
21880
bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
21881
bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
21882
21883
bftst %d1{&2:&30} # were any bits shifted off?
21884
bne.b case2_set_sticky # yes; set sticky bit
21885
bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
21886
bne.b case2_set_sticky # yes; set sticky bit
21887
21888
mov.l %d1, %d0 # move new G,R,S to %d0
21889
bra.b case2_end
21890
21891
case2_set_sticky:
21892
mov.l %d1, %d0 # move new G,R,S to %d0
21893
bset &rnd_stky_bit, %d0 # set sticky bit
21894
21895
case2_end:
21896
clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0
21897
mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO
21898
and.l &0xe0000000, %d0 # clear all but G,R,S
21899
21900
mov.l (%sp)+,%d2 # restore temp register
21901
rts
21902
21903
#
21904
# case (d1>=64)
21905
#
21906
# %d0 = denorm threshold
21907
# %d1 = amt to shift
21908
#
21909
case_3:
21910
mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold
21911
21912
cmpi.w %d1, &65 # is shift amt > 65?
21913
blt.b case3_64 # no; it's == 64
21914
beq.b case3_65 # no; it's == 65
21915
21916
#
21917
# case (d1>65)
21918
#
21919
# Shift value is > 65 and out of range. All bits are shifted off.
21920
# Return a zero mantissa with the sticky bit set
21921
#
21922
clr.l FTEMP_HI(%a0) # clear hi(mantissa)
21923
clr.l FTEMP_LO(%a0) # clear lo(mantissa)
21924
mov.l &0x20000000, %d0 # set sticky bit
21925
rts
21926
21927
#
21928
# case (d1 == 64)
21929
#
21930
# ---------------------------------------------------------
21931
# | FTEMP_HI | FTEMP_LO |grs000.........000|
21932
# ---------------------------------------------------------
21933
# <-------(32)------>
21934
# \ \
21935
# \ \
21936
# \ \
21937
# \ ------------------------------
21938
# ------------------------------- \
21939
# \ \
21940
# \ \
21941
# \ \
21942
# <-------(32)------>
21943
# ---------------------------------------------------------
21944
# |0...............0|0................0|grs |
21945
# ---------------------------------------------------------
21946
#
21947
case3_64:
21948
mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
21949
mov.l %d0, %d1 # make a copy
21950
and.l &0xc0000000, %d0 # extract G,R
21951
and.l &0x3fffffff, %d1 # extract other bits
21952
21953
bra.b case3_complete
21954
21955
#
21956
# case (d1 == 65)
21957
#
21958
# ---------------------------------------------------------
21959
# | FTEMP_HI | FTEMP_LO |grs000.........000|
21960
# ---------------------------------------------------------
21961
# <-------(32)------>
21962
# \ \
21963
# \ \
21964
# \ \
21965
# \ ------------------------------
21966
# -------------------------------- \
21967
# \ \
21968
# \ \
21969
# \ \
21970
# <-------(31)----->
21971
# ---------------------------------------------------------
21972
# |0...............0|0................0|0rs |
21973
# ---------------------------------------------------------
21974
#
21975
case3_65:
21976
mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
21977
and.l &0x80000000, %d0 # extract R bit
21978
lsr.l &0x1, %d0 # shift high bit into R bit
21979
and.l &0x7fffffff, %d1 # extract other bits
21980
21981
case3_complete:
21982
# last operation done was an "and" of the bits shifted off so the condition
21983
# codes are already set so branch accordingly.
21984
bne.b case3_set_sticky # yes; go set new sticky
21985
tst.l FTEMP_LO(%a0) # were any bits shifted off?
21986
bne.b case3_set_sticky # yes; go set new sticky
21987
tst.b GRS(%a6) # were any bits shifted off?
21988
bne.b case3_set_sticky # yes; go set new sticky
21989
21990
#
21991
# no bits were shifted off so don't set the sticky bit.
21992
# the guard and
21993
# the entire mantissa is zero.
21994
#
21995
clr.l FTEMP_HI(%a0) # clear hi(mantissa)
21996
clr.l FTEMP_LO(%a0) # clear lo(mantissa)
21997
rts
21998
21999
#
22000
# some bits were shifted off so set the sticky bit.
22001
# the entire mantissa is zero.
22002
#
22003
case3_set_sticky:
22004
bset &rnd_stky_bit,%d0 # set new sticky bit
22005
clr.l FTEMP_HI(%a0) # clear hi(mantissa)
22006
clr.l FTEMP_LO(%a0) # clear lo(mantissa)
22007
rts
22008
22009
#########################################################################
22010
# XDEF **************************************************************** #
22011
# _round(): round result according to precision/mode #
22012
# #
22013
# XREF **************************************************************** #
22014
# None #
22015
# #
22016
# INPUT *************************************************************** #
22017
# a0 = ptr to input operand in internal extended format #
22018
# d1(hi) = contains rounding precision: #
22019
# ext = $0000xxxx #
22020
# sgl = $0004xxxx #
22021
# dbl = $0008xxxx #
22022
# d1(lo) = contains rounding mode: #
22023
# RN = $xxxx0000 #
22024
# RZ = $xxxx0001 #
22025
# RM = $xxxx0002 #
22026
# RP = $xxxx0003 #
22027
# d0{31:29} = contains the g,r,s bits (extended) #
22028
# #
22029
# OUTPUT ************************************************************** #
22030
# a0 = pointer to rounded result #
22031
# #
22032
# ALGORITHM *********************************************************** #
22033
# On return the value pointed to by a0 is correctly rounded, #
22034
# a0 is preserved and the g-r-s bits in d0 are cleared. #
22035
# The result is not typed - the tag field is invalid. The #
22036
# result is still in the internal extended format. #
22037
# #
22038
# The INEX bit of USER_FPSR will be set if the rounded result was #
22039
# inexact (i.e. if any of the g-r-s bits were set). #
22040
# #
22041
#########################################################################
22042
22043
global _round
22044
_round:
22045
#
22046
# ext_grs() looks at the rounding precision and sets the appropriate
22047
# G,R,S bits.
22048
# If (G,R,S == 0) then result is exact and round is done, else set
22049
# the inex flag in status reg and continue.
22050
#
22051
bsr.l ext_grs # extract G,R,S
22052
22053
tst.l %d0 # are G,R,S zero?
22054
beq.w truncate # yes; round is complete
22055
22056
or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
22057
22058
#
22059
# Use rounding mode as an index into a jump table for these modes.
22060
# All of the following assumes grs != 0.
22061
#
22062
mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
22063
jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler
22064
22065
tbl_mode:
22066
short rnd_near - tbl_mode
22067
short truncate - tbl_mode # RZ always truncates
22068
short rnd_mnus - tbl_mode
22069
short rnd_plus - tbl_mode
22070
22071
#################################################################
22072
# ROUND PLUS INFINITY #
22073
# #
22074
# If sign of fp number = 0 (positive), then add 1 to l. #
22075
#################################################################
22076
rnd_plus:
22077
tst.b FTEMP_SGN(%a0) # check for sign
22078
bmi.w truncate # if positive then truncate
22079
22080
mov.l &0xffffffff, %d0 # force g,r,s to be all f's
22081
swap %d1 # set up d1 for round prec.
22082
22083
cmpi.b %d1, &s_mode # is prec = sgl?
22084
beq.w add_sgl # yes
22085
bgt.w add_dbl # no; it's dbl
22086
bra.w add_ext # no; it's ext
22087
22088
#################################################################
22089
# ROUND MINUS INFINITY #
22090
# #
22091
# If sign of fp number = 1 (negative), then add 1 to l. #
22092
#################################################################
22093
rnd_mnus:
22094
tst.b FTEMP_SGN(%a0) # check for sign
22095
bpl.w truncate # if negative then truncate
22096
22097
mov.l &0xffffffff, %d0 # force g,r,s to be all f's
22098
swap %d1 # set up d1 for round prec.
22099
22100
cmpi.b %d1, &s_mode # is prec = sgl?
22101
beq.w add_sgl # yes
22102
bgt.w add_dbl # no; it's dbl
22103
bra.w add_ext # no; it's ext
22104
22105
#################################################################
22106
# ROUND NEAREST #
22107
# #
22108
# If (g=1), then add 1 to l and if (r=s=0), then clear l #
22109
# Note that this will round to even in case of a tie. #
22110
#################################################################
22111
rnd_near:
22112
asl.l &0x1, %d0 # shift g-bit to c-bit
22113
bcc.w truncate # if (g=1) then
22114
22115
swap %d1 # set up d1 for round prec.
22116
22117
cmpi.b %d1, &s_mode # is prec = sgl?
22118
beq.w add_sgl # yes
22119
bgt.w add_dbl # no; it's dbl
22120
bra.w add_ext # no; it's ext
22121
22122
# *** LOCAL EQUATES ***
22123
set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec
22124
set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec
22125
22126
#########################
22127
# ADD SINGLE #
22128
#########################
22129
add_sgl:
22130
add.l &ad_1_sgl, FTEMP_HI(%a0)
22131
bcc.b scc_clr # no mantissa overflow
22132
roxr.w FTEMP_HI(%a0) # shift v-bit back in
22133
roxr.w FTEMP_HI+2(%a0) # shift v-bit back in
22134
add.w &0x1, FTEMP_EX(%a0) # and incr exponent
22135
scc_clr:
22136
tst.l %d0 # test for rs = 0
22137
bne.b sgl_done
22138
and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
22139
sgl_done:
22140
and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
22141
clr.l FTEMP_LO(%a0) # clear d2
22142
rts
22143
22144
#########################
22145
# ADD EXTENDED #
22146
#########################
22147
add_ext:
22148
addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit
22149
bcc.b xcc_clr # test for carry out
22150
addq.l &1,FTEMP_HI(%a0) # propagate carry
22151
bcc.b xcc_clr
22152
roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
22153
roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
22154
roxr.w FTEMP_LO(%a0)
22155
roxr.w FTEMP_LO+2(%a0)
22156
add.w &0x1,FTEMP_EX(%a0) # and inc exp
22157
xcc_clr:
22158
tst.l %d0 # test rs = 0
22159
bne.b add_ext_done
22160
and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit
22161
add_ext_done:
22162
rts
22163
22164
#########################
22165
# ADD DOUBLE #
22166
#########################
22167
add_dbl:
22168
add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
22169
bcc.b dcc_clr # no carry
22170
addq.l &0x1, FTEMP_HI(%a0) # propagate carry
22171
bcc.b dcc_clr # no carry
22172
22173
roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
22174
roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
22175
roxr.w FTEMP_LO(%a0)
22176
roxr.w FTEMP_LO+2(%a0)
22177
addq.w &0x1, FTEMP_EX(%a0) # incr exponent
22178
dcc_clr:
22179
tst.l %d0 # test for rs = 0
22180
bne.b dbl_done
22181
and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
22182
22183
dbl_done:
22184
and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
22185
rts
22186
22187
###########################
22188
# Truncate all other bits #
22189
###########################
22190
truncate:
22191
swap %d1 # select rnd prec
22192
22193
cmpi.b %d1, &s_mode # is prec sgl?
22194
beq.w sgl_done # yes
22195
bgt.b dbl_done # no; it's dbl
22196
rts # no; it's ext
22197
22198
22199
#
22200
# ext_grs(): extract guard, round and sticky bits according to
22201
# rounding precision.
22202
#
22203
# INPUT
22204
# d0 = extended precision g,r,s (in d0{31:29})
22205
# d1 = {PREC,ROUND}
22206
# OUTPUT
22207
# d0{31:29} = guard, round, sticky
22208
#
22209
# The ext_grs extract the guard/round/sticky bits according to the
22210
# selected rounding precision. It is called by the round subroutine
22211
# only. All registers except d0 are kept intact. d0 becomes an
22212
# updated guard,round,sticky in d0{31:29}
22213
#
22214
# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
22215
# prior to usage, and needs to restore d1 to original. this
22216
# routine is tightly tied to the round routine and not meant to
22217
# uphold standard subroutine calling practices.
22218
#
22219
22220
ext_grs:
22221
swap %d1 # have d1.w point to round precision
22222
tst.b %d1 # is rnd prec = extended?
22223
bne.b ext_grs_not_ext # no; go handle sgl or dbl
22224
22225
#
22226
# %d0 actually already hold g,r,s since _round() had it before calling
22227
# this function. so, as long as we don't disturb it, we are "returning" it.
22228
#
22229
ext_grs_ext:
22230
swap %d1 # yes; return to correct positions
22231
rts
22232
22233
ext_grs_not_ext:
22234
movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}
22235
22236
cmpi.b %d1, &s_mode # is rnd prec = sgl?
22237
bne.b ext_grs_dbl # no; go handle dbl
22238
22239
#
22240
# sgl:
22241
# 96 64 40 32 0
22242
# -----------------------------------------------------
22243
# | EXP |XXXXXXX| |xx | |grs|
22244
# -----------------------------------------------------
22245
# <--(24)--->nn\ /
22246
# ee ---------------------
22247
# ww |
22248
# v
22249
# gr new sticky
22250
#
22251
ext_grs_sgl:
22252
bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
22253
mov.l &30, %d2 # of the sgl prec. limits
22254
lsl.l %d2, %d3 # shift g-r bits to MSB of d3
22255
mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test
22256
and.l &0x0000003f, %d2 # s bit is the or of all other
22257
bne.b ext_grs_st_stky # bits to the right of g-r
22258
tst.l FTEMP_LO(%a0) # test lower mantissa
22259
bne.b ext_grs_st_stky # if any are set, set sticky
22260
tst.l %d0 # test original g,r,s
22261
bne.b ext_grs_st_stky # if any are set, set sticky
22262
bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit
22263
22264
#
22265
# dbl:
22266
# 96 64 32 11 0
22267
# -----------------------------------------------------
22268
# | EXP |XXXXXXX| | |xx |grs|
22269
# -----------------------------------------------------
22270
# nn\ /
22271
# ee -------
22272
# ww |
22273
# v
22274
# gr new sticky
22275
#
22276
ext_grs_dbl:
22277
bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
22278
mov.l &30, %d2 # of the dbl prec. limits
22279
lsl.l %d2, %d3 # shift g-r bits to the MSB of d3
22280
mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test
22281
and.l &0x000001ff, %d2 # s bit is the or-ing of all
22282
bne.b ext_grs_st_stky # other bits to the right of g-r
22283
tst.l %d0 # test word original g,r,s
22284
bne.b ext_grs_st_stky # if any are set, set sticky
22285
bra.b ext_grs_end_sd # if clear, exit
22286
22287
ext_grs_st_stky:
22288
bset &rnd_stky_bit, %d3 # set sticky bit
22289
ext_grs_end_sd:
22290
mov.l %d3, %d0 # return grs to d0
22291
22292
movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}
22293
22294
swap %d1 # restore d1 to original
22295
rts
22296
22297
#########################################################################
22298
# norm(): normalize the mantissa of an extended precision input. the #
22299
# input operand should not be normalized already. #
22300
# #
22301
# XDEF **************************************************************** #
22302
# norm() #
22303
# #
22304
# XREF **************************************************************** #
22305
# none #
22306
# #
22307
# INPUT *************************************************************** #
22308
# a0 = pointer fp extended precision operand to normalize #
22309
# #
22310
# OUTPUT ************************************************************** #
22311
# d0 = number of bit positions the mantissa was shifted #
22312
# a0 = the input operand's mantissa is normalized; the exponent #
22313
# is unchanged. #
22314
# #
22315
#########################################################################
22316
global norm
22317
norm:
22318
mov.l %d2, -(%sp) # create some temp regs
22319
mov.l %d3, -(%sp)
22320
22321
mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
22322
mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
22323
22324
bfffo %d0{&0:&32}, %d2 # how many places to shift?
22325
beq.b norm_lo # hi(man) is all zeroes!
22326
22327
norm_hi:
22328
lsl.l %d2, %d0 # left shift hi(man)
22329
bfextu %d1{&0:%d2}, %d3 # extract lo bits
22330
22331
or.l %d3, %d0 # create hi(man)
22332
lsl.l %d2, %d1 # create lo(man)
22333
22334
mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
22335
mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
22336
22337
mov.l %d2, %d0 # return shift amount
22338
22339
mov.l (%sp)+, %d3 # restore temp regs
22340
mov.l (%sp)+, %d2
22341
22342
rts
22343
22344
norm_lo:
22345
bfffo %d1{&0:&32}, %d2 # how many places to shift?
22346
lsl.l %d2, %d1 # shift lo(man)
22347
add.l &32, %d2 # add 32 to shft amount
22348
22349
mov.l %d1, FTEMP_HI(%a0) # store hi(man)
22350
clr.l FTEMP_LO(%a0) # lo(man) is now zero
22351
22352
mov.l %d2, %d0 # return shift amount
22353
22354
mov.l (%sp)+, %d3 # restore temp regs
22355
mov.l (%sp)+, %d2
22356
22357
rts
22358
22359
#########################################################################
22360
# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
22361
# - returns corresponding optype tag #
22362
# #
22363
# XDEF **************************************************************** #
22364
# unnorm_fix() #
22365
# #
22366
# XREF **************************************************************** #
22367
# norm() - normalize the mantissa #
22368
# #
22369
# INPUT *************************************************************** #
22370
# a0 = pointer to unnormalized extended precision number #
22371
# #
22372
# OUTPUT ************************************************************** #
22373
# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
22374
# a0 = input operand has been converted to a norm, denorm, or #
22375
# zero; both the exponent and mantissa are changed. #
22376
# #
22377
#########################################################################
22378
22379
global unnorm_fix
22380
unnorm_fix:
22381
bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
22382
bne.b unnorm_shift # hi(man) is not all zeroes
22383
22384
#
22385
# hi(man) is all zeroes so see if any bits in lo(man) are set
22386
#
22387
unnorm_chk_lo:
22388
bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
22389
beq.w unnorm_zero # yes
22390
22391
add.w &32, %d0 # no; fix shift distance
22392
22393
#
22394
# d0 = # shifts needed for complete normalization
22395
#
22396
unnorm_shift:
22397
clr.l %d1 # clear top word
22398
mov.w FTEMP_EX(%a0), %d1 # extract exponent
22399
and.w &0x7fff, %d1 # strip off sgn
22400
22401
cmp.w %d0, %d1 # will denorm push exp < 0?
22402
bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
22403
22404
#
22405
# exponent would not go < 0. Therefore, number stays normalized
22406
#
22407
sub.w %d0, %d1 # shift exponent value
22408
mov.w FTEMP_EX(%a0), %d0 # load old exponent
22409
and.w &0x8000, %d0 # save old sign
22410
or.w %d0, %d1 # {sgn,new exp}
22411
mov.w %d1, FTEMP_EX(%a0) # insert new exponent
22412
22413
bsr.l norm # normalize UNNORM
22414
22415
mov.b &NORM, %d0 # return new optype tag
22416
rts
22417
22418
#
22419
# exponent would go < 0, so only denormalize until exp = 0
22420
#
22421
unnorm_nrm_zero:
22422
cmp.b %d1, &32 # is exp <= 32?
22423
bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
22424
22425
bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
22426
mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
22427
22428
mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
22429
lsl.l %d1, %d0 # extract new lo(man)
22430
mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
22431
22432
and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
22433
22434
mov.b &DENORM, %d0 # return new optype tag
22435
rts
22436
22437
#
22438
# only mantissa bits set are in lo(man)
22439
#
22440
unnorm_nrm_zero_lrg:
22441
sub.w &32, %d1 # adjust shft amt by 32
22442
22443
mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
22444
lsl.l %d1, %d0 # left shift lo(man)
22445
22446
mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
22447
clr.l FTEMP_LO(%a0) # lo(man) = 0
22448
22449
and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
22450
22451
mov.b &DENORM, %d0 # return new optype tag
22452
rts
22453
22454
#
22455
# whole mantissa is zero so this UNNORM is actually a zero
22456
#
22457
unnorm_zero:
22458
and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
22459
22460
mov.b &ZERO, %d0 # fix optype tag
22461
rts
22462
22463
#########################################################################
22464
# XDEF **************************************************************** #
22465
# set_tag_x(): return the optype of the input ext fp number #
22466
# #
22467
# XREF **************************************************************** #
22468
# None #
22469
# #
22470
# INPUT *************************************************************** #
22471
# a0 = pointer to extended precision operand #
22472
# #
22473
# OUTPUT ************************************************************** #
22474
# d0 = value of type tag #
22475
# one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
22476
# #
22477
# ALGORITHM *********************************************************** #
22478
# Simply test the exponent, j-bit, and mantissa values to #
22479
# determine the type of operand. #
22480
# If it's an unnormalized zero, alter the operand and force it #
22481
# to be a normal zero. #
22482
# #
22483
#########################################################################
22484
22485
global set_tag_x
22486
set_tag_x:
22487
mov.w FTEMP_EX(%a0), %d0 # extract exponent
22488
andi.w &0x7fff, %d0 # strip off sign
22489
cmpi.w %d0, &0x7fff # is (EXP == MAX)?
22490
beq.b inf_or_nan_x
22491
not_inf_or_nan_x:
22492
btst &0x7,FTEMP_HI(%a0)
22493
beq.b not_norm_x
22494
is_norm_x:
22495
mov.b &NORM, %d0
22496
rts
22497
not_norm_x:
22498
tst.w %d0 # is exponent = 0?
22499
bne.b is_unnorm_x
22500
not_unnorm_x:
22501
tst.l FTEMP_HI(%a0)
22502
bne.b is_denorm_x
22503
tst.l FTEMP_LO(%a0)
22504
bne.b is_denorm_x
22505
is_zero_x:
22506
mov.b &ZERO, %d0
22507
rts
22508
is_denorm_x:
22509
mov.b &DENORM, %d0
22510
rts
22511
# must distinguish now "Unnormalized zeroes" which we
22512
# must convert to zero.
22513
is_unnorm_x:
22514
tst.l FTEMP_HI(%a0)
22515
bne.b is_unnorm_reg_x
22516
tst.l FTEMP_LO(%a0)
22517
bne.b is_unnorm_reg_x
22518
# it's an "unnormalized zero". let's convert it to an actual zero...
22519
andi.w &0x8000,FTEMP_EX(%a0) # clear exponent
22520
mov.b &ZERO, %d0
22521
rts
22522
is_unnorm_reg_x:
22523
mov.b &UNNORM, %d0
22524
rts
22525
inf_or_nan_x:
22526
tst.l FTEMP_LO(%a0)
22527
bne.b is_nan_x
22528
mov.l FTEMP_HI(%a0), %d0
22529
and.l &0x7fffffff, %d0 # msb is a don't care!
22530
bne.b is_nan_x
22531
is_inf_x:
22532
mov.b &INF, %d0
22533
rts
22534
is_nan_x:
22535
btst &0x6, FTEMP_HI(%a0)
22536
beq.b is_snan_x
22537
mov.b &QNAN, %d0
22538
rts
22539
is_snan_x:
22540
mov.b &SNAN, %d0
22541
rts
22542
22543
#########################################################################
22544
# XDEF **************************************************************** #
22545
# set_tag_d(): return the optype of the input dbl fp number #
22546
# #
22547
# XREF **************************************************************** #
22548
# None #
22549
# #
22550
# INPUT *************************************************************** #
22551
# a0 = points to double precision operand #
22552
# #
22553
# OUTPUT ************************************************************** #
22554
# d0 = value of type tag #
22555
# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
22556
# #
22557
# ALGORITHM *********************************************************** #
22558
# Simply test the exponent, j-bit, and mantissa values to #
22559
# determine the type of operand. #
22560
# #
22561
#########################################################################
22562
22563
global set_tag_d
22564
set_tag_d:
22565
mov.l FTEMP(%a0), %d0
22566
mov.l %d0, %d1
22567
22568
andi.l &0x7ff00000, %d0
22569
beq.b zero_or_denorm_d
22570
22571
cmpi.l %d0, &0x7ff00000
22572
beq.b inf_or_nan_d
22573
22574
is_norm_d:
22575
mov.b &NORM, %d0
22576
rts
22577
zero_or_denorm_d:
22578
and.l &0x000fffff, %d1
22579
bne is_denorm_d
22580
tst.l 4+FTEMP(%a0)
22581
bne is_denorm_d
22582
is_zero_d:
22583
mov.b &ZERO, %d0
22584
rts
22585
is_denorm_d:
22586
mov.b &DENORM, %d0
22587
rts
22588
inf_or_nan_d:
22589
and.l &0x000fffff, %d1
22590
bne is_nan_d
22591
tst.l 4+FTEMP(%a0)
22592
bne is_nan_d
22593
is_inf_d:
22594
mov.b &INF, %d0
22595
rts
22596
is_nan_d:
22597
btst &19, %d1
22598
bne is_qnan_d
22599
is_snan_d:
22600
mov.b &SNAN, %d0
22601
rts
22602
is_qnan_d:
22603
mov.b &QNAN, %d0
22604
rts
22605
22606
#########################################################################
22607
# XDEF **************************************************************** #
22608
# set_tag_s(): return the optype of the input sgl fp number #
22609
# #
22610
# XREF **************************************************************** #
22611
# None #
22612
# #
22613
# INPUT *************************************************************** #
22614
# a0 = pointer to single precision operand #
22615
# #
22616
# OUTPUT ************************************************************** #
22617
# d0 = value of type tag #
22618
# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
22619
# #
22620
# ALGORITHM *********************************************************** #
22621
# Simply test the exponent, j-bit, and mantissa values to #
22622
# determine the type of operand. #
22623
# #
22624
#########################################################################
22625
22626
global set_tag_s
22627
set_tag_s:
22628
mov.l FTEMP(%a0), %d0
22629
mov.l %d0, %d1
22630
22631
andi.l &0x7f800000, %d0
22632
beq.b zero_or_denorm_s
22633
22634
cmpi.l %d0, &0x7f800000
22635
beq.b inf_or_nan_s
22636
22637
is_norm_s:
22638
mov.b &NORM, %d0
22639
rts
22640
zero_or_denorm_s:
22641
and.l &0x007fffff, %d1
22642
bne is_denorm_s
22643
is_zero_s:
22644
mov.b &ZERO, %d0
22645
rts
22646
is_denorm_s:
22647
mov.b &DENORM, %d0
22648
rts
22649
inf_or_nan_s:
22650
and.l &0x007fffff, %d1
22651
bne is_nan_s
22652
is_inf_s:
22653
mov.b &INF, %d0
22654
rts
22655
is_nan_s:
22656
btst &22, %d1
22657
bne is_qnan_s
22658
is_snan_s:
22659
mov.b &SNAN, %d0
22660
rts
22661
is_qnan_s:
22662
mov.b &QNAN, %d0
22663
rts
22664
22665
#########################################################################
22666
# XDEF **************************************************************** #
22667
# unf_res(): routine to produce default underflow result of a #
22668
# scaled extended precision number; this is used by #
22669
# fadd/fdiv/fmul/etc. emulation routines. #
22670
# unf_res4(): same as above but for fsglmul/fsgldiv which use #
22671
# single round prec and extended prec mode. #
22672
# #
22673
# XREF **************************************************************** #
22674
# _denorm() - denormalize according to scale factor #
22675
# _round() - round denormalized number according to rnd prec #
22676
# #
22677
# INPUT *************************************************************** #
22678
# a0 = pointer to extended precison operand #
22679
# d0 = scale factor #
22680
# d1 = rounding precision/mode #
22681
# #
22682
# OUTPUT ************************************************************** #
22683
# a0 = pointer to default underflow result in extended precision #
22684
# d0.b = result FPSR_cc which caller may or may not want to save #
22685
# #
22686
# ALGORITHM *********************************************************** #
22687
# Convert the input operand to "internal format" which means the #
22688
# exponent is extended to 16 bits and the sign is stored in the unused #
22689
# portion of the extended precison operand. Denormalize the number #
22690
# according to the scale factor passed in d0. Then, round the #
22691
# denormalized result. #
22692
# Set the FPSR_exc bits as appropriate but return the cc bits in #
22693
# d0 in case the caller doesn't want to save them (as is the case for #
22694
# fmove out). #
22695
# unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
22696
# precision and the rounding mode to single. #
22697
# #
22698
#########################################################################
22699
global unf_res
22700
unf_res:
22701
mov.l %d1, -(%sp) # save rnd prec,mode on stack
22702
22703
btst &0x7, FTEMP_EX(%a0) # make "internal" format
22704
sne FTEMP_SGN(%a0)
22705
22706
mov.w FTEMP_EX(%a0), %d1 # extract exponent
22707
and.w &0x7fff, %d1
22708
sub.w %d0, %d1
22709
mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent
22710
22711
mov.l %a0, -(%sp) # save operand ptr during calls
22712
22713
mov.l 0x4(%sp),%d0 # pass rnd prec.
22714
andi.w &0x00c0,%d0
22715
lsr.w &0x4,%d0
22716
bsr.l _denorm # denorm result
22717
22718
mov.l (%sp),%a0
22719
mov.w 0x6(%sp),%d1 # load prec:mode into %d1
22720
andi.w &0xc0,%d1 # extract rnd prec
22721
lsr.w &0x4,%d1
22722
swap %d1
22723
mov.w 0x6(%sp),%d1
22724
andi.w &0x30,%d1
22725
lsr.w &0x4,%d1
22726
bsr.l _round # round the denorm
22727
22728
mov.l (%sp)+, %a0
22729
22730
# result is now rounded properly. convert back to normal format
22731
bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue
22732
tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
22733
beq.b unf_res_chkifzero # no; result is positive
22734
bset &0x7, FTEMP_EX(%a0) # set result sgn
22735
clr.b FTEMP_SGN(%a0) # clear temp sign
22736
22737
# the number may have become zero after rounding. set ccodes accordingly.
22738
unf_res_chkifzero:
22739
clr.l %d0
22740
tst.l FTEMP_HI(%a0) # is value now a zero?
22741
bne.b unf_res_cont # no
22742
tst.l FTEMP_LO(%a0)
22743
bne.b unf_res_cont # no
22744
# bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
22745
bset &z_bit, %d0 # yes; set zero ccode bit
22746
22747
unf_res_cont:
22748
22749
#
22750
# can inex1 also be set along with unfl and inex2???
22751
#
22752
# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22753
#
22754
btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
22755
beq.b unf_res_end # no
22756
bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
22757
22758
unf_res_end:
22759
add.l &0x4, %sp # clear stack
22760
rts
22761
22762
# unf_res() for fsglmul() and fsgldiv().
22763
global unf_res4
22764
unf_res4:
22765
mov.l %d1,-(%sp) # save rnd prec,mode on stack
22766
22767
btst &0x7,FTEMP_EX(%a0) # make "internal" format
22768
sne FTEMP_SGN(%a0)
22769
22770
mov.w FTEMP_EX(%a0),%d1 # extract exponent
22771
and.w &0x7fff,%d1
22772
sub.w %d0,%d1
22773
mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent
22774
22775
mov.l %a0,-(%sp) # save operand ptr during calls
22776
22777
clr.l %d0 # force rnd prec = ext
22778
bsr.l _denorm # denorm result
22779
22780
mov.l (%sp),%a0
22781
mov.w &s_mode,%d1 # force rnd prec = sgl
22782
swap %d1
22783
mov.w 0x6(%sp),%d1 # load rnd mode
22784
andi.w &0x30,%d1 # extract rnd prec
22785
lsr.w &0x4,%d1
22786
bsr.l _round # round the denorm
22787
22788
mov.l (%sp)+,%a0
22789
22790
# result is now rounded properly. convert back to normal format
22791
bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue
22792
tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
22793
beq.b unf_res4_chkifzero # no; result is positive
22794
bset &0x7,FTEMP_EX(%a0) # set result sgn
22795
clr.b FTEMP_SGN(%a0) # clear temp sign
22796
22797
# the number may have become zero after rounding. set ccodes accordingly.
22798
unf_res4_chkifzero:
22799
clr.l %d0
22800
tst.l FTEMP_HI(%a0) # is value now a zero?
22801
bne.b unf_res4_cont # no
22802
tst.l FTEMP_LO(%a0)
22803
bne.b unf_res4_cont # no
22804
# bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
22805
bset &z_bit,%d0 # yes; set zero ccode bit
22806
22807
unf_res4_cont:
22808
22809
#
22810
# can inex1 also be set along with unfl and inex2???
22811
#
22812
# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22813
#
22814
btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
22815
beq.b unf_res4_end # no
22816
bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
22817
22818
unf_res4_end:
22819
add.l &0x4,%sp # clear stack
22820
rts
22821
22822
#########################################################################
22823
# XDEF **************************************************************** #
22824
# ovf_res(): routine to produce the default overflow result of #
22825
# an overflowing number. #
22826
# ovf_res2(): same as above but the rnd mode/prec are passed #
22827
# differently. #
22828
# #
22829
# XREF **************************************************************** #
22830
# none #
22831
# #
22832
# INPUT *************************************************************** #
22833
# d1.b = '-1' => (-); '0' => (+) #
22834
# ovf_res(): #
22835
# d0 = rnd mode/prec #
22836
# ovf_res2(): #
22837
# hi(d0) = rnd prec #
22838
# lo(d0) = rnd mode #
22839
# #
22840
# OUTPUT ************************************************************** #
22841
# a0 = points to extended precision result #
22842
# d0.b = condition code bits #
22843
# #
22844
# ALGORITHM *********************************************************** #
22845
# The default overflow result can be determined by the sign of #
22846
# the result and the rounding mode/prec in effect. These bits are #
22847
# concatenated together to create an index into the default result #
22848
# table. A pointer to the correct result is returned in a0. The #
22849
# resulting condition codes are returned in d0 in case the caller #
22850
# doesn't want FPSR_cc altered (as is the case for fmove out). #
22851
# #
22852
#########################################################################
22853
22854
global ovf_res
22855
ovf_res:
22856
andi.w &0x10,%d1 # keep result sign
22857
lsr.b &0x4,%d0 # shift prec/mode
22858
or.b %d0,%d1 # concat the two
22859
mov.w %d1,%d0 # make a copy
22860
lsl.b &0x1,%d1 # multiply d1 by 2
22861
bra.b ovf_res_load
22862
22863
global ovf_res2
22864
ovf_res2:
22865
and.w &0x10, %d1 # keep result sign
22866
or.b %d0, %d1 # insert rnd mode
22867
swap %d0
22868
or.b %d0, %d1 # insert rnd prec
22869
mov.w %d1, %d0 # make a copy
22870
lsl.b &0x1, %d1 # shift left by 1
22871
22872
#
22873
# use the rounding mode, precision, and result sign as in index into the
22874
# two tables below to fetch the default result and the result ccodes.
22875
#
22876
ovf_res_load:
22877
mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
22878
lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
22879
22880
rts
22881
22882
tbl_ovfl_cc:
22883
byte 0x2, 0x0, 0x0, 0x2
22884
byte 0x2, 0x0, 0x0, 0x2
22885
byte 0x2, 0x0, 0x0, 0x2
22886
byte 0x0, 0x0, 0x0, 0x0
22887
byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
22888
byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
22889
byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
22890
22891
tbl_ovfl_result:
22892
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22893
long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
22894
long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
22895
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22896
22897
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22898
long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
22899
long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
22900
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22901
22902
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22903
long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
22904
long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
22905
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22906
22907
long 0x00000000,0x00000000,0x00000000,0x00000000
22908
long 0x00000000,0x00000000,0x00000000,0x00000000
22909
long 0x00000000,0x00000000,0x00000000,0x00000000
22910
long 0x00000000,0x00000000,0x00000000,0x00000000
22911
22912
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22913
long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
22914
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22915
long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
22916
22917
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22918
long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
22919
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22920
long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
22921
22922
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22923
long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
22924
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22925
long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
22926
22927
#########################################################################
22928
# XDEF **************************************************************** #
22929
# get_packed(): fetch a packed operand from memory and then #
22930
# convert it to a floating-point binary number. #
22931
# #
22932
# XREF **************************************************************** #
22933
# _dcalc_ea() - calculate the correct <ea> #
22934
# _mem_read() - fetch the packed operand from memory #
22935
# facc_in_x() - the fetch failed so jump to special exit code #
22936
# decbin() - convert packed to binary extended precision #
22937
# #
22938
# INPUT *************************************************************** #
22939
# None #
22940
# #
22941
# OUTPUT ************************************************************** #
22942
# If no failure on _mem_read(): #
22943
# FP_SRC(a6) = packed operand now as a binary FP number #
22944
# #
22945
# ALGORITHM *********************************************************** #
22946
# Get the correct <ea> which is the value on the exception stack #
22947
# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
22948
# Then, fetch the operand from memory. If the fetch fails, exit #
22949
# through facc_in_x(). #
22950
# If the packed operand is a ZERO,NAN, or INF, convert it to #
22951
# its binary representation here. Else, call decbin() which will #
22952
# convert the packed value to an extended precision binary value. #
22953
# #
22954
#########################################################################
22955
22956
# the stacked <ea> for packed is correct except for -(An).
22957
# the base reg must be updated for both -(An) and (An)+.
22958
global get_packed
22959
get_packed:
22960
mov.l &0xc,%d0 # packed is 12 bytes
22961
bsr.l _dcalc_ea # fetch <ea>; correct An
22962
22963
lea FP_SRC(%a6),%a1 # pass: ptr to super dst
22964
mov.l &0xc,%d0 # pass: 12 bytes
22965
bsr.l _dmem_read # read packed operand
22966
22967
tst.l %d1 # did dfetch fail?
22968
bne.l facc_in_x # yes
22969
22970
# The packed operand is an INF or a NAN if the exponent field is all ones.
22971
bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
22972
cmpi.w %d0,&0x7fff # INF or NAN?
22973
bne.b gp_try_zero # no
22974
rts # operand is an INF or NAN
22975
22976
# The packed operand is a zero if the mantissa is all zero, else it's
22977
# a normal packed op.
22978
gp_try_zero:
22979
mov.b 3+FP_SRC(%a6),%d0 # get byte 4
22980
andi.b &0x0f,%d0 # clear all but last nybble
22981
bne.b gp_not_spec # not a zero
22982
tst.l FP_SRC_HI(%a6) # is lw 2 zero?
22983
bne.b gp_not_spec # not a zero
22984
tst.l FP_SRC_LO(%a6) # is lw 3 zero?
22985
bne.b gp_not_spec # not a zero
22986
rts # operand is a ZERO
22987
gp_not_spec:
22988
lea FP_SRC(%a6),%a0 # pass: ptr to packed op
22989
bsr.l decbin # convert to extended
22990
fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
22991
rts
22992
22993
#########################################################################
22994
# decbin(): Converts normalized packed bcd value pointed to by register #
22995
# a0 to extended-precision value in fp0. #
22996
# #
22997
# INPUT *************************************************************** #
22998
# a0 = pointer to normalized packed bcd value #
22999
# #
23000
# OUTPUT ************************************************************** #
23001
# fp0 = exact fp representation of the packed bcd value. #
23002
# #
23003
# ALGORITHM *********************************************************** #
23004
# Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
23005
# and NaN operands are dispatched without entering this routine) #
23006
# value in 68881/882 format at location (a0). #
23007
# #
23008
# A1. Convert the bcd exponent to binary by successive adds and #
23009
# muls. Set the sign according to SE. Subtract 16 to compensate #
23010
# for the mantissa which is to be interpreted as 17 integer #
23011
# digits, rather than 1 integer and 16 fraction digits. #
23012
# Note: this operation can never overflow. #
23013
# #
23014
# A2. Convert the bcd mantissa to binary by successive #
23015
# adds and muls in FP0. Set the sign according to SM. #
23016
# The mantissa digits will be converted with the decimal point #
23017
# assumed following the least-significant digit. #
23018
# Note: this operation can never overflow. #
23019
# #
23020
# A3. Count the number of leading/trailing zeros in the #
23021
# bcd string. If SE is positive, count the leading zeros; #
23022
# if negative, count the trailing zeros. Set the adjusted #
23023
# exponent equal to the exponent from A1 and the zero count #
23024
# added if SM = 1 and subtracted if SM = 0. Scale the #
23025
# mantissa the equivalent of forcing in the bcd value: #
23026
# #
23027
# SM = 0 a non-zero digit in the integer position #
23028
# SM = 1 a non-zero digit in Mant0, lsd of the fraction #
23029
# #
23030
# this will insure that any value, regardless of its #
23031
# representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
23032
# consistently. #
23033
# #
23034
# A4. Calculate the factor 10^exp in FP1 using a table of #
23035
# 10^(2^n) values. To reduce the error in forming factors #
23036
# greater than 10^27, a directed rounding scheme is used with #
23037
# tables rounded to RN, RM, and RP, according to the table #
23038
# in the comments of the pwrten section. #
23039
# #
23040
# A5. Form the final binary number by scaling the mantissa by #
23041
# the exponent factor. This is done by multiplying the #
23042
# mantissa in FP0 by the factor in FP1 if the adjusted #
23043
# exponent sign is positive, and dividing FP0 by FP1 if #
23044
# it is negative. #
23045
# #
23046
# Clean up and return. Check if the final mul or div was inexact. #
23047
# If so, set INEX1 in USER_FPSR. #
23048
# #
23049
#########################################################################
23050
23051
#
23052
# PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
23053
# to nearest, minus, and plus, respectively. The tables include
23054
# 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
23055
# is required until the power is greater than 27, however, all
23056
# tables include the first 5 for ease of indexing.
23057
#
23058
RTABLE:
23059
byte 0,0,0,0
23060
byte 2,3,2,3
23061
byte 2,3,3,2
23062
byte 3,2,2,3
23063
23064
set FNIBS,7
23065
set FSTRT,0
23066
23067
set ESTRT,4
23068
set EDIGITS,2
23069
23070
global decbin
23071
decbin:
23072
mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
23073
mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
23074
mov.l 0x8(%a0),FP_SCR0_LO(%a6)
23075
23076
lea FP_SCR0(%a6),%a0
23077
23078
movm.l &0x3c00,-(%sp) # save d2-d5
23079
fmovm.x &0x1,-(%sp) # save fp1
23080
#
23081
# Calculate exponent:
23082
# 1. Copy bcd value in memory for use as a working copy.
23083
# 2. Calculate absolute value of exponent in d1 by mul and add.
23084
# 3. Correct for exponent sign.
23085
# 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
23086
# (i.e., all digits assumed left of the decimal point.)
23087
#
23088
# Register usage:
23089
#
23090
# calc_e:
23091
# (*) d0: temp digit storage
23092
# (*) d1: accumulator for binary exponent
23093
# (*) d2: digit count
23094
# (*) d3: offset pointer
23095
# ( ) d4: first word of bcd
23096
# ( ) a0: pointer to working bcd value
23097
# ( ) a6: pointer to original bcd value
23098
# (*) FP_SCR1: working copy of original bcd value
23099
# (*) L_SCR1: copy of original exponent word
23100
#
23101
calc_e:
23102
mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part
23103
mov.l &ESTRT,%d3 # counter to pick up digits
23104
mov.l (%a0),%d4 # get first word of bcd
23105
clr.l %d1 # zero d1 for accumulator
23106
e_gd:
23107
mulu.l &0xa,%d1 # mul partial product by one digit place
23108
bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0
23109
add.l %d0,%d1 # d1 = d1 + d0
23110
addq.b &4,%d3 # advance d3 to the next digit
23111
dbf.w %d2,e_gd # if we have used all 3 digits, exit loop
23112
btst &30,%d4 # get SE
23113
beq.b e_pos # don't negate if pos
23114
neg.l %d1 # negate before subtracting
23115
e_pos:
23116
sub.l &16,%d1 # sub to compensate for shift of mant
23117
bge.b e_save # if still pos, do not neg
23118
neg.l %d1 # now negative, make pos and set SE
23119
or.l &0x40000000,%d4 # set SE in d4,
23120
or.l &0x40000000,(%a0) # and in working bcd
23121
e_save:
23122
mov.l %d1,-(%sp) # save exp on stack
23123
#
23124
#
23125
# Calculate mantissa:
23126
# 1. Calculate absolute value of mantissa in fp0 by mul and add.
23127
# 2. Correct for mantissa sign.
23128
# (i.e., all digits assumed left of the decimal point.)
23129
#
23130
# Register usage:
23131
#
23132
# calc_m:
23133
# (*) d0: temp digit storage
23134
# (*) d1: lword counter
23135
# (*) d2: digit count
23136
# (*) d3: offset pointer
23137
# ( ) d4: words 2 and 3 of bcd
23138
# ( ) a0: pointer to working bcd value
23139
# ( ) a6: pointer to original bcd value
23140
# (*) fp0: mantissa accumulator
23141
# ( ) FP_SCR1: working copy of original bcd value
23142
# ( ) L_SCR1: copy of original exponent word
23143
#
23144
calc_m:
23145
mov.l &1,%d1 # word counter, init to 1
23146
fmov.s &0x00000000,%fp0 # accumulator
23147
#
23148
#
23149
# Since the packed number has a long word between the first & second parts,
23150
# get the integer digit then skip down & get the rest of the
23151
# mantissa. We will unroll the loop once.
23152
#
23153
bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word
23154
fadd.b %d0,%fp0 # add digit to sum in fp0
23155
#
23156
#
23157
# Get the rest of the mantissa.
23158
#
23159
loadlw:
23160
mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4
23161
mov.l &FSTRT,%d3 # counter to pick up digits
23162
mov.l &FNIBS,%d2 # reset number of digits per a0 ptr
23163
md2b:
23164
fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10
23165
bfextu %d4{%d3:&4},%d0 # get the digit and zero extend
23166
fadd.b %d0,%fp0 # fp0 = fp0 + digit
23167
#
23168
#
23169
# If all the digits (8) in that long word have been converted (d2=0),
23170
# then inc d1 (=2) to point to the next long word and reset d3 to 0
23171
# to initialize the digit offset, and set d2 to 7 for the digit count;
23172
# else continue with this long word.
23173
#
23174
addq.b &4,%d3 # advance d3 to the next digit
23175
dbf.w %d2,md2b # check for last digit in this lw
23176
nextlw:
23177
addq.l &1,%d1 # inc lw pointer in mantissa
23178
cmp.l %d1,&2 # test for last lw
23179
ble.b loadlw # if not, get last one
23180
#
23181
# Check the sign of the mant and make the value in fp0 the same sign.
23182
#
23183
m_sign:
23184
btst &31,(%a0) # test sign of the mantissa
23185
beq.b ap_st_z # if clear, go to append/strip zeros
23186
fneg.x %fp0 # if set, negate fp0
23187
#
23188
# Append/strip zeros:
23189
#
23190
# For adjusted exponents which have an absolute value greater than 27*,
23191
# this routine calculates the amount needed to normalize the mantissa
23192
# for the adjusted exponent. That number is subtracted from the exp
23193
# if the exp was positive, and added if it was negative. The purpose
23194
# of this is to reduce the value of the exponent and the possibility
23195
# of error in calculation of pwrten.
23196
#
23197
# 1. Branch on the sign of the adjusted exponent.
23198
# 2p.(positive exp)
23199
# 2. Check M16 and the digits in lwords 2 and 3 in descending order.
23200
# 3. Add one for each zero encountered until a non-zero digit.
23201
# 4. Subtract the count from the exp.
23202
# 5. Check if the exp has crossed zero in #3 above; make the exp abs
23203
# and set SE.
23204
# 6. Multiply the mantissa by 10**count.
23205
# 2n.(negative exp)
23206
# 2. Check the digits in lwords 3 and 2 in descending order.
23207
# 3. Add one for each zero encountered until a non-zero digit.
23208
# 4. Add the count to the exp.
23209
# 5. Check if the exp has crossed zero in #3 above; clear SE.
23210
# 6. Divide the mantissa by 10**count.
23211
#
23212
# *Why 27? If the adjusted exponent is within -28 < expA < 28, than
23213
# any adjustment due to append/strip zeros will drive the resultane
23214
# exponent towards zero. Since all pwrten constants with a power
23215
# of 27 or less are exact, there is no need to use this routine to
23216
# attempt to lessen the resultant exponent.
23217
#
23218
# Register usage:
23219
#
23220
# ap_st_z:
23221
# (*) d0: temp digit storage
23222
# (*) d1: zero count
23223
# (*) d2: digit count
23224
# (*) d3: offset pointer
23225
# ( ) d4: first word of bcd
23226
# (*) d5: lword counter
23227
# ( ) a0: pointer to working bcd value
23228
# ( ) FP_SCR1: working copy of original bcd value
23229
# ( ) L_SCR1: copy of original exponent word
23230
#
23231
#
23232
# First check the absolute value of the exponent to see if this
23233
# routine is necessary. If so, then check the sign of the exponent
23234
# and do append (+) or strip (-) zeros accordingly.
23235
# This section handles a positive adjusted exponent.
23236
#
23237
ap_st_z:
23238
mov.l (%sp),%d1 # load expA for range test
23239
cmp.l %d1,&27 # test is with 27
23240
ble.w pwrten # if abs(expA) <28, skip ap/st zeros
23241
btst &30,(%a0) # check sign of exp
23242
bne.b ap_st_n # if neg, go to neg side
23243
clr.l %d1 # zero count reg
23244
mov.l (%a0),%d4 # load lword 1 to d4
23245
bfextu %d4{&28:&4},%d0 # get M16 in d0
23246
bne.b ap_p_fx # if M16 is non-zero, go fix exp
23247
addq.l &1,%d1 # inc zero count
23248
mov.l &1,%d5 # init lword counter
23249
mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4
23250
bne.b ap_p_cl # if lw 2 is zero, skip it
23251
addq.l &8,%d1 # and inc count by 8
23252
addq.l &1,%d5 # inc lword counter
23253
mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4
23254
ap_p_cl:
23255
clr.l %d3 # init offset reg
23256
mov.l &7,%d2 # init digit counter
23257
ap_p_gd:
23258
bfextu %d4{%d3:&4},%d0 # get digit
23259
bne.b ap_p_fx # if non-zero, go to fix exp
23260
addq.l &4,%d3 # point to next digit
23261
addq.l &1,%d1 # inc digit counter
23262
dbf.w %d2,ap_p_gd # get next digit
23263
ap_p_fx:
23264
mov.l %d1,%d0 # copy counter to d2
23265
mov.l (%sp),%d1 # get adjusted exp from memory
23266
sub.l %d0,%d1 # subtract count from exp
23267
bge.b ap_p_fm # if still pos, go to pwrten
23268
neg.l %d1 # now its neg; get abs
23269
mov.l (%a0),%d4 # load lword 1 to d4
23270
or.l &0x40000000,%d4 # and set SE in d4
23271
or.l &0x40000000,(%a0) # and in memory
23272
#
23273
# Calculate the mantissa multiplier to compensate for the striping of
23274
# zeros from the mantissa.
23275
#
23276
ap_p_fm:
23277
lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
23278
clr.l %d3 # init table index
23279
fmov.s &0x3f800000,%fp1 # init fp1 to 1
23280
mov.l &3,%d2 # init d2 to count bits in counter
23281
ap_p_el:
23282
asr.l &1,%d0 # shift lsb into carry
23283
bcc.b ap_p_en # if 1, mul fp1 by pwrten factor
23284
fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23285
ap_p_en:
23286
add.l &12,%d3 # inc d3 to next rtable entry
23287
tst.l %d0 # check if d0 is zero
23288
bne.b ap_p_el # if not, get next bit
23289
fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)
23290
bra.b pwrten # go calc pwrten
23291
#
23292
# This section handles a negative adjusted exponent.
23293
#
23294
ap_st_n:
23295
clr.l %d1 # clr counter
23296
mov.l &2,%d5 # set up d5 to point to lword 3
23297
mov.l (%a0,%d5.L*4),%d4 # get lword 3
23298
bne.b ap_n_cl # if not zero, check digits
23299
sub.l &1,%d5 # dec d5 to point to lword 2
23300
addq.l &8,%d1 # inc counter by 8
23301
mov.l (%a0,%d5.L*4),%d4 # get lword 2
23302
ap_n_cl:
23303
mov.l &28,%d3 # point to last digit
23304
mov.l &7,%d2 # init digit counter
23305
ap_n_gd:
23306
bfextu %d4{%d3:&4},%d0 # get digit
23307
bne.b ap_n_fx # if non-zero, go to exp fix
23308
subq.l &4,%d3 # point to previous digit
23309
addq.l &1,%d1 # inc digit counter
23310
dbf.w %d2,ap_n_gd # get next digit
23311
ap_n_fx:
23312
mov.l %d1,%d0 # copy counter to d0
23313
mov.l (%sp),%d1 # get adjusted exp from memory
23314
sub.l %d0,%d1 # subtract count from exp
23315
bgt.b ap_n_fm # if still pos, go fix mantissa
23316
neg.l %d1 # take abs of exp and clr SE
23317
mov.l (%a0),%d4 # load lword 1 to d4
23318
and.l &0xbfffffff,%d4 # and clr SE in d4
23319
and.l &0xbfffffff,(%a0) # and in memory
23320
#
23321
# Calculate the mantissa multiplier to compensate for the appending of
23322
# zeros to the mantissa.
23323
#
23324
ap_n_fm:
23325
lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
23326
clr.l %d3 # init table index
23327
fmov.s &0x3f800000,%fp1 # init fp1 to 1
23328
mov.l &3,%d2 # init d2 to count bits in counter
23329
ap_n_el:
23330
asr.l &1,%d0 # shift lsb into carry
23331
bcc.b ap_n_en # if 1, mul fp1 by pwrten factor
23332
fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23333
ap_n_en:
23334
add.l &12,%d3 # inc d3 to next rtable entry
23335
tst.l %d0 # check if d0 is zero
23336
bne.b ap_n_el # if not, get next bit
23337
fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)
23338
#
23339
#
23340
# Calculate power-of-ten factor from adjusted and shifted exponent.
23341
#
23342
# Register usage:
23343
#
23344
# pwrten:
23345
# (*) d0: temp
23346
# ( ) d1: exponent
23347
# (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
23348
# (*) d3: FPCR work copy
23349
# ( ) d4: first word of bcd
23350
# (*) a1: RTABLE pointer
23351
# calc_p:
23352
# (*) d0: temp
23353
# ( ) d1: exponent
23354
# (*) d3: PWRTxx table index
23355
# ( ) a0: pointer to working copy of bcd
23356
# (*) a1: PWRTxx pointer
23357
# (*) fp1: power-of-ten accumulator
23358
#
23359
# Pwrten calculates the exponent factor in the selected rounding mode
23360
# according to the following table:
23361
#
23362
# Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
23363
#
23364
# ANY ANY RN RN
23365
#
23366
# + + RP RP
23367
# - + RP RM
23368
# + - RP RM
23369
# - - RP RP
23370
#
23371
# + + RM RM
23372
# - + RM RP
23373
# + - RM RP
23374
# - - RM RM
23375
#
23376
# + + RZ RM
23377
# - + RZ RM
23378
# + - RZ RP
23379
# - - RZ RP
23380
#
23381
#
23382
pwrten:
23383
mov.l USER_FPCR(%a6),%d3 # get user's FPCR
23384
bfextu %d3{&26:&2},%d2 # isolate rounding mode bits
23385
mov.l (%a0),%d4 # reload 1st bcd word to d4
23386
asl.l &2,%d2 # format d2 to be
23387
bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}
23388
add.l %d0,%d2 # in d2 as index into RTABLE
23389
lea.l RTABLE(%pc),%a1 # load rtable base
23390
mov.b (%a1,%d2),%d0 # load new rounding bits from table
23391
clr.l %d3 # clear d3 to force no exc and extended
23392
bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR
23393
fmov.l %d3,%fpcr # write new FPCR
23394
asr.l &1,%d0 # write correct PTENxx table
23395
bcc.b not_rp # to a1
23396
lea.l PTENRP(%pc),%a1 # it is RP
23397
bra.b calc_p # go to init section
23398
not_rp:
23399
asr.l &1,%d0 # keep checking
23400
bcc.b not_rm
23401
lea.l PTENRM(%pc),%a1 # it is RM
23402
bra.b calc_p # go to init section
23403
not_rm:
23404
lea.l PTENRN(%pc),%a1 # it is RN
23405
calc_p:
23406
mov.l %d1,%d0 # copy exp to d0;use d0
23407
bpl.b no_neg # if exp is negative,
23408
neg.l %d0 # invert it
23409
or.l &0x40000000,(%a0) # and set SE bit
23410
no_neg:
23411
clr.l %d3 # table index
23412
fmov.s &0x3f800000,%fp1 # init fp1 to 1
23413
e_loop:
23414
asr.l &1,%d0 # shift next bit into carry
23415
bcc.b e_next # if zero, skip the mul
23416
fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23417
e_next:
23418
add.l &12,%d3 # inc d3 to next rtable entry
23419
tst.l %d0 # check if d0 is zero
23420
bne.b e_loop # not zero, continue shifting
23421
#
23422
#
23423
# Check the sign of the adjusted exp and make the value in fp0 the
23424
# same sign. If the exp was pos then multiply fp1*fp0;
23425
# else divide fp0/fp1.
23426
#
23427
# Register Usage:
23428
# norm:
23429
# ( ) a0: pointer to working bcd value
23430
# (*) fp0: mantissa accumulator
23431
# ( ) fp1: scaling factor - 10**(abs(exp))
23432
#
23433
pnorm:
23434
btst &30,(%a0) # test the sign of the exponent
23435
beq.b mul # if clear, go to multiply
23436
div:
23437
fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp
23438
bra.b end_dec
23439
mul:
23440
fmul.x %fp1,%fp0 # exp is positive, so multiply by exp
23441
#
23442
#
23443
# Clean up and return with result in fp0.
23444
#
23445
# If the final mul/div in decbin incurred an inex exception,
23446
# it will be inex2, but will be reported as inex1 by get_op.
23447
#
23448
end_dec:
23449
fmov.l %fpsr,%d0 # get status register
23450
bclr &inex2_bit+8,%d0 # test for inex2 and clear it
23451
beq.b no_exc # skip this if no exc
23452
ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
23453
no_exc:
23454
add.l &0x4,%sp # clear 1 lw param
23455
fmovm.x (%sp)+,&0x40 # restore fp1
23456
movm.l (%sp)+,&0x3c # restore d2-d5
23457
fmov.l &0x0,%fpcr
23458
fmov.l &0x0,%fpsr
23459
rts
23460
23461
#########################################################################
23462
# bindec(): Converts an input in extended precision format to bcd format#
23463
# #
23464
# INPUT *************************************************************** #
23465
# a0 = pointer to the input extended precision value in memory. #
23466
# the input may be either normalized, unnormalized, or #
23467
# denormalized. #
23468
# d0 = contains the k-factor sign-extended to 32-bits. #
23469
# #
23470
# OUTPUT ************************************************************** #
23471
# FP_SCR0(a6) = bcd format result on the stack. #
23472
# #
23473
# ALGORITHM *********************************************************** #
23474
# #
23475
# A1. Set RM and size ext; Set SIGMA = sign of input. #
23476
# The k-factor is saved for use in d7. Clear the #
23477
# BINDEC_FLG for separating normalized/denormalized #
23478
# input. If input is unnormalized or denormalized, #
23479
# normalize it. #
23480
# #
23481
# A2. Set X = abs(input). #
23482
# #
23483
# A3. Compute ILOG. #
23484
# ILOG is the log base 10 of the input value. It is #
23485
# approximated by adding e + 0.f when the original #
23486
# value is viewed as 2^^e * 1.f in extended precision. #
23487
# This value is stored in d6. #
23488
# #
23489
# A4. Clr INEX bit. #
23490
# The operation in A3 above may have set INEX2. #
23491
# #
23492
# A5. Set ICTR = 0; #
23493
# ICTR is a flag used in A13. It must be set before the #
23494
# loop entry A6. #
23495
# #
23496
# A6. Calculate LEN. #
23497
# LEN is the number of digits to be displayed. The #
23498
# k-factor can dictate either the total number of digits, #
23499
# if it is a positive number, or the number of digits #
23500
# after the decimal point which are to be included as #
23501
# significant. See the 68882 manual for examples. #
23502
# If LEN is computed to be greater than 17, set OPERR in #
23503
# USER_FPSR. LEN is stored in d4. #
23504
# #
23505
# A7. Calculate SCALE. #
23506
# SCALE is equal to 10^ISCALE, where ISCALE is the number #
23507
# of decimal places needed to insure LEN integer digits #
23508
# in the output before conversion to bcd. LAMBDA is the #
23509
# sign of ISCALE, used in A9. Fp1 contains #
23510
# 10^^(abs(ISCALE)) using a rounding mode which is a #
23511
# function of the original rounding mode and the signs #
23512
# of ISCALE and X. A table is given in the code. #
23513
# #
23514
# A8. Clr INEX; Force RZ. #
23515
# The operation in A3 above may have set INEX2. #
23516
# RZ mode is forced for the scaling operation to insure #
23517
# only one rounding error. The grs bits are collected in #
23518
# the INEX flag for use in A10. #
23519
# #
23520
# A9. Scale X -> Y. #
23521
# The mantissa is scaled to the desired number of #
23522
# significant digits. The excess digits are collected #
23523
# in INEX2. #
23524
# #
23525
# A10. Or in INEX. #
23526
# If INEX is set, round error occurred. This is #
23527
# compensated for by 'or-ing' in the INEX2 flag to #
23528
# the lsb of Y. #
23529
# #
23530
# A11. Restore original FPCR; set size ext. #
23531
# Perform FINT operation in the user's rounding mode. #
23532
# Keep the size to extended. #
23533
# #
23534
# A12. Calculate YINT = FINT(Y) according to user's rounding #
23535
# mode. The FPSP routine sintd0 is used. The output #
23536
# is in fp0. #
23537
# #
23538
# A13. Check for LEN digits. #
23539
# If the int operation results in more than LEN digits, #
23540
# or less than LEN -1 digits, adjust ILOG and repeat from #
23541
# A6. This test occurs only on the first pass. If the #
23542
# result is exactly 10^LEN, decrement ILOG and divide #
23543
# the mantissa by 10. #
23544
# #
23545
# A14. Convert the mantissa to bcd. #
23546
# The binstr routine is used to convert the LEN digit #
23547
# mantissa to bcd in memory. The input to binstr is #
23548
# to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
23549
# such that the decimal point is to the left of bit 63. #
23550
# The bcd digits are stored in the correct position in #
23551
# the final string area in memory. #
23552
# #
23553
# A15. Convert the exponent to bcd. #
23554
# As in A14 above, the exp is converted to bcd and the #
23555
# digits are stored in the final string. #
23556
# Test the length of the final exponent string. If the #
23557
# length is 4, set operr. #
23558
# #
23559
# A16. Write sign bits to final string. #
23560
# #
23561
#########################################################################
23562
23563
set BINDEC_FLG, EXC_TEMP # DENORM flag
23564
23565
# Constants in extended precision
23566
PLOG2:
23567
long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
23568
PLOG2UP1:
23569
long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
23570
23571
# Constants in single precision
23572
FONE:
23573
long 0x3F800000,0x00000000,0x00000000,0x00000000
23574
FTWO:
23575
long 0x40000000,0x00000000,0x00000000,0x00000000
23576
FTEN:
23577
long 0x41200000,0x00000000,0x00000000,0x00000000
23578
F4933:
23579
long 0x459A2800,0x00000000,0x00000000,0x00000000
23580
23581
RBDTBL:
23582
byte 0,0,0,0
23583
byte 3,3,2,2
23584
byte 3,2,2,3
23585
byte 2,3,3,2
23586
23587
# Implementation Notes:
23588
#
23589
# The registers are used as follows:
23590
#
23591
# d0: scratch; LEN input to binstr
23592
# d1: scratch
23593
# d2: upper 32-bits of mantissa for binstr
23594
# d3: scratch;lower 32-bits of mantissa for binstr
23595
# d4: LEN
23596
# d5: LAMBDA/ICTR
23597
# d6: ILOG
23598
# d7: k-factor
23599
# a0: ptr for original operand/final result
23600
# a1: scratch pointer
23601
# a2: pointer to FP_X; abs(original value) in ext
23602
# fp0: scratch
23603
# fp1: scratch
23604
# fp2: scratch
23605
# F_SCR1:
23606
# F_SCR2:
23607
# L_SCR1:
23608
# L_SCR2:
23609
23610
global bindec
23611
bindec:
23612
movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}
23613
fmovm.x &0x7,-(%sp) # {%fp0-%fp2}
23614
23615
# A1. Set RM and size ext. Set SIGMA = sign input;
23616
# The k-factor is saved for use in d7. Clear BINDEC_FLG for
23617
# separating normalized/denormalized input. If the input
23618
# is a denormalized number, set the BINDEC_FLG memory word
23619
# to signal denorm. If the input is unnormalized, normalize
23620
# the input and test for denormalized result.
23621
#
23622
fmov.l &rm_mode*0x10,%fpcr # set RM and ext
23623
mov.l (%a0),L_SCR2(%a6) # save exponent for sign check
23624
mov.l %d0,%d7 # move k-factor to d7
23625
23626
clr.b BINDEC_FLG(%a6) # clr norm/denorm flag
23627
cmpi.b STAG(%a6),&DENORM # is input a DENORM?
23628
bne.w A2_str # no; input is a NORM
23629
23630
#
23631
# Normalize the denorm
23632
#
23633
un_de_norm:
23634
mov.w (%a0),%d0
23635
and.w &0x7fff,%d0 # strip sign of normalized exp
23636
mov.l 4(%a0),%d1
23637
mov.l 8(%a0),%d2
23638
norm_loop:
23639
sub.w &1,%d0
23640
lsl.l &1,%d2
23641
roxl.l &1,%d1
23642
tst.l %d1
23643
bge.b norm_loop
23644
#
23645
# Test if the normalized input is denormalized
23646
#
23647
tst.w %d0
23648
bgt.b pos_exp # if greater than zero, it is a norm
23649
st BINDEC_FLG(%a6) # set flag for denorm
23650
pos_exp:
23651
and.w &0x7fff,%d0 # strip sign of normalized exp
23652
mov.w %d0,(%a0)
23653
mov.l %d1,4(%a0)
23654
mov.l %d2,8(%a0)
23655
23656
# A2. Set X = abs(input).
23657
#
23658
A2_str:
23659
mov.l (%a0),FP_SCR1(%a6) # move input to work space
23660
mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space
23661
mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space
23662
and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)
23663
23664
# A3. Compute ILOG.
23665
# ILOG is the log base 10 of the input value. It is approx-
23666
# imated by adding e + 0.f when the original value is viewed
23667
# as 2^^e * 1.f in extended precision. This value is stored
23668
# in d6.
23669
#
23670
# Register usage:
23671
# Input/Output
23672
# d0: k-factor/exponent
23673
# d2: x/x
23674
# d3: x/x
23675
# d4: x/x
23676
# d5: x/x
23677
# d6: x/ILOG
23678
# d7: k-factor/Unchanged
23679
# a0: ptr for original operand/final result
23680
# a1: x/x
23681
# a2: x/x
23682
# fp0: x/float(ILOG)
23683
# fp1: x/x
23684
# fp2: x/x
23685
# F_SCR1:x/x
23686
# F_SCR2:Abs(X)/Abs(X) with $3fff exponent
23687
# L_SCR1:x/x
23688
# L_SCR2:first word of X packed/Unchanged
23689
23690
tst.b BINDEC_FLG(%a6) # check for denorm
23691
beq.b A3_cont # if clr, continue with norm
23692
mov.l &-4933,%d6 # force ILOG = -4933
23693
bra.b A4_str
23694
A3_cont:
23695
mov.w FP_SCR1(%a6),%d0 # move exp to d0
23696
mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff
23697
fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f
23698
sub.w &0x3fff,%d0 # strip off bias
23699
fadd.w %d0,%fp0 # add in exp
23700
fsub.s FONE(%pc),%fp0 # subtract off 1.0
23701
fbge.w pos_res # if pos, branch
23702
fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1
23703
fmov.l %fp0,%d6 # put ILOG in d6 as a lword
23704
bra.b A4_str # go move out ILOG
23705
pos_res:
23706
fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2
23707
fmov.l %fp0,%d6 # put ILOG in d6 as a lword
23708
23709
23710
# A4. Clr INEX bit.
23711
# The operation in A3 above may have set INEX2.
23712
23713
A4_str:
23714
fmov.l &0,%fpsr # zero all of fpsr - nothing needed
23715
23716
23717
# A5. Set ICTR = 0;
23718
# ICTR is a flag used in A13. It must be set before the
23719
# loop entry A6. The lower word of d5 is used for ICTR.
23720
23721
clr.w %d5 # clear ICTR
23722
23723
# A6. Calculate LEN.
23724
# LEN is the number of digits to be displayed. The k-factor
23725
# can dictate either the total number of digits, if it is
23726
# a positive number, or the number of digits after the
23727
# original decimal point which are to be included as
23728
# significant. See the 68882 manual for examples.
23729
# If LEN is computed to be greater than 17, set OPERR in
23730
# USER_FPSR. LEN is stored in d4.
23731
#
23732
# Register usage:
23733
# Input/Output
23734
# d0: exponent/Unchanged
23735
# d2: x/x/scratch
23736
# d3: x/x
23737
# d4: exc picture/LEN
23738
# d5: ICTR/Unchanged
23739
# d6: ILOG/Unchanged
23740
# d7: k-factor/Unchanged
23741
# a0: ptr for original operand/final result
23742
# a1: x/x
23743
# a2: x/x
23744
# fp0: float(ILOG)/Unchanged
23745
# fp1: x/x
23746
# fp2: x/x
23747
# F_SCR1:x/x
23748
# F_SCR2:Abs(X) with $3fff exponent/Unchanged
23749
# L_SCR1:x/x
23750
# L_SCR2:first word of X packed/Unchanged
23751
23752
A6_str:
23753
tst.l %d7 # branch on sign of k
23754
ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k
23755
mov.l %d7,%d4 # if k > 0, LEN = k
23756
bra.b len_ck # skip to LEN check
23757
k_neg:
23758
mov.l %d6,%d4 # first load ILOG to d4
23759
sub.l %d7,%d4 # subtract off k
23760
addq.l &1,%d4 # add in the 1
23761
len_ck:
23762
tst.l %d4 # LEN check: branch on sign of LEN
23763
ble.b LEN_ng # if neg, set LEN = 1
23764
cmp.l %d4,&17 # test if LEN > 17
23765
ble.b A7_str # if not, forget it
23766
mov.l &17,%d4 # set max LEN = 17
23767
tst.l %d7 # if negative, never set OPERR
23768
ble.b A7_str # if positive, continue
23769
or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
23770
bra.b A7_str # finished here
23771
LEN_ng:
23772
mov.l &1,%d4 # min LEN is 1
23773
23774
23775
# A7. Calculate SCALE.
23776
# SCALE is equal to 10^ISCALE, where ISCALE is the number
23777
# of decimal places needed to insure LEN integer digits
23778
# in the output before conversion to bcd. LAMBDA is the sign
23779
# of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
23780
# the rounding mode as given in the following table (see
23781
# Coonen, p. 7.23 as ref.; however, the SCALE variable is
23782
# of opposite sign in bindec.sa from Coonen).
23783
#
23784
# Initial USE
23785
# FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
23786
# ----------------------------------------------
23787
# RN 00 0 0 00/0 RN
23788
# RN 00 0 1 00/0 RN
23789
# RN 00 1 0 00/0 RN
23790
# RN 00 1 1 00/0 RN
23791
# RZ 01 0 0 11/3 RP
23792
# RZ 01 0 1 11/3 RP
23793
# RZ 01 1 0 10/2 RM
23794
# RZ 01 1 1 10/2 RM
23795
# RM 10 0 0 11/3 RP
23796
# RM 10 0 1 10/2 RM
23797
# RM 10 1 0 10/2 RM
23798
# RM 10 1 1 11/3 RP
23799
# RP 11 0 0 10/2 RM
23800
# RP 11 0 1 11/3 RP
23801
# RP 11 1 0 11/3 RP
23802
# RP 11 1 1 10/2 RM
23803
#
23804
# Register usage:
23805
# Input/Output
23806
# d0: exponent/scratch - final is 0
23807
# d2: x/0 or 24 for A9
23808
# d3: x/scratch - offset ptr into PTENRM array
23809
# d4: LEN/Unchanged
23810
# d5: 0/ICTR:LAMBDA
23811
# d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
23812
# d7: k-factor/Unchanged
23813
# a0: ptr for original operand/final result
23814
# a1: x/ptr to PTENRM array
23815
# a2: x/x
23816
# fp0: float(ILOG)/Unchanged
23817
# fp1: x/10^ISCALE
23818
# fp2: x/x
23819
# F_SCR1:x/x
23820
# F_SCR2:Abs(X) with $3fff exponent/Unchanged
23821
# L_SCR1:x/x
23822
# L_SCR2:first word of X packed/Unchanged
23823
23824
A7_str:
23825
tst.l %d7 # test sign of k
23826
bgt.b k_pos # if pos and > 0, skip this
23827
cmp.l %d7,%d6 # test k - ILOG
23828
blt.b k_pos # if ILOG >= k, skip this
23829
mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k
23830
k_pos:
23831
mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0
23832
addq.l &1,%d0 # add the 1
23833
sub.l %d4,%d0 # sub off LEN
23834
swap %d5 # use upper word of d5 for LAMBDA
23835
clr.w %d5 # set it zero initially
23836
clr.w %d2 # set up d2 for very small case
23837
tst.l %d0 # test sign of ISCALE
23838
bge.b iscale # if pos, skip next inst
23839
addq.w &1,%d5 # if neg, set LAMBDA true
23840
cmp.l %d0,&0xffffecd4 # test iscale <= -4908
23841
bgt.b no_inf # if false, skip rest
23842
add.l &24,%d0 # add in 24 to iscale
23843
mov.l &24,%d2 # put 24 in d2 for A9
23844
no_inf:
23845
neg.l %d0 # and take abs of ISCALE
23846
iscale:
23847
fmov.s FONE(%pc),%fp1 # init fp1 to 1
23848
bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits
23849
lsl.w &1,%d1 # put them in bits 2:1
23850
add.w %d5,%d1 # add in LAMBDA
23851
lsl.w &1,%d1 # put them in bits 3:1
23852
tst.l L_SCR2(%a6) # test sign of original x
23853
bge.b x_pos # if pos, don't set bit 0
23854
addq.l &1,%d1 # if neg, set bit 0
23855
x_pos:
23856
lea.l RBDTBL(%pc),%a2 # load rbdtbl base
23857
mov.b (%a2,%d1),%d3 # load d3 with new rmode
23858
lsl.l &4,%d3 # put bits in proper position
23859
fmov.l %d3,%fpcr # load bits into fpu
23860
lsr.l &4,%d3 # put bits in proper position
23861
tst.b %d3 # decode new rmode for pten table
23862
bne.b not_rn # if zero, it is RN
23863
lea.l PTENRN(%pc),%a1 # load a1 with RN table base
23864
bra.b rmode # exit decode
23865
not_rn:
23866
lsr.b &1,%d3 # get lsb in carry
23867
bcc.b not_rp2 # if carry clear, it is RM
23868
lea.l PTENRP(%pc),%a1 # load a1 with RP table base
23869
bra.b rmode # exit decode
23870
not_rp2:
23871
lea.l PTENRM(%pc),%a1 # load a1 with RM table base
23872
rmode:
23873
clr.l %d3 # clr table index
23874
e_loop2:
23875
lsr.l &1,%d0 # shift next bit into carry
23876
bcc.b e_next2 # if zero, skip the mul
23877
fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23878
e_next2:
23879
add.l &12,%d3 # inc d3 to next pwrten table entry
23880
tst.l %d0 # test if ISCALE is zero
23881
bne.b e_loop2 # if not, loop
23882
23883
# A8. Clr INEX; Force RZ.
23884
# The operation in A3 above may have set INEX2.
23885
# RZ mode is forced for the scaling operation to insure
23886
# only one rounding error. The grs bits are collected in
23887
# the INEX flag for use in A10.
23888
#
23889
# Register usage:
23890
# Input/Output
23891
23892
fmov.l &0,%fpsr # clr INEX
23893
fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode
23894
23895
# A9. Scale X -> Y.
23896
# The mantissa is scaled to the desired number of significant
23897
# digits. The excess digits are collected in INEX2. If mul,
23898
# Check d2 for excess 10 exponential value. If not zero,
23899
# the iscale value would have caused the pwrten calculation
23900
# to overflow. Only a negative iscale can cause this, so
23901
# multiply by 10^(d2), which is now only allowed to be 24,
23902
# with a multiply by 10^8 and 10^16, which is exact since
23903
# 10^24 is exact. If the input was denormalized, we must
23904
# create a busy stack frame with the mul command and the
23905
# two operands, and allow the fpu to complete the multiply.
23906
#
23907
# Register usage:
23908
# Input/Output
23909
# d0: FPCR with RZ mode/Unchanged
23910
# d2: 0 or 24/unchanged
23911
# d3: x/x
23912
# d4: LEN/Unchanged
23913
# d5: ICTR:LAMBDA
23914
# d6: ILOG/Unchanged
23915
# d7: k-factor/Unchanged
23916
# a0: ptr for original operand/final result
23917
# a1: ptr to PTENRM array/Unchanged
23918
# a2: x/x
23919
# fp0: float(ILOG)/X adjusted for SCALE (Y)
23920
# fp1: 10^ISCALE/Unchanged
23921
# fp2: x/x
23922
# F_SCR1:x/x
23923
# F_SCR2:Abs(X) with $3fff exponent/Unchanged
23924
# L_SCR1:x/x
23925
# L_SCR2:first word of X packed/Unchanged
23926
23927
A9_str:
23928
fmov.x (%a0),%fp0 # load X from memory
23929
fabs.x %fp0 # use abs(X)
23930
tst.w %d5 # LAMBDA is in lower word of d5
23931
bne.b sc_mul # if neg (LAMBDA = 1), scale by mul
23932
fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0
23933
bra.w A10_st # branch to A10
23934
23935
sc_mul:
23936
tst.b BINDEC_FLG(%a6) # check for denorm
23937
beq.w A9_norm # if norm, continue with mul
23938
23939
# for DENORM, we must calculate:
23940
# fp0 = input_op * 10^ISCALE * 10^24
23941
# since the input operand is a DENORM, we can't multiply it directly.
23942
# so, we do the multiplication of the exponents and mantissas separately.
23943
# in this way, we avoid underflow on intermediate stages of the
23944
# multiplication and guarantee a result without exception.
23945
fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack
23946
23947
mov.w (%sp),%d3 # grab exponent
23948
andi.w &0x7fff,%d3 # clear sign
23949
ori.w &0x8000,(%a0) # make DENORM exp negative
23950
add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp
23951
subi.w &0x3fff,%d3 # subtract BIAS
23952
add.w 36(%a1),%d3
23953
subi.w &0x3fff,%d3 # subtract BIAS
23954
add.w 48(%a1),%d3
23955
subi.w &0x3fff,%d3 # subtract BIAS
23956
23957
bmi.w sc_mul_err # is result is DENORM, punt!!!
23958
23959
andi.w &0x8000,(%sp) # keep sign
23960
or.w %d3,(%sp) # insert new exponent
23961
andi.w &0x7fff,(%a0) # clear sign bit on DENORM again
23962
mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk
23963
mov.l 0x4(%a0),-(%sp)
23964
mov.l &0x3fff0000,-(%sp) # force exp to zero
23965
fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0
23966
fmul.x (%sp)+,%fp0
23967
23968
# fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
23969
# fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
23970
mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa
23971
mov.l 36+4(%a1),-(%sp)
23972
mov.l &0x3fff0000,-(%sp) # force exp to zero
23973
mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa
23974
mov.l 48+4(%a1),-(%sp)
23975
mov.l &0x3fff0000,-(%sp)# force exp to zero
23976
fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8
23977
fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16
23978
bra.b A10_st
23979
23980
sc_mul_err:
23981
bra.b sc_mul_err
23982
23983
A9_norm:
23984
tst.w %d2 # test for small exp case
23985
beq.b A9_con # if zero, continue as normal
23986
fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
23987
fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
23988
A9_con:
23989
fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0
23990
23991
# A10. Or in INEX.
23992
# If INEX is set, round error occurred. This is compensated
23993
# for by 'or-ing' in the INEX2 flag to the lsb of Y.
23994
#
23995
# Register usage:
23996
# Input/Output
23997
# d0: FPCR with RZ mode/FPSR with INEX2 isolated
23998
# d2: x/x
23999
# d3: x/x
24000
# d4: LEN/Unchanged
24001
# d5: ICTR:LAMBDA
24002
# d6: ILOG/Unchanged
24003
# d7: k-factor/Unchanged
24004
# a0: ptr for original operand/final result
24005
# a1: ptr to PTENxx array/Unchanged
24006
# a2: x/ptr to FP_SCR1(a6)
24007
# fp0: Y/Y with lsb adjusted
24008
# fp1: 10^ISCALE/Unchanged
24009
# fp2: x/x
24010
24011
A10_st:
24012
fmov.l %fpsr,%d0 # get FPSR
24013
fmov.x %fp0,FP_SCR1(%a6) # move Y to memory
24014
lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1
24015
btst &9,%d0 # check if INEX2 set
24016
beq.b A11_st # if clear, skip rest
24017
or.l &1,8(%a2) # or in 1 to lsb of mantissa
24018
fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu
24019
24020
24021
# A11. Restore original FPCR; set size ext.
24022
# Perform FINT operation in the user's rounding mode. Keep
24023
# the size to extended. The sintdo entry point in the sint
24024
# routine expects the FPCR value to be in USER_FPCR for
24025
# mode and precision. The original FPCR is saved in L_SCR1.
24026
24027
A11_st:
24028
mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later
24029
and.l &0x00000030,USER_FPCR(%a6) # set size to ext,
24030
# ;block exceptions
24031
24032
24033
# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
24034
# The FPSP routine sintd0 is used. The output is in fp0.
24035
#
24036
# Register usage:
24037
# Input/Output
24038
# d0: FPSR with AINEX cleared/FPCR with size set to ext
24039
# d2: x/x/scratch
24040
# d3: x/x
24041
# d4: LEN/Unchanged
24042
# d5: ICTR:LAMBDA/Unchanged
24043
# d6: ILOG/Unchanged
24044
# d7: k-factor/Unchanged
24045
# a0: ptr for original operand/src ptr for sintdo
24046
# a1: ptr to PTENxx array/Unchanged
24047
# a2: ptr to FP_SCR1(a6)/Unchanged
24048
# a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
24049
# fp0: Y/YINT
24050
# fp1: 10^ISCALE/Unchanged
24051
# fp2: x/x
24052
# F_SCR1:x/x
24053
# F_SCR2:Y adjusted for inex/Y with original exponent
24054
# L_SCR1:x/original USER_FPCR
24055
# L_SCR2:first word of X packed/Unchanged
24056
24057
A12_st:
24058
movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
24059
mov.l L_SCR1(%a6),-(%sp)
24060
mov.l L_SCR2(%a6),-(%sp)
24061
24062
lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)
24063
fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)
24064
tst.l L_SCR2(%a6) # test sign of original operand
24065
bge.b do_fint12 # if pos, use Y
24066
or.l &0x80000000,(%a0) # if neg, use -Y
24067
do_fint12:
24068
mov.l USER_FPSR(%a6),-(%sp)
24069
# bsr sintdo # sint routine returns int in fp0
24070
24071
fmov.l USER_FPCR(%a6),%fpcr
24072
fmov.l &0x0,%fpsr # clear the AEXC bits!!!
24073
## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
24074
## andi.l &0x00000030,%d0
24075
## fmov.l %d0,%fpcr
24076
fint.x FP_SCR1(%a6),%fp0 # do fint()
24077
fmov.l %fpsr,%d0
24078
or.w %d0,FPSR_EXCEPT(%a6)
24079
## fmov.l &0x0,%fpcr
24080
## fmov.l %fpsr,%d0 # don't keep ccodes
24081
## or.w %d0,FPSR_EXCEPT(%a6)
24082
24083
mov.b (%sp),USER_FPSR(%a6)
24084
add.l &4,%sp
24085
24086
mov.l (%sp)+,L_SCR2(%a6)
24087
mov.l (%sp)+,L_SCR1(%a6)
24088
movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
24089
24090
mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent
24091
mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR
24092
24093
# A13. Check for LEN digits.
24094
# If the int operation results in more than LEN digits,
24095
# or less than LEN -1 digits, adjust ILOG and repeat from
24096
# A6. This test occurs only on the first pass. If the
24097
# result is exactly 10^LEN, decrement ILOG and divide
24098
# the mantissa by 10. The calculation of 10^LEN cannot
24099
# be inexact, since all powers of ten up to 10^27 are exact
24100
# in extended precision, so the use of a previous power-of-ten
24101
# table will introduce no error.
24102
#
24103
#
24104
# Register usage:
24105
# Input/Output
24106
# d0: FPCR with size set to ext/scratch final = 0
24107
# d2: x/x
24108
# d3: x/scratch final = x
24109
# d4: LEN/LEN adjusted
24110
# d5: ICTR:LAMBDA/LAMBDA:ICTR
24111
# d6: ILOG/ILOG adjusted
24112
# d7: k-factor/Unchanged
24113
# a0: pointer into memory for packed bcd string formation
24114
# a1: ptr to PTENxx array/Unchanged
24115
# a2: ptr to FP_SCR1(a6)/Unchanged
24116
# fp0: int portion of Y/abs(YINT) adjusted
24117
# fp1: 10^ISCALE/Unchanged
24118
# fp2: x/10^LEN
24119
# F_SCR1:x/x
24120
# F_SCR2:Y with original exponent/Unchanged
24121
# L_SCR1:original USER_FPCR/Unchanged
24122
# L_SCR2:first word of X packed/Unchanged
24123
24124
A13_st:
24125
swap %d5 # put ICTR in lower word of d5
24126
tst.w %d5 # check if ICTR = 0
24127
bne not_zr # if non-zero, go to second test
24128
#
24129
# Compute 10^(LEN-1)
24130
#
24131
fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
24132
mov.l %d4,%d0 # put LEN in d0
24133
subq.l &1,%d0 # d0 = LEN -1
24134
clr.l %d3 # clr table index
24135
l_loop:
24136
lsr.l &1,%d0 # shift next bit into carry
24137
bcc.b l_next # if zero, skip the mul
24138
fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
24139
l_next:
24140
add.l &12,%d3 # inc d3 to next pwrten table entry
24141
tst.l %d0 # test if LEN is zero
24142
bne.b l_loop # if not, loop
24143
#
24144
# 10^LEN-1 is computed for this test and A14. If the input was
24145
# denormalized, check only the case in which YINT > 10^LEN.
24146
#
24147
tst.b BINDEC_FLG(%a6) # check if input was norm
24148
beq.b A13_con # if norm, continue with checking
24149
fabs.x %fp0 # take abs of YINT
24150
bra test_2
24151
#
24152
# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
24153
#
24154
A13_con:
24155
fabs.x %fp0 # take abs of YINT
24156
fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)
24157
fbge.w test_2 # if greater, do next test
24158
subq.l &1,%d6 # subtract 1 from ILOG
24159
mov.w &1,%d5 # set ICTR
24160
fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
24161
fmul.s FTEN(%pc),%fp2 # compute 10^LEN
24162
bra.w A6_str # return to A6 and recompute YINT
24163
test_2:
24164
fmul.s FTEN(%pc),%fp2 # compute 10^LEN
24165
fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN
24166
fblt.w A14_st # if less, all is ok, go to A14
24167
fbgt.w fix_ex # if greater, fix and redo
24168
fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10
24169
addq.l &1,%d6 # and inc ILOG
24170
bra.b A14_st # and continue elsewhere
24171
fix_ex:
24172
addq.l &1,%d6 # increment ILOG by 1
24173
mov.w &1,%d5 # set ICTR
24174
fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
24175
bra.w A6_str # return to A6 and recompute YINT
24176
#
24177
# Since ICTR <> 0, we have already been through one adjustment,
24178
# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
24179
# 10^LEN is again computed using whatever table is in a1 since the
24180
# value calculated cannot be inexact.
24181
#
24182
not_zr:
24183
fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
24184
mov.l %d4,%d0 # put LEN in d0
24185
clr.l %d3 # clr table index
24186
z_loop:
24187
lsr.l &1,%d0 # shift next bit into carry
24188
bcc.b z_next # if zero, skip the mul
24189
fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
24190
z_next:
24191
add.l &12,%d3 # inc d3 to next pwrten table entry
24192
tst.l %d0 # test if LEN is zero
24193
bne.b z_loop # if not, loop
24194
fabs.x %fp0 # get abs(YINT)
24195
fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN
24196
fbneq.w A14_st # if not, skip this
24197
fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10
24198
addq.l &1,%d6 # and inc ILOG by 1
24199
addq.l &1,%d4 # and inc LEN
24200
fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN
24201
24202
# A14. Convert the mantissa to bcd.
24203
# The binstr routine is used to convert the LEN digit
24204
# mantissa to bcd in memory. The input to binstr is
24205
# to be a fraction; i.e. (mantissa)/10^LEN and adjusted
24206
# such that the decimal point is to the left of bit 63.
24207
# The bcd digits are stored in the correct position in
24208
# the final string area in memory.
24209
#
24210
#
24211
# Register usage:
24212
# Input/Output
24213
# d0: x/LEN call to binstr - final is 0
24214
# d1: x/0
24215
# d2: x/ms 32-bits of mant of abs(YINT)
24216
# d3: x/ls 32-bits of mant of abs(YINT)
24217
# d4: LEN/Unchanged
24218
# d5: ICTR:LAMBDA/LAMBDA:ICTR
24219
# d6: ILOG
24220
# d7: k-factor/Unchanged
24221
# a0: pointer into memory for packed bcd string formation
24222
# /ptr to first mantissa byte in result string
24223
# a1: ptr to PTENxx array/Unchanged
24224
# a2: ptr to FP_SCR1(a6)/Unchanged
24225
# fp0: int portion of Y/abs(YINT) adjusted
24226
# fp1: 10^ISCALE/Unchanged
24227
# fp2: 10^LEN/Unchanged
24228
# F_SCR1:x/Work area for final result
24229
# F_SCR2:Y with original exponent/Unchanged
24230
# L_SCR1:original USER_FPCR/Unchanged
24231
# L_SCR2:first word of X packed/Unchanged
24232
24233
A14_st:
24234
fmov.l &rz_mode*0x10,%fpcr # force rz for conversion
24235
fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN
24236
lea.l FP_SCR0(%a6),%a0
24237
fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory
24238
mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2
24239
mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3
24240
clr.l 4(%a0) # zero word 2 of FP_RES
24241
clr.l 8(%a0) # zero word 3 of FP_RES
24242
mov.l (%a0),%d0 # move exponent to d0
24243
swap %d0 # put exponent in lower word
24244
beq.b no_sft # if zero, don't shift
24245
sub.l &0x3ffd,%d0 # sub bias less 2 to make fract
24246
tst.l %d0 # check if > 1
24247
bgt.b no_sft # if so, don't shift
24248
neg.l %d0 # make exp positive
24249
m_loop:
24250
lsr.l &1,%d2 # shift d2:d3 right, add 0s
24251
roxr.l &1,%d3 # the number of places
24252
dbf.w %d0,m_loop # given in d0
24253
no_sft:
24254
tst.l %d2 # check for mantissa of zero
24255
bne.b no_zr # if not, go on
24256
tst.l %d3 # continue zero check
24257
beq.b zer_m # if zero, go directly to binstr
24258
no_zr:
24259
clr.l %d1 # put zero in d1 for addx
24260
add.l &0x00000080,%d3 # inc at bit 7
24261
addx.l %d1,%d2 # continue inc
24262
and.l &0xffffff80,%d3 # strip off lsb not used by 882
24263
zer_m:
24264
mov.l %d4,%d0 # put LEN in d0 for binstr call
24265
addq.l &3,%a0 # a0 points to M16 byte in result
24266
bsr binstr # call binstr to convert mant
24267
24268
24269
# A15. Convert the exponent to bcd.
24270
# As in A14 above, the exp is converted to bcd and the
24271
# digits are stored in the final string.
24272
#
24273
# Digits are stored in L_SCR1(a6) on return from BINDEC as:
24274
#
24275
# 32 16 15 0
24276
# -----------------------------------------
24277
# | 0 | e3 | e2 | e1 | e4 | X | X | X |
24278
# -----------------------------------------
24279
#
24280
# And are moved into their proper places in FP_SCR0. If digit e4
24281
# is non-zero, OPERR is signaled. In all cases, all 4 digits are
24282
# written as specified in the 881/882 manual for packed decimal.
24283
#
24284
# Register usage:
24285
# Input/Output
24286
# d0: x/LEN call to binstr - final is 0
24287
# d1: x/scratch (0);shift count for final exponent packing
24288
# d2: x/ms 32-bits of exp fraction/scratch
24289
# d3: x/ls 32-bits of exp fraction
24290
# d4: LEN/Unchanged
24291
# d5: ICTR:LAMBDA/LAMBDA:ICTR
24292
# d6: ILOG
24293
# d7: k-factor/Unchanged
24294
# a0: ptr to result string/ptr to L_SCR1(a6)
24295
# a1: ptr to PTENxx array/Unchanged
24296
# a2: ptr to FP_SCR1(a6)/Unchanged
24297
# fp0: abs(YINT) adjusted/float(ILOG)
24298
# fp1: 10^ISCALE/Unchanged
24299
# fp2: 10^LEN/Unchanged
24300
# F_SCR1:Work area for final result/BCD result
24301
# F_SCR2:Y with original exponent/ILOG/10^4
24302
# L_SCR1:original USER_FPCR/Exponent digits on return from binstr
24303
# L_SCR2:first word of X packed/Unchanged
24304
24305
A15_st:
24306
tst.b BINDEC_FLG(%a6) # check for denorm
24307
beq.b not_denorm
24308
ftest.x %fp0 # test for zero
24309
fbeq.w den_zero # if zero, use k-factor or 4933
24310
fmov.l %d6,%fp0 # float ILOG
24311
fabs.x %fp0 # get abs of ILOG
24312
bra.b convrt
24313
den_zero:
24314
tst.l %d7 # check sign of the k-factor
24315
blt.b use_ilog # if negative, use ILOG
24316
fmov.s F4933(%pc),%fp0 # force exponent to 4933
24317
bra.b convrt # do it
24318
use_ilog:
24319
fmov.l %d6,%fp0 # float ILOG
24320
fabs.x %fp0 # get abs of ILOG
24321
bra.b convrt
24322
not_denorm:
24323
ftest.x %fp0 # test for zero
24324
fbneq.w not_zero # if zero, force exponent
24325
fmov.s FONE(%pc),%fp0 # force exponent to 1
24326
bra.b convrt # do it
24327
not_zero:
24328
fmov.l %d6,%fp0 # float ILOG
24329
fabs.x %fp0 # get abs of ILOG
24330
convrt:
24331
fdiv.x 24(%a1),%fp0 # compute ILOG/10^4
24332
fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory
24333
mov.l 4(%a2),%d2 # move word 2 to d2
24334
mov.l 8(%a2),%d3 # move word 3 to d3
24335
mov.w (%a2),%d0 # move exp to d0
24336
beq.b x_loop_fin # if zero, skip the shift
24337
sub.w &0x3ffd,%d0 # subtract off bias
24338
neg.w %d0 # make exp positive
24339
x_loop:
24340
lsr.l &1,%d2 # shift d2:d3 right
24341
roxr.l &1,%d3 # the number of places
24342
dbf.w %d0,x_loop # given in d0
24343
x_loop_fin:
24344
clr.l %d1 # put zero in d1 for addx
24345
add.l &0x00000080,%d3 # inc at bit 6
24346
addx.l %d1,%d2 # continue inc
24347
and.l &0xffffff80,%d3 # strip off lsb not used by 882
24348
mov.l &4,%d0 # put 4 in d0 for binstr call
24349
lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
24350
bsr binstr # call binstr to convert exp
24351
mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
24352
mov.l &12,%d1 # use d1 for shift count
24353
lsr.l %d1,%d0 # shift d0 right by 12
24354
bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0
24355
lsr.l %d1,%d0 # shift d0 right by 12
24356
bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0
24357
tst.b %d0 # check if e4 is zero
24358
beq.b A16_st # if zero, skip rest
24359
or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
24360
24361
24362
# A16. Write sign bits to final string.
24363
# Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
24364
#
24365
# Register usage:
24366
# Input/Output
24367
# d0: x/scratch - final is x
24368
# d2: x/x
24369
# d3: x/x
24370
# d4: LEN/Unchanged
24371
# d5: ICTR:LAMBDA/LAMBDA:ICTR
24372
# d6: ILOG/ILOG adjusted
24373
# d7: k-factor/Unchanged
24374
# a0: ptr to L_SCR1(a6)/Unchanged
24375
# a1: ptr to PTENxx array/Unchanged
24376
# a2: ptr to FP_SCR1(a6)/Unchanged
24377
# fp0: float(ILOG)/Unchanged
24378
# fp1: 10^ISCALE/Unchanged
24379
# fp2: 10^LEN/Unchanged
24380
# F_SCR1:BCD result with correct signs
24381
# F_SCR2:ILOG/10^4
24382
# L_SCR1:Exponent digits on return from binstr
24383
# L_SCR2:first word of X packed/Unchanged
24384
24385
A16_st:
24386
clr.l %d0 # clr d0 for collection of signs
24387
and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0
24388
tst.l L_SCR2(%a6) # check sign of original mantissa
24389
bge.b mant_p # if pos, don't set SM
24390
mov.l &2,%d0 # move 2 in to d0 for SM
24391
mant_p:
24392
tst.l %d6 # check sign of ILOG
24393
bge.b wr_sgn # if pos, don't set SE
24394
addq.l &1,%d0 # set bit 0 in d0 for SE
24395
wr_sgn:
24396
bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
24397
24398
# Clean up and restore all registers used.
24399
24400
fmov.l &0,%fpsr # clear possible inex2/ainex bits
24401
fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}
24402
movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}
24403
rts
24404
24405
global PTENRN
24406
PTENRN:
24407
long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24408
long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24409
long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24410
long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24411
long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24412
long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
24413
long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
24414
long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
24415
long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
24416
long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
24417
long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
24418
long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
24419
long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
24420
24421
global PTENRP
24422
PTENRP:
24423
long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24424
long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24425
long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24426
long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24427
long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24428
long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
24429
long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
24430
long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
24431
long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
24432
long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
24433
long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
24434
long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
24435
long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
24436
24437
global PTENRM
24438
PTENRM:
24439
long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24440
long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24441
long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24442
long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24443
long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24444
long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
24445
long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
24446
long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
24447
long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
24448
long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
24449
long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
24450
long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
24451
long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
24452
24453
#########################################################################
24454
# binstr(): Converts a 64-bit binary integer to bcd. #
24455
# #
24456
# INPUT *************************************************************** #
24457
# d2:d3 = 64-bit binary integer #
24458
# d0 = desired length (LEN) #
24459
# a0 = pointer to start in memory for bcd characters #
24460
# (This pointer must point to byte 4 of the first #
24461
# lword of the packed decimal memory string.) #
24462
# #
24463
# OUTPUT ************************************************************** #
24464
# a0 = pointer to LEN bcd digits representing the 64-bit integer. #
24465
# #
24466
# ALGORITHM *********************************************************** #
24467
# The 64-bit binary is assumed to have a decimal point before #
24468
# bit 63. The fraction is multiplied by 10 using a mul by 2 #
24469
# shift and a mul by 8 shift. The bits shifted out of the #
24470
# msb form a decimal digit. This process is iterated until #
24471
# LEN digits are formed. #
24472
# #
24473
# A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
24474
# digit formed will be assumed the least significant. This is #
24475
# to force the first byte formed to have a 0 in the upper 4 bits. #
24476
# #
24477
# A2. Beginning of the loop: #
24478
# Copy the fraction in d2:d3 to d4:d5. #
24479
# #
24480
# A3. Multiply the fraction in d2:d3 by 8 using bit-field #
24481
# extracts and shifts. The three msbs from d2 will go into d1. #
24482
# #
24483
# A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
24484
# will be collected by the carry. #
24485
# #
24486
# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
24487
# into d2:d3. D1 will contain the bcd digit formed. #
24488
# #
24489
# A6. Test d7. If zero, the digit formed is the ms digit. If non- #
24490
# zero, it is the ls digit. Put the digit in its place in the #
24491
# upper word of d0. If it is the ls digit, write the word #
24492
# from d0 to memory. #
24493
# #
24494
# A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
24495
# #
24496
#########################################################################
24497
24498
# Implementation Notes:
24499
#
24500
# The registers are used as follows:
24501
#
24502
# d0: LEN counter
24503
# d1: temp used to form the digit
24504
# d2: upper 32-bits of fraction for mul by 8
24505
# d3: lower 32-bits of fraction for mul by 8
24506
# d4: upper 32-bits of fraction for mul by 2
24507
# d5: lower 32-bits of fraction for mul by 2
24508
# d6: temp for bit-field extracts
24509
# d7: byte digit formation word;digit count {0,1}
24510
# a0: pointer into memory for packed bcd string formation
24511
#
24512
24513
global binstr
24514
binstr:
24515
movm.l &0xff00,-(%sp) # {%d0-%d7}
24516
24517
#
24518
# A1: Init d7
24519
#
24520
mov.l &1,%d7 # init d7 for second digit
24521
subq.l &1,%d0 # for dbf d0 would have LEN+1 passes
24522
#
24523
# A2. Copy d2:d3 to d4:d5. Start loop.
24524
#
24525
loop:
24526
mov.l %d2,%d4 # copy the fraction before muls
24527
mov.l %d3,%d5 # to d4:d5
24528
#
24529
# A3. Multiply d2:d3 by 8; extract msbs into d1.
24530
#
24531
bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1
24532
asl.l &3,%d2 # shift d2 left by 3 places
24533
bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6
24534
asl.l &3,%d3 # shift d3 left by 3 places
24535
or.l %d6,%d2 # or in msbs from d3 into d2
24536
#
24537
# A4. Multiply d4:d5 by 2; add carry out to d1.
24538
#
24539
asl.l &1,%d5 # mul d5 by 2
24540
roxl.l &1,%d4 # mul d4 by 2
24541
swap %d6 # put 0 in d6 lower word
24542
addx.w %d6,%d1 # add in extend from mul by 2
24543
#
24544
# A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
24545
#
24546
add.l %d5,%d3 # add lower 32 bits
24547
nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24548
addx.l %d4,%d2 # add with extend upper 32 bits
24549
nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24550
addx.w %d6,%d1 # add in extend from add to d1
24551
swap %d6 # with d6 = 0; put 0 in upper word
24552
#
24553
# A6. Test d7 and branch.
24554
#
24555
tst.w %d7 # if zero, store digit & to loop
24556
beq.b first_d # if non-zero, form byte & write
24557
sec_d:
24558
swap %d7 # bring first digit to word d7b
24559
asl.w &4,%d7 # first digit in upper 4 bits d7b
24560
add.w %d1,%d7 # add in ls digit to d7b
24561
mov.b %d7,(%a0)+ # store d7b byte in memory
24562
swap %d7 # put LEN counter in word d7a
24563
clr.w %d7 # set d7a to signal no digits done
24564
dbf.w %d0,loop # do loop some more!
24565
bra.b end_bstr # finished, so exit
24566
first_d:
24567
swap %d7 # put digit word in d7b
24568
mov.w %d1,%d7 # put new digit in d7b
24569
swap %d7 # put LEN counter in word d7a
24570
addq.w &1,%d7 # set d7a to signal first digit done
24571
dbf.w %d0,loop # do loop some more!
24572
swap %d7 # put last digit in string
24573
lsl.w &4,%d7 # move it to upper 4 bits
24574
mov.b %d7,(%a0)+ # store it in memory string
24575
#
24576
# Clean up and return with result in fp0.
24577
#
24578
end_bstr:
24579
movm.l (%sp)+,&0xff # {%d0-%d7}
24580
rts
24581
24582
#########################################################################
24583
# XDEF **************************************************************** #
24584
# facc_in_b(): dmem_read_byte failed #
24585
# facc_in_w(): dmem_read_word failed #
24586
# facc_in_l(): dmem_read_long failed #
24587
# facc_in_d(): dmem_read of dbl prec failed #
24588
# facc_in_x(): dmem_read of ext prec failed #
24589
# #
24590
# facc_out_b(): dmem_write_byte failed #
24591
# facc_out_w(): dmem_write_word failed #
24592
# facc_out_l(): dmem_write_long failed #
24593
# facc_out_d(): dmem_write of dbl prec failed #
24594
# facc_out_x(): dmem_write of ext prec failed #
24595
# #
24596
# XREF **************************************************************** #
24597
# _real_access() - exit through access error handler #
24598
# #
24599
# INPUT *************************************************************** #
24600
# None #
24601
# #
24602
# OUTPUT ************************************************************** #
24603
# None #
24604
# #
24605
# ALGORITHM *********************************************************** #
24606
# Flow jumps here when an FP data fetch call gets an error #
24607
# result. This means the operating system wants an access error frame #
24608
# made out of the current exception stack frame. #
24609
# So, we first call restore() which makes sure that any updated #
24610
# -(an)+ register gets returned to its pre-exception value and then #
24611
# we change the stack to an access error stack frame. #
24612
# #
24613
#########################################################################
24614
24615
facc_in_b:
24616
movq.l &0x1,%d0 # one byte
24617
bsr.w restore # fix An
24618
24619
mov.w &0x0121,EXC_VOFF(%a6) # set FSLW
24620
bra.w facc_finish
24621
24622
facc_in_w:
24623
movq.l &0x2,%d0 # two bytes
24624
bsr.w restore # fix An
24625
24626
mov.w &0x0141,EXC_VOFF(%a6) # set FSLW
24627
bra.b facc_finish
24628
24629
facc_in_l:
24630
movq.l &0x4,%d0 # four bytes
24631
bsr.w restore # fix An
24632
24633
mov.w &0x0101,EXC_VOFF(%a6) # set FSLW
24634
bra.b facc_finish
24635
24636
facc_in_d:
24637
movq.l &0x8,%d0 # eight bytes
24638
bsr.w restore # fix An
24639
24640
mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
24641
bra.b facc_finish
24642
24643
facc_in_x:
24644
movq.l &0xc,%d0 # twelve bytes
24645
bsr.w restore # fix An
24646
24647
mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
24648
bra.b facc_finish
24649
24650
################################################################
24651
24652
facc_out_b:
24653
movq.l &0x1,%d0 # one byte
24654
bsr.w restore # restore An
24655
24656
mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW
24657
bra.b facc_finish
24658
24659
facc_out_w:
24660
movq.l &0x2,%d0 # two bytes
24661
bsr.w restore # restore An
24662
24663
mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW
24664
bra.b facc_finish
24665
24666
facc_out_l:
24667
movq.l &0x4,%d0 # four bytes
24668
bsr.w restore # restore An
24669
24670
mov.w &0x0081,EXC_VOFF(%a6) # set FSLW
24671
bra.b facc_finish
24672
24673
facc_out_d:
24674
movq.l &0x8,%d0 # eight bytes
24675
bsr.w restore # restore An
24676
24677
mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
24678
bra.b facc_finish
24679
24680
facc_out_x:
24681
mov.l &0xc,%d0 # twelve bytes
24682
bsr.w restore # restore An
24683
24684
mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
24685
24686
# here's where we actually create the access error frame from the
24687
# current exception stack frame.
24688
facc_finish:
24689
mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
24690
24691
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
24692
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
24693
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
24694
24695
unlk %a6
24696
24697
mov.l (%sp),-(%sp) # store SR, hi(PC)
24698
mov.l 0x8(%sp),0x4(%sp) # store lo(PC)
24699
mov.l 0xc(%sp),0x8(%sp) # store EA
24700
mov.l &0x00000001,0xc(%sp) # store FSLW
24701
mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)
24702
mov.w &0x4008,0x6(%sp) # store voff
24703
24704
btst &0x5,(%sp) # supervisor or user mode?
24705
beq.b facc_out2 # user
24706
bset &0x2,0xd(%sp) # set supervisor TM bit
24707
24708
facc_out2:
24709
bra.l _real_access
24710
24711
##################################################################
24712
24713
# if the effective addressing mode was predecrement or postincrement,
24714
# the emulation has already changed its value to the correct post-
24715
# instruction value. but since we're exiting to the access error
24716
# handler, then AN must be returned to its pre-instruction value.
24717
# we do that here.
24718
restore:
24719
mov.b EXC_OPWORD+0x1(%a6),%d1
24720
andi.b &0x38,%d1 # extract opmode
24721
cmpi.b %d1,&0x18 # postinc?
24722
beq.w rest_inc
24723
cmpi.b %d1,&0x20 # predec?
24724
beq.w rest_dec
24725
rts
24726
24727
rest_inc:
24728
mov.b EXC_OPWORD+0x1(%a6),%d1
24729
andi.w &0x0007,%d1 # fetch An
24730
24731
mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1
24732
jmp (tbl_rest_inc.b,%pc,%d1.w*1)
24733
24734
tbl_rest_inc:
24735
short ri_a0 - tbl_rest_inc
24736
short ri_a1 - tbl_rest_inc
24737
short ri_a2 - tbl_rest_inc
24738
short ri_a3 - tbl_rest_inc
24739
short ri_a4 - tbl_rest_inc
24740
short ri_a5 - tbl_rest_inc
24741
short ri_a6 - tbl_rest_inc
24742
short ri_a7 - tbl_rest_inc
24743
24744
ri_a0:
24745
sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0
24746
rts
24747
ri_a1:
24748
sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1
24749
rts
24750
ri_a2:
24751
sub.l %d0,%a2 # fix a2
24752
rts
24753
ri_a3:
24754
sub.l %d0,%a3 # fix a3
24755
rts
24756
ri_a4:
24757
sub.l %d0,%a4 # fix a4
24758
rts
24759
ri_a5:
24760
sub.l %d0,%a5 # fix a5
24761
rts
24762
ri_a6:
24763
sub.l %d0,(%a6) # fix stacked a6
24764
rts
24765
# if it's a fmove out instruction, we don't have to fix a7
24766
# because we hadn't changed it yet. if it's an opclass two
24767
# instruction (data moved in) and the exception was in supervisor
24768
# mode, then also also wasn't updated. if it was user mode, then
24769
# restore the correct a7 which is in the USP currently.
24770
ri_a7:
24771
cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?
24772
bne.b ri_a7_done # out
24773
24774
btst &0x5,EXC_SR(%a6) # user or supervisor?
24775
bne.b ri_a7_done # supervisor
24776
movc %usp,%a0 # restore USP
24777
sub.l %d0,%a0
24778
movc %a0,%usp
24779
ri_a7_done:
24780
rts
24781
24782
# need to invert adjustment value if the <ea> was predec
24783
rest_dec:
24784
neg.l %d0
24785
bra.b rest_inc
24786
24787