CoCalc -- fpsp.S

GitHub Repository: torvalds/linux
Path: blob/master/arch/m68k/ifpsp060/src/fpsp.S
²⁹²⁶⁹ views
1
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3
M68000 Hi-Performance Microprocessor Division
4
M68060 Software Package
5
Production Release P1.00 -- October 10, 1994
6

7
M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
8

9
THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10
To the maximum extent permitted by applicable law,
11
MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12
INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13
and any warranty against infringement with regard to the SOFTWARE
14
(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15

16
To the maximum extent permitted by applicable law,
17
IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18
(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19
BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20
ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21
Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22

23
You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24
so long as this entire notice is retained without alteration in any modified and/or
25
redistributed versions, and that such modified versions are clearly identified as such.
26
No licenses are granted by implication, estoppel or otherwise under any patents
27
or trademarks of Motorola, Inc.
28
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29
#
30
# freal.s:
31
#	This file is appended to the top of the 060FPSP package
32
# and contains the entry points into the package. The user, in
33
# effect, branches to one of the branch table entries located
34
# after _060FPSP_TABLE.
35
#	Also, subroutine stubs exist in this file (_fpsp_done for
36
# example) that are referenced by the FPSP package itself in order
37
# to call a given routine. The stub routine actually performs the
38
# callout. The FPSP code does a "bsr" to the stub routine. This
39
# extra layer of hierarchy adds a slight performance penalty but
40
# it makes the FPSP code easier to read and more mainatinable.
41
#
42

43
set	_off_bsun,	0x00
44
set	_off_snan,	0x04
45
set	_off_operr,	0x08
46
set	_off_ovfl,	0x0c
47
set	_off_unfl,	0x10
48
set	_off_dz,	0x14
49
set	_off_inex,	0x18
50
set	_off_fline,	0x1c
51
set	_off_fpu_dis,	0x20
52
set	_off_trap,	0x24
53
set	_off_trace,	0x28
54
set	_off_access,	0x2c
55
set	_off_done,	0x30
56

57
set	_off_imr,	0x40
58
set	_off_dmr,	0x44
59
set	_off_dmw,	0x48
60
set	_off_irw,	0x4c
61
set	_off_irl,	0x50
62
set	_off_drb,	0x54
63
set	_off_drw,	0x58
64
set	_off_drl,	0x5c
65
set	_off_dwb,	0x60
66
set	_off_dww,	0x64
67
set	_off_dwl,	0x68
68

69
_060FPSP_TABLE:
70

71
###############################################################
72

73
# Here's the table of ENTRY POINTS for those linking the package.
74
	bra.l		_fpsp_snan
75
	short		0x0000
76
	bra.l		_fpsp_operr
77
	short		0x0000
78
	bra.l		_fpsp_ovfl
79
	short		0x0000
80
	bra.l		_fpsp_unfl
81
	short		0x0000
82
	bra.l		_fpsp_dz
83
	short		0x0000
84
	bra.l		_fpsp_inex
85
	short		0x0000
86
	bra.l		_fpsp_fline
87
	short		0x0000
88
	bra.l		_fpsp_unsupp
89
	short		0x0000
90
	bra.l		_fpsp_effadd
91
	short		0x0000
92

93
	space		56
94

95
###############################################################
96
	global		_fpsp_done
97
_fpsp_done:
98
	mov.l		%d0,-(%sp)
99
	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
100
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
101
	mov.l		0x4(%sp),%d0
102
	rtd		&0x4
103

104
	global		_real_ovfl
105
_real_ovfl:
106
	mov.l		%d0,-(%sp)
107
	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
108
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
109
	mov.l		0x4(%sp),%d0
110
	rtd		&0x4
111

112
	global		_real_unfl
113
_real_unfl:
114
	mov.l		%d0,-(%sp)
115
	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
116
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
117
	mov.l		0x4(%sp),%d0
118
	rtd		&0x4
119

120
	global		_real_inex
121
_real_inex:
122
	mov.l		%d0,-(%sp)
123
	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
124
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
125
	mov.l		0x4(%sp),%d0
126
	rtd		&0x4
127

128
	global		_real_bsun
129
_real_bsun:
130
	mov.l		%d0,-(%sp)
131
	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
132
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
133
	mov.l		0x4(%sp),%d0
134
	rtd		&0x4
135

136
	global		_real_operr
137
_real_operr:
138
	mov.l		%d0,-(%sp)
139
	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
140
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
141
	mov.l		0x4(%sp),%d0
142
	rtd		&0x4
143

144
	global		_real_snan
145
_real_snan:
146
	mov.l		%d0,-(%sp)
147
	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
148
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
149
	mov.l		0x4(%sp),%d0
150
	rtd		&0x4
151

152
	global		_real_dz
153
_real_dz:
154
	mov.l		%d0,-(%sp)
155
	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
156
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
157
	mov.l		0x4(%sp),%d0
158
	rtd		&0x4
159

160
	global		_real_fline
161
_real_fline:
162
	mov.l		%d0,-(%sp)
163
	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
164
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
165
	mov.l		0x4(%sp),%d0
166
	rtd		&0x4
167

168
	global		_real_fpu_disabled
169
_real_fpu_disabled:
170
	mov.l		%d0,-(%sp)
171
	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
172
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
173
	mov.l		0x4(%sp),%d0
174
	rtd		&0x4
175

176
	global		_real_trap
177
_real_trap:
178
	mov.l		%d0,-(%sp)
179
	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
180
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
181
	mov.l		0x4(%sp),%d0
182
	rtd		&0x4
183

184
	global		_real_trace
185
_real_trace:
186
	mov.l		%d0,-(%sp)
187
	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
188
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
189
	mov.l		0x4(%sp),%d0
190
	rtd		&0x4
191

192
	global		_real_access
193
_real_access:
194
	mov.l		%d0,-(%sp)
195
	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
196
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
197
	mov.l		0x4(%sp),%d0
198
	rtd		&0x4
199

200
#######################################
201

202
	global		_imem_read
203
_imem_read:
204
	mov.l		%d0,-(%sp)
205
	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
206
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
207
	mov.l		0x4(%sp),%d0
208
	rtd		&0x4
209

210
	global		_dmem_read
211
_dmem_read:
212
	mov.l		%d0,-(%sp)
213
	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
214
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
215
	mov.l		0x4(%sp),%d0
216
	rtd		&0x4
217

218
	global		_dmem_write
219
_dmem_write:
220
	mov.l		%d0,-(%sp)
221
	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
222
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
223
	mov.l		0x4(%sp),%d0
224
	rtd		&0x4
225

226
	global		_imem_read_word
227
_imem_read_word:
228
	mov.l		%d0,-(%sp)
229
	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
230
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
231
	mov.l		0x4(%sp),%d0
232
	rtd		&0x4
233

234
	global		_imem_read_long
235
_imem_read_long:
236
	mov.l		%d0,-(%sp)
237
	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
238
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
239
	mov.l		0x4(%sp),%d0
240
	rtd		&0x4
241

242
	global		_dmem_read_byte
243
_dmem_read_byte:
244
	mov.l		%d0,-(%sp)
245
	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
246
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
247
	mov.l		0x4(%sp),%d0
248
	rtd		&0x4
249

250
	global		_dmem_read_word
251
_dmem_read_word:
252
	mov.l		%d0,-(%sp)
253
	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
254
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
255
	mov.l		0x4(%sp),%d0
256
	rtd		&0x4
257

258
	global		_dmem_read_long
259
_dmem_read_long:
260
	mov.l		%d0,-(%sp)
261
	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
262
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
263
	mov.l		0x4(%sp),%d0
264
	rtd		&0x4
265

266
	global		_dmem_write_byte
267
_dmem_write_byte:
268
	mov.l		%d0,-(%sp)
269
	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
270
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
271
	mov.l		0x4(%sp),%d0
272
	rtd		&0x4
273

274
	global		_dmem_write_word
275
_dmem_write_word:
276
	mov.l		%d0,-(%sp)
277
	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
278
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
279
	mov.l		0x4(%sp),%d0
280
	rtd		&0x4
281

282
	global		_dmem_write_long
283
_dmem_write_long:
284
	mov.l		%d0,-(%sp)
285
	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
286
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
287
	mov.l		0x4(%sp),%d0
288
	rtd		&0x4
289

290
#
291
# This file contains a set of define statements for constants
292
# in order to promote readability within the corecode itself.
293
#
294

295
set LOCAL_SIZE,		192			# stack frame size(bytes)
296
set LV,			-LOCAL_SIZE		# stack offset
297

298
set EXC_SR,		0x4			# stack status register
299
set EXC_PC,		0x6			# stack pc
300
set EXC_VOFF,		0xa			# stacked vector offset
301
set EXC_EA,		0xc			# stacked <ea>
302

303
set EXC_FP,		0x0			# frame pointer
304

305
set EXC_AREGS,		-68			# offset of all address regs
306
set EXC_DREGS,		-100			# offset of all data regs
307
set EXC_FPREGS,		-36			# offset of all fp regs
308

309
set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
310
set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
311
set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
312
set EXC_A5,		EXC_AREGS+(5*4)
313
set EXC_A4,		EXC_AREGS+(4*4)
314
set EXC_A3,		EXC_AREGS+(3*4)
315
set EXC_A2,		EXC_AREGS+(2*4)
316
set EXC_A1,		EXC_AREGS+(1*4)
317
set EXC_A0,		EXC_AREGS+(0*4)
318
set EXC_D7,		EXC_DREGS+(7*4)
319
set EXC_D6,		EXC_DREGS+(6*4)
320
set EXC_D5,		EXC_DREGS+(5*4)
321
set EXC_D4,		EXC_DREGS+(4*4)
322
set EXC_D3,		EXC_DREGS+(3*4)
323
set EXC_D2,		EXC_DREGS+(2*4)
324
set EXC_D1,		EXC_DREGS+(1*4)
325
set EXC_D0,		EXC_DREGS+(0*4)
326

327
set EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
328
set EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
329
set EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
330

331
set FP_SCR1,		LV+80			# fp scratch 1
332
set FP_SCR1_EX,		FP_SCR1+0
333
set FP_SCR1_SGN,	FP_SCR1+2
334
set FP_SCR1_HI,		FP_SCR1+4
335
set FP_SCR1_LO,		FP_SCR1+8
336

337
set FP_SCR0,		LV+68			# fp scratch 0
338
set FP_SCR0_EX,		FP_SCR0+0
339
set FP_SCR0_SGN,	FP_SCR0+2
340
set FP_SCR0_HI,		FP_SCR0+4
341
set FP_SCR0_LO,		FP_SCR0+8
342

343
set FP_DST,		LV+56			# fp destination operand
344
set FP_DST_EX,		FP_DST+0
345
set FP_DST_SGN,		FP_DST+2
346
set FP_DST_HI,		FP_DST+4
347
set FP_DST_LO,		FP_DST+8
348

349
set FP_SRC,		LV+44			# fp source operand
350
set FP_SRC_EX,		FP_SRC+0
351
set FP_SRC_SGN,		FP_SRC+2
352
set FP_SRC_HI,		FP_SRC+4
353
set FP_SRC_LO,		FP_SRC+8
354

355
set USER_FPIAR,		LV+40			# FP instr address register
356

357
set USER_FPSR,		LV+36			# FP status register
358
set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
359
set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
360
set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
361
set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
362

363
set USER_FPCR,		LV+32			# FP control register
364
set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
365
set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
366

367
set L_SCR3,		LV+28			# integer scratch 3
368
set L_SCR2,		LV+24			# integer scratch 2
369
set L_SCR1,		LV+20			# integer scratch 1
370

371
set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
372

373
set EXC_TEMP2,		LV+24			# temporary space
374
set EXC_TEMP,		LV+16			# temporary space
375

376
set DTAG,		LV+15			# destination operand type
377
set STAG,		LV+14			# source operand type
378

379
set SPCOND_FLG,		LV+10			# flag: special case (see below)
380

381
set EXC_CC,		LV+8			# saved condition codes
382
set EXC_EXTWPTR,	LV+4			# saved current PC (active)
383
set EXC_EXTWORD,	LV+2			# saved extension word
384
set EXC_CMDREG,		LV+2			# saved extension word
385
set EXC_OPWORD,		LV+0			# saved operation word
386

387
################################
388

389
# Helpful macros
390

391
set FTEMP,		0			# offsets within an
392
set FTEMP_EX,		0			# extended precision
393
set FTEMP_SGN,		2			# value saved in memory.
394
set FTEMP_HI,		4
395
set FTEMP_LO,		8
396
set FTEMP_GRS,		12
397

398
set LOCAL,		0			# offsets within an
399
set LOCAL_EX,		0			# extended precision
400
set LOCAL_SGN,		2			# value saved in memory.
401
set LOCAL_HI,		4
402
set LOCAL_LO,		8
403
set LOCAL_GRS,		12
404

405
set DST,		0			# offsets within an
406
set DST_EX,		0			# extended precision
407
set DST_HI,		4			# value saved in memory.
408
set DST_LO,		8
409

410
set SRC,		0			# offsets within an
411
set SRC_EX,		0			# extended precision
412
set SRC_HI,		4			# value saved in memory.
413
set SRC_LO,		8
414

415
set SGL_LO,		0x3f81			# min sgl prec exponent
416
set SGL_HI,		0x407e			# max sgl prec exponent
417
set DBL_LO,		0x3c01			# min dbl prec exponent
418
set DBL_HI,		0x43fe			# max dbl prec exponent
419
set EXT_LO,		0x0			# min ext prec exponent
420
set EXT_HI,		0x7ffe			# max ext prec exponent
421

422
set EXT_BIAS,		0x3fff			# extended precision bias
423
set SGL_BIAS,		0x007f			# single precision bias
424
set DBL_BIAS,		0x03ff			# double precision bias
425

426
set NORM,		0x00			# operand type for STAG/DTAG
427
set ZERO,		0x01			# operand type for STAG/DTAG
428
set INF,		0x02			# operand type for STAG/DTAG
429
set QNAN,		0x03			# operand type for STAG/DTAG
430
set DENORM,		0x04			# operand type for STAG/DTAG
431
set SNAN,		0x05			# operand type for STAG/DTAG
432
set UNNORM,		0x06			# operand type for STAG/DTAG
433

434
##################
435
# FPSR/FPCR bits #
436
##################
437
set neg_bit,		0x3			# negative result
438
set z_bit,		0x2			# zero result
439
set inf_bit,		0x1			# infinite result
440
set nan_bit,		0x0			# NAN result
441

442
set q_sn_bit,		0x7			# sign bit of quotient byte
443

444
set bsun_bit,		7			# branch on unordered
445
set snan_bit,		6			# signalling NAN
446
set operr_bit,		5			# operand error
447
set ovfl_bit,		4			# overflow
448
set unfl_bit,		3			# underflow
449
set dz_bit,		2			# divide by zero
450
set inex2_bit,		1			# inexact result 2
451
set inex1_bit,		0			# inexact result 1
452

453
set aiop_bit,		7			# accrued inexact operation bit
454
set aovfl_bit,		6			# accrued overflow bit
455
set aunfl_bit,		5			# accrued underflow bit
456
set adz_bit,		4			# accrued dz bit
457
set ainex_bit,		3			# accrued inexact bit
458

459
#############################
460
# FPSR individual bit masks #
461
#############################
462
set neg_mask,		0x08000000		# negative bit mask (lw)
463
set inf_mask,		0x02000000		# infinity bit mask (lw)
464
set z_mask,		0x04000000		# zero bit mask (lw)
465
set nan_mask,		0x01000000		# nan bit mask (lw)
466

467
set neg_bmask,		0x08			# negative bit mask (byte)
468
set inf_bmask,		0x02			# infinity bit mask (byte)
469
set z_bmask,		0x04			# zero bit mask (byte)
470
set nan_bmask,		0x01			# nan bit mask (byte)
471

472
set bsun_mask,		0x00008000		# bsun exception mask
473
set snan_mask,		0x00004000		# snan exception mask
474
set operr_mask,		0x00002000		# operr exception mask
475
set ovfl_mask,		0x00001000		# overflow exception mask
476
set unfl_mask,		0x00000800		# underflow exception mask
477
set dz_mask,		0x00000400		# dz exception mask
478
set inex2_mask,		0x00000200		# inex2 exception mask
479
set inex1_mask,		0x00000100		# inex1 exception mask
480

481
set aiop_mask,		0x00000080		# accrued illegal operation
482
set aovfl_mask,		0x00000040		# accrued overflow
483
set aunfl_mask,		0x00000020		# accrued underflow
484
set adz_mask,		0x00000010		# accrued divide by zero
485
set ainex_mask,		0x00000008		# accrued inexact
486

487
######################################
488
# FPSR combinations used in the FPSP #
489
######################################
490
set dzinf_mask,		inf_mask+dz_mask+adz_mask
491
set opnan_mask,		nan_mask+operr_mask+aiop_mask
492
set nzi_mask,		0x01ffffff		#clears N, Z, and I
493
set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
494
set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
495
set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
496
set inx1a_mask,		inex1_mask+ainex_mask
497
set inx2a_mask,		inex2_mask+ainex_mask
498
set snaniop_mask,	nan_mask+snan_mask+aiop_mask
499
set snaniop2_mask,	snan_mask+aiop_mask
500
set naniop_mask,	nan_mask+aiop_mask
501
set neginf_mask,	neg_mask+inf_mask
502
set infaiop_mask,	inf_mask+aiop_mask
503
set negz_mask,		neg_mask+z_mask
504
set opaop_mask,		operr_mask+aiop_mask
505
set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
506
set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
507

508
#########
509
# misc. #
510
#########
511
set rnd_stky_bit,	29			# stky bit pos in longword
512

513
set sign_bit,		0x7			# sign bit
514
set signan_bit,		0x6			# signalling nan bit
515

516
set sgl_thresh,		0x3f81			# minimum sgl exponent
517
set dbl_thresh,		0x3c01			# minimum dbl exponent
518

519
set x_mode,		0x0			# extended precision
520
set s_mode,		0x4			# single precision
521
set d_mode,		0x8			# double precision
522

523
set rn_mode,		0x0			# round-to-nearest
524
set rz_mode,		0x1			# round-to-zero
525
set rm_mode,		0x2			# round-tp-minus-infinity
526
set rp_mode,		0x3			# round-to-plus-infinity
527

528
set mantissalen,	64			# length of mantissa in bits
529

530
set BYTE,		1			# len(byte) == 1 byte
531
set WORD,		2			# len(word) == 2 bytes
532
set LONG,		4			# len(longword) == 2 bytes
533

534
set BSUN_VEC,		0xc0			# bsun    vector offset
535
set INEX_VEC,		0xc4			# inexact vector offset
536
set DZ_VEC,		0xc8			# dz      vector offset
537
set UNFL_VEC,		0xcc			# unfl    vector offset
538
set OPERR_VEC,		0xd0			# operr   vector offset
539
set OVFL_VEC,		0xd4			# ovfl    vector offset
540
set SNAN_VEC,		0xd8			# snan    vector offset
541

542
###########################
543
# SPecial CONDition FLaGs #
544
###########################
545
set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
546
set fbsun_flg,		0x02			# flag bit: bsun exception
547
set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
548
set mda7_flg,		0x08			# flag bit: -(a7) <ea>
549
set fmovm_flg,		0x40			# flag bit: fmovm instruction
550
set immed_flg,		0x80			# flag bit: &<data> <ea>
551

552
set ftrapcc_bit,	0x0
553
set fbsun_bit,		0x1
554
set mia7_bit,		0x2
555
set mda7_bit,		0x3
556
set immed_bit,		0x7
557

558
##################################
559
# TRANSCENDENTAL "LAST-OP" FLAGS #
560
##################################
561
set FMUL_OP,		0x0			# fmul instr performed last
562
set FDIV_OP,		0x1			# fdiv performed last
563
set FADD_OP,		0x2			# fadd performed last
564
set FMOV_OP,		0x3			# fmov performed last
565

566
#############
567
# CONSTANTS #
568
#############
569
T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
570
T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
571

572
PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
573
PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
574

575
TWOBYPI:
576
	long		0x3FE45F30,0x6DC9C883
577

578
#########################################################################
579
# XDEF ****************************************************************	#
580
#	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
581
#									#
582
#	This handler should be the first code executed upon taking the	#
583
#	FP Overflow exception in an operating system.			#
584
#									#
585
# XREF ****************************************************************	#
586
#	_imem_read_long() - read instruction longword			#
587
#	fix_skewed_ops() - adjust src operand in fsave frame		#
588
#	set_tag_x() - determine optype of src/dst operands		#
589
#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
590
#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
591
#	load_fpn2() - load dst operand from FP regfile			#
592
#	fout() - emulate an opclass 3 instruction			#
593
#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
594
#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
595
#	_real_ovfl() - "callout" for Overflow exception enabled code	#
596
#	_real_inex() - "callout" for Inexact exception enabled code	#
597
#	_real_trace() - "callout" for Trace exception code		#
598
#									#
599
# INPUT ***************************************************************	#
600
#	- The system stack contains the FP Ovfl exception stack frame	#
601
#	- The fsave frame contains the source operand			#
602
#									#
603
# OUTPUT **************************************************************	#
604
#	Overflow Exception enabled:					#
605
#	- The system stack is unchanged					#
606
#	- The fsave frame contains the adjusted src op for opclass 0,2	#
607
#	Overflow Exception disabled:					#
608
#	- The system stack is unchanged					#
609
#	- The "exception present" flag in the fsave frame is cleared	#
610
#									#
611
# ALGORITHM ***********************************************************	#
612
#	On the 060, if an FP overflow is present as the result of any	#
613
# instruction, the 060 will take an overflow exception whether the	#
614
# exception is enabled or disabled in the FPCR. For the disabled case,	#
615
# This handler emulates the instruction to determine what the correct	#
616
# default result should be for the operation. This default result is	#
617
# then stored in either the FP regfile, data regfile, or memory.	#
618
# Finally, the handler exits through the "callout" _fpsp_done()		#
619
# denoting that no exceptional conditions exist within the machine.	#
620
#	If the exception is enabled, then this handler must create the	#
621
# exceptional operand and plave it in the fsave state frame, and store	#
622
# the default result (only if the instruction is opclass 3). For	#
623
# exceptions enabled, this handler must exit through the "callout"	#
624
# _real_ovfl() so that the operating system enabled overflow handler	#
625
# can handle this case.							#
626
#	Two other conditions exist. First, if overflow was disabled	#
627
# but the inexact exception was enabled, this handler must exit		#
628
# through the "callout" _real_inex() regardless of whether the result	#
629
# was inexact.								#
630
#	Also, in the case of an opclass three instruction where		#
631
# overflow was disabled and the trace exception was enabled, this	#
632
# handler must exit through the "callout" _real_trace().		#
633
#									#
634
#########################################################################
635

636
	global		_fpsp_ovfl
637
_fpsp_ovfl:
638

639
#$#	sub.l		&24,%sp			# make room for src/dst
640

641
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
642

643
	fsave		FP_SRC(%a6)		# grab the "busy" frame
644

645
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
646
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
647
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
648

649
# the FPIAR holds the "current PC" of the faulting instruction
650
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
651
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
652
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
653
	bsr.l		_imem_read_long		# fetch the instruction words
654
	mov.l		%d0,EXC_OPWORD(%a6)
655

656
##############################################################################
657

658
	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
659
	bne.w		fovfl_out
660

661

662
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
663
	bsr.l		fix_skewed_ops		# fix src op
664

665
# since, I believe, only NORMs and DENORMs can come through here,
666
# maybe we can avoid the subroutine call.
667
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
668
	bsr.l		set_tag_x		# tag the operand type
669
	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
670

671
# bit five of the fp extension word separates the monadic and dyadic operations
672
# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
673
# will never take this exception.
674
	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
675
	beq.b		fovfl_extract		# monadic
676

677
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
678
	bsr.l		load_fpn2		# load dst into FP_DST
679

680
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
681
	bsr.l		set_tag_x		# tag the operand type
682
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
683
	bne.b		fovfl_op2_done		# no
684
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
685
fovfl_op2_done:
686
	mov.b		%d0,DTAG(%a6)		# save dst optype tag
687

688
fovfl_extract:
689

690
#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
691
#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
692
#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
693
#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
694
#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
695
#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
696

697
	clr.l		%d0
698
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
699

700
	mov.b		1+EXC_CMDREG(%a6),%d1
701
	andi.w		&0x007f,%d1		# extract extension
702

703
	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
704

705
	fmov.l		&0x0,%fpcr		# zero current control regs
706
	fmov.l		&0x0,%fpsr
707

708
	lea		FP_SRC(%a6),%a0
709
	lea		FP_DST(%a6),%a1
710

711
# maybe we can make these entry points ONLY the OVFL entry points of each routine.
712
	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
713
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
714

715
# the operation has been emulated. the result is in fp0.
716
# the EXOP, if an exception occurred, is in fp1.
717
# we must save the default result regardless of whether
718
# traps are enabled or disabled.
719
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
720
	bsr.l		store_fpreg
721

722
# the exceptional possibilities we have left ourselves with are ONLY overflow
723
# and inexact. and, the inexact is such that overflow occurred and was disabled
724
# but inexact was enabled.
725
	btst		&ovfl_bit,FPCR_ENABLE(%a6)
726
	bne.b		fovfl_ovfl_on
727

728
	btst		&inex2_bit,FPCR_ENABLE(%a6)
729
	bne.b		fovfl_inex_on
730

731
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
732
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
733
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
734

735
	unlk		%a6
736
#$#	add.l		&24,%sp
737
	bra.l		_fpsp_done
738

739
# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
740
# in fp1. now, simply jump to _real_ovfl()!
741
fovfl_ovfl_on:
742
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
743

744
	mov.w		&0xe005,2+FP_SRC(%a6)	# save exc status
745

746
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
747
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
748
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
749

750
	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
751

752
	unlk		%a6
753

754
	bra.l		_real_ovfl
755

756
# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
757
# we must jump to real_inex().
758
fovfl_inex_on:
759

760
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
761

762
	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
763
	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
764

765
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
766
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
767
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
768

769
	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
770

771
	unlk		%a6
772

773
	bra.l		_real_inex
774

775
########################################################################
776
fovfl_out:
777

778

779
#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
780
#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
781
#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
782

783
# the src operand is definitely a NORM(!), so tag it as such
784
	mov.b		&NORM,STAG(%a6)		# set src optype tag
785

786
	clr.l		%d0
787
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
788

789
	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
790

791
	fmov.l		&0x0,%fpcr		# zero current control regs
792
	fmov.l		&0x0,%fpsr
793

794
	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
795

796
	bsr.l		fout
797

798
	btst		&ovfl_bit,FPCR_ENABLE(%a6)
799
	bne.w		fovfl_ovfl_on
800

801
	btst		&inex2_bit,FPCR_ENABLE(%a6)
802
	bne.w		fovfl_inex_on
803

804
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
805
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
806
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
807

808
	unlk		%a6
809
#$#	add.l		&24,%sp
810

811
	btst		&0x7,(%sp)		# is trace on?
812
	beq.l		_fpsp_done		# no
813

814
	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
815
	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
816
	bra.l		_real_trace
817

818
#########################################################################
819
# XDEF ****************************************************************	#
820
#	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
821
#									#
822
#	This handler should be the first code executed upon taking the	#
823
#	FP Underflow exception in an operating system.			#
824
#									#
825
# XREF ****************************************************************	#
826
#	_imem_read_long() - read instruction longword			#
827
#	fix_skewed_ops() - adjust src operand in fsave frame		#
828
#	set_tag_x() - determine optype of src/dst operands		#
829
#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
830
#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
831
#	load_fpn2() - load dst operand from FP regfile			#
832
#	fout() - emulate an opclass 3 instruction			#
833
#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
834
#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
835
#	_real_ovfl() - "callout" for Overflow exception enabled code	#
836
#	_real_inex() - "callout" for Inexact exception enabled code	#
837
#	_real_trace() - "callout" for Trace exception code		#
838
#									#
839
# INPUT ***************************************************************	#
840
#	- The system stack contains the FP Unfl exception stack frame	#
841
#	- The fsave frame contains the source operand			#
842
#									#
843
# OUTPUT **************************************************************	#
844
#	Underflow Exception enabled:					#
845
#	- The system stack is unchanged					#
846
#	- The fsave frame contains the adjusted src op for opclass 0,2	#
847
#	Underflow Exception disabled:					#
848
#	- The system stack is unchanged					#
849
#	- The "exception present" flag in the fsave frame is cleared	#
850
#									#
851
# ALGORITHM ***********************************************************	#
852
#	On the 060, if an FP underflow is present as the result of any	#
853
# instruction, the 060 will take an underflow exception whether the	#
854
# exception is enabled or disabled in the FPCR. For the disabled case,	#
855
# This handler emulates the instruction to determine what the correct	#
856
# default result should be for the operation. This default result is	#
857
# then stored in either the FP regfile, data regfile, or memory.	#
858
# Finally, the handler exits through the "callout" _fpsp_done()		#
859
# denoting that no exceptional conditions exist within the machine.	#
860
#	If the exception is enabled, then this handler must create the	#
861
# exceptional operand and plave it in the fsave state frame, and store	#
862
# the default result (only if the instruction is opclass 3). For	#
863
# exceptions enabled, this handler must exit through the "callout"	#
864
# _real_unfl() so that the operating system enabled overflow handler	#
865
# can handle this case.							#
866
#	Two other conditions exist. First, if underflow was disabled	#
867
# but the inexact exception was enabled and the result was inexact,	#
868
# this handler must exit through the "callout" _real_inex().		#
869
# was inexact.								#
870
#	Also, in the case of an opclass three instruction where		#
871
# underflow was disabled and the trace exception was enabled, this	#
872
# handler must exit through the "callout" _real_trace().		#
873
#									#
874
#########################################################################
875

876
	global		_fpsp_unfl
877
_fpsp_unfl:
878

879
#$#	sub.l		&24,%sp			# make room for src/dst
880

881
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
882

883
	fsave		FP_SRC(%a6)		# grab the "busy" frame
884

885
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
886
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
887
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
888

889
# the FPIAR holds the "current PC" of the faulting instruction
890
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
891
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
892
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
893
	bsr.l		_imem_read_long		# fetch the instruction words
894
	mov.l		%d0,EXC_OPWORD(%a6)
895

896
##############################################################################
897

898
	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
899
	bne.w		funfl_out
900

901

902
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
903
	bsr.l		fix_skewed_ops		# fix src op
904

905
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
906
	bsr.l		set_tag_x		# tag the operand type
907
	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
908

909
# bit five of the fp ext word separates the monadic and dyadic operations
910
# that can pass through fpsp_unfl(). remember that fcmp, and ftst
911
# will never take this exception.
912
	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
913
	beq.b		funfl_extract		# monadic
914

915
# now, what's left that's not dyadic is fsincos. we can distinguish it
916
# from all dyadics by the '0110xxx pattern
917
	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
918
	bne.b		funfl_extract		# yes
919

920
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
921
	bsr.l		load_fpn2		# load dst into FP_DST
922

923
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
924
	bsr.l		set_tag_x		# tag the operand type
925
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
926
	bne.b		funfl_op2_done		# no
927
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
928
funfl_op2_done:
929
	mov.b		%d0,DTAG(%a6)		# save dst optype tag
930

931
funfl_extract:
932

933
#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
934
#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
935
#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
936
#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
937
#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
938
#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
939

940
	clr.l		%d0
941
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
942

943
	mov.b		1+EXC_CMDREG(%a6),%d1
944
	andi.w		&0x007f,%d1		# extract extension
945

946
	andi.l		&0x00ff01ff,USER_FPSR(%a6)
947

948
	fmov.l		&0x0,%fpcr		# zero current control regs
949
	fmov.l		&0x0,%fpsr
950

951
	lea		FP_SRC(%a6),%a0
952
	lea		FP_DST(%a6),%a1
953

954
# maybe we can make these entry points ONLY the OVFL entry points of each routine.
955
	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
956
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
957

958
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
959
	bsr.l		store_fpreg
960

961
# The `060 FPU multiplier hardware is such that if the result of a
962
# multiply operation is the smallest possible normalized number
963
# (0x00000000_80000000_00000000), then the machine will take an
964
# underflow exception. Since this is incorrect, we need to check
965
# if our emulation, after re-doing the operation, decided that
966
# no underflow was called for. We do these checks only in
967
# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
968
# special case will simply exit gracefully with the correct result.
969

970
# the exceptional possibilities we have left ourselves with are ONLY overflow
971
# and inexact. and, the inexact is such that overflow occurred and was disabled
972
# but inexact was enabled.
973
	btst		&unfl_bit,FPCR_ENABLE(%a6)
974
	bne.b		funfl_unfl_on
975

976
funfl_chkinex:
977
	btst		&inex2_bit,FPCR_ENABLE(%a6)
978
	bne.b		funfl_inex_on
979

980
funfl_exit:
981
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
982
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
983
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
984

985
	unlk		%a6
986
#$#	add.l		&24,%sp
987
	bra.l		_fpsp_done
988

989
# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
990
# in fp1 (don't forget to save fp0). what to do now?
991
# well, we simply have to get to go to _real_unfl()!
992
funfl_unfl_on:
993

994
# The `060 FPU multiplier hardware is such that if the result of a
995
# multiply operation is the smallest possible normalized number
996
# (0x00000000_80000000_00000000), then the machine will take an
997
# underflow exception. Since this is incorrect, we check here to see
998
# if our emulation, after re-doing the operation, decided that
999
# no underflow was called for.
1000
	btst		&unfl_bit,FPSR_EXCEPT(%a6)
1001
	beq.w		funfl_chkinex
1002

1003
funfl_unfl_on2:
1004
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
1005

1006
	mov.w		&0xe003,2+FP_SRC(%a6)	# save exc status
1007

1008
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1009
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1010
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1011

1012
	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1013

1014
	unlk		%a6
1015

1016
	bra.l		_real_unfl
1017

1018
# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1019
# we must jump to real_inex().
1020
funfl_inex_on:
1021

1022
# The `060 FPU multiplier hardware is such that if the result of a
1023
# multiply operation is the smallest possible normalized number
1024
# (0x00000000_80000000_00000000), then the machine will take an
1025
# underflow exception.
1026
# But, whether bogus or not, if inexact is enabled AND it occurred,
1027
# then we have to branch to real_inex.
1028

1029
	btst		&inex2_bit,FPSR_EXCEPT(%a6)
1030
	beq.w		funfl_exit
1031

1032
funfl_inex_on2:
1033

1034
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to stack
1035

1036
	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
1037
	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
1038

1039
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1040
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1041
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1042

1043
	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1044

1045
	unlk		%a6
1046

1047
	bra.l		_real_inex
1048

1049
#######################################################################
1050
funfl_out:
1051

1052

1053
#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1054
#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1055
#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1056

1057
# the src operand is definitely a NORM(!), so tag it as such
1058
	mov.b		&NORM,STAG(%a6)		# set src optype tag
1059

1060
	clr.l		%d0
1061
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
1062

1063
	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1064

1065
	fmov.l		&0x0,%fpcr		# zero current control regs
1066
	fmov.l		&0x0,%fpsr
1067

1068
	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1069

1070
	bsr.l		fout
1071

1072
	btst		&unfl_bit,FPCR_ENABLE(%a6)
1073
	bne.w		funfl_unfl_on2
1074

1075
	btst		&inex2_bit,FPCR_ENABLE(%a6)
1076
	bne.w		funfl_inex_on2
1077

1078
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1079
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1080
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1081

1082
	unlk		%a6
1083
#$#	add.l		&24,%sp
1084

1085
	btst		&0x7,(%sp)		# is trace on?
1086
	beq.l		_fpsp_done		# no
1087

1088
	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
1089
	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
1090
	bra.l		_real_trace
1091

1092
#########################################################################
1093
# XDEF ****************************************************************	#
1094
#	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
1095
#		        Data Type" exception.				#
1096
#									#
1097
#	This handler should be the first code executed upon taking the	#
1098
#	FP Unimplemented Data Type exception in an operating system.	#
1099
#									#
1100
# XREF ****************************************************************	#
1101
#	_imem_read_{word,long}() - read instruction word/longword	#
1102
#	fix_skewed_ops() - adjust src operand in fsave frame		#
1103
#	set_tag_x() - determine optype of src/dst operands		#
1104
#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
1105
#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
1106
#	load_fpn2() - load dst operand from FP regfile			#
1107
#	load_fpn1() - load src operand from FP regfile			#
1108
#	fout() - emulate an opclass 3 instruction			#
1109
#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
1110
#	_real_inex() - "callout" to operating system inexact handler	#
1111
#	_fpsp_done() - "callout" for exit; work all done		#
1112
#	_real_trace() - "callout" for Trace enabled exception		#
1113
#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
1114
#	_real_snan() - "callout" for SNAN exception			#
1115
#	_real_operr() - "callout" for OPERR exception			#
1116
#	_real_ovfl() - "callout" for OVFL exception			#
1117
#	_real_unfl() - "callout" for UNFL exception			#
1118
#	get_packed() - fetch packed operand from memory			#
1119
#									#
1120
# INPUT ***************************************************************	#
1121
#	- The system stack contains the "Unimp Data Type" stk frame	#
1122
#	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
1123
#									#
1124
# OUTPUT **************************************************************	#
1125
#	If Inexact exception (opclass 3):				#
1126
#	- The system stack is changed to an Inexact exception stk frame	#
1127
#	If SNAN exception (opclass 3):					#
1128
#	- The system stack is changed to an SNAN exception stk frame	#
1129
#	If OPERR exception (opclass 3):					#
1130
#	- The system stack is changed to an OPERR exception stk frame	#
1131
#	If OVFL exception (opclass 3):					#
1132
#	- The system stack is changed to an OVFL exception stk frame	#
1133
#	If UNFL exception (opclass 3):					#
1134
#	- The system stack is changed to an UNFL exception stack frame	#
1135
#	If Trace exception enabled:					#
1136
#	- The system stack is changed to a Trace exception stack frame	#
1137
#	Else: (normal case)						#
1138
#	- Correct result has been stored as appropriate			#
1139
#									#
1140
# ALGORITHM ***********************************************************	#
1141
#	Two main instruction types can enter here: (1) DENORM or UNNORM	#
1142
# unimplemented data types. These can be either opclass 0,2 or 3	#
1143
# instructions, and (2) PACKED unimplemented data format instructions	#
1144
# also of opclasses 0,2, or 3.						#
1145
#	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
1146
# operand from the fsave state frame and the dst operand (if dyadic)	#
1147
# from the FP register file. The instruction is then emulated by	#
1148
# choosing an emulation routine from a table of routines indexed by	#
1149
# instruction type. Once the instruction has been emulated and result	#
1150
# saved, then we check to see if any enabled exceptions resulted from	#
1151
# instruction emulation. If none, then we exit through the "callout"	#
1152
# _fpsp_done(). If there is an enabled FP exception, then we insert	#
1153
# this exception into the FPU in the fsave state frame and then exit	#
1154
# through _fpsp_done().							#
1155
#	PACKED opclass 0 and 2 is similar in how the instruction is	#
1156
# emulated and exceptions handled. The differences occur in how the	#
1157
# handler loads the packed op (by calling get_packed() routine) and	#
1158
# by the fact that a Trace exception could be pending for PACKED ops.	#
1159
# If a Trace exception is pending, then the current exception stack	#
1160
# frame is changed to a Trace exception stack frame and an exit is	#
1161
# made through _real_trace().						#
1162
#	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
1163
# performed by calling the routine fout(). If no exception should occur	#
1164
# as the result of emulation, then an exit either occurs through	#
1165
# _fpsp_done() or through _real_trace() if a Trace exception is pending	#
1166
# (a Trace stack frame must be created here, too). If an FP exception	#
1167
# should occur, then we must create an exception stack frame of that	#
1168
# type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
1169
# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3	#
1170
# emulation is performed in a similar manner.				#
1171
#									#
1172
#########################################################################
1173

1174
#
1175
# (1) DENORM and UNNORM (unimplemented) data types:
1176
#
1177
#				post-instruction
1178
#				*****************
1179
#				*      EA	*
1180
#	 pre-instruction	*		*
1181
#	*****************	*****************
1182
#	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
1183
#	*****************	*****************
1184
#	*     Next	*	*     Next	*
1185
#	*      PC	*	*      PC	*
1186
#	*****************	*****************
1187
#	*      SR	*	*      SR	*
1188
#	*****************	*****************
1189
#
1190
# (2) PACKED format (unsupported) opclasses two and three:
1191
#	*****************
1192
#	*      EA	*
1193
#	*		*
1194
#	*****************
1195
#	* 0x2 *  0x0dc	*
1196
#	*****************
1197
#	*     Next	*
1198
#	*      PC	*
1199
#	*****************
1200
#	*      SR	*
1201
#	*****************
1202
#
1203
	global		_fpsp_unsupp
1204
_fpsp_unsupp:
1205

1206
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
1207

1208
	fsave		FP_SRC(%a6)		# save fp state
1209

1210
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
1211
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1212
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
1213

1214
	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
1215
	bne.b		fu_s
1216
fu_u:
1217
	mov.l		%usp,%a0		# fetch user stack pointer
1218
	mov.l		%a0,EXC_A7(%a6)		# save on stack
1219
	bra.b		fu_cont
1220
# if the exception is an opclass zero or two unimplemented data type
1221
# exception, then the a7' calculated here is wrong since it doesn't
1222
# stack an ea. however, we don't need an a7' for this case anyways.
1223
fu_s:
1224
	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
1225
	mov.l		%a0,EXC_A7(%a6)		# save on stack
1226

1227
fu_cont:
1228

1229
# the FPIAR holds the "current PC" of the faulting instruction
1230
# the FPIAR should be set correctly for ALL exceptions passing through
1231
# this point.
1232
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1233
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
1234
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
1235
	bsr.l		_imem_read_long		# fetch the instruction words
1236
	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
1237

1238
############################
1239

1240
	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
1241

1242
# Separate opclass three (fpn-to-mem) ops since they have a different
1243
# stack frame and protocol.
1244
	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
1245
	bne.w		fu_out			# yes
1246

1247
# Separate packed opclass two instructions.
1248
	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
1249
	cmpi.b		%d0,&0x13
1250
	beq.w		fu_in_pack
1251

1252

1253
# I'm not sure at this point what FPSR bits are valid for this instruction.
1254
# so, since the emulation routines re-create them anyways, zero exception field
1255
	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
1256

1257
	fmov.l		&0x0,%fpcr		# zero current control regs
1258
	fmov.l		&0x0,%fpsr
1259

1260
# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1261
# precision format if the src format was single or double and the
1262
# source data type was an INF, NAN, DENORM, or UNNORM
1263
	lea		FP_SRC(%a6),%a0		# pass ptr to input
1264
	bsr.l		fix_skewed_ops
1265

1266
# we don't know whether the src operand or the dst operand (or both) is the
1267
# UNNORM or DENORM. call the function that tags the operand type. if the
1268
# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1269
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
1270
	bsr.l		set_tag_x		# tag the operand type
1271
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1272
	bne.b		fu_op2			# no
1273
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1274

1275
fu_op2:
1276
	mov.b		%d0,STAG(%a6)		# save src optype tag
1277

1278
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1279

1280
# bit five of the fp extension word separates the monadic and dyadic operations
1281
# at this point
1282
	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1283
	beq.b		fu_extract		# monadic
1284
	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1285
	beq.b		fu_extract		# yes, so it's monadic, too
1286

1287
	bsr.l		load_fpn2		# load dst into FP_DST
1288

1289
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1290
	bsr.l		set_tag_x		# tag the operand type
1291
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1292
	bne.b		fu_op2_done		# no
1293
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1294
fu_op2_done:
1295
	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1296

1297
fu_extract:
1298
	clr.l		%d0
1299
	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1300

1301
	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1302

1303
	lea		FP_SRC(%a6),%a0
1304
	lea		FP_DST(%a6),%a1
1305

1306
	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1307
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1308

1309
#
1310
# Exceptions in order of precedence:
1311
#	BSUN	: none
1312
#	SNAN	: all dyadic ops
1313
#	OPERR	: fsqrt(-NORM)
1314
#	OVFL	: all except ftst,fcmp
1315
#	UNFL	: all except ftst,fcmp
1316
#	DZ	: fdiv
1317
#	INEX2	: all except ftst,fcmp
1318
#	INEX1	: none (packed doesn't go through here)
1319
#
1320

1321
# we determine the highest priority exception(if any) set by the
1322
# emulation routine that has also been enabled by the user.
1323
	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
1324
	bne.b		fu_in_ena		# some are enabled
1325

1326
fu_in_cont:
1327
# fcmp and ftst do not store any result.
1328
	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1329
	andi.b		&0x38,%d0		# extract bits 3-5
1330
	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1331
	beq.b		fu_in_exit		# yes
1332

1333
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1334
	bsr.l		store_fpreg		# store the result
1335

1336
fu_in_exit:
1337

1338
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1339
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1340
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1341

1342
	unlk		%a6
1343

1344
	bra.l		_fpsp_done
1345

1346
fu_in_ena:
1347
	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1348
	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1349
	bne.b		fu_in_exc		# there is at least one set
1350

1351
#
1352
# No exceptions occurred that were also enabled. Now:
1353
#
1354
#	if (OVFL && ovfl_disabled && inexact_enabled) {
1355
#	    branch to _real_inex() (even if the result was exact!);
1356
#	} else {
1357
#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1358
#	    return;
1359
#	}
1360
#
1361
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1362
	beq.b		fu_in_cont		# no
1363

1364
fu_in_ovflchk:
1365
	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1366
	beq.b		fu_in_cont		# no
1367
	bra.w		fu_in_exc_ovfl		# go insert overflow frame
1368

1369
#
1370
# An exception occurred and that exception was enabled:
1371
#
1372
#	shift enabled exception field into lo byte of d0;
1373
#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1374
#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1375
#		/*
1376
#		 * this is the case where we must call _real_inex() now or else
1377
#		 * there will be no other way to pass it the exceptional operand
1378
#		 */
1379
#		call _real_inex();
1380
#	} else {
1381
#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1382
#	}
1383
#
1384
fu_in_exc:
1385
	subi.l		&24,%d0			# fix offset to be 0-8
1386
	cmpi.b		%d0,&0x6		# is exception INEX? (6)
1387
	bne.b		fu_in_exc_exit		# no
1388

1389
# the enabled exception was inexact
1390
	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1391
	bne.w		fu_in_exc_unfl		# yes
1392
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1393
	bne.w		fu_in_exc_ovfl		# yes
1394

1395
# here, we insert the correct fsave status value into the fsave frame for the
1396
# corresponding exception. the operand in the fsave frame should be the original
1397
# src operand.
1398
fu_in_exc_exit:
1399
	mov.l		%d0,-(%sp)		# save d0
1400
	bsr.l		funimp_skew		# skew sgl or dbl inputs
1401
	mov.l		(%sp)+,%d0		# restore d0
1402

1403
	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1404

1405
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1406
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1407
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1408

1409
	frestore	FP_SRC(%a6)		# restore src op
1410

1411
	unlk		%a6
1412

1413
	bra.l		_fpsp_done
1414

1415
tbl_except:
1416
	short		0xe000,0xe006,0xe004,0xe005
1417
	short		0xe003,0xe002,0xe001,0xe001
1418

1419
fu_in_exc_unfl:
1420
	mov.w		&0x4,%d0
1421
	bra.b		fu_in_exc_exit
1422
fu_in_exc_ovfl:
1423
	mov.w		&0x03,%d0
1424
	bra.b		fu_in_exc_exit
1425

1426
# If the input operand to this operation was opclass two and a single
1427
# or double precision denorm, inf, or nan, the operand needs to be
1428
# "corrected" in order to have the proper equivalent extended precision
1429
# number.
1430
	global		fix_skewed_ops
1431
fix_skewed_ops:
1432
	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1433
	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
1434
	beq.b		fso_sgl			# yes
1435
	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
1436
	beq.b		fso_dbl			# yes
1437
	rts					# no
1438

1439
fso_sgl:
1440
	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1441
	andi.w		&0x7fff,%d0		# strip sign
1442
	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
1443
	beq.b		fso_sgl_dnrm_zero	# yes
1444
	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
1445
	beq.b		fso_infnan		# yes
1446
	rts					# no
1447

1448
fso_sgl_dnrm_zero:
1449
	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1450
	beq.b		fso_zero		# it's a skewed zero
1451
fso_sgl_dnrm:
1452
# here, we count on norm not to alter a0...
1453
	bsr.l		norm			# normalize mantissa
1454
	neg.w		%d0			# -shft amt
1455
	addi.w		&0x3f81,%d0		# adjust new exponent
1456
	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1457
	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1458
	rts
1459

1460
fso_zero:
1461
	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
1462
	rts
1463

1464
fso_infnan:
1465
	andi.b		&0x7f,LOCAL_HI(%a0)	# clear j-bit
1466
	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
1467
	rts
1468

1469
fso_dbl:
1470
	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1471
	andi.w		&0x7fff,%d0		# strip sign
1472
	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
1473
	beq.b		fso_dbl_dnrm_zero	# yes
1474
	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
1475
	beq.b		fso_infnan		# yes
1476
	rts					# no
1477

1478
fso_dbl_dnrm_zero:
1479
	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1480
	bne.b		fso_dbl_dnrm		# it's a skewed denorm
1481
	tst.l		LOCAL_LO(%a0)		# is it a zero?
1482
	beq.b		fso_zero		# yes
1483
fso_dbl_dnrm:
1484
# here, we count on norm not to alter a0...
1485
	bsr.l		norm			# normalize mantissa
1486
	neg.w		%d0			# -shft amt
1487
	addi.w		&0x3c01,%d0		# adjust new exponent
1488
	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1489
	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1490
	rts
1491

1492
#################################################################
1493

1494
# fmove out took an unimplemented data type exception.
1495
# the src operand is in FP_SRC. Call _fout() to write out the result and
1496
# to determine which exceptions, if any, to take.
1497
fu_out:
1498

1499
# Separate packed move outs from the UNNORM and DENORM move outs.
1500
	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
1501
	cmpi.b		%d0,&0x3
1502
	beq.w		fu_out_pack
1503
	cmpi.b		%d0,&0x7
1504
	beq.w		fu_out_pack
1505

1506

1507
# I'm not sure at this point what FPSR bits are valid for this instruction.
1508
# so, since the emulation routines re-create them anyways, zero exception field.
1509
# fmove out doesn't affect ccodes.
1510
	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
1511

1512
	fmov.l		&0x0,%fpcr		# zero current control regs
1513
	fmov.l		&0x0,%fpsr
1514

1515
# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1516
# call here. just figure out what it is...
1517
	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
1518
	andi.w		&0x7fff,%d0		# strip sign
1519
	beq.b		fu_out_denorm		# it's a DENORM
1520

1521
	lea		FP_SRC(%a6),%a0
1522
	bsr.l		unnorm_fix		# yes; fix it
1523

1524
	mov.b		%d0,STAG(%a6)
1525

1526
	bra.b		fu_out_cont
1527
fu_out_denorm:
1528
	mov.b		&DENORM,STAG(%a6)
1529
fu_out_cont:
1530

1531
	clr.l		%d0
1532
	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1533

1534
	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1535

1536
	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
1537
	bsr.l		fout			# call fmove out routine
1538

1539
# Exceptions in order of precedence:
1540
#	BSUN	: none
1541
#	SNAN	: none
1542
#	OPERR	: fmove.{b,w,l} out of large UNNORM
1543
#	OVFL	: fmove.{s,d}
1544
#	UNFL	: fmove.{s,d,x}
1545
#	DZ	: none
1546
#	INEX2	: all
1547
#	INEX1	: none (packed doesn't travel through here)
1548

1549
# determine the highest priority exception(if any) set by the
1550
# emulation routine that has also been enabled by the user.
1551
	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1552
	bne.w		fu_out_ena		# some are enabled
1553

1554
fu_out_done:
1555

1556
	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
1557

1558
# on extended precision opclass three instructions using pre-decrement or
1559
# post-increment addressing mode, the address register is not updated. is the
1560
# address register was the stack pointer used from user mode, then let's update
1561
# it here. if it was used from supervisor mode, then we have to handle this
1562
# as a special case.
1563
	btst		&0x5,EXC_SR(%a6)
1564
	bne.b		fu_out_done_s
1565

1566
	mov.l		EXC_A7(%a6),%a0		# restore a7
1567
	mov.l		%a0,%usp
1568

1569
fu_out_done_cont:
1570
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1571
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1572
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1573

1574
	unlk		%a6
1575

1576
	btst		&0x7,(%sp)		# is trace on?
1577
	bne.b		fu_out_trace		# yes
1578

1579
	bra.l		_fpsp_done
1580

1581
# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1582
# ("fmov.x fpm,-(a7)") if so,
1583
fu_out_done_s:
1584
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
1585
	bne.b		fu_out_done_cont
1586

1587
# the extended precision result is still in fp0. but, we need to save it
1588
# somewhere on the stack until we can copy it to its final resting place.
1589
# here, we're counting on the top of the stack to be the old place-holders
1590
# for fp0/fp1 which have already been restored. that way, we can write
1591
# over those destinations with the shifted stack frame.
1592
	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1593

1594
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1595
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1596
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1597

1598
	mov.l		(%a6),%a6		# restore frame pointer
1599

1600
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1601
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1602

1603
# now, copy the result to the proper place on the stack
1604
	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1605
	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1606
	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1607

1608
	add.l		&LOCAL_SIZE-0x8,%sp
1609

1610
	btst		&0x7,(%sp)
1611
	bne.b		fu_out_trace
1612

1613
	bra.l		_fpsp_done
1614

1615
fu_out_ena:
1616
	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1617
	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1618
	bne.b		fu_out_exc		# there is at least one set
1619

1620
# no exceptions were set.
1621
# if a disabled overflow occurred and inexact was enabled but the result
1622
# was exact, then a branch to _real_inex() is made.
1623
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1624
	beq.w		fu_out_done		# no
1625

1626
fu_out_ovflchk:
1627
	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1628
	beq.w		fu_out_done		# no
1629
	bra.w		fu_inex			# yes
1630

1631
#
1632
# The fp move out that took the "Unimplemented Data Type" exception was
1633
# being traced. Since the stack frames are similar, get the "current" PC
1634
# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1635
#
1636
#		  UNSUPP FRAME		   TRACE FRAME
1637
#		*****************	*****************
1638
#		*      EA	*	*    Current	*
1639
#		*		*	*      PC	*
1640
#		*****************	*****************
1641
#		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
1642
#		*****************	*****************
1643
#		*     Next	*	*     Next	*
1644
#		*      PC	*	*      PC	*
1645
#		*****************	*****************
1646
#		*      SR	*	*      SR	*
1647
#		*****************	*****************
1648
#
1649
fu_out_trace:
1650
	mov.w		&0x2024,0x6(%sp)
1651
	fmov.l		%fpiar,0x8(%sp)
1652
	bra.l		_real_trace
1653

1654
# an exception occurred and that exception was enabled.
1655
fu_out_exc:
1656
	subi.l		&24,%d0			# fix offset to be 0-8
1657

1658
# we don't mess with the existing fsave frame. just re-insert it and
1659
# jump to the "_real_{}()" handler...
1660
	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
1661
	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
1662

1663
	swbeg		&0x8
1664
tbl_fu_out:
1665
	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
1666
	short		tbl_fu_out	- tbl_fu_out	# SNAN can't happen
1667
	short		fu_operr	- tbl_fu_out	# OPERR
1668
	short		fu_ovfl		- tbl_fu_out	# OVFL
1669
	short		fu_unfl		- tbl_fu_out	# UNFL
1670
	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
1671
	short		fu_inex		- tbl_fu_out	# INEX2
1672
	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
1673

1674
# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1675
# frestore it.
1676
fu_snan:
1677
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1678
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1679
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1680

1681
	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
1682
	mov.w		&0xe006,2+FP_SRC(%a6)
1683

1684
	frestore	FP_SRC(%a6)
1685

1686
	unlk		%a6
1687

1688

1689
	bra.l		_real_snan
1690

1691
fu_operr:
1692
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1693
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1694
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1695

1696
	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
1697
	mov.w		&0xe004,2+FP_SRC(%a6)
1698

1699
	frestore	FP_SRC(%a6)
1700

1701
	unlk		%a6
1702

1703

1704
	bra.l		_real_operr
1705

1706
fu_ovfl:
1707
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1708

1709
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1710
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1711
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1712

1713
	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
1714
	mov.w		&0xe005,2+FP_SRC(%a6)
1715

1716
	frestore	FP_SRC(%a6)		# restore EXOP
1717

1718
	unlk		%a6
1719

1720
	bra.l		_real_ovfl
1721

1722
# underflow can happen for extended precision. extended precision opclass
1723
# three instruction exceptions don't update the stack pointer. so, if the
1724
# exception occurred from user mode, then simply update a7 and exit normally.
1725
# if the exception occurred from supervisor mode, check if
1726
fu_unfl:
1727
	mov.l		EXC_A6(%a6),(%a6)	# restore a6
1728

1729
	btst		&0x5,EXC_SR(%a6)
1730
	bne.w		fu_unfl_s
1731

1732
	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
1733
	mov.l		%a0,%usp		# to or not...
1734

1735
fu_unfl_cont:
1736
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1737

1738
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1739
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1740
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1741

1742
	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1743
	mov.w		&0xe003,2+FP_SRC(%a6)
1744

1745
	frestore	FP_SRC(%a6)		# restore EXOP
1746

1747
	unlk		%a6
1748

1749
	bra.l		_real_unfl
1750

1751
fu_unfl_s:
1752
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1753
	bne.b		fu_unfl_cont
1754

1755
# the extended precision result is still in fp0. but, we need to save it
1756
# somewhere on the stack until we can copy it to its final resting place
1757
# (where the exc frame is currently). make sure it's not at the top of the
1758
# frame or it will get overwritten when the exc stack frame is shifted "down".
1759
	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1760
	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
1761

1762
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1763
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1764
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1765

1766
	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1767
	mov.w		&0xe003,2+FP_DST(%a6)
1768

1769
	frestore	FP_DST(%a6)		# restore EXOP
1770

1771
	mov.l		(%a6),%a6		# restore frame pointer
1772

1773
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1774
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1775
	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1776

1777
# now, copy the result to the proper place on the stack
1778
	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1779
	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1780
	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1781

1782
	add.l		&LOCAL_SIZE-0x8,%sp
1783

1784
	bra.l		_real_unfl
1785

1786
# fmove in and out enter here.
1787
fu_inex:
1788
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1789

1790
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1791
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1792
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1793

1794
	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
1795
	mov.w		&0xe001,2+FP_SRC(%a6)
1796

1797
	frestore	FP_SRC(%a6)		# restore EXOP
1798

1799
	unlk		%a6
1800

1801

1802
	bra.l		_real_inex
1803

1804
#########################################################################
1805
#########################################################################
1806
fu_in_pack:
1807

1808

1809
# I'm not sure at this point what FPSR bits are valid for this instruction.
1810
# so, since the emulation routines re-create them anyways, zero exception field
1811
	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
1812

1813
	fmov.l		&0x0,%fpcr		# zero current control regs
1814
	fmov.l		&0x0,%fpsr
1815

1816
	bsr.l		get_packed		# fetch packed src operand
1817

1818
	lea		FP_SRC(%a6),%a0		# pass ptr to src
1819
	bsr.l		set_tag_x		# set src optype tag
1820

1821
	mov.b		%d0,STAG(%a6)		# save src optype tag
1822

1823
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1824

1825
# bit five of the fp extension word separates the monadic and dyadic operations
1826
# at this point
1827
	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1828
	beq.b		fu_extract_p		# monadic
1829
	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1830
	beq.b		fu_extract_p		# yes, so it's monadic, too
1831

1832
	bsr.l		load_fpn2		# load dst into FP_DST
1833

1834
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1835
	bsr.l		set_tag_x		# tag the operand type
1836
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1837
	bne.b		fu_op2_done_p		# no
1838
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1839
fu_op2_done_p:
1840
	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1841

1842
fu_extract_p:
1843
	clr.l		%d0
1844
	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1845

1846
	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1847

1848
	lea		FP_SRC(%a6),%a0
1849
	lea		FP_DST(%a6),%a1
1850

1851
	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1852
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1853

1854
#
1855
# Exceptions in order of precedence:
1856
#	BSUN	: none
1857
#	SNAN	: all dyadic ops
1858
#	OPERR	: fsqrt(-NORM)
1859
#	OVFL	: all except ftst,fcmp
1860
#	UNFL	: all except ftst,fcmp
1861
#	DZ	: fdiv
1862
#	INEX2	: all except ftst,fcmp
1863
#	INEX1	: all
1864
#
1865

1866
# we determine the highest priority exception(if any) set by the
1867
# emulation routine that has also been enabled by the user.
1868
	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1869
	bne.w		fu_in_ena_p		# some are enabled
1870

1871
fu_in_cont_p:
1872
# fcmp and ftst do not store any result.
1873
	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1874
	andi.b		&0x38,%d0		# extract bits 3-5
1875
	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1876
	beq.b		fu_in_exit_p		# yes
1877

1878
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1879
	bsr.l		store_fpreg		# store the result
1880

1881
fu_in_exit_p:
1882

1883
	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1884
	bne.w		fu_in_exit_s_p		# supervisor
1885

1886
	mov.l		EXC_A7(%a6),%a0		# update user a7
1887
	mov.l		%a0,%usp
1888

1889
fu_in_exit_cont_p:
1890
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1891
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1892
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1893

1894
	unlk		%a6			# unravel stack frame
1895

1896
	btst		&0x7,(%sp)		# is trace on?
1897
	bne.w		fu_trace_p		# yes
1898

1899
	bra.l		_fpsp_done		# exit to os
1900

1901
# the exception occurred in supervisor mode. check to see if the
1902
# addressing mode was (a7)+. if so, we'll need to shift the
1903
# stack frame "up".
1904
fu_in_exit_s_p:
1905
	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1906
	beq.b		fu_in_exit_cont_p	# no
1907

1908
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1909
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1910
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1911

1912
	unlk		%a6			# unravel stack frame
1913

1914
# shift the stack frame "up". we don't really care about the <ea> field.
1915
	mov.l		0x4(%sp),0x10(%sp)
1916
	mov.l		0x0(%sp),0xc(%sp)
1917
	add.l		&0xc,%sp
1918

1919
	btst		&0x7,(%sp)		# is trace on?
1920
	bne.w		fu_trace_p		# yes
1921

1922
	bra.l		_fpsp_done		# exit to os
1923

1924
fu_in_ena_p:
1925
	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
1926
	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1927
	bne.b		fu_in_exc_p		# at least one was set
1928

1929
#
1930
# No exceptions occurred that were also enabled. Now:
1931
#
1932
#	if (OVFL && ovfl_disabled && inexact_enabled) {
1933
#	    branch to _real_inex() (even if the result was exact!);
1934
#	} else {
1935
#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1936
#	    return;
1937
#	}
1938
#
1939
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1940
	beq.w		fu_in_cont_p		# no
1941

1942
fu_in_ovflchk_p:
1943
	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1944
	beq.w		fu_in_cont_p		# no
1945
	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
1946

1947
#
1948
# An exception occurred and that exception was enabled:
1949
#
1950
#	shift enabled exception field into lo byte of d0;
1951
#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1952
#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1953
#		/*
1954
#		 * this is the case where we must call _real_inex() now or else
1955
#		 * there will be no other way to pass it the exceptional operand
1956
#		 */
1957
#		call _real_inex();
1958
#	} else {
1959
#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1960
#	}
1961
#
1962
fu_in_exc_p:
1963
	subi.l		&24,%d0			# fix offset to be 0-8
1964
	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
1965
	blt.b		fu_in_exc_exit_p	# no
1966

1967
# the enabled exception was inexact
1968
	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1969
	bne.w		fu_in_exc_unfl_p	# yes
1970
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1971
	bne.w		fu_in_exc_ovfl_p	# yes
1972

1973
# here, we insert the correct fsave status value into the fsave frame for the
1974
# corresponding exception. the operand in the fsave frame should be the original
1975
# src operand.
1976
# as a reminder for future predicted pain and agony, we are passing in fsave the
1977
# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1978
# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1979
fu_in_exc_exit_p:
1980
	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1981
	bne.w		fu_in_exc_exit_s_p	# supervisor
1982

1983
	mov.l		EXC_A7(%a6),%a0		# update user a7
1984
	mov.l		%a0,%usp
1985

1986
fu_in_exc_exit_cont_p:
1987
	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1988

1989
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1990
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1991
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1992

1993
	frestore	FP_SRC(%a6)		# restore src op
1994

1995
	unlk		%a6
1996

1997
	btst		&0x7,(%sp)		# is trace enabled?
1998
	bne.w		fu_trace_p		# yes
1999

2000
	bra.l		_fpsp_done
2001

2002
tbl_except_p:
2003
	short		0xe000,0xe006,0xe004,0xe005
2004
	short		0xe003,0xe002,0xe001,0xe001
2005

2006
fu_in_exc_ovfl_p:
2007
	mov.w		&0x3,%d0
2008
	bra.w		fu_in_exc_exit_p
2009

2010
fu_in_exc_unfl_p:
2011
	mov.w		&0x4,%d0
2012
	bra.w		fu_in_exc_exit_p
2013

2014
fu_in_exc_exit_s_p:
2015
	btst		&mia7_bit,SPCOND_FLG(%a6)
2016
	beq.b		fu_in_exc_exit_cont_p
2017

2018
	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2019

2020
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2021
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2022
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2023

2024
	frestore	FP_SRC(%a6)		# restore src op
2025

2026
	unlk		%a6			# unravel stack frame
2027

2028
# shift stack frame "up". who cares about <ea> field.
2029
	mov.l		0x4(%sp),0x10(%sp)
2030
	mov.l		0x0(%sp),0xc(%sp)
2031
	add.l		&0xc,%sp
2032

2033
	btst		&0x7,(%sp)		# is trace on?
2034
	bne.b		fu_trace_p		# yes
2035

2036
	bra.l		_fpsp_done		# exit to os
2037

2038
#
2039
# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2040
# exception was being traced. Make the "current" PC the FPIAR and put it in the
2041
# trace stack frame then jump to _real_trace().
2042
#
2043
#		  UNSUPP FRAME		   TRACE FRAME
2044
#		*****************	*****************
2045
#		*      EA	*	*    Current	*
2046
#		*		*	*      PC	*
2047
#		*****************	*****************
2048
#		* 0x2 *	0x0dc	*	* 0x2 *  0x024	*
2049
#		*****************	*****************
2050
#		*     Next	*	*     Next	*
2051
#		*      PC	*	*      PC	*
2052
#		*****************	*****************
2053
#		*      SR	*	*      SR	*
2054
#		*****************	*****************
2055
fu_trace_p:
2056
	mov.w		&0x2024,0x6(%sp)
2057
	fmov.l		%fpiar,0x8(%sp)
2058

2059
	bra.l		_real_trace
2060

2061
#########################################################
2062
#########################################################
2063
fu_out_pack:
2064

2065

2066
# I'm not sure at this point what FPSR bits are valid for this instruction.
2067
# so, since the emulation routines re-create them anyways, zero exception field.
2068
# fmove out doesn't affect ccodes.
2069
	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
2070

2071
	fmov.l		&0x0,%fpcr		# zero current control regs
2072
	fmov.l		&0x0,%fpsr
2073

2074
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
2075
	bsr.l		load_fpn1
2076

2077
# unlike other opclass 3, unimplemented data type exceptions, packed must be
2078
# able to detect all operand types.
2079
	lea		FP_SRC(%a6),%a0
2080
	bsr.l		set_tag_x		# tag the operand type
2081
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2082
	bne.b		fu_op2_p		# no
2083
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
2084

2085
fu_op2_p:
2086
	mov.b		%d0,STAG(%a6)		# save src optype tag
2087

2088
	clr.l		%d0
2089
	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
2090

2091
	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
2092

2093
	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
2094
	bsr.l		fout			# call fmove out routine
2095

2096
# Exceptions in order of precedence:
2097
#	BSUN	: no
2098
#	SNAN	: yes
2099
#	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2100
#	OVFL	: no
2101
#	UNFL	: no
2102
#	DZ	: no
2103
#	INEX2	: yes
2104
#	INEX1	: no
2105

2106
# determine the highest priority exception(if any) set by the
2107
# emulation routine that has also been enabled by the user.
2108
	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2109
	bne.w		fu_out_ena_p		# some are enabled
2110

2111
fu_out_exit_p:
2112
	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2113

2114
	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
2115
	bne.b		fu_out_exit_s_p		# supervisor
2116

2117
	mov.l		EXC_A7(%a6),%a0		# update user a7
2118
	mov.l		%a0,%usp
2119

2120
fu_out_exit_cont_p:
2121
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2122
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2123
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2124

2125
	unlk		%a6			# unravel stack frame
2126

2127
	btst		&0x7,(%sp)		# is trace on?
2128
	bne.w		fu_trace_p		# yes
2129

2130
	bra.l		_fpsp_done		# exit to os
2131

2132
# the exception occurred in supervisor mode. check to see if the
2133
# addressing mode was -(a7). if so, we'll need to shift the
2134
# stack frame "down".
2135
fu_out_exit_s_p:
2136
	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2137
	beq.b		fu_out_exit_cont_p	# no
2138

2139
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2140
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2141
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2142

2143
	mov.l		(%a6),%a6		# restore frame pointer
2144

2145
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2146
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2147

2148
# now, copy the result to the proper place on the stack
2149
	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2150
	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2151
	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2152

2153
	add.l		&LOCAL_SIZE-0x8,%sp
2154

2155
	btst		&0x7,(%sp)
2156
	bne.w		fu_trace_p
2157

2158
	bra.l		_fpsp_done
2159

2160
fu_out_ena_p:
2161
	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
2162
	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2163
	beq.w		fu_out_exit_p
2164

2165
	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2166

2167
# an exception occurred and that exception was enabled.
2168
# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2169
fu_out_exc_p:
2170
	cmpi.b		%d0,&0x1a
2171
	bgt.w		fu_inex_p2
2172
	beq.w		fu_operr_p
2173

2174
fu_snan_p:
2175
	btst		&0x5,EXC_SR(%a6)
2176
	bne.b		fu_snan_s_p
2177

2178
	mov.l		EXC_A7(%a6),%a0
2179
	mov.l		%a0,%usp
2180
	bra.w		fu_snan
2181

2182
fu_snan_s_p:
2183
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2184
	bne.w		fu_snan
2185

2186
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2187
# the strategy is to move the exception frame "down" 12 bytes. then, we
2188
# can store the default result where the exception frame was.
2189
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2190
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2191
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2192

2193
	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
2194
	mov.w		&0xe006,2+FP_SRC(%a6)	# set fsave status
2195

2196
	frestore	FP_SRC(%a6)		# restore src operand
2197

2198
	mov.l		(%a6),%a6		# restore frame pointer
2199

2200
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2201
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2202
	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2203

2204
# now, we copy the default result to its proper location
2205
	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2206
	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2207
	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2208

2209
	add.l		&LOCAL_SIZE-0x8,%sp
2210

2211

2212
	bra.l		_real_snan
2213

2214
fu_operr_p:
2215
	btst		&0x5,EXC_SR(%a6)
2216
	bne.w		fu_operr_p_s
2217

2218
	mov.l		EXC_A7(%a6),%a0
2219
	mov.l		%a0,%usp
2220
	bra.w		fu_operr
2221

2222
fu_operr_p_s:
2223
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2224
	bne.w		fu_operr
2225

2226
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2227
# the strategy is to move the exception frame "down" 12 bytes. then, we
2228
# can store the default result where the exception frame was.
2229
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2230
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2231
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2232

2233
	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
2234
	mov.w		&0xe004,2+FP_SRC(%a6)	# set fsave status
2235

2236
	frestore	FP_SRC(%a6)		# restore src operand
2237

2238
	mov.l		(%a6),%a6		# restore frame pointer
2239

2240
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2241
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2242
	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2243

2244
# now, we copy the default result to its proper location
2245
	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2246
	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2247
	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2248

2249
	add.l		&LOCAL_SIZE-0x8,%sp
2250

2251

2252
	bra.l		_real_operr
2253

2254
fu_inex_p2:
2255
	btst		&0x5,EXC_SR(%a6)
2256
	bne.w		fu_inex_s_p2
2257

2258
	mov.l		EXC_A7(%a6),%a0
2259
	mov.l		%a0,%usp
2260
	bra.w		fu_inex
2261

2262
fu_inex_s_p2:
2263
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2264
	bne.w		fu_inex
2265

2266
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2267
# the strategy is to move the exception frame "down" 12 bytes. then, we
2268
# can store the default result where the exception frame was.
2269
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2270
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2271
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2272

2273
	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
2274
	mov.w		&0xe001,2+FP_SRC(%a6)	# set fsave status
2275

2276
	frestore	FP_SRC(%a6)		# restore src operand
2277

2278
	mov.l		(%a6),%a6		# restore frame pointer
2279

2280
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2281
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2282
	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2283

2284
# now, we copy the default result to its proper location
2285
	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2286
	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2287
	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2288

2289
	add.l		&LOCAL_SIZE-0x8,%sp
2290

2291

2292
	bra.l		_real_inex
2293

2294
#########################################################################
2295

2296
#
2297
# if we're stuffing a source operand back into an fsave frame then we
2298
# have to make sure that for single or double source operands that the
2299
# format stuffed is as weird as the hardware usually makes it.
2300
#
2301
	global		funimp_skew
2302
funimp_skew:
2303
	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2304
	cmpi.b		%d0,&0x1		# was src sgl?
2305
	beq.b		funimp_skew_sgl		# yes
2306
	cmpi.b		%d0,&0x5		# was src dbl?
2307
	beq.b		funimp_skew_dbl		# yes
2308
	rts
2309

2310
funimp_skew_sgl:
2311
	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2312
	andi.w		&0x7fff,%d0		# strip sign
2313
	beq.b		funimp_skew_sgl_not
2314
	cmpi.w		%d0,&0x3f80
2315
	bgt.b		funimp_skew_sgl_not
2316
	neg.w		%d0			# make exponent negative
2317
	addi.w		&0x3f81,%d0		# find amt to shift
2318
	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
2319
	lsr.l		%d0,%d1			# shift it
2320
	bset		&31,%d1			# set j-bit
2321
	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
2322
	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
2323
	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
2324
funimp_skew_sgl_not:
2325
	rts
2326

2327
funimp_skew_dbl:
2328
	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2329
	andi.w		&0x7fff,%d0		# strip sign
2330
	beq.b		funimp_skew_dbl_not
2331
	cmpi.w		%d0,&0x3c00
2332
	bgt.b		funimp_skew_dbl_not
2333

2334
	tst.b		FP_SRC_EX(%a6)		# make "internal format"
2335
	smi.b		0x2+FP_SRC(%a6)
2336
	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
2337
	clr.l		%d0			# clear g,r,s
2338
	lea		FP_SRC(%a6),%a0		# pass ptr to src op
2339
	mov.w		&0x3c01,%d1		# pass denorm threshold
2340
	bsr.l		dnrm_lp			# denorm it
2341
	mov.w		&0x3c00,%d0		# new exponent
2342
	tst.b		0x2+FP_SRC(%a6)		# is sign set?
2343
	beq.b		fss_dbl_denorm_done	# no
2344
	bset		&15,%d0			# set sign
2345
fss_dbl_denorm_done:
2346
	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
2347
	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
2348
funimp_skew_dbl_not:
2349
	rts
2350

2351
#########################################################################
2352
	global		_mem_write2
2353
_mem_write2:
2354
	btst		&0x5,EXC_SR(%a6)
2355
	beq.l		_dmem_write
2356
	mov.l		0x0(%a0),FP_DST_EX(%a6)
2357
	mov.l		0x4(%a0),FP_DST_HI(%a6)
2358
	mov.l		0x8(%a0),FP_DST_LO(%a6)
2359
	clr.l		%d1
2360
	rts
2361

2362
#########################################################################
2363
# XDEF ****************************************************************	#
2364
#	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
2365
#			effective address" exception.			#
2366
#									#
2367
#	This handler should be the first code executed upon taking the	#
2368
#	FP Unimplemented Effective Address exception in an operating	#
2369
#	system.								#
2370
#									#
2371
# XREF ****************************************************************	#
2372
#	_imem_read_long() - read instruction longword			#
2373
#	fix_skewed_ops() - adjust src operand in fsave frame		#
2374
#	set_tag_x() - determine optype of src/dst operands		#
2375
#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
2376
#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
2377
#	load_fpn2() - load dst operand from FP regfile			#
2378
#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
2379
#	decbin() - convert packed data to FP binary data		#
2380
#	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
2381
#	_real_access() - "callout" for access error exception		#
2382
#	_mem_read() - read extended immediate operand from memory	#
2383
#	_fpsp_done() - "callout" for exit; work all done		#
2384
#	_real_trace() - "callout" for Trace enabled exception		#
2385
#	fmovm_dynamic() - emulate dynamic fmovm instruction		#
2386
#	fmovm_ctrl() - emulate fmovm control instruction		#
2387
#									#
2388
# INPUT ***************************************************************	#
2389
#	- The system stack contains the "Unimplemented <ea>" stk frame	#
2390
#									#
2391
# OUTPUT **************************************************************	#
2392
#	If access error:						#
2393
#	- The system stack is changed to an access error stack frame	#
2394
#	If FPU disabled:						#
2395
#	- The system stack is changed to an FPU disabled stack frame	#
2396
#	If Trace exception enabled:					#
2397
#	- The system stack is changed to a Trace exception stack frame	#
2398
#	Else: (normal case)						#
2399
#	- None (correct result has been stored as appropriate)		#
2400
#									#
2401
# ALGORITHM ***********************************************************	#
2402
#	This exception handles 3 types of operations:			#
2403
# (1) FP Instructions using extended precision or packed immediate	#
2404
#     addressing mode.							#
2405
# (2) The "fmovm.x" instruction w/ dynamic register specification.	#
2406
# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
2407
#									#
2408
#	For immediate data operations, the data is read in w/ a		#
2409
# _mem_read() "callout", converted to FP binary (if packed), and used	#
2410
# as the source operand to the instruction specified by the instruction	#
2411
# word. If no FP exception should be reported ads a result of the	#
2412
# emulation, then the result is stored to the destination register and	#
2413
# the handler exits through _fpsp_done(). If an enabled exc has been	#
2414
# signalled as a result of emulation, then an fsave state frame		#
2415
# corresponding to the FP exception type must be entered into the 060	#
2416
# FPU before exiting. In either the enabled or disabled cases, we	#
2417
# must also check if a Trace exception is pending, in which case, we	#
2418
# must create a Trace exception stack frame from the current exception	#
2419
# stack frame. If no Trace is pending, we simply exit through		#
2420
# _fpsp_done().								#
2421
#	For "fmovm.x", call the routine fmovm_dynamic() which will	#
2422
# decode and emulate the instruction. No FP exceptions can be pending	#
2423
# as a result of this operation emulation. A Trace exception can be	#
2424
# pending, though, which means the current stack frame must be changed	#
2425
# to a Trace stack frame and an exit made through _real_trace().	#
2426
# For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
2427
# was executed from supervisor mode, this handler must store the FP	#
2428
# register file values to the system stack by itself since		#
2429
# fmovm_dynamic() can't handle this. A normal exit is made through	#
2430
# fpsp_done().								#
2431
#	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
2432
# Again, a Trace exception may be pending and an exit made through	#
2433
# _real_trace(). Else, a normal exit is made through _fpsp_done().	#
2434
#									#
2435
#	Before any of the above is attempted, it must be checked to	#
2436
# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
2437
# before the "FPU disabled" exception, but the "FPU disabled" exception	#
2438
# has higher priority, we check the disabled bit in the PCR. If set,	#
2439
# then we must create an 8 word "FPU disabled" exception stack frame	#
2440
# from the current 4 word exception stack frame. This includes		#
2441
# reproducing the effective address of the instruction to put on the	#
2442
# new stack frame.							#
2443
#									#
2444
#	In the process of all emulation work, if a _mem_read()		#
2445
# "callout" returns a failing result indicating an access error, then	#
2446
# we must create an access error stack frame from the current stack	#
2447
# frame. This information includes a faulting address and a fault-	#
2448
# status-longword. These are created within this handler.		#
2449
#									#
2450
#########################################################################
2451

2452
	global		_fpsp_effadd
2453
_fpsp_effadd:
2454

2455
# This exception type takes priority over the "Line F Emulator"
2456
# exception. Therefore, the FPU could be disabled when entering here.
2457
# So, we must check to see if it's disabled and handle that case separately.
2458
	mov.l		%d0,-(%sp)		# save d0
2459
	movc		%pcr,%d0		# load proc cr
2460
	btst		&0x1,%d0		# is FPU disabled?
2461
	bne.w		iea_disabled		# yes
2462
	mov.l		(%sp)+,%d0		# restore d0
2463

2464
	link		%a6,&-LOCAL_SIZE	# init stack frame
2465

2466
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2467
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2468
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
2469

2470
# PC of instruction that took the exception is the PC in the frame
2471
	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2472

2473
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2474
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2475
	bsr.l		_imem_read_long		# fetch the instruction words
2476
	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2477

2478
#########################################################################
2479

2480
	tst.w		%d0			# is operation fmovem?
2481
	bmi.w		iea_fmovm		# yes
2482

2483
#
2484
# here, we will have:
2485
#	fabs	fdabs	fsabs		facos		fmod
2486
#	fadd	fdadd	fsadd		fasin		frem
2487
#	fcmp				fatan		fscale
2488
#	fdiv	fddiv	fsdiv		fatanh		fsin
2489
#	fint				fcos		fsincos
2490
#	fintrz				fcosh		fsinh
2491
#	fmove	fdmove	fsmove		fetox		ftan
2492
#	fmul	fdmul	fsmul		fetoxm1		ftanh
2493
#	fneg	fdneg	fsneg		fgetexp		ftentox
2494
#	fsgldiv				fgetman		ftwotox
2495
#	fsglmul				flog10
2496
#	fsqrt				flog2
2497
#	fsub	fdsub	fssub		flogn
2498
#	ftst				flognp1
2499
# which can all use f<op>.{x,p}
2500
# so, now it's immediate data extended precision AND PACKED FORMAT!
2501
#
2502
iea_op:
2503
	andi.l		&0x00ff00ff,USER_FPSR(%a6)
2504

2505
	btst		&0xa,%d0		# is src fmt x or p?
2506
	bne.b		iea_op_pack		# packed
2507

2508

2509
	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2510
	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
2511
	mov.l		&0xc,%d0		# pass: 12 bytes
2512
	bsr.l		_imem_read		# read extended immediate
2513

2514
	tst.l		%d1			# did ifetch fail?
2515
	bne.w		iea_iacc		# yes
2516

2517
	bra.b		iea_op_setsrc
2518

2519
iea_op_pack:
2520

2521
	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2522
	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
2523
	mov.l		&0xc,%d0		# pass: 12 bytes
2524
	bsr.l		_imem_read		# read packed operand
2525

2526
	tst.l		%d1			# did ifetch fail?
2527
	bne.w		iea_iacc		# yes
2528

2529
# The packed operand is an INF or a NAN if the exponent field is all ones.
2530
	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
2531
	cmpi.w		%d0,&0x7fff		# INF or NAN?
2532
	beq.b		iea_op_setsrc		# operand is an INF or NAN
2533

2534
# The packed operand is a zero if the mantissa is all zero, else it's
2535
# a normal packed op.
2536
	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
2537
	andi.b		&0x0f,%d0		# clear all but last nybble
2538
	bne.b		iea_op_gp_not_spec	# not a zero
2539
	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
2540
	bne.b		iea_op_gp_not_spec	# not a zero
2541
	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
2542
	beq.b		iea_op_setsrc		# operand is a ZERO
2543
iea_op_gp_not_spec:
2544
	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
2545
	bsr.l		decbin			# convert to extended
2546
	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
2547

2548
iea_op_setsrc:
2549
	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
2550

2551
# FP_SRC now holds the src operand.
2552
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
2553
	bsr.l		set_tag_x		# tag the operand type
2554
	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
2555
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2556
	bne.b		iea_op_getdst		# no
2557
	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2558
	mov.b		%d0,STAG(%a6)		# set new optype tag
2559
iea_op_getdst:
2560
	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
2561

2562
	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
2563
	beq.b		iea_op_extract		# monadic
2564
	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
2565
	bne.b		iea_op_spec		# yes
2566

2567
iea_op_loaddst:
2568
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2569
	bsr.l		load_fpn2		# load dst operand
2570

2571
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
2572
	bsr.l		set_tag_x		# tag the operand type
2573
	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
2574
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2575
	bne.b		iea_op_extract		# no
2576
	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2577
	mov.b		%d0,DTAG(%a6)		# set new optype tag
2578
	bra.b		iea_op_extract
2579

2580
# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2581
iea_op_spec:
2582
	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
2583
	beq.b		iea_op_extract		# yes
2584
# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2585
# store a result. then, only fcmp will branch back and pick up a dst operand.
2586
	st		STORE_FLG(%a6)		# don't store a final result
2587
	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
2588
	beq.b		iea_op_loaddst		# yes
2589

2590
iea_op_extract:
2591
	clr.l		%d0
2592
	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
2593

2594
	mov.b		1+EXC_CMDREG(%a6),%d1
2595
	andi.w		&0x007f,%d1		# extract extension
2596

2597
	fmov.l		&0x0,%fpcr
2598
	fmov.l		&0x0,%fpsr
2599

2600
	lea		FP_SRC(%a6),%a0
2601
	lea		FP_DST(%a6),%a1
2602

2603
	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2604
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
2605

2606
#
2607
# Exceptions in order of precedence:
2608
#	BSUN	: none
2609
#	SNAN	: all operations
2610
#	OPERR	: all reg-reg or mem-reg operations that can normally operr
2611
#	OVFL	: same as OPERR
2612
#	UNFL	: same as OPERR
2613
#	DZ	: same as OPERR
2614
#	INEX2	: same as OPERR
2615
#	INEX1	: all packed immediate operations
2616
#
2617

2618
# we determine the highest priority exception(if any) set by the
2619
# emulation routine that has also been enabled by the user.
2620
	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2621
	bne.b		iea_op_ena		# some are enabled
2622

2623
# now, we save the result, unless, of course, the operation was ftst or fcmp.
2624
# these don't save results.
2625
iea_op_save:
2626
	tst.b		STORE_FLG(%a6)		# does this op store a result?
2627
	bne.b		iea_op_exit1		# exit with no frestore
2628

2629
iea_op_store:
2630
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2631
	bsr.l		store_fpreg		# store the result
2632

2633
iea_op_exit1:
2634
	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2635
	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2636

2637
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2638
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2639
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2640

2641
	unlk		%a6			# unravel the frame
2642

2643
	btst		&0x7,(%sp)		# is trace on?
2644
	bne.w		iea_op_trace		# yes
2645

2646
	bra.l		_fpsp_done		# exit to os
2647

2648
iea_op_ena:
2649
	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
2650
	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2651
	bne.b		iea_op_exc		# at least one was set
2652

2653
# no exception occurred. now, did a disabled, exact overflow occur with inexact
2654
# enabled? if so, then we have to stuff an overflow frame into the FPU.
2655
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2656
	beq.b		iea_op_save
2657

2658
iea_op_ovfl:
2659
	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2660
	beq.b		iea_op_store		# no
2661
	bra.b		iea_op_exc_ovfl		# yes
2662

2663
# an enabled exception occurred. we have to insert the exception type back into
2664
# the machine.
2665
iea_op_exc:
2666
	subi.l		&24,%d0			# fix offset to be 0-8
2667
	cmpi.b		%d0,&0x6		# is exception INEX?
2668
	bne.b		iea_op_exc_force	# no
2669

2670
# the enabled exception was inexact. so, if it occurs with an overflow
2671
# or underflow that was disabled, then we have to force an overflow or
2672
# underflow frame.
2673
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2674
	bne.b		iea_op_exc_ovfl		# yes
2675
	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2676
	bne.b		iea_op_exc_unfl		# yes
2677

2678
iea_op_exc_force:
2679
	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2680
	bra.b		iea_op_exit2		# exit with frestore
2681

2682
tbl_iea_except:
2683
	short		0xe002, 0xe006, 0xe004, 0xe005
2684
	short		0xe003, 0xe002, 0xe001, 0xe001
2685

2686
iea_op_exc_ovfl:
2687
	mov.w		&0xe005,2+FP_SRC(%a6)
2688
	bra.b		iea_op_exit2
2689

2690
iea_op_exc_unfl:
2691
	mov.w		&0xe003,2+FP_SRC(%a6)
2692

2693
iea_op_exit2:
2694
	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2695
	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2696

2697
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2698
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2699
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2700

2701
	frestore	FP_SRC(%a6)		# restore exceptional state
2702

2703
	unlk		%a6			# unravel the frame
2704

2705
	btst		&0x7,(%sp)		# is trace on?
2706
	bne.b		iea_op_trace		# yes
2707

2708
	bra.l		_fpsp_done		# exit to os
2709

2710
#
2711
# The opclass two instruction that took an "Unimplemented Effective Address"
2712
# exception was being traced. Make the "current" PC the FPIAR and put it in
2713
# the trace stack frame then jump to _real_trace().
2714
#
2715
#		 UNIMP EA FRAME		   TRACE FRAME
2716
#		*****************	*****************
2717
#		* 0x0 *  0x0f0	*	*    Current	*
2718
#		*****************	*      PC	*
2719
#		*    Current	*	*****************
2720
#		*      PC	*	* 0x2 *  0x024	*
2721
#		*****************	*****************
2722
#		*      SR	*	*     Next	*
2723
#		*****************	*      PC	*
2724
#					*****************
2725
#					*      SR	*
2726
#					*****************
2727
iea_op_trace:
2728
	mov.l		(%sp),-(%sp)		# shift stack frame "down"
2729
	mov.w		0x8(%sp),0x4(%sp)
2730
	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
2731
	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
2732

2733
	bra.l		_real_trace
2734

2735
#########################################################################
2736
iea_fmovm:
2737
	btst		&14,%d0			# ctrl or data reg
2738
	beq.w		iea_fmovm_ctrl
2739

2740
iea_fmovm_data:
2741

2742
	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
2743
	bne.b		iea_fmovm_data_s
2744

2745
iea_fmovm_data_u:
2746
	mov.l		%usp,%a0
2747
	mov.l		%a0,EXC_A7(%a6)		# store current a7
2748
	bsr.l		fmovm_dynamic		# do dynamic fmovm
2749
	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
2750
	mov.l		%a0,%usp		# update usp
2751
	bra.w		iea_fmovm_exit
2752

2753
iea_fmovm_data_s:
2754
	clr.b		SPCOND_FLG(%a6)
2755
	lea		0x2+EXC_VOFF(%a6),%a0
2756
	mov.l		%a0,EXC_A7(%a6)
2757
	bsr.l		fmovm_dynamic		# do dynamic fmovm
2758

2759
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2760
	beq.w		iea_fmovm_data_predec
2761
	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
2762
	bne.w		iea_fmovm_exit
2763

2764
# right now, d0 = the size.
2765
# the data has been fetched from the supervisor stack, but we have not
2766
# incremented the stack pointer by the appropriate number of bytes.
2767
# do it here.
2768
iea_fmovm_data_postinc:
2769
	btst		&0x7,EXC_SR(%a6)
2770
	bne.b		iea_fmovm_data_pi_trace
2771

2772
	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2773
	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2774
	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2775

2776
	lea		(EXC_SR,%a6,%d0),%a0
2777
	mov.l		%a0,EXC_SR(%a6)
2778

2779
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2780
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2781
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2782

2783
	unlk		%a6
2784
	mov.l		(%sp)+,%sp
2785
	bra.l		_fpsp_done
2786

2787
iea_fmovm_data_pi_trace:
2788
	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2789
	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2790
	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2791
	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2792

2793
	lea		(EXC_SR-0x4,%a6,%d0),%a0
2794
	mov.l		%a0,EXC_SR(%a6)
2795

2796
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2797
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2798
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2799

2800
	unlk		%a6
2801
	mov.l		(%sp)+,%sp
2802
	bra.l		_real_trace
2803

2804
# right now, d1 = size and d0 = the strg.
2805
iea_fmovm_data_predec:
2806
	mov.b		%d1,EXC_VOFF(%a6)	# store strg
2807
	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
2808

2809
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2810
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2811
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2812

2813
	mov.l		(%a6),-(%sp)		# make a copy of a6
2814
	mov.l		%d0,-(%sp)		# save d0
2815
	mov.l		%d1,-(%sp)		# save d1
2816
	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
2817

2818
	clr.l		%d0
2819
	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
2820
	neg.l		%d0			# get negative of size
2821

2822
	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
2823
	beq.b		iea_fmovm_data_p2
2824

2825
	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2826
	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2827
	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
2828
	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2829

2830
	pea		(%a6,%d0)		# create final sp
2831
	bra.b		iea_fmovm_data_p3
2832

2833
iea_fmovm_data_p2:
2834
	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2835
	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
2836
	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2837

2838
	pea		(0x4,%a6,%d0)		# create final sp
2839

2840
iea_fmovm_data_p3:
2841
	clr.l		%d1
2842
	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
2843

2844
	tst.b		%d1
2845
	bpl.b		fm_1
2846
	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
2847
	addi.l		&0xc,%d0
2848
fm_1:
2849
	lsl.b		&0x1,%d1
2850
	bpl.b		fm_2
2851
	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
2852
	addi.l		&0xc,%d0
2853
fm_2:
2854
	lsl.b		&0x1,%d1
2855
	bpl.b		fm_3
2856
	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
2857
	addi.l		&0xc,%d0
2858
fm_3:
2859
	lsl.b		&0x1,%d1
2860
	bpl.b		fm_4
2861
	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
2862
	addi.l		&0xc,%d0
2863
fm_4:
2864
	lsl.b		&0x1,%d1
2865
	bpl.b		fm_5
2866
	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
2867
	addi.l		&0xc,%d0
2868
fm_5:
2869
	lsl.b		&0x1,%d1
2870
	bpl.b		fm_6
2871
	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
2872
	addi.l		&0xc,%d0
2873
fm_6:
2874
	lsl.b		&0x1,%d1
2875
	bpl.b		fm_7
2876
	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
2877
	addi.l		&0xc,%d0
2878
fm_7:
2879
	lsl.b		&0x1,%d1
2880
	bpl.b		fm_end
2881
	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
2882
fm_end:
2883
	mov.l		0x4(%sp),%d1
2884
	mov.l		0x8(%sp),%d0
2885
	mov.l		0xc(%sp),%a6
2886
	mov.l		(%sp)+,%sp
2887

2888
	btst		&0x7,(%sp)		# is trace enabled?
2889
	beq.l		_fpsp_done
2890
	bra.l		_real_trace
2891

2892
#########################################################################
2893
iea_fmovm_ctrl:
2894

2895
	bsr.l		fmovm_ctrl		# load ctrl regs
2896

2897
iea_fmovm_exit:
2898
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2899
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2900
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2901

2902
	btst		&0x7,EXC_SR(%a6)	# is trace on?
2903
	bne.b		iea_fmovm_trace		# yes
2904

2905
	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2906

2907
	unlk		%a6			# unravel the frame
2908

2909
	bra.l		_fpsp_done		# exit to os
2910

2911
#
2912
# The control reg instruction that took an "Unimplemented Effective Address"
2913
# exception was being traced. The "Current PC" for the trace frame is the
2914
# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2915
# After fixing the stack frame, jump to _real_trace().
2916
#
2917
#		 UNIMP EA FRAME		   TRACE FRAME
2918
#		*****************	*****************
2919
#		* 0x0 *  0x0f0	*	*    Current	*
2920
#		*****************	*      PC	*
2921
#		*    Current	*	*****************
2922
#		*      PC	*	* 0x2 *  0x024	*
2923
#		*****************	*****************
2924
#		*      SR	*	*     Next	*
2925
#		*****************	*      PC	*
2926
#					*****************
2927
#					*      SR	*
2928
#					*****************
2929
# this ain't a pretty solution, but it works:
2930
# -restore a6 (not with unlk)
2931
# -shift stack frame down over where old a6 used to be
2932
# -add LOCAL_SIZE to stack pointer
2933
iea_fmovm_trace:
2934
	mov.l		(%a6),%a6		# restore frame pointer
2935
	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2936
	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2937
	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2938
	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2939
	add.l		&LOCAL_SIZE,%sp		# clear stack frame
2940

2941
	bra.l		_real_trace
2942

2943
#########################################################################
2944
# The FPU is disabled and so we should really have taken the "Line
2945
# F Emulator" exception. So, here we create an 8-word stack frame
2946
# from our 4-word stack frame. This means we must calculate the length
2947
# the faulting instruction to get the "next PC". This is trivial for
2948
# immediate operands but requires some extra work for fmovm dynamic
2949
# which can use most addressing modes.
2950
iea_disabled:
2951
	mov.l		(%sp)+,%d0		# restore d0
2952

2953
	link		%a6,&-LOCAL_SIZE	# init stack frame
2954

2955
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2956

2957
# PC of instruction that took the exception is the PC in the frame
2958
	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2959
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2960
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2961
	bsr.l		_imem_read_long		# fetch the instruction words
2962
	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2963

2964
	tst.w		%d0			# is instr fmovm?
2965
	bmi.b		iea_dis_fmovm		# yes
2966
# instruction is using an extended precision immediate operand. Therefore,
2967
# the total instruction length is 16 bytes.
2968
iea_dis_immed:
2969
	mov.l		&0x10,%d0		# 16 bytes of instruction
2970
	bra.b		iea_dis_cont
2971
iea_dis_fmovm:
2972
	btst		&0xe,%d0		# is instr fmovm ctrl
2973
	bne.b		iea_dis_fmovm_data	# no
2974
# the instruction is a fmovm.l with 2 or 3 registers.
2975
	bfextu		%d0{&19:&3},%d1
2976
	mov.l		&0xc,%d0
2977
	cmpi.b		%d1,&0x7		# move all regs?
2978
	bne.b		iea_dis_cont
2979
	addq.l		&0x4,%d0
2980
	bra.b		iea_dis_cont
2981
# the instruction is an fmovm.x dynamic which can use many addressing
2982
# modes and thus can have several different total instruction lengths.
2983
# call fmovm_calc_ea which will go through the ea calc process and,
2984
# as a by-product, will tell us how long the instruction is.
2985
iea_dis_fmovm_data:
2986
	clr.l		%d0
2987
	bsr.l		fmovm_calc_ea
2988
	mov.l		EXC_EXTWPTR(%a6),%d0
2989
	sub.l		EXC_PC(%a6),%d0
2990
iea_dis_cont:
2991
	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
2992

2993
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2994

2995
	unlk		%a6
2996

2997
# here, we actually create the 8-word frame from the 4-word frame,
2998
# with the "next PC" as additional info.
2999
# the <ea> field is let as undefined.
3000
	subq.l		&0x8,%sp		# make room for new stack
3001
	mov.l		%d0,-(%sp)		# save d0
3002
	mov.w		0xc(%sp),0x4(%sp)	# move SR
3003
	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
3004
	clr.l		%d0
3005
	mov.w		0x12(%sp),%d0
3006
	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
3007
	add.l		%d0,0x6(%sp)		# make Next PC
3008
	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
3009
	mov.l		(%sp)+,%d0		# restore d0
3010

3011
	bra.l		_real_fpu_disabled
3012

3013
##########
3014

3015
iea_iacc:
3016
	movc		%pcr,%d0
3017
	btst		&0x1,%d0
3018
	bne.b		iea_iacc_cont
3019
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3020
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3021
iea_iacc_cont:
3022
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3023

3024
	unlk		%a6
3025

3026
	subq.w		&0x8,%sp		# make stack frame bigger
3027
	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
3028
	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
3029
	mov.w		&0x4008,0x6(%sp)	# store voff
3030
	mov.l		0x2(%sp),0x8(%sp)	# store ea
3031
	mov.l		&0x09428001,0xc(%sp)	# store fslw
3032

3033
iea_acc_done:
3034
	btst		&0x5,(%sp)		# user or supervisor mode?
3035
	beq.b		iea_acc_done2		# user
3036
	bset		&0x2,0xd(%sp)		# set supervisor TM bit
3037

3038
iea_acc_done2:
3039
	bra.l		_real_access
3040

3041
iea_dacc:
3042
	lea		-LOCAL_SIZE(%a6),%sp
3043

3044
	movc		%pcr,%d1
3045
	btst		&0x1,%d1
3046
	bne.b		iea_dacc_cont
3047
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3048
	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3049
iea_dacc_cont:
3050
	mov.l		(%a6),%a6
3051

3052
	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3053
	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3054
	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3055
	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
3056
	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
3057
	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3058

3059
	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3060
	add.w		&LOCAL_SIZE-0x4,%sp
3061

3062
	bra.b		iea_acc_done
3063

3064
#########################################################################
3065
# XDEF ****************************************************************	#
3066
#	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
3067
#									#
3068
#	This handler should be the first code executed upon taking the	#
3069
#	FP Operand Error exception in an operating system.		#
3070
#									#
3071
# XREF ****************************************************************	#
3072
#	_imem_read_long() - read instruction longword			#
3073
#	fix_skewed_ops() - adjust src operand in fsave frame		#
3074
#	_real_operr() - "callout" to operating system operr handler	#
3075
#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3076
#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3077
#	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
3078
#									#
3079
# INPUT ***************************************************************	#
3080
#	- The system stack contains the FP Operr exception frame	#
3081
#	- The fsave frame contains the source operand			#
3082
#									#
3083
# OUTPUT **************************************************************	#
3084
#	No access error:						#
3085
#	- The system stack is unchanged					#
3086
#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3087
#									#
3088
# ALGORITHM ***********************************************************	#
3089
#	In a system where the FP Operr exception is enabled, the goal	#
3090
# is to get to the handler specified at _real_operr(). But, on the 060,	#
3091
# for opclass zero and two instruction taking this exception, the	#
3092
# input operand in the fsave frame may be incorrect for some cases	#
3093
# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3094
# do just this and then exits through _real_operr().			#
3095
#	For opclass 3 instructions, the 060 doesn't store the default	#
3096
# operr result out to memory or data register file as it should.	#
3097
# This code must emulate the move out before finally exiting through	#
3098
# _real_inex(). The move out, if to memory, is performed using		#
3099
# _mem_write() "callout" routines that may return a failing result.	#
3100
# In this special case, the handler must exit through facc_out()	#
3101
# which creates an access error stack frame from the current operr	#
3102
# stack frame.								#
3103
#									#
3104
#########################################################################
3105

3106
	global		_fpsp_operr
3107
_fpsp_operr:
3108

3109
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3110

3111
	fsave		FP_SRC(%a6)		# grab the "busy" frame
3112

3113
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3114
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3115
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3116

3117
# the FPIAR holds the "current PC" of the faulting instruction
3118
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3119

3120
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3121
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3122
	bsr.l		_imem_read_long		# fetch the instruction words
3123
	mov.l		%d0,EXC_OPWORD(%a6)
3124

3125
##############################################################################
3126

3127
	btst		&13,%d0			# is instr an fmove out?
3128
	bne.b		foperr_out		# fmove out
3129

3130

3131
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3132
# this would be the case for opclass two operations with a source infinity or
3133
# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3134
# cause an operr so we don't need to check for them here.
3135
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3136
	bsr.l		fix_skewed_ops		# fix src op
3137

3138
foperr_exit:
3139
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3140
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3141
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3142

3143
	frestore	FP_SRC(%a6)
3144

3145
	unlk		%a6
3146
	bra.l		_real_operr
3147

3148
########################################################################
3149

3150
#
3151
# the hardware does not save the default result to memory on enabled
3152
# operand error exceptions. we do this here before passing control to
3153
# the user operand error handler.
3154
#
3155
# byte, word, and long destination format operations can pass
3156
# through here. we simply need to test the sign of the src
3157
# operand and save the appropriate minimum or maximum integer value
3158
# to the effective address as pointed to by the stacked effective address.
3159
#
3160
# although packed opclass three operations can take operand error
3161
# exceptions, they won't pass through here since they are caught
3162
# first by the unsupported data format exception handler. that handler
3163
# sends them directly to _real_operr() if necessary.
3164
#
3165
foperr_out:
3166

3167
	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
3168
	andi.w		&0x7fff,%d1
3169
	cmpi.w		%d1,&0x7fff
3170
	bne.b		foperr_out_not_qnan
3171
# the operand is either an infinity or a QNAN.
3172
	tst.l		FP_SRC_LO(%a6)
3173
	bne.b		foperr_out_qnan
3174
	mov.l		FP_SRC_HI(%a6),%d1
3175
	andi.l		&0x7fffffff,%d1
3176
	beq.b		foperr_out_not_qnan
3177
foperr_out_qnan:
3178
	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
3179
	bra.b		foperr_out_jmp
3180

3181
foperr_out_not_qnan:
3182
	mov.l		&0x7fffffff,%d1
3183
	tst.b		FP_SRC_EX(%a6)
3184
	bpl.b		foperr_out_not_qnan2
3185
	addq.l		&0x1,%d1
3186
foperr_out_not_qnan2:
3187
	mov.l		%d1,L_SCR1(%a6)
3188

3189
foperr_out_jmp:
3190
	bfextu		%d0{&19:&3},%d0		# extract dst format field
3191
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3192
	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
3193
	jmp		(tbl_operr.b,%pc,%a0)
3194

3195
tbl_operr:
3196
	short		foperr_out_l - tbl_operr # long word integer
3197
	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
3198
	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
3199
	short		foperr_exit  - tbl_operr # packed won't enter here
3200
	short		foperr_out_w - tbl_operr # word integer
3201
	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
3202
	short		foperr_out_b - tbl_operr # byte integer
3203
	short		tbl_operr    - tbl_operr # packed won't enter here
3204

3205
foperr_out_b:
3206
	mov.b		L_SCR1(%a6),%d0		# load positive default result
3207
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3208
	ble.b		foperr_out_b_save_dn	# yes
3209
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3210
	bsr.l		_dmem_write_byte	# write the default result
3211

3212
	tst.l		%d1			# did dstore fail?
3213
	bne.l		facc_out_b		# yes
3214

3215
	bra.w		foperr_exit
3216
foperr_out_b_save_dn:
3217
	andi.w		&0x0007,%d1
3218
	bsr.l		store_dreg_b		# store result to regfile
3219
	bra.w		foperr_exit
3220

3221
foperr_out_w:
3222
	mov.w		L_SCR1(%a6),%d0		# load positive default result
3223
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3224
	ble.b		foperr_out_w_save_dn	# yes
3225
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3226
	bsr.l		_dmem_write_word	# write the default result
3227

3228
	tst.l		%d1			# did dstore fail?
3229
	bne.l		facc_out_w		# yes
3230

3231
	bra.w		foperr_exit
3232
foperr_out_w_save_dn:
3233
	andi.w		&0x0007,%d1
3234
	bsr.l		store_dreg_w		# store result to regfile
3235
	bra.w		foperr_exit
3236

3237
foperr_out_l:
3238
	mov.l		L_SCR1(%a6),%d0		# load positive default result
3239
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3240
	ble.b		foperr_out_l_save_dn	# yes
3241
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3242
	bsr.l		_dmem_write_long	# write the default result
3243

3244
	tst.l		%d1			# did dstore fail?
3245
	bne.l		facc_out_l		# yes
3246

3247
	bra.w		foperr_exit
3248
foperr_out_l_save_dn:
3249
	andi.w		&0x0007,%d1
3250
	bsr.l		store_dreg_l		# store result to regfile
3251
	bra.w		foperr_exit
3252

3253
#########################################################################
3254
# XDEF ****************************************************************	#
3255
#	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
3256
#									#
3257
#	This handler should be the first code executed upon taking the	#
3258
#	FP Signalling NAN exception in an operating system.		#
3259
#									#
3260
# XREF ****************************************************************	#
3261
#	_imem_read_long() - read instruction longword			#
3262
#	fix_skewed_ops() - adjust src operand in fsave frame		#
3263
#	_real_snan() - "callout" to operating system SNAN handler	#
3264
#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3265
#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3266
#	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
3267
#	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
3268
#									#
3269
# INPUT ***************************************************************	#
3270
#	- The system stack contains the FP SNAN exception frame		#
3271
#	- The fsave frame contains the source operand			#
3272
#									#
3273
# OUTPUT **************************************************************	#
3274
#	No access error:						#
3275
#	- The system stack is unchanged					#
3276
#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3277
#									#
3278
# ALGORITHM ***********************************************************	#
3279
#	In a system where the FP SNAN exception is enabled, the goal	#
3280
# is to get to the handler specified at _real_snan(). But, on the 060,	#
3281
# for opclass zero and two instructions taking this exception, the	#
3282
# input operand in the fsave frame may be incorrect for some cases	#
3283
# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3284
# do just this and then exits through _real_snan().			#
3285
#	For opclass 3 instructions, the 060 doesn't store the default	#
3286
# SNAN result out to memory or data register file as it should.		#
3287
# This code must emulate the move out before finally exiting through	#
3288
# _real_snan(). The move out, if to memory, is performed using		#
3289
# _mem_write() "callout" routines that may return a failing result.	#
3290
# In this special case, the handler must exit through facc_out()	#
3291
# which creates an access error stack frame from the current SNAN	#
3292
# stack frame.								#
3293
#	For the case of an extended precision opclass 3 instruction,	#
3294
# if the effective addressing mode was -() or ()+, then the address	#
3295
# register must get updated by calling _calc_ea_fout(). If the <ea>	#
3296
# was -(a7) from supervisor mode, then the exception frame currently	#
3297
# on the system stack must be carefully moved "down" to make room	#
3298
# for the operand being moved.						#
3299
#									#
3300
#########################################################################
3301

3302
	global		_fpsp_snan
3303
_fpsp_snan:
3304

3305
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3306

3307
	fsave		FP_SRC(%a6)		# grab the "busy" frame
3308

3309
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3310
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3311
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3312

3313
# the FPIAR holds the "current PC" of the faulting instruction
3314
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3315

3316
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3317
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3318
	bsr.l		_imem_read_long		# fetch the instruction words
3319
	mov.l		%d0,EXC_OPWORD(%a6)
3320

3321
##############################################################################
3322

3323
	btst		&13,%d0			# is instr an fmove out?
3324
	bne.w		fsnan_out		# fmove out
3325

3326

3327
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3328
# this would be the case for opclass two operations with a source infinity or
3329
# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3330
# fixed here.
3331
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3332
	bsr.l		fix_skewed_ops		# fix src op
3333

3334
fsnan_exit:
3335
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3336
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3337
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3338

3339
	frestore	FP_SRC(%a6)
3340

3341
	unlk		%a6
3342
	bra.l		_real_snan
3343

3344
########################################################################
3345

3346
#
3347
# the hardware does not save the default result to memory on enabled
3348
# snan exceptions. we do this here before passing control to
3349
# the user snan handler.
3350
#
3351
# byte, word, long, and packed destination format operations can pass
3352
# through here. since packed format operations already were handled by
3353
# fpsp_unsupp(), then we need to do nothing else for them here.
3354
# for byte, word, and long, we simply need to test the sign of the src
3355
# operand and save the appropriate minimum or maximum integer value
3356
# to the effective address as pointed to by the stacked effective address.
3357
#
3358
fsnan_out:
3359

3360
	bfextu		%d0{&19:&3},%d0		# extract dst format field
3361
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3362
	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
3363
	jmp		(tbl_snan.b,%pc,%a0)
3364

3365
tbl_snan:
3366
	short		fsnan_out_l - tbl_snan # long word integer
3367
	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3368
	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
3369
	short		tbl_snan    - tbl_snan # packed needs no help
3370
	short		fsnan_out_w - tbl_snan # word integer
3371
	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3372
	short		fsnan_out_b - tbl_snan # byte integer
3373
	short		tbl_snan    - tbl_snan # packed needs no help
3374

3375
fsnan_out_b:
3376
	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
3377
	bset		&6,%d0			# set SNAN bit
3378
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3379
	ble.b		fsnan_out_b_dn		# yes
3380
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3381
	bsr.l		_dmem_write_byte	# write the default result
3382

3383
	tst.l		%d1			# did dstore fail?
3384
	bne.l		facc_out_b		# yes
3385

3386
	bra.w		fsnan_exit
3387
fsnan_out_b_dn:
3388
	andi.w		&0x0007,%d1
3389
	bsr.l		store_dreg_b		# store result to regfile
3390
	bra.w		fsnan_exit
3391

3392
fsnan_out_w:
3393
	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
3394
	bset		&14,%d0			# set SNAN bit
3395
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3396
	ble.b		fsnan_out_w_dn		# yes
3397
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3398
	bsr.l		_dmem_write_word	# write the default result
3399

3400
	tst.l		%d1			# did dstore fail?
3401
	bne.l		facc_out_w		# yes
3402

3403
	bra.w		fsnan_exit
3404
fsnan_out_w_dn:
3405
	andi.w		&0x0007,%d1
3406
	bsr.l		store_dreg_w		# store result to regfile
3407
	bra.w		fsnan_exit
3408

3409
fsnan_out_l:
3410
	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
3411
	bset		&30,%d0			# set SNAN bit
3412
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3413
	ble.b		fsnan_out_l_dn		# yes
3414
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3415
	bsr.l		_dmem_write_long	# write the default result
3416

3417
	tst.l		%d1			# did dstore fail?
3418
	bne.l		facc_out_l		# yes
3419

3420
	bra.w		fsnan_exit
3421
fsnan_out_l_dn:
3422
	andi.w		&0x0007,%d1
3423
	bsr.l		store_dreg_l		# store result to regfile
3424
	bra.w		fsnan_exit
3425

3426
fsnan_out_s:
3427
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3428
	ble.b		fsnan_out_d_dn		# yes
3429
	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3430
	andi.l		&0x80000000,%d0		# keep sign
3431
	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3432
	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3433
	lsr.l		&0x8,%d1		# shift mantissa for sgl
3434
	or.l		%d1,%d0			# create sgl SNAN
3435
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3436
	bsr.l		_dmem_write_long	# write the default result
3437

3438
	tst.l		%d1			# did dstore fail?
3439
	bne.l		facc_out_l		# yes
3440

3441
	bra.w		fsnan_exit
3442
fsnan_out_d_dn:
3443
	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3444
	andi.l		&0x80000000,%d0		# keep sign
3445
	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3446
	mov.l		%d1,-(%sp)
3447
	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3448
	lsr.l		&0x8,%d1		# shift mantissa for sgl
3449
	or.l		%d1,%d0			# create sgl SNAN
3450
	mov.l		(%sp)+,%d1
3451
	andi.w		&0x0007,%d1
3452
	bsr.l		store_dreg_l		# store result to regfile
3453
	bra.w		fsnan_exit
3454

3455
fsnan_out_d:
3456
	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3457
	andi.l		&0x80000000,%d0		# keep sign
3458
	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
3459
	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3460
	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
3461
	mov.l		&11,%d0			# load shift amt
3462
	lsr.l		%d0,%d1
3463
	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
3464
	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3465
	andi.l		&0x000007ff,%d1
3466
	ror.l		%d0,%d1
3467
	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
3468
	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
3469
	lsr.l		%d0,%d1
3470
	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
3471
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3472
	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
3473
	movq.l		&0x8,%d0		# pass: size of 8 bytes
3474
	bsr.l		_dmem_write		# write the default result
3475

3476
	tst.l		%d1			# did dstore fail?
3477
	bne.l		facc_out_d		# yes
3478

3479
	bra.w		fsnan_exit
3480

3481
# for extended precision, if the addressing mode is pre-decrement or
3482
# post-increment, then the address register did not get updated.
3483
# in addition, for pre-decrement, the stacked <ea> is incorrect.
3484
fsnan_out_x:
3485
	clr.b		SPCOND_FLG(%a6)		# clear special case flag
3486

3487
	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3488
	clr.w		2+FP_SCR0(%a6)
3489
	mov.l		FP_SRC_HI(%a6),%d0
3490
	bset		&30,%d0
3491
	mov.l		%d0,FP_SCR0_HI(%a6)
3492
	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3493

3494
	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
3495
	bne.b		fsnan_out_x_s		# yes
3496

3497
	mov.l		%usp,%a0		# fetch user stack pointer
3498
	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
3499
	mov.l		(%a6),EXC_A6(%a6)
3500

3501
	bsr.l		_calc_ea_fout		# find the correct ea,update An
3502
	mov.l		%a0,%a1
3503
	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3504

3505
	mov.l		EXC_A7(%a6),%a0
3506
	mov.l		%a0,%usp		# restore user stack pointer
3507
	mov.l		EXC_A6(%a6),(%a6)
3508

3509
fsnan_out_x_save:
3510
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3511
	movq.l		&0xc,%d0		# pass: size of extended
3512
	bsr.l		_dmem_write		# write the default result
3513

3514
	tst.l		%d1			# did dstore fail?
3515
	bne.l		facc_out_x		# yes
3516

3517
	bra.w		fsnan_exit
3518

3519
fsnan_out_x_s:
3520
	mov.l		(%a6),EXC_A6(%a6)
3521

3522
	bsr.l		_calc_ea_fout		# find the correct ea,update An
3523
	mov.l		%a0,%a1
3524
	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3525

3526
	mov.l		EXC_A6(%a6),(%a6)
3527

3528
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3529
	bne.b		fsnan_out_x_save	# no
3530

3531
# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3532
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3533
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3534
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3535

3536
	frestore	FP_SRC(%a6)
3537

3538
	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
3539

3540
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3541
	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3542
	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3543

3544
	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3545
	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3546
	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3547

3548
	add.l		&LOCAL_SIZE-0x8,%sp
3549

3550
	bra.l		_real_snan
3551

3552
#########################################################################
3553
# XDEF ****************************************************************	#
3554
#	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
3555
#									#
3556
#	This handler should be the first code executed upon taking the	#
3557
#	FP Inexact exception in an operating system.			#
3558
#									#
3559
# XREF ****************************************************************	#
3560
#	_imem_read_long() - read instruction longword			#
3561
#	fix_skewed_ops() - adjust src operand in fsave frame		#
3562
#	set_tag_x() - determine optype of src/dst operands		#
3563
#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3564
#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
3565
#	load_fpn2() - load dst operand from FP regfile			#
3566
#	smovcr() - emulate an "fmovcr" instruction			#
3567
#	fout() - emulate an opclass 3 instruction			#
3568
#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
3569
#	_real_inex() - "callout" to operating system inexact handler	#
3570
#									#
3571
# INPUT ***************************************************************	#
3572
#	- The system stack contains the FP Inexact exception frame	#
3573
#	- The fsave frame contains the source operand			#
3574
#									#
3575
# OUTPUT **************************************************************	#
3576
#	- The system stack is unchanged					#
3577
#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3578
#									#
3579
# ALGORITHM ***********************************************************	#
3580
#	In a system where the FP Inexact exception is enabled, the goal	#
3581
# is to get to the handler specified at _real_inex(). But, on the 060,	#
3582
# for opclass zero and two instruction taking this exception, the	#
3583
# hardware doesn't store the correct result to the destination FP	#
3584
# register as did the '040 and '881/2. This handler must emulate the	#
3585
# instruction in order to get this value and then store it to the	#
3586
# correct register before calling _real_inex().				#
3587
#	For opclass 3 instructions, the 060 doesn't store the default	#
3588
# inexact result out to memory or data register file as it should.	#
3589
# This code must emulate the move out by calling fout() before finally	#
3590
# exiting through _real_inex().						#
3591
#									#
3592
#########################################################################
3593

3594
	global		_fpsp_inex
3595
_fpsp_inex:
3596

3597
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3598

3599
	fsave		FP_SRC(%a6)		# grab the "busy" frame
3600

3601
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3602
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3603
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3604

3605
# the FPIAR holds the "current PC" of the faulting instruction
3606
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3607

3608
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3609
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3610
	bsr.l		_imem_read_long		# fetch the instruction words
3611
	mov.l		%d0,EXC_OPWORD(%a6)
3612

3613
##############################################################################
3614

3615
	btst		&13,%d0			# is instr an fmove out?
3616
	bne.w		finex_out		# fmove out
3617

3618

3619
# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3620
# longword integer directly into the upper longword of the mantissa along
3621
# w/ an exponent value of 0x401e. we convert this to extended precision here.
3622
	bfextu		%d0{&19:&3},%d0		# fetch instr size
3623
	bne.b		finex_cont		# instr size is not long
3624
	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
3625
	bne.b		finex_cont		# no
3626
	fmov.l		&0x0,%fpcr
3627
	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
3628
	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
3629
	mov.w		&0xe001,0x2+FP_SRC(%a6)
3630

3631
finex_cont:
3632
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3633
	bsr.l		fix_skewed_ops		# fix src op
3634

3635
# Here, we zero the ccode and exception byte field since we're going to
3636
# emulate the whole instruction. Notice, though, that we don't kill the
3637
# INEX1 bit. This is because a packed op has long since been converted
3638
# to extended before arriving here. Therefore, we need to retain the
3639
# INEX1 bit from when the operand was first converted.
3640
	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3641

3642
	fmov.l		&0x0,%fpcr		# zero current control regs
3643
	fmov.l		&0x0,%fpsr
3644

3645
	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3646
	cmpi.b		%d1,&0x17		# is op an fmovecr?
3647
	beq.w		finex_fmovcr		# yes
3648

3649
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3650
	bsr.l		set_tag_x		# tag the operand type
3651
	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
3652

3653
# bits four and five of the fp extension word separate the monadic and dyadic
3654
# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3655
# will never take this exception, but fsincos will.
3656
	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
3657
	beq.b		finex_extract		# monadic
3658

3659
	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
3660
	bne.b		finex_extract		# yes
3661

3662
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3663
	bsr.l		load_fpn2		# load dst into FP_DST
3664

3665
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
3666
	bsr.l		set_tag_x		# tag the operand type
3667
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
3668
	bne.b		finex_op2_done		# no
3669
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
3670
finex_op2_done:
3671
	mov.b		%d0,DTAG(%a6)		# save dst optype tag
3672

3673
finex_extract:
3674
	clr.l		%d0
3675
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
3676

3677
	mov.b		1+EXC_CMDREG(%a6),%d1
3678
	andi.w		&0x007f,%d1		# extract extension
3679

3680
	lea		FP_SRC(%a6),%a0
3681
	lea		FP_DST(%a6),%a1
3682

3683
	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3684
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
3685

3686
# the operation has been emulated. the result is in fp0.
3687
finex_save:
3688
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
3689
	bsr.l		store_fpreg
3690

3691
finex_exit:
3692
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3693
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3694
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3695

3696
	frestore	FP_SRC(%a6)
3697

3698
	unlk		%a6
3699
	bra.l		_real_inex
3700

3701
finex_fmovcr:
3702
	clr.l		%d0
3703
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3704
	mov.b		1+EXC_CMDREG(%a6),%d1
3705
	andi.l		&0x0000007f,%d1		# pass rom offset
3706
	bsr.l		smovcr
3707
	bra.b		finex_save
3708

3709
########################################################################
3710

3711
#
3712
# the hardware does not save the default result to memory on enabled
3713
# inexact exceptions. we do this here before passing control to
3714
# the user inexact handler.
3715
#
3716
# byte, word, and long destination format operations can pass
3717
# through here. so can double and single precision.
3718
# although packed opclass three operations can take inexact
3719
# exceptions, they won't pass through here since they are caught
3720
# first by the unsupported data format exception handler. that handler
3721
# sends them directly to _real_inex() if necessary.
3722
#
3723
finex_out:
3724

3725
	mov.b		&NORM,STAG(%a6)		# src is a NORM
3726

3727
	clr.l		%d0
3728
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3729

3730
	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
3731

3732
	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
3733

3734
	bsr.l		fout			# store the default result
3735

3736
	bra.b		finex_exit
3737

3738
#########################################################################
3739
# XDEF ****************************************************************	#
3740
#	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
3741
#									#
3742
#	This handler should be the first code executed upon taking	#
3743
#	the FP DZ exception in an operating system.			#
3744
#									#
3745
# XREF ****************************************************************	#
3746
#	_imem_read_long() - read instruction longword from memory	#
3747
#	fix_skewed_ops() - adjust fsave operand				#
3748
#	_real_dz() - "callout" exit point from FP DZ handler		#
3749
#									#
3750
# INPUT ***************************************************************	#
3751
#	- The system stack contains the FP DZ exception stack.		#
3752
#	- The fsave frame contains the source operand.			#
3753
#									#
3754
# OUTPUT **************************************************************	#
3755
#	- The system stack contains the FP DZ exception stack.		#
3756
#	- The fsave frame contains the adjusted source operand.		#
3757
#									#
3758
# ALGORITHM ***********************************************************	#
3759
#	In a system where the DZ exception is enabled, the goal is to	#
3760
# get to the handler specified at _real_dz(). But, on the 060, when the	#
3761
# exception is taken, the input operand in the fsave state frame may	#
3762
# be incorrect for some cases and need to be adjusted. So, this package	#
3763
# adjusts the operand using fix_skewed_ops() and then branches to	#
3764
# _real_dz().								#
3765
#									#
3766
#########################################################################
3767

3768
	global		_fpsp_dz
3769
_fpsp_dz:
3770

3771
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3772

3773
	fsave		FP_SRC(%a6)		# grab the "busy" frame
3774

3775
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3776
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3777
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3778

3779
# the FPIAR holds the "current PC" of the faulting instruction
3780
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3781

3782
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3783
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3784
	bsr.l		_imem_read_long		# fetch the instruction words
3785
	mov.l		%d0,EXC_OPWORD(%a6)
3786

3787
##############################################################################
3788

3789

3790
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3791
# this would be the case for opclass two operations with a source zero
3792
# in the sgl or dbl format.
3793
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3794
	bsr.l		fix_skewed_ops		# fix src op
3795

3796
fdz_exit:
3797
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3798
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3799
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3800

3801
	frestore	FP_SRC(%a6)
3802

3803
	unlk		%a6
3804
	bra.l		_real_dz
3805

3806
#########################################################################
3807
# XDEF ****************************************************************	#
3808
#	_fpsp_fline(): 060FPSP entry point for "Line F emulator" exc.	#
3809
#									#
3810
#	This handler should be the first code executed upon taking the	#
3811
#	"Line F Emulator" exception in an operating system.		#
3812
#									#
3813
# XREF ****************************************************************	#
3814
#	_fpsp_unimp() - handle "FP Unimplemented" exceptions		#
3815
#	_real_fpu_disabled() - handle "FPU disabled" exceptions		#
3816
#	_real_fline() - handle "FLINE" exceptions			#
3817
#	_imem_read_long() - read instruction longword			#
3818
#									#
3819
# INPUT ***************************************************************	#
3820
#	- The system stack contains a "Line F Emulator" exception	#
3821
#	  stack frame.							#
3822
#									#
3823
# OUTPUT **************************************************************	#
3824
#	- The system stack is unchanged					#
3825
#									#
3826
# ALGORITHM ***********************************************************	#
3827
#	When a "Line F Emulator" exception occurs, there are 3 possible	#
3828
# exception types, denoted by the exception stack frame format number:	#
3829
#	(1) FPU unimplemented instruction (6 word stack frame)		#
3830
#	(2) FPU disabled (8 word stack frame)				#
3831
#	(3) Line F (4 word stack frame)					#
3832
#									#
3833
#	This module determines which and forks the flow off to the	#
3834
# appropriate "callout" (for "disabled" and "Line F") or to the		#
3835
# correct emulation code (for "FPU unimplemented").			#
3836
#	This code also must check for "fmovecr" instructions w/ a	#
3837
# non-zero <ea> field. These may get flagged as "Line F" but should	#
3838
# really be flagged as "FPU Unimplemented". (This is a "feature" on	#
3839
# the '060.								#
3840
#									#
3841
#########################################################################
3842

3843
	global		_fpsp_fline
3844
_fpsp_fline:
3845

3846
# check to see if this exception is a "FP Unimplemented Instruction"
3847
# exception. if so, branch directly to that handler's entry point.
3848
	cmpi.w		0x6(%sp),&0x202c
3849
	beq.l		_fpsp_unimp
3850

3851
# check to see if the FPU is disabled. if so, jump to the OS entry
3852
# point for that condition.
3853
	cmpi.w		0x6(%sp),&0x402c
3854
	beq.l		_real_fpu_disabled
3855

3856
# the exception was an "F-Line Illegal" exception. we check to see
3857
# if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
3858
# so, convert the F-Line exception stack frame to an FP Unimplemented
3859
# Instruction exception stack frame else branch to the OS entry
3860
# point for the F-Line exception handler.
3861
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3862

3863
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3864

3865
	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
3866
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3867
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3868
	bsr.l		_imem_read_long		# fetch instruction words
3869

3870
	bfextu		%d0{&0:&10},%d1		# is it an fmovecr?
3871
	cmpi.w		%d1,&0x03c8
3872
	bne.b		fline_fline		# no
3873

3874
	bfextu		%d0{&16:&6},%d1		# is it an fmovecr?
3875
	cmpi.b		%d1,&0x17
3876
	bne.b		fline_fline		# no
3877

3878
# it's an fmovecr w/ a non-zero <ea> that has entered through
3879
# the F-Line Illegal exception.
3880
# so, we need to convert the F-Line exception stack frame into an
3881
# FP Unimplemented Instruction stack frame and jump to that entry
3882
# point.
3883
#
3884
# but, if the FPU is disabled, then we need to jump to the FPU disabled
3885
# entry point.
3886
	movc		%pcr,%d0
3887
	btst		&0x1,%d0
3888
	beq.b		fline_fmovcr
3889

3890
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3891

3892
	unlk		%a6
3893

3894
	sub.l		&0x8,%sp		# make room for "Next PC", <ea>
3895
	mov.w		0x8(%sp),(%sp)
3896
	mov.l		0xa(%sp),0x2(%sp)	# move "Current PC"
3897
	mov.w		&0x402c,0x6(%sp)
3898
	mov.l		0x2(%sp),0xc(%sp)
3899
	addq.l		&0x4,0x2(%sp)		# set "Next PC"
3900

3901
	bra.l		_real_fpu_disabled
3902

3903
fline_fmovcr:
3904
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3905

3906
	unlk		%a6
3907

3908
	fmov.l		0x2(%sp),%fpiar		# set current PC
3909
	addq.l		&0x4,0x2(%sp)		# set Next PC
3910

3911
	mov.l		(%sp),-(%sp)
3912
	mov.l		0x8(%sp),0x4(%sp)
3913
	mov.b		&0x20,0x6(%sp)
3914

3915
	bra.l		_fpsp_unimp
3916

3917
fline_fline:
3918
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3919

3920
	unlk		%a6
3921

3922
	bra.l		_real_fline
3923

3924
#########################################################################
3925
# XDEF ****************************************************************	#
3926
#	_fpsp_unimp(): 060FPSP entry point for FP "Unimplemented	#
3927
#		       Instruction" exception.				#
3928
#									#
3929
#	This handler should be the first code executed upon taking the	#
3930
#	FP Unimplemented Instruction exception in an operating system.	#
3931
#									#
3932
# XREF ****************************************************************	#
3933
#	_imem_read_{word,long}() - read instruction word/longword	#
3934
#	load_fop() - load src/dst ops from memory and/or FP regfile	#
3935
#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3936
#	tbl_trans - addr of table of emulation routines for trnscndls	#
3937
#	_real_access() - "callout" for access error exception		#
3938
#	_fpsp_done() - "callout" for exit; work all done		#
3939
#	_real_trace() - "callout" for Trace enabled exception		#
3940
#	smovcr() - emulate "fmovecr" instruction			#
3941
#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
3942
#	_ftrapcc() - emulate an "ftrapcc" instruction			#
3943
#	_fdbcc() - emulate an "fdbcc" instruction			#
3944
#	_fscc() - emulate an "fscc" instruction				#
3945
#	_real_trap() - "callout" for Trap exception			#
3946
#	_real_bsun() - "callout" for enabled Bsun exception		#
3947
#									#
3948
# INPUT ***************************************************************	#
3949
#	- The system stack contains the "Unimplemented Instr" stk frame	#
3950
#									#
3951
# OUTPUT **************************************************************	#
3952
#	If access error:						#
3953
#	- The system stack is changed to an access error stack frame	#
3954
#	If Trace exception enabled:					#
3955
#	- The system stack is changed to a Trace exception stack frame	#
3956
#	Else: (normal case)						#
3957
#	- Correct result has been stored as appropriate			#
3958
#									#
3959
# ALGORITHM ***********************************************************	#
3960
#	There are two main cases of instructions that may enter here to	#
3961
# be emulated: (1) the FPgen instructions, most of which were also	#
3962
# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc".	#
3963
#	For the first set, this handler calls the routine load_fop()	#
3964
# to load the source and destination (for dyadic) operands to be used	#
3965
# for instruction emulation. The correct emulation routine is then	#
3966
# chosen by decoding the instruction type and indexing into an		#
3967
# emulation subroutine index table. After emulation returns, this	#
3968
# handler checks to see if an exception should occur as a result of the #
3969
# FP instruction emulation. If so, then an FP exception of the correct	#
3970
# type is inserted into the FPU state frame using the "frestore"	#
3971
# instruction before exiting through _fpsp_done(). In either the	#
3972
# exceptional or non-exceptional cases, we must check to see if the	#
3973
# Trace exception is enabled. If so, then we must create a Trace	#
3974
# exception frame from the current exception frame and exit through	#
3975
# _real_trace().							#
3976
#	For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines	#
3977
# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three	#
3978
# may flag that a BSUN exception should be taken. If so, then the	#
3979
# current exception stack frame is converted into a BSUN exception	#
3980
# stack frame and an exit is made through _real_bsun(). If the		#
3981
# instruction was "ftrapcc" and a Trap exception should result, a Trap	#
3982
# exception stack frame is created from the current frame and an exit	#
3983
# is made through _real_trap(). If a Trace exception is pending, then	#
3984
# a Trace exception frame is created from the current frame and a jump	#
3985
# is made to _real_trace(). Finally, if none of these conditions exist,	#
3986
# then the handler exits though the callout _fpsp_done().		#
3987
#									#
3988
#	In any of the above scenarios, if a _mem_read() or _mem_write()	#
3989
# "callout" returns a failing value, then an access error stack frame	#
3990
# is created from the current stack frame and an exit is made through	#
3991
# _real_access().							#
3992
#									#
3993
#########################################################################
3994

3995
#
3996
# FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
3997
#
3998
#	*****************
3999
#	*		* => <ea> of fp unimp instr.
4000
#	-      EA	-
4001
#	*		*
4002
#	*****************
4003
#	* 0x2 *  0x02c	* => frame format and vector offset(vector #11)
4004
#	*****************
4005
#	*		*
4006
#	-    Next PC	- => PC of instr to execute after exc handling
4007
#	*		*
4008
#	*****************
4009
#	*      SR	* => SR at the time the exception was taken
4010
#	*****************
4011
#
4012
# Note: the !NULL bit does not get set in the fsave frame when the
4013
# machine encounters an fp unimp exception. Therefore, it must be set
4014
# before leaving this handler.
4015
#
4016
	global		_fpsp_unimp
4017
_fpsp_unimp:
4018

4019
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
4020

4021
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
4022
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
4023
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1
4024

4025
	btst		&0x5,EXC_SR(%a6)	# user mode exception?
4026
	bne.b		funimp_s		# no; supervisor mode
4027

4028
# save the value of the user stack pointer onto the stack frame
4029
funimp_u:
4030
	mov.l		%usp,%a0		# fetch user stack pointer
4031
	mov.l		%a0,EXC_A7(%a6)		# store in stack frame
4032
	bra.b		funimp_cont
4033

4034
# store the value of the supervisor stack pointer BEFORE the exc occurred.
4035
# old_sp is address just above stacked effective address.
4036
funimp_s:
4037
	lea		4+EXC_EA(%a6),%a0	# load old a7'
4038
	mov.l		%a0,EXC_A7(%a6)		# store a7'
4039
	mov.l		%a0,OLD_A7(%a6)		# make a copy
4040

4041
funimp_cont:
4042

4043
# the FPIAR holds the "current PC" of the faulting instruction.
4044
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
4045

4046
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4047
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
4048
	bsr.l		_imem_read_long		# fetch the instruction words
4049
	mov.l		%d0,EXC_OPWORD(%a6)
4050

4051
############################################################################
4052

4053
	fmov.l		&0x0,%fpcr		# clear FPCR
4054
	fmov.l		&0x0,%fpsr		# clear FPSR
4055

4056
	clr.b		SPCOND_FLG(%a6)		# clear "special case" flag
4057

4058
# Divide the fp instructions into 8 types based on the TYPE field in
4059
# bits 6-8 of the opword(classes 6,7 are undefined).
4060
# (for the '060, only two types  can take this exception)
4061
#	bftst		%d0{&7:&3}		# test TYPE
4062
	btst		&22,%d0			# type 0 or 1 ?
4063
	bne.w		funimp_misc		# type 1
4064

4065
#########################################
4066
# TYPE == 0: General instructions	#
4067
#########################################
4068
funimp_gen:
4069

4070
	clr.b		STORE_FLG(%a6)		# clear "store result" flag
4071

4072
# clear the ccode byte and exception status byte
4073
	andi.l		&0x00ff00ff,USER_FPSR(%a6)
4074

4075
	bfextu		%d0{&16:&6},%d1		# extract upper 6 of cmdreg
4076
	cmpi.b		%d1,&0x17		# is op an fmovecr?
4077
	beq.w		funimp_fmovcr		# yes
4078

4079
funimp_gen_op:
4080
	bsr.l		_load_fop		# load
4081

4082
	clr.l		%d0
4083
	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode
4084

4085
	mov.b		1+EXC_CMDREG(%a6),%d1
4086
	andi.w		&0x003f,%d1		# extract extension bits
4087
	lsl.w		&0x3,%d1		# shift right 3 bits
4088
	or.b		STAG(%a6),%d1		# insert src optag bits
4089

4090
	lea		FP_DST(%a6),%a1		# pass dst ptr in a1
4091
	lea		FP_SRC(%a6),%a0		# pass src ptr in a0
4092

4093
	mov.w		(tbl_trans.w,%pc,%d1.w*2),%d1
4094
	jsr		(tbl_trans.w,%pc,%d1.w*1) # emulate
4095

4096
funimp_fsave:
4097
	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
4098
	bne.w		funimp_ena		# some are enabled
4099

4100
funimp_store:
4101
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
4102
	bsr.l		store_fpreg		# store result to fp regfile
4103

4104
funimp_gen_exit:
4105
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4106
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4107
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4108

4109
funimp_gen_exit_cmp:
4110
	cmpi.b		SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
4111
	beq.b		funimp_gen_exit_a7	# yes
4112

4113
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
4114
	beq.b		funimp_gen_exit_a7	# yes
4115

4116
funimp_gen_exit_cont:
4117
	unlk		%a6
4118

4119
funimp_gen_exit_cont2:
4120
	btst		&0x7,(%sp)		# is trace on?
4121
	beq.l		_fpsp_done		# no
4122

4123
# this catches a problem with the case where an exception will be re-inserted
4124
# into the machine. the frestore has already been executed...so, the fmov.l
4125
# alone of the control register would trigger an unwanted exception.
4126
# until I feel like fixing this, we'll sidestep the exception.
4127
	fsave		-(%sp)
4128
	fmov.l		%fpiar,0x14(%sp)	# "Current PC" is in FPIAR
4129
	frestore	(%sp)+
4130
	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x24
4131
	bra.l		_real_trace
4132

4133
funimp_gen_exit_a7:
4134
	btst		&0x5,EXC_SR(%a6)	# supervisor or user mode?
4135
	bne.b		funimp_gen_exit_a7_s	# supervisor
4136

4137
	mov.l		%a0,-(%sp)
4138
	mov.l		EXC_A7(%a6),%a0
4139
	mov.l		%a0,%usp
4140
	mov.l		(%sp)+,%a0
4141
	bra.b		funimp_gen_exit_cont
4142

4143
# if the instruction was executed from supervisor mode and the addressing
4144
# mode was (a7)+, then the stack frame for the rte must be shifted "up"
4145
# "n" bytes where "n" is the size of the src operand type.
4146
# f<op>.{b,w,l,s,d,x,p}
4147
funimp_gen_exit_a7_s:
4148
	mov.l		%d0,-(%sp)		# save d0
4149
	mov.l		EXC_A7(%a6),%d0		# load new a7'
4150
	sub.l		OLD_A7(%a6),%d0		# subtract old a7'
4151
	mov.l		0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
4152
	mov.l		EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
4153
	mov.w		%d0,EXC_SR(%a6)		# store incr number
4154
	mov.l		(%sp)+,%d0		# restore d0
4155

4156
	unlk		%a6
4157

4158
	add.w		(%sp),%sp		# stack frame shifted
4159
	bra.b		funimp_gen_exit_cont2
4160

4161
######################
4162
# fmovecr.x #ccc,fpn #
4163
######################
4164
funimp_fmovcr:
4165
	clr.l		%d0
4166
	mov.b		FPCR_MODE(%a6),%d0
4167
	mov.b		1+EXC_CMDREG(%a6),%d1
4168
	andi.l		&0x0000007f,%d1		# pass rom offset in d1
4169
	bsr.l		smovcr
4170
	bra.w		funimp_fsave
4171

4172
#########################################################################
4173

4174
#
4175
# the user has enabled some exceptions. we figure not to see this too
4176
# often so that's why it gets lower priority.
4177
#
4178
funimp_ena:
4179

4180
# was an exception set that was also enabled?
4181
	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled and set
4182
	bfffo		%d0{&24:&8},%d0		# find highest priority exception
4183
	bne.b		funimp_exc		# at least one was set
4184

4185
# no exception that was enabled was set BUT if we got an exact overflow
4186
# and overflow wasn't enabled but inexact was (yech!) then this is
4187
# an inexact exception; otherwise, return to normal non-exception flow.
4188
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4189
	beq.w		funimp_store		# no; return to normal flow
4190

4191
# the overflow w/ exact result happened but was inexact set in the FPCR?
4192
funimp_ovfl:
4193
	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
4194
	beq.w		funimp_store		# no; return to normal flow
4195
	bra.b		funimp_exc_ovfl		# yes
4196

4197
# some exception happened that was actually enabled.
4198
# we'll insert this new exception into the FPU and then return.
4199
funimp_exc:
4200
	subi.l		&24,%d0			# fix offset to be 0-8
4201
	cmpi.b		%d0,&0x6		# is exception INEX?
4202
	bne.b		funimp_exc_force	# no
4203

4204
# the enabled exception was inexact. so, if it occurs with an overflow
4205
# or underflow that was disabled, then we have to force an overflow or
4206
# underflow frame. the eventual overflow or underflow handler will see that
4207
# it's actually an inexact and act appropriately. this is the only easy
4208
# way to have the EXOP available for the enabled inexact handler when
4209
# a disabled overflow or underflow has also happened.
4210
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4211
	bne.b		funimp_exc_ovfl		# yes
4212
	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
4213
	bne.b		funimp_exc_unfl		# yes
4214

4215
# force the fsave exception status bits to signal an exception of the
4216
# appropriate type. don't forget to "skew" the source operand in case we
4217
# "unskewed" the one the hardware initially gave us.
4218
funimp_exc_force:
4219
	mov.l		%d0,-(%sp)		# save d0
4220
	bsr.l		funimp_skew		# check for special case
4221
	mov.l		(%sp)+,%d0		# restore d0
4222
	mov.w		(tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
4223
	bra.b		funimp_gen_exit2	# exit with frestore
4224

4225
tbl_funimp_except:
4226
	short		0xe002, 0xe006, 0xe004, 0xe005
4227
	short		0xe003, 0xe002, 0xe001, 0xe001
4228

4229
# insert an overflow frame
4230
funimp_exc_ovfl:
4231
	bsr.l		funimp_skew		# check for special case
4232
	mov.w		&0xe005,2+FP_SRC(%a6)
4233
	bra.b		funimp_gen_exit2
4234

4235
# insert an underflow frame
4236
funimp_exc_unfl:
4237
	bsr.l		funimp_skew		# check for special case
4238
	mov.w		&0xe003,2+FP_SRC(%a6)
4239

4240
# this is the general exit point for an enabled exception that will be
4241
# restored into the machine for the instruction just emulated.
4242
funimp_gen_exit2:
4243
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4244
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4245
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4246

4247
	frestore	FP_SRC(%a6)		# insert exceptional status
4248

4249
	bra.w		funimp_gen_exit_cmp
4250

4251
############################################################################
4252

4253
#
4254
# TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
4255
#
4256
# These instructions were implemented on the '881/2 and '040 in hardware but
4257
# are emulated in software on the '060.
4258
#
4259
funimp_misc:
4260
	bfextu		%d0{&10:&3},%d1		# extract mode field
4261
	cmpi.b		%d1,&0x1		# is it an fdb<cc>?
4262
	beq.w		funimp_fdbcc		# yes
4263
	cmpi.b		%d1,&0x7		# is it an fs<cc>?
4264
	bne.w		funimp_fscc		# yes
4265
	bfextu		%d0{&13:&3},%d1
4266
	cmpi.b		%d1,&0x2		# is it an fs<cc>?
4267
	blt.w		funimp_fscc		# yes
4268

4269
#########################
4270
# ftrap<cc>		#
4271
# ftrap<cc>.w #<data>	#
4272
# ftrap<cc>.l #<data>	#
4273
#########################
4274
funimp_ftrapcc:
4275

4276
	bsr.l		_ftrapcc		# FTRAP<cc>()
4277

4278
	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4279
	beq.w		funimp_bsun		# yes
4280

4281
	cmpi.b		SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
4282
	bne.w		funimp_done		# no
4283

4284
#	 FP UNIMP FRAME		   TRAP  FRAME
4285
#	*****************	*****************
4286
#	**    <EA>     **	**  Current PC **
4287
#	*****************	*****************
4288
#	* 0x2 *  0x02c	*	* 0x2 *  0x01c  *
4289
#	*****************	*****************
4290
#	**   Next PC   **	**   Next PC   **
4291
#	*****************	*****************
4292
#	*      SR	*	*      SR	*
4293
#	*****************	*****************
4294
#	    (6 words)		    (6 words)
4295
#
4296
# the ftrapcc instruction should take a trap. so, here we must create a
4297
# trap stack frame from an unimplemented fp instruction stack frame and
4298
# jump to the user supplied entry point for the trap exception
4299
funimp_ftrapcc_tp:
4300
	mov.l		USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
4301
	mov.w		&0x201c,EXC_VOFF(%a6)	# Vector Offset = 0x01c
4302

4303
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4304
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4305
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4306

4307
	unlk		%a6
4308
	bra.l		_real_trap
4309

4310
#########################
4311
# fdb<cc> Dn,<label>	#
4312
#########################
4313
funimp_fdbcc:
4314

4315
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4316
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4317
	bsr.l		_imem_read_word		# read displacement
4318

4319
	tst.l		%d1			# did ifetch fail?
4320
	bne.w		funimp_iacc		# yes
4321

4322
	ext.l		%d0			# sign extend displacement
4323

4324
	bsr.l		_fdbcc			# FDB<cc>()
4325

4326
	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4327
	beq.w		funimp_bsun
4328

4329
	bra.w		funimp_done		# branch to finish
4330

4331
#################
4332
# fs<cc>.b <ea>	#
4333
#################
4334
funimp_fscc:
4335

4336
	bsr.l		_fscc			# FS<cc>()
4337

4338
# I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
4339
# does not need to update "An" before taking a bsun exception.
4340
	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4341
	beq.w		funimp_bsun
4342

4343
	btst		&0x5,EXC_SR(%a6)	# yes; is it a user mode exception?
4344
	bne.b		funimp_fscc_s		# no
4345

4346
funimp_fscc_u:
4347
	mov.l		EXC_A7(%a6),%a0		# yes; set new USP
4348
	mov.l		%a0,%usp
4349
	bra.w		funimp_done		# branch to finish
4350

4351
# remember, I'm assuming that post-increment is bogus...(it IS!!!)
4352
# so, the least significant WORD of the stacked effective address got
4353
# overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
4354
# so that the rte will work correctly without destroying the result.
4355
# even though the operation size is byte, the stack ptr is decr by 2.
4356
#
4357
# remember, also, this instruction may be traced.
4358
funimp_fscc_s:
4359
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
4360
	bne.w		funimp_done		# no
4361

4362
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4363
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4364
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4365

4366
	unlk		%a6
4367

4368
	btst		&0x7,(%sp)		# is trace enabled?
4369
	bne.b		funimp_fscc_s_trace	# yes
4370

4371
	subq.l		&0x2,%sp
4372
	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
4373
	mov.l		0x6(%sp),0x4(%sp)	# shift lo(PC),voff "down"
4374
	bra.l		_fpsp_done
4375

4376
funimp_fscc_s_trace:
4377
	subq.l		&0x2,%sp
4378
	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
4379
	mov.w		0x6(%sp),0x4(%sp)	# shift lo(PC)
4380
	mov.w		&0x2024,0x6(%sp)	# fmt/voff = $2024
4381
	fmov.l		%fpiar,0x8(%sp)		# insert "current PC"
4382

4383
	bra.l		_real_trace
4384

4385
#
4386
# The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
4387
# the fp unimplemented instruction exception stack frame into a bsun stack frame,
4388
# restore a bsun exception into the machine, and branch to the user
4389
# supplied bsun hook.
4390
#
4391
#	 FP UNIMP FRAME		   BSUN FRAME
4392
#	*****************	*****************
4393
#	**    <EA>     **	* 0x0 * 0x0c0	*
4394
#	*****************	*****************
4395
#	* 0x2 *  0x02c  *	** Current PC  **
4396
#	*****************	*****************
4397
#	**   Next PC   **	*      SR	*
4398
#	*****************	*****************
4399
#	*      SR	*	    (4 words)
4400
#	*****************
4401
#	    (6 words)
4402
#
4403
funimp_bsun:
4404
	mov.w		&0x00c0,2+EXC_EA(%a6)	# Fmt = 0x0; Vector Offset = 0x0c0
4405
	mov.l		USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
4406
	mov.w		EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
4407

4408
	mov.w		&0xe000,2+FP_SRC(%a6)	# bsun exception enabled
4409

4410
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4411
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4412
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4413

4414
	frestore	FP_SRC(%a6)		# restore bsun exception
4415

4416
	unlk		%a6
4417

4418
	addq.l		&0x4,%sp		# erase sludge
4419

4420
	bra.l		_real_bsun		# branch to user bsun hook
4421

4422
#
4423
# all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
4424
# and return.
4425
#
4426
# as usual, we have to check for trace mode being on here. since instructions
4427
# modifying the supervisor stack frame don't pass through here, this is a
4428
# relatively easy task.
4429
#
4430
funimp_done:
4431
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
4432
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4433
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4434

4435
	unlk		%a6
4436

4437
	btst		&0x7,(%sp)		# is trace enabled?
4438
	bne.b		funimp_trace		# yes
4439

4440
	bra.l		_fpsp_done
4441

4442
#	 FP UNIMP FRAME		  TRACE  FRAME
4443
#	*****************	*****************
4444
#	**    <EA>     **	**  Current PC **
4445
#	*****************	*****************
4446
#	* 0x2 *  0x02c	*	* 0x2 *  0x024  *
4447
#	*****************	*****************
4448
#	**   Next PC   **	**   Next PC   **
4449
#	*****************	*****************
4450
#	*      SR	*	*      SR	*
4451
#	*****************	*****************
4452
#	    (6 words)		    (6 words)
4453
#
4454
# the fscc instruction should take a trace trap. so, here we must create a
4455
# trace stack frame from an unimplemented fp instruction stack frame and
4456
# jump to the user supplied entry point for the trace exception
4457
funimp_trace:
4458
	fmov.l		%fpiar,0x8(%sp)		# current PC is in fpiar
4459
	mov.b		&0x24,0x7(%sp)		# vector offset = 0x024
4460

4461
	bra.l		_real_trace
4462

4463
################################################################
4464

4465
	global		tbl_trans
4466
	swbeg		&0x1c0
4467
tbl_trans:
4468
	short		tbl_trans - tbl_trans	# $00-0 fmovecr all
4469
	short		tbl_trans - tbl_trans	# $00-1 fmovecr all
4470
	short		tbl_trans - tbl_trans	# $00-2 fmovecr all
4471
	short		tbl_trans - tbl_trans	# $00-3 fmovecr all
4472
	short		tbl_trans - tbl_trans	# $00-4 fmovecr all
4473
	short		tbl_trans - tbl_trans	# $00-5 fmovecr all
4474
	short		tbl_trans - tbl_trans	# $00-6 fmovecr all
4475
	short		tbl_trans - tbl_trans	# $00-7 fmovecr all
4476

4477
	short		tbl_trans - tbl_trans	# $01-0 fint norm
4478
	short		tbl_trans - tbl_trans	# $01-1 fint zero
4479
	short		tbl_trans - tbl_trans	# $01-2 fint inf
4480
	short		tbl_trans - tbl_trans	# $01-3 fint qnan
4481
	short		tbl_trans - tbl_trans	# $01-5 fint denorm
4482
	short		tbl_trans - tbl_trans	# $01-4 fint snan
4483
	short		tbl_trans - tbl_trans	# $01-6 fint unnorm
4484
	short		tbl_trans - tbl_trans	# $01-7 ERROR
4485

4486
	short		ssinh	 - tbl_trans	# $02-0 fsinh norm
4487
	short		src_zero - tbl_trans	# $02-1 fsinh zero
4488
	short		src_inf	 - tbl_trans	# $02-2 fsinh inf
4489
	short		src_qnan - tbl_trans	# $02-3 fsinh qnan
4490
	short		ssinhd	 - tbl_trans	# $02-5 fsinh denorm
4491
	short		src_snan - tbl_trans	# $02-4 fsinh snan
4492
	short		tbl_trans - tbl_trans	# $02-6 fsinh unnorm
4493
	short		tbl_trans - tbl_trans	# $02-7 ERROR
4494

4495
	short		tbl_trans - tbl_trans	# $03-0 fintrz norm
4496
	short		tbl_trans - tbl_trans	# $03-1 fintrz zero
4497
	short		tbl_trans - tbl_trans	# $03-2 fintrz inf
4498
	short		tbl_trans - tbl_trans	# $03-3 fintrz qnan
4499
	short		tbl_trans - tbl_trans	# $03-5 fintrz denorm
4500
	short		tbl_trans - tbl_trans	# $03-4 fintrz snan
4501
	short		tbl_trans - tbl_trans	# $03-6 fintrz unnorm
4502
	short		tbl_trans - tbl_trans	# $03-7 ERROR
4503

4504
	short		tbl_trans - tbl_trans	# $04-0 fsqrt norm
4505
	short		tbl_trans - tbl_trans	# $04-1 fsqrt zero
4506
	short		tbl_trans - tbl_trans	# $04-2 fsqrt inf
4507
	short		tbl_trans - tbl_trans	# $04-3 fsqrt qnan
4508
	short		tbl_trans - tbl_trans	# $04-5 fsqrt denorm
4509
	short		tbl_trans - tbl_trans	# $04-4 fsqrt snan
4510
	short		tbl_trans - tbl_trans	# $04-6 fsqrt unnorm
4511
	short		tbl_trans - tbl_trans	# $04-7 ERROR
4512

4513
	short		tbl_trans - tbl_trans	# $05-0 ERROR
4514
	short		tbl_trans - tbl_trans	# $05-1 ERROR
4515
	short		tbl_trans - tbl_trans	# $05-2 ERROR
4516
	short		tbl_trans - tbl_trans	# $05-3 ERROR
4517
	short		tbl_trans - tbl_trans	# $05-4 ERROR
4518
	short		tbl_trans - tbl_trans	# $05-5 ERROR
4519
	short		tbl_trans - tbl_trans	# $05-6 ERROR
4520
	short		tbl_trans - tbl_trans	# $05-7 ERROR
4521

4522
	short		slognp1	 - tbl_trans	# $06-0 flognp1 norm
4523
	short		src_zero - tbl_trans	# $06-1 flognp1 zero
4524
	short		sopr_inf - tbl_trans	# $06-2 flognp1 inf
4525
	short		src_qnan - tbl_trans	# $06-3 flognp1 qnan
4526
	short		slognp1d - tbl_trans	# $06-5 flognp1 denorm
4527
	short		src_snan - tbl_trans	# $06-4 flognp1 snan
4528
	short		tbl_trans - tbl_trans	# $06-6 flognp1 unnorm
4529
	short		tbl_trans - tbl_trans	# $06-7 ERROR
4530

4531
	short		tbl_trans - tbl_trans	# $07-0 ERROR
4532
	short		tbl_trans - tbl_trans	# $07-1 ERROR
4533
	short		tbl_trans - tbl_trans	# $07-2 ERROR
4534
	short		tbl_trans - tbl_trans	# $07-3 ERROR
4535
	short		tbl_trans - tbl_trans	# $07-4 ERROR
4536
	short		tbl_trans - tbl_trans	# $07-5 ERROR
4537
	short		tbl_trans - tbl_trans	# $07-6 ERROR
4538
	short		tbl_trans - tbl_trans	# $07-7 ERROR
4539

4540
	short		setoxm1	 - tbl_trans	# $08-0 fetoxm1 norm
4541
	short		src_zero - tbl_trans	# $08-1 fetoxm1 zero
4542
	short		setoxm1i - tbl_trans	# $08-2 fetoxm1 inf
4543
	short		src_qnan - tbl_trans	# $08-3 fetoxm1 qnan
4544
	short		setoxm1d - tbl_trans	# $08-5 fetoxm1 denorm
4545
	short		src_snan - tbl_trans	# $08-4 fetoxm1 snan
4546
	short		tbl_trans - tbl_trans	# $08-6 fetoxm1 unnorm
4547
	short		tbl_trans - tbl_trans	# $08-7 ERROR
4548

4549
	short		stanh	 - tbl_trans	# $09-0 ftanh norm
4550
	short		src_zero - tbl_trans	# $09-1 ftanh zero
4551
	short		src_one	 - tbl_trans	# $09-2 ftanh inf
4552
	short		src_qnan - tbl_trans	# $09-3 ftanh qnan
4553
	short		stanhd	 - tbl_trans	# $09-5 ftanh denorm
4554
	short		src_snan - tbl_trans	# $09-4 ftanh snan
4555
	short		tbl_trans - tbl_trans	# $09-6 ftanh unnorm
4556
	short		tbl_trans - tbl_trans	# $09-7 ERROR
4557

4558
	short		satan	 - tbl_trans	# $0a-0 fatan norm
4559
	short		src_zero - tbl_trans	# $0a-1 fatan zero
4560
	short		spi_2	 - tbl_trans	# $0a-2 fatan inf
4561
	short		src_qnan - tbl_trans	# $0a-3 fatan qnan
4562
	short		satand	 - tbl_trans	# $0a-5 fatan denorm
4563
	short		src_snan - tbl_trans	# $0a-4 fatan snan
4564
	short		tbl_trans - tbl_trans	# $0a-6 fatan unnorm
4565
	short		tbl_trans - tbl_trans	# $0a-7 ERROR
4566

4567
	short		tbl_trans - tbl_trans	# $0b-0 ERROR
4568
	short		tbl_trans - tbl_trans	# $0b-1 ERROR
4569
	short		tbl_trans - tbl_trans	# $0b-2 ERROR
4570
	short		tbl_trans - tbl_trans	# $0b-3 ERROR
4571
	short		tbl_trans - tbl_trans	# $0b-4 ERROR
4572
	short		tbl_trans - tbl_trans	# $0b-5 ERROR
4573
	short		tbl_trans - tbl_trans	# $0b-6 ERROR
4574
	short		tbl_trans - tbl_trans	# $0b-7 ERROR
4575

4576
	short		sasin	 - tbl_trans	# $0c-0 fasin norm
4577
	short		src_zero - tbl_trans	# $0c-1 fasin zero
4578
	short		t_operr	 - tbl_trans	# $0c-2 fasin inf
4579
	short		src_qnan - tbl_trans	# $0c-3 fasin qnan
4580
	short		sasind	 - tbl_trans	# $0c-5 fasin denorm
4581
	short		src_snan - tbl_trans	# $0c-4 fasin snan
4582
	short		tbl_trans - tbl_trans	# $0c-6 fasin unnorm
4583
	short		tbl_trans - tbl_trans	# $0c-7 ERROR
4584

4585
	short		satanh	 - tbl_trans	# $0d-0 fatanh norm
4586
	short		src_zero - tbl_trans	# $0d-1 fatanh zero
4587
	short		t_operr	 - tbl_trans	# $0d-2 fatanh inf
4588
	short		src_qnan - tbl_trans	# $0d-3 fatanh qnan
4589
	short		satanhd	 - tbl_trans	# $0d-5 fatanh denorm
4590
	short		src_snan - tbl_trans	# $0d-4 fatanh snan
4591
	short		tbl_trans - tbl_trans	# $0d-6 fatanh unnorm
4592
	short		tbl_trans - tbl_trans	# $0d-7 ERROR
4593

4594
	short		ssin	 - tbl_trans	# $0e-0 fsin norm
4595
	short		src_zero - tbl_trans	# $0e-1 fsin zero
4596
	short		t_operr	 - tbl_trans	# $0e-2 fsin inf
4597
	short		src_qnan - tbl_trans	# $0e-3 fsin qnan
4598
	short		ssind	 - tbl_trans	# $0e-5 fsin denorm
4599
	short		src_snan - tbl_trans	# $0e-4 fsin snan
4600
	short		tbl_trans - tbl_trans	# $0e-6 fsin unnorm
4601
	short		tbl_trans - tbl_trans	# $0e-7 ERROR
4602

4603
	short		stan	 - tbl_trans	# $0f-0 ftan norm
4604
	short		src_zero - tbl_trans	# $0f-1 ftan zero
4605
	short		t_operr	 - tbl_trans	# $0f-2 ftan inf
4606
	short		src_qnan - tbl_trans	# $0f-3 ftan qnan
4607
	short		stand	 - tbl_trans	# $0f-5 ftan denorm
4608
	short		src_snan - tbl_trans	# $0f-4 ftan snan
4609
	short		tbl_trans - tbl_trans	# $0f-6 ftan unnorm
4610
	short		tbl_trans - tbl_trans	# $0f-7 ERROR
4611

4612
	short		setox	 - tbl_trans	# $10-0 fetox norm
4613
	short		ld_pone	 - tbl_trans	# $10-1 fetox zero
4614
	short		szr_inf	 - tbl_trans	# $10-2 fetox inf
4615
	short		src_qnan - tbl_trans	# $10-3 fetox qnan
4616
	short		setoxd	 - tbl_trans	# $10-5 fetox denorm
4617
	short		src_snan - tbl_trans	# $10-4 fetox snan
4618
	short		tbl_trans - tbl_trans	# $10-6 fetox unnorm
4619
	short		tbl_trans - tbl_trans	# $10-7 ERROR
4620

4621
	short		stwotox	 - tbl_trans	# $11-0 ftwotox norm
4622
	short		ld_pone	 - tbl_trans	# $11-1 ftwotox zero
4623
	short		szr_inf	 - tbl_trans	# $11-2 ftwotox inf
4624
	short		src_qnan - tbl_trans	# $11-3 ftwotox qnan
4625
	short		stwotoxd - tbl_trans	# $11-5 ftwotox denorm
4626
	short		src_snan - tbl_trans	# $11-4 ftwotox snan
4627
	short		tbl_trans - tbl_trans	# $11-6 ftwotox unnorm
4628
	short		tbl_trans - tbl_trans	# $11-7 ERROR
4629

4630
	short		stentox	 - tbl_trans	# $12-0 ftentox norm
4631
	short		ld_pone	 - tbl_trans	# $12-1 ftentox zero
4632
	short		szr_inf	 - tbl_trans	# $12-2 ftentox inf
4633
	short		src_qnan - tbl_trans	# $12-3 ftentox qnan
4634
	short		stentoxd - tbl_trans	# $12-5 ftentox denorm
4635
	short		src_snan - tbl_trans	# $12-4 ftentox snan
4636
	short		tbl_trans - tbl_trans	# $12-6 ftentox unnorm
4637
	short		tbl_trans - tbl_trans	# $12-7 ERROR
4638

4639
	short		tbl_trans - tbl_trans	# $13-0 ERROR
4640
	short		tbl_trans - tbl_trans	# $13-1 ERROR
4641
	short		tbl_trans - tbl_trans	# $13-2 ERROR
4642
	short		tbl_trans - tbl_trans	# $13-3 ERROR
4643
	short		tbl_trans - tbl_trans	# $13-4 ERROR
4644
	short		tbl_trans - tbl_trans	# $13-5 ERROR
4645
	short		tbl_trans - tbl_trans	# $13-6 ERROR
4646
	short		tbl_trans - tbl_trans	# $13-7 ERROR
4647

4648
	short		slogn	 - tbl_trans	# $14-0 flogn norm
4649
	short		t_dz2	 - tbl_trans	# $14-1 flogn zero
4650
	short		sopr_inf - tbl_trans	# $14-2 flogn inf
4651
	short		src_qnan - tbl_trans	# $14-3 flogn qnan
4652
	short		slognd	 - tbl_trans	# $14-5 flogn denorm
4653
	short		src_snan - tbl_trans	# $14-4 flogn snan
4654
	short		tbl_trans - tbl_trans	# $14-6 flogn unnorm
4655
	short		tbl_trans - tbl_trans	# $14-7 ERROR
4656

4657
	short		slog10	 - tbl_trans	# $15-0 flog10 norm
4658
	short		t_dz2	 - tbl_trans	# $15-1 flog10 zero
4659
	short		sopr_inf - tbl_trans	# $15-2 flog10 inf
4660
	short		src_qnan - tbl_trans	# $15-3 flog10 qnan
4661
	short		slog10d	 - tbl_trans	# $15-5 flog10 denorm
4662
	short		src_snan - tbl_trans	# $15-4 flog10 snan
4663
	short		tbl_trans - tbl_trans	# $15-6 flog10 unnorm
4664
	short		tbl_trans - tbl_trans	# $15-7 ERROR
4665

4666
	short		slog2	 - tbl_trans	# $16-0 flog2 norm
4667
	short		t_dz2	 - tbl_trans	# $16-1 flog2 zero
4668
	short		sopr_inf - tbl_trans	# $16-2 flog2 inf
4669
	short		src_qnan - tbl_trans	# $16-3 flog2 qnan
4670
	short		slog2d	 - tbl_trans	# $16-5 flog2 denorm
4671
	short		src_snan - tbl_trans	# $16-4 flog2 snan
4672
	short		tbl_trans - tbl_trans	# $16-6 flog2 unnorm
4673
	short		tbl_trans - tbl_trans	# $16-7 ERROR
4674

4675
	short		tbl_trans - tbl_trans	# $17-0 ERROR
4676
	short		tbl_trans - tbl_trans	# $17-1 ERROR
4677
	short		tbl_trans - tbl_trans	# $17-2 ERROR
4678
	short		tbl_trans - tbl_trans	# $17-3 ERROR
4679
	short		tbl_trans - tbl_trans	# $17-4 ERROR
4680
	short		tbl_trans - tbl_trans	# $17-5 ERROR
4681
	short		tbl_trans - tbl_trans	# $17-6 ERROR
4682
	short		tbl_trans - tbl_trans	# $17-7 ERROR
4683

4684
	short		tbl_trans - tbl_trans	# $18-0 fabs norm
4685
	short		tbl_trans - tbl_trans	# $18-1 fabs zero
4686
	short		tbl_trans - tbl_trans	# $18-2 fabs inf
4687
	short		tbl_trans - tbl_trans	# $18-3 fabs qnan
4688
	short		tbl_trans - tbl_trans	# $18-5 fabs denorm
4689
	short		tbl_trans - tbl_trans	# $18-4 fabs snan
4690
	short		tbl_trans - tbl_trans	# $18-6 fabs unnorm
4691
	short		tbl_trans - tbl_trans	# $18-7 ERROR
4692

4693
	short		scosh	 - tbl_trans	# $19-0 fcosh norm
4694
	short		ld_pone	 - tbl_trans	# $19-1 fcosh zero
4695
	short		ld_pinf	 - tbl_trans	# $19-2 fcosh inf
4696
	short		src_qnan - tbl_trans	# $19-3 fcosh qnan
4697
	short		scoshd	 - tbl_trans	# $19-5 fcosh denorm
4698
	short		src_snan - tbl_trans	# $19-4 fcosh snan
4699
	short		tbl_trans - tbl_trans	# $19-6 fcosh unnorm
4700
	short		tbl_trans - tbl_trans	# $19-7 ERROR
4701

4702
	short		tbl_trans - tbl_trans	# $1a-0 fneg norm
4703
	short		tbl_trans - tbl_trans	# $1a-1 fneg zero
4704
	short		tbl_trans - tbl_trans	# $1a-2 fneg inf
4705
	short		tbl_trans - tbl_trans	# $1a-3 fneg qnan
4706
	short		tbl_trans - tbl_trans	# $1a-5 fneg denorm
4707
	short		tbl_trans - tbl_trans	# $1a-4 fneg snan
4708
	short		tbl_trans - tbl_trans	# $1a-6 fneg unnorm
4709
	short		tbl_trans - tbl_trans	# $1a-7 ERROR
4710

4711
	short		tbl_trans - tbl_trans	# $1b-0 ERROR
4712
	short		tbl_trans - tbl_trans	# $1b-1 ERROR
4713
	short		tbl_trans - tbl_trans	# $1b-2 ERROR
4714
	short		tbl_trans - tbl_trans	# $1b-3 ERROR
4715
	short		tbl_trans - tbl_trans	# $1b-4 ERROR
4716
	short		tbl_trans - tbl_trans	# $1b-5 ERROR
4717
	short		tbl_trans - tbl_trans	# $1b-6 ERROR
4718
	short		tbl_trans - tbl_trans	# $1b-7 ERROR
4719

4720
	short		sacos	 - tbl_trans	# $1c-0 facos norm
4721
	short		ld_ppi2	 - tbl_trans	# $1c-1 facos zero
4722
	short		t_operr	 - tbl_trans	# $1c-2 facos inf
4723
	short		src_qnan - tbl_trans	# $1c-3 facos qnan
4724
	short		sacosd	 - tbl_trans	# $1c-5 facos denorm
4725
	short		src_snan - tbl_trans	# $1c-4 facos snan
4726
	short		tbl_trans - tbl_trans	# $1c-6 facos unnorm
4727
	short		tbl_trans - tbl_trans	# $1c-7 ERROR
4728

4729
	short		scos	 - tbl_trans	# $1d-0 fcos norm
4730
	short		ld_pone	 - tbl_trans	# $1d-1 fcos zero
4731
	short		t_operr	 - tbl_trans	# $1d-2 fcos inf
4732
	short		src_qnan - tbl_trans	# $1d-3 fcos qnan
4733
	short		scosd	 - tbl_trans	# $1d-5 fcos denorm
4734
	short		src_snan - tbl_trans	# $1d-4 fcos snan
4735
	short		tbl_trans - tbl_trans	# $1d-6 fcos unnorm
4736
	short		tbl_trans - tbl_trans	# $1d-7 ERROR
4737

4738
	short		sgetexp	 - tbl_trans	# $1e-0 fgetexp norm
4739
	short		src_zero - tbl_trans	# $1e-1 fgetexp zero
4740
	short		t_operr	 - tbl_trans	# $1e-2 fgetexp inf
4741
	short		src_qnan - tbl_trans	# $1e-3 fgetexp qnan
4742
	short		sgetexpd - tbl_trans	# $1e-5 fgetexp denorm
4743
	short		src_snan - tbl_trans	# $1e-4 fgetexp snan
4744
	short		tbl_trans - tbl_trans	# $1e-6 fgetexp unnorm
4745
	short		tbl_trans - tbl_trans	# $1e-7 ERROR
4746

4747
	short		sgetman	 - tbl_trans	# $1f-0 fgetman norm
4748
	short		src_zero - tbl_trans	# $1f-1 fgetman zero
4749
	short		t_operr	 - tbl_trans	# $1f-2 fgetman inf
4750
	short		src_qnan - tbl_trans	# $1f-3 fgetman qnan
4751
	short		sgetmand - tbl_trans	# $1f-5 fgetman denorm
4752
	short		src_snan - tbl_trans	# $1f-4 fgetman snan
4753
	short		tbl_trans - tbl_trans	# $1f-6 fgetman unnorm
4754
	short		tbl_trans - tbl_trans	# $1f-7 ERROR
4755

4756
	short		tbl_trans - tbl_trans	# $20-0 fdiv norm
4757
	short		tbl_trans - tbl_trans	# $20-1 fdiv zero
4758
	short		tbl_trans - tbl_trans	# $20-2 fdiv inf
4759
	short		tbl_trans - tbl_trans	# $20-3 fdiv qnan
4760
	short		tbl_trans - tbl_trans	# $20-5 fdiv denorm
4761
	short		tbl_trans - tbl_trans	# $20-4 fdiv snan
4762
	short		tbl_trans - tbl_trans	# $20-6 fdiv unnorm
4763
	short		tbl_trans - tbl_trans	# $20-7 ERROR
4764

4765
	short		smod_snorm - tbl_trans	# $21-0 fmod norm
4766
	short		smod_szero - tbl_trans	# $21-1 fmod zero
4767
	short		smod_sinf - tbl_trans	# $21-2 fmod inf
4768
	short		sop_sqnan - tbl_trans	# $21-3 fmod qnan
4769
	short		smod_sdnrm - tbl_trans	# $21-5 fmod denorm
4770
	short		sop_ssnan - tbl_trans	# $21-4 fmod snan
4771
	short		tbl_trans - tbl_trans	# $21-6 fmod unnorm
4772
	short		tbl_trans - tbl_trans	# $21-7 ERROR
4773

4774
	short		tbl_trans - tbl_trans	# $22-0 fadd norm
4775
	short		tbl_trans - tbl_trans	# $22-1 fadd zero
4776
	short		tbl_trans - tbl_trans	# $22-2 fadd inf
4777
	short		tbl_trans - tbl_trans	# $22-3 fadd qnan
4778
	short		tbl_trans - tbl_trans	# $22-5 fadd denorm
4779
	short		tbl_trans - tbl_trans	# $22-4 fadd snan
4780
	short		tbl_trans - tbl_trans	# $22-6 fadd unnorm
4781
	short		tbl_trans - tbl_trans	# $22-7 ERROR
4782

4783
	short		tbl_trans - tbl_trans	# $23-0 fmul norm
4784
	short		tbl_trans - tbl_trans	# $23-1 fmul zero
4785
	short		tbl_trans - tbl_trans	# $23-2 fmul inf
4786
	short		tbl_trans - tbl_trans	# $23-3 fmul qnan
4787
	short		tbl_trans - tbl_trans	# $23-5 fmul denorm
4788
	short		tbl_trans - tbl_trans	# $23-4 fmul snan
4789
	short		tbl_trans - tbl_trans	# $23-6 fmul unnorm
4790
	short		tbl_trans - tbl_trans	# $23-7 ERROR
4791

4792
	short		tbl_trans - tbl_trans	# $24-0 fsgldiv norm
4793
	short		tbl_trans - tbl_trans	# $24-1 fsgldiv zero
4794
	short		tbl_trans - tbl_trans	# $24-2 fsgldiv inf
4795
	short		tbl_trans - tbl_trans	# $24-3 fsgldiv qnan
4796
	short		tbl_trans - tbl_trans	# $24-5 fsgldiv denorm
4797
	short		tbl_trans - tbl_trans	# $24-4 fsgldiv snan
4798
	short		tbl_trans - tbl_trans	# $24-6 fsgldiv unnorm
4799
	short		tbl_trans - tbl_trans	# $24-7 ERROR
4800

4801
	short		srem_snorm - tbl_trans	# $25-0 frem norm
4802
	short		srem_szero - tbl_trans	# $25-1 frem zero
4803
	short		srem_sinf - tbl_trans	# $25-2 frem inf
4804
	short		sop_sqnan - tbl_trans	# $25-3 frem qnan
4805
	short		srem_sdnrm - tbl_trans	# $25-5 frem denorm
4806
	short		sop_ssnan - tbl_trans	# $25-4 frem snan
4807
	short		tbl_trans - tbl_trans	# $25-6 frem unnorm
4808
	short		tbl_trans - tbl_trans	# $25-7 ERROR
4809

4810
	short		sscale_snorm - tbl_trans # $26-0 fscale norm
4811
	short		sscale_szero - tbl_trans # $26-1 fscale zero
4812
	short		sscale_sinf - tbl_trans	# $26-2 fscale inf
4813
	short		sop_sqnan - tbl_trans	# $26-3 fscale qnan
4814
	short		sscale_sdnrm - tbl_trans # $26-5 fscale denorm
4815
	short		sop_ssnan - tbl_trans	# $26-4 fscale snan
4816
	short		tbl_trans - tbl_trans	# $26-6 fscale unnorm
4817
	short		tbl_trans - tbl_trans	# $26-7 ERROR
4818

4819
	short		tbl_trans - tbl_trans	# $27-0 fsglmul norm
4820
	short		tbl_trans - tbl_trans	# $27-1 fsglmul zero
4821
	short		tbl_trans - tbl_trans	# $27-2 fsglmul inf
4822
	short		tbl_trans - tbl_trans	# $27-3 fsglmul qnan
4823
	short		tbl_trans - tbl_trans	# $27-5 fsglmul denorm
4824
	short		tbl_trans - tbl_trans	# $27-4 fsglmul snan
4825
	short		tbl_trans - tbl_trans	# $27-6 fsglmul unnorm
4826
	short		tbl_trans - tbl_trans	# $27-7 ERROR
4827

4828
	short		tbl_trans - tbl_trans	# $28-0 fsub norm
4829
	short		tbl_trans - tbl_trans	# $28-1 fsub zero
4830
	short		tbl_trans - tbl_trans	# $28-2 fsub inf
4831
	short		tbl_trans - tbl_trans	# $28-3 fsub qnan
4832
	short		tbl_trans - tbl_trans	# $28-5 fsub denorm
4833
	short		tbl_trans - tbl_trans	# $28-4 fsub snan
4834
	short		tbl_trans - tbl_trans	# $28-6 fsub unnorm
4835
	short		tbl_trans - tbl_trans	# $28-7 ERROR
4836

4837
	short		tbl_trans - tbl_trans	# $29-0 ERROR
4838
	short		tbl_trans - tbl_trans	# $29-1 ERROR
4839
	short		tbl_trans - tbl_trans	# $29-2 ERROR
4840
	short		tbl_trans - tbl_trans	# $29-3 ERROR
4841
	short		tbl_trans - tbl_trans	# $29-4 ERROR
4842
	short		tbl_trans - tbl_trans	# $29-5 ERROR
4843
	short		tbl_trans - tbl_trans	# $29-6 ERROR
4844
	short		tbl_trans - tbl_trans	# $29-7 ERROR
4845

4846
	short		tbl_trans - tbl_trans	# $2a-0 ERROR
4847
	short		tbl_trans - tbl_trans	# $2a-1 ERROR
4848
	short		tbl_trans - tbl_trans	# $2a-2 ERROR
4849
	short		tbl_trans - tbl_trans	# $2a-3 ERROR
4850
	short		tbl_trans - tbl_trans	# $2a-4 ERROR
4851
	short		tbl_trans - tbl_trans	# $2a-5 ERROR
4852
	short		tbl_trans - tbl_trans	# $2a-6 ERROR
4853
	short		tbl_trans - tbl_trans	# $2a-7 ERROR
4854

4855
	short		tbl_trans - tbl_trans	# $2b-0 ERROR
4856
	short		tbl_trans - tbl_trans	# $2b-1 ERROR
4857
	short		tbl_trans - tbl_trans	# $2b-2 ERROR
4858
	short		tbl_trans - tbl_trans	# $2b-3 ERROR
4859
	short		tbl_trans - tbl_trans	# $2b-4 ERROR
4860
	short		tbl_trans - tbl_trans	# $2b-5 ERROR
4861
	short		tbl_trans - tbl_trans	# $2b-6 ERROR
4862
	short		tbl_trans - tbl_trans	# $2b-7 ERROR
4863

4864
	short		tbl_trans - tbl_trans	# $2c-0 ERROR
4865
	short		tbl_trans - tbl_trans	# $2c-1 ERROR
4866
	short		tbl_trans - tbl_trans	# $2c-2 ERROR
4867
	short		tbl_trans - tbl_trans	# $2c-3 ERROR
4868
	short		tbl_trans - tbl_trans	# $2c-4 ERROR
4869
	short		tbl_trans - tbl_trans	# $2c-5 ERROR
4870
	short		tbl_trans - tbl_trans	# $2c-6 ERROR
4871
	short		tbl_trans - tbl_trans	# $2c-7 ERROR
4872

4873
	short		tbl_trans - tbl_trans	# $2d-0 ERROR
4874
	short		tbl_trans - tbl_trans	# $2d-1 ERROR
4875
	short		tbl_trans - tbl_trans	# $2d-2 ERROR
4876
	short		tbl_trans - tbl_trans	# $2d-3 ERROR
4877
	short		tbl_trans - tbl_trans	# $2d-4 ERROR
4878
	short		tbl_trans - tbl_trans	# $2d-5 ERROR
4879
	short		tbl_trans - tbl_trans	# $2d-6 ERROR
4880
	short		tbl_trans - tbl_trans	# $2d-7 ERROR
4881

4882
	short		tbl_trans - tbl_trans	# $2e-0 ERROR
4883
	short		tbl_trans - tbl_trans	# $2e-1 ERROR
4884
	short		tbl_trans - tbl_trans	# $2e-2 ERROR
4885
	short		tbl_trans - tbl_trans	# $2e-3 ERROR
4886
	short		tbl_trans - tbl_trans	# $2e-4 ERROR
4887
	short		tbl_trans - tbl_trans	# $2e-5 ERROR
4888
	short		tbl_trans - tbl_trans	# $2e-6 ERROR
4889
	short		tbl_trans - tbl_trans	# $2e-7 ERROR
4890

4891
	short		tbl_trans - tbl_trans	# $2f-0 ERROR
4892
	short		tbl_trans - tbl_trans	# $2f-1 ERROR
4893
	short		tbl_trans - tbl_trans	# $2f-2 ERROR
4894
	short		tbl_trans - tbl_trans	# $2f-3 ERROR
4895
	short		tbl_trans - tbl_trans	# $2f-4 ERROR
4896
	short		tbl_trans - tbl_trans	# $2f-5 ERROR
4897
	short		tbl_trans - tbl_trans	# $2f-6 ERROR
4898
	short		tbl_trans - tbl_trans	# $2f-7 ERROR
4899

4900
	short		ssincos	 - tbl_trans	# $30-0 fsincos norm
4901
	short		ssincosz - tbl_trans	# $30-1 fsincos zero
4902
	short		ssincosi - tbl_trans	# $30-2 fsincos inf
4903
	short		ssincosqnan - tbl_trans	# $30-3 fsincos qnan
4904
	short		ssincosd - tbl_trans	# $30-5 fsincos denorm
4905
	short		ssincossnan - tbl_trans	# $30-4 fsincos snan
4906
	short		tbl_trans - tbl_trans	# $30-6 fsincos unnorm
4907
	short		tbl_trans - tbl_trans	# $30-7 ERROR
4908

4909
	short		ssincos	 - tbl_trans	# $31-0 fsincos norm
4910
	short		ssincosz - tbl_trans	# $31-1 fsincos zero
4911
	short		ssincosi - tbl_trans	# $31-2 fsincos inf
4912
	short		ssincosqnan - tbl_trans	# $31-3 fsincos qnan
4913
	short		ssincosd - tbl_trans	# $31-5 fsincos denorm
4914
	short		ssincossnan - tbl_trans	# $31-4 fsincos snan
4915
	short		tbl_trans - tbl_trans	# $31-6 fsincos unnorm
4916
	short		tbl_trans - tbl_trans	# $31-7 ERROR
4917

4918
	short		ssincos	 - tbl_trans	# $32-0 fsincos norm
4919
	short		ssincosz - tbl_trans	# $32-1 fsincos zero
4920
	short		ssincosi - tbl_trans	# $32-2 fsincos inf
4921
	short		ssincosqnan - tbl_trans	# $32-3 fsincos qnan
4922
	short		ssincosd - tbl_trans	# $32-5 fsincos denorm
4923
	short		ssincossnan - tbl_trans	# $32-4 fsincos snan
4924
	short		tbl_trans - tbl_trans	# $32-6 fsincos unnorm
4925
	short		tbl_trans - tbl_trans	# $32-7 ERROR
4926

4927
	short		ssincos	 - tbl_trans	# $33-0 fsincos norm
4928
	short		ssincosz - tbl_trans	# $33-1 fsincos zero
4929
	short		ssincosi - tbl_trans	# $33-2 fsincos inf
4930
	short		ssincosqnan - tbl_trans	# $33-3 fsincos qnan
4931
	short		ssincosd - tbl_trans	# $33-5 fsincos denorm
4932
	short		ssincossnan - tbl_trans	# $33-4 fsincos snan
4933
	short		tbl_trans - tbl_trans	# $33-6 fsincos unnorm
4934
	short		tbl_trans - tbl_trans	# $33-7 ERROR
4935

4936
	short		ssincos	 - tbl_trans	# $34-0 fsincos norm
4937
	short		ssincosz - tbl_trans	# $34-1 fsincos zero
4938
	short		ssincosi - tbl_trans	# $34-2 fsincos inf
4939
	short		ssincosqnan - tbl_trans	# $34-3 fsincos qnan
4940
	short		ssincosd - tbl_trans	# $34-5 fsincos denorm
4941
	short		ssincossnan - tbl_trans	# $34-4 fsincos snan
4942
	short		tbl_trans - tbl_trans	# $34-6 fsincos unnorm
4943
	short		tbl_trans - tbl_trans	# $34-7 ERROR
4944

4945
	short		ssincos	 - tbl_trans	# $35-0 fsincos norm
4946
	short		ssincosz - tbl_trans	# $35-1 fsincos zero
4947
	short		ssincosi - tbl_trans	# $35-2 fsincos inf
4948
	short		ssincosqnan - tbl_trans	# $35-3 fsincos qnan
4949
	short		ssincosd - tbl_trans	# $35-5 fsincos denorm
4950
	short		ssincossnan - tbl_trans	# $35-4 fsincos snan
4951
	short		tbl_trans - tbl_trans	# $35-6 fsincos unnorm
4952
	short		tbl_trans - tbl_trans	# $35-7 ERROR
4953

4954
	short		ssincos	 - tbl_trans	# $36-0 fsincos norm
4955
	short		ssincosz - tbl_trans	# $36-1 fsincos zero
4956
	short		ssincosi - tbl_trans	# $36-2 fsincos inf
4957
	short		ssincosqnan - tbl_trans	# $36-3 fsincos qnan
4958
	short		ssincosd - tbl_trans	# $36-5 fsincos denorm
4959
	short		ssincossnan - tbl_trans	# $36-4 fsincos snan
4960
	short		tbl_trans - tbl_trans	# $36-6 fsincos unnorm
4961
	short		tbl_trans - tbl_trans	# $36-7 ERROR
4962

4963
	short		ssincos	 - tbl_trans	# $37-0 fsincos norm
4964
	short		ssincosz - tbl_trans	# $37-1 fsincos zero
4965
	short		ssincosi - tbl_trans	# $37-2 fsincos inf
4966
	short		ssincosqnan - tbl_trans	# $37-3 fsincos qnan
4967
	short		ssincosd - tbl_trans	# $37-5 fsincos denorm
4968
	short		ssincossnan - tbl_trans	# $37-4 fsincos snan
4969
	short		tbl_trans - tbl_trans	# $37-6 fsincos unnorm
4970
	short		tbl_trans - tbl_trans	# $37-7 ERROR
4971

4972
##########
4973

4974
# the instruction fetch access for the displacement word for the
4975
# fdbcc emulation failed. here, we create an access error frame
4976
# from the current frame and branch to _real_access().
4977
funimp_iacc:
4978
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
4979
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4980
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
4981

4982
	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
4983

4984
	unlk		%a6
4985

4986
	mov.l		(%sp),-(%sp)		# store SR,hi(PC)
4987
	mov.w		0x8(%sp),0x4(%sp)	# store lo(PC)
4988
	mov.w		&0x4008,0x6(%sp)	# store voff
4989
	mov.l		0x2(%sp),0x8(%sp)	# store EA
4990
	mov.l		&0x09428001,0xc(%sp)	# store FSLW
4991

4992
	btst		&0x5,(%sp)		# user or supervisor mode?
4993
	beq.b		funimp_iacc_end		# user
4994
	bset		&0x2,0xd(%sp)		# set supervisor TM bit
4995

4996
funimp_iacc_end:
4997
	bra.l		_real_access
4998

4999
#########################################################################
5000
# ssin():     computes the sine of a normalized input			#
5001
# ssind():    computes the sine of a denormalized input			#
5002
# scos():     computes the cosine of a normalized input			#
5003
# scosd():    computes the cosine of a denormalized input		#
5004
# ssincos():  computes the sine and cosine of a normalized input	#
5005
# ssincosd(): computes the sine and cosine of a denormalized input	#
5006
#									#
5007
# INPUT *************************************************************** #
5008
#	a0 = pointer to extended precision input			#
5009
#	d0 = round precision,mode					#
5010
#									#
5011
# OUTPUT ************************************************************** #
5012
#	fp0 = sin(X) or cos(X)						#
5013
#									#
5014
#    For ssincos(X):							#
5015
#	fp0 = sin(X)							#
5016
#	fp1 = cos(X)							#
5017
#									#
5018
# ACCURACY and MONOTONICITY ******************************************* #
5019
#	The returned result is within 1 ulp in 64 significant bit, i.e.	#
5020
#	within 0.5001 ulp to 53 bits if the result is subsequently	#
5021
#	rounded to double precision. The result is provably monotonic	#
5022
#	in double precision.						#
5023
#									#
5024
# ALGORITHM ***********************************************************	#
5025
#									#
5026
#	SIN and COS:							#
5027
#	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.	#
5028
#									#
5029
#	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.			#
5030
#									#
5031
#	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5032
#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
5033
#		Overwrite k by k := k + AdjN.				#
5034
#									#
5035
#	4. If k is even, go to 6.					#
5036
#									#
5037
#	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j.			#
5038
#		Return sgn*cos(r) where cos(r) is approximated by an	#
5039
#		even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)),	#
5040
#		s = r*r.						#
5041
#		Exit.							#
5042
#									#
5043
#	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)	#
5044
#		where sin(r) is approximated by an odd polynomial in r	#
5045
#		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.	#
5046
#		Exit.							#
5047
#									#
5048
#	7. If |X| > 1, go to 9.						#
5049
#									#
5050
#	8. (|X|<2**(-40)) If SIN is invoked, return X;			#
5051
#		otherwise return 1.					#
5052
#									#
5053
#	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
5054
#		go back to 3.						#
5055
#									#
5056
#	SINCOS:								#
5057
#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
5058
#									#
5059
#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5060
#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
5061
#									#
5062
#	3. If k is even, go to 5.					#
5063
#									#
5064
#	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie.	#
5065
#		j1 exclusive or with the l.s.b. of k.			#
5066
#		sgn1 := (-1)**j1, sgn2 := (-1)**j2.			#
5067
#		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where	#
5068
#		sin(r) and cos(r) are computed as odd and even		#
5069
#		polynomials in r, respectively. Exit			#
5070
#									#
5071
#	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.			#
5072
#		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where	#
5073
#		sin(r) and cos(r) are computed as odd and even		#
5074
#		polynomials in r, respectively. Exit			#
5075
#									#
5076
#	6. If |X| > 1, go to 8.						#
5077
#									#
5078
#	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.		#
5079
#									#
5080
#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
5081
#		go back to 2.						#
5082
#									#
5083
#########################################################################
5084

5085
SINA7:	long		0xBD6AAA77,0xCCC994F5
5086
SINA6:	long		0x3DE61209,0x7AAE8DA1
5087
SINA5:	long		0xBE5AE645,0x2A118AE4
5088
SINA4:	long		0x3EC71DE3,0xA5341531
5089
SINA3:	long		0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
5090
SINA2:	long		0x3FF80000,0x88888888,0x888859AF,0x00000000
5091
SINA1:	long		0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
5092

5093
COSB8:	long		0x3D2AC4D0,0xD6011EE3
5094
COSB7:	long		0xBDA9396F,0x9F45AC19
5095
COSB6:	long		0x3E21EED9,0x0612C972
5096
COSB5:	long		0xBE927E4F,0xB79D9FCF
5097
COSB4:	long		0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5098
COSB3:	long		0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5099
COSB2:	long		0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5100
COSB1:	long		0xBF000000
5101

5102
	set		INARG,FP_SCR0
5103

5104
	set		X,FP_SCR0
5105
#	set		XDCARE,X+2
5106
	set		XFRAC,X+4
5107

5108
	set		RPRIME,FP_SCR0
5109
	set		SPRIME,FP_SCR1
5110

5111
	set		POSNEG1,L_SCR1
5112
	set		TWOTO63,L_SCR1
5113

5114
	set		ENDFLAG,L_SCR2
5115
	set		INT,L_SCR2
5116

5117
	set		ADJN,L_SCR3
5118

5119
############################################
5120
	global		ssin
5121
ssin:
5122
	mov.l		&0,ADJN(%a6)		# yes; SET ADJN TO 0
5123
	bra.b		SINBGN
5124

5125
############################################
5126
	global		scos
5127
scos:
5128
	mov.l		&1,ADJN(%a6)		# yes; SET ADJN TO 1
5129

5130
############################################
5131
SINBGN:
5132
#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5133

5134
	fmov.x		(%a0),%fp0		# LOAD INPUT
5135
	fmov.x		%fp0,X(%a6)		# save input at X
5136

5137
# "COMPACTIFY" X
5138
	mov.l		(%a0),%d1		# put exp in hi word
5139
	mov.w		4(%a0),%d1		# fetch hi(man)
5140
	and.l		&0x7FFFFFFF,%d1		# strip sign
5141

5142
	cmpi.l		%d1,&0x3FD78000		# is |X| >= 2**(-40)?
5143
	bge.b		SOK1			# no
5144
	bra.w		SINSM			# yes; input is very small
5145

5146
SOK1:
5147
	cmp.l		%d1,&0x4004BC7E		# is |X| < 15 PI?
5148
	blt.b		SINMAIN			# no
5149
	bra.w		SREDUCEX		# yes; input is very large
5150

5151
#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5152
#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5153
SINMAIN:
5154
	fmov.x		%fp0,%fp1
5155
	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
5156

5157
	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
5158

5159
	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
5160

5161
	mov.l		INT(%a6),%d1		# make a copy of N
5162
	asl.l		&4,%d1			# N *= 16
5163
	add.l		%d1,%a1			# tbl_addr = a1 + (N*16)
5164

5165
# A1 IS THE ADDRESS OF N*PIBY2
5166
# ...WHICH IS IN TWO PIECES Y1 & Y2
5167
	fsub.x		(%a1)+,%fp0		# X-Y1
5168
	fsub.s		(%a1),%fp0		# fp0 = R = (X-Y1)-Y2
5169

5170
SINCONT:
5171
#--continuation from REDUCEX
5172

5173
#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5174
	mov.l		INT(%a6),%d1
5175
	add.l		ADJN(%a6),%d1		# SEE IF D0 IS ODD OR EVEN
5176
	ror.l		&1,%d1			# D0 WAS ODD IFF D0 IS NEGATIVE
5177
	cmp.l		%d1,&0
5178
	blt.w		COSPOLY
5179

5180
#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5181
#--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5182
#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5183
#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5184
#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5185
#--WHERE T=S*S.
5186
#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5187
#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5188
SINPOLY:
5189
	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
5190

5191
	fmov.x		%fp0,X(%a6)		# X IS R
5192
	fmul.x		%fp0,%fp0		# FP0 IS S
5193

5194
	fmov.d		SINA7(%pc),%fp3
5195
	fmov.d		SINA6(%pc),%fp2
5196

5197
	fmov.x		%fp0,%fp1
5198
	fmul.x		%fp1,%fp1		# FP1 IS T
5199

5200
	ror.l		&1,%d1
5201
	and.l		&0x80000000,%d1
5202
# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5203
	eor.l		%d1,X(%a6)		# X IS NOW R'= SGN*R
5204

5205
	fmul.x		%fp1,%fp3		# TA7
5206
	fmul.x		%fp1,%fp2		# TA6
5207

5208
	fadd.d		SINA5(%pc),%fp3		# A5+TA7
5209
	fadd.d		SINA4(%pc),%fp2		# A4+TA6
5210

5211
	fmul.x		%fp1,%fp3		# T(A5+TA7)
5212
	fmul.x		%fp1,%fp2		# T(A4+TA6)
5213

5214
	fadd.d		SINA3(%pc),%fp3		# A3+T(A5+TA7)
5215
	fadd.x		SINA2(%pc),%fp2		# A2+T(A4+TA6)
5216

5217
	fmul.x		%fp3,%fp1		# T(A3+T(A5+TA7))
5218

5219
	fmul.x		%fp0,%fp2		# S(A2+T(A4+TA6))
5220
	fadd.x		SINA1(%pc),%fp1		# A1+T(A3+T(A5+TA7))
5221
	fmul.x		X(%a6),%fp0		# R'*S
5222

5223
	fadd.x		%fp2,%fp1		# [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5224

5225
	fmul.x		%fp1,%fp0		# SIN(R')-R'
5226

5227
	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
5228

5229
	fmov.l		%d0,%fpcr		# restore users round mode,prec
5230
	fadd.x		X(%a6),%fp0		# last inst - possible exception set
5231
	bra		t_inx2
5232

5233
#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5234
#--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5235
#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5236
#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5237
#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5238
#--WHERE T=S*S.
5239
#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5240
#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5241
#--AND IS THEREFORE STORED AS SINGLE PRECISION.
5242
COSPOLY:
5243
	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
5244

5245
	fmul.x		%fp0,%fp0		# FP0 IS S
5246

5247
	fmov.d		COSB8(%pc),%fp2
5248
	fmov.d		COSB7(%pc),%fp3
5249

5250
	fmov.x		%fp0,%fp1
5251
	fmul.x		%fp1,%fp1		# FP1 IS T
5252

5253
	fmov.x		%fp0,X(%a6)		# X IS S
5254
	ror.l		&1,%d1
5255
	and.l		&0x80000000,%d1
5256
# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5257

5258
	fmul.x		%fp1,%fp2		# TB8
5259

5260
	eor.l		%d1,X(%a6)		# X IS NOW S'= SGN*S
5261
	and.l		&0x80000000,%d1
5262

5263
	fmul.x		%fp1,%fp3		# TB7
5264

5265
	or.l		&0x3F800000,%d1		# D0 IS SGN IN SINGLE
5266
	mov.l		%d1,POSNEG1(%a6)
5267

5268
	fadd.d		COSB6(%pc),%fp2		# B6+TB8
5269
	fadd.d		COSB5(%pc),%fp3		# B5+TB7
5270

5271
	fmul.x		%fp1,%fp2		# T(B6+TB8)
5272
	fmul.x		%fp1,%fp3		# T(B5+TB7)
5273

5274
	fadd.d		COSB4(%pc),%fp2		# B4+T(B6+TB8)
5275
	fadd.x		COSB3(%pc),%fp3		# B3+T(B5+TB7)
5276

5277
	fmul.x		%fp1,%fp2		# T(B4+T(B6+TB8))
5278
	fmul.x		%fp3,%fp1		# T(B3+T(B5+TB7))
5279

5280
	fadd.x		COSB2(%pc),%fp2		# B2+T(B4+T(B6+TB8))
5281
	fadd.s		COSB1(%pc),%fp1		# B1+T(B3+T(B5+TB7))
5282

5283
	fmul.x		%fp2,%fp0		# S(B2+T(B4+T(B6+TB8)))
5284

5285
	fadd.x		%fp1,%fp0
5286

5287
	fmul.x		X(%a6),%fp0
5288

5289
	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
5290

5291
	fmov.l		%d0,%fpcr		# restore users round mode,prec
5292
	fadd.s		POSNEG1(%a6),%fp0	# last inst - possible exception set
5293
	bra		t_inx2
5294

5295
##############################################
5296

5297
# SINe: Big OR Small?
5298
#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5299
#--IF |X| < 2**(-40), RETURN X OR 1.
5300
SINBORS:
5301
	cmp.l		%d1,&0x3FFF8000
5302
	bgt.l		SREDUCEX
5303

5304
SINSM:
5305
	mov.l		ADJN(%a6),%d1
5306
	cmp.l		%d1,&0
5307
	bgt.b		COSTINY
5308

5309
# here, the operation may underflow iff the precision is sgl or dbl.
5310
# extended denorms are handled through another entry point.
5311
SINTINY:
5312
#	mov.w		&0x0000,XDCARE(%a6)	# JUST IN CASE
5313

5314
	fmov.l		%d0,%fpcr		# restore users round mode,prec
5315
	mov.b		&FMOV_OP,%d1		# last inst is MOVE
5316
	fmov.x		X(%a6),%fp0		# last inst - possible exception set
5317
	bra		t_catch
5318

5319
COSTINY:
5320
	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
5321
	fmov.l		%d0,%fpcr		# restore users round mode,prec
5322
	fadd.s		&0x80800000,%fp0	# last inst - possible exception set
5323
	bra		t_pinx2
5324

5325
################################################
5326
	global		ssind
5327
#--SIN(X) = X FOR DENORMALIZED X
5328
ssind:
5329
	bra		t_extdnrm
5330

5331
############################################
5332
	global		scosd
5333
#--COS(X) = 1 FOR DENORMALIZED X
5334
scosd:
5335
	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
5336
	bra		t_pinx2
5337

5338
##################################################
5339

5340
	global		ssincos
5341
ssincos:
5342
#--SET ADJN TO 4
5343
	mov.l		&4,ADJN(%a6)
5344

5345
	fmov.x		(%a0),%fp0		# LOAD INPUT
5346
	fmov.x		%fp0,X(%a6)
5347

5348
	mov.l		(%a0),%d1
5349
	mov.w		4(%a0),%d1
5350
	and.l		&0x7FFFFFFF,%d1		# COMPACTIFY X
5351

5352
	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
5353
	bge.b		SCOK1
5354
	bra.w		SCSM
5355

5356
SCOK1:
5357
	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
5358
	blt.b		SCMAIN
5359
	bra.w		SREDUCEX
5360

5361

5362
#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5363
#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5364
SCMAIN:
5365
	fmov.x		%fp0,%fp1
5366

5367
	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
5368

5369
	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
5370

5371
	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
5372

5373
	mov.l		INT(%a6),%d1
5374
	asl.l		&4,%d1
5375
	add.l		%d1,%a1			# ADDRESS OF N*PIBY2, IN Y1, Y2
5376

5377
	fsub.x		(%a1)+,%fp0		# X-Y1
5378
	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
5379

5380
SCCONT:
5381
#--continuation point from REDUCEX
5382

5383
	mov.l		INT(%a6),%d1
5384
	ror.l		&1,%d1
5385
	cmp.l		%d1,&0			# D0 < 0 IFF N IS ODD
5386
	bge.w		NEVEN
5387

5388
SNODD:
5389
#--REGISTERS SAVED SO FAR: D0, A0, FP2.
5390
	fmovm.x		&0x04,-(%sp)		# save fp2
5391

5392
	fmov.x		%fp0,RPRIME(%a6)
5393
	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
5394
	fmov.d		SINA7(%pc),%fp1		# A7
5395
	fmov.d		COSB8(%pc),%fp2		# B8
5396
	fmul.x		%fp0,%fp1		# SA7
5397
	fmul.x		%fp0,%fp2		# SB8
5398

5399
	mov.l		%d2,-(%sp)
5400
	mov.l		%d1,%d2
5401
	ror.l		&1,%d2
5402
	and.l		&0x80000000,%d2
5403
	eor.l		%d1,%d2
5404
	and.l		&0x80000000,%d2
5405

5406
	fadd.d		SINA6(%pc),%fp1		# A6+SA7
5407
	fadd.d		COSB7(%pc),%fp2		# B7+SB8
5408

5409
	fmul.x		%fp0,%fp1		# S(A6+SA7)
5410
	eor.l		%d2,RPRIME(%a6)
5411
	mov.l		(%sp)+,%d2
5412
	fmul.x		%fp0,%fp2		# S(B7+SB8)
5413
	ror.l		&1,%d1
5414
	and.l		&0x80000000,%d1
5415
	mov.l		&0x3F800000,POSNEG1(%a6)
5416
	eor.l		%d1,POSNEG1(%a6)
5417

5418
	fadd.d		SINA5(%pc),%fp1		# A5+S(A6+SA7)
5419
	fadd.d		COSB6(%pc),%fp2		# B6+S(B7+SB8)
5420

5421
	fmul.x		%fp0,%fp1		# S(A5+S(A6+SA7))
5422
	fmul.x		%fp0,%fp2		# S(B6+S(B7+SB8))
5423
	fmov.x		%fp0,SPRIME(%a6)
5424

5425
	fadd.d		SINA4(%pc),%fp1		# A4+S(A5+S(A6+SA7))
5426
	eor.l		%d1,SPRIME(%a6)
5427
	fadd.d		COSB5(%pc),%fp2		# B5+S(B6+S(B7+SB8))
5428

5429
	fmul.x		%fp0,%fp1		# S(A4+...)
5430
	fmul.x		%fp0,%fp2		# S(B5+...)
5431

5432
	fadd.d		SINA3(%pc),%fp1		# A3+S(A4+...)
5433
	fadd.d		COSB4(%pc),%fp2		# B4+S(B5+...)
5434

5435
	fmul.x		%fp0,%fp1		# S(A3+...)
5436
	fmul.x		%fp0,%fp2		# S(B4+...)
5437

5438
	fadd.x		SINA2(%pc),%fp1		# A2+S(A3+...)
5439
	fadd.x		COSB3(%pc),%fp2		# B3+S(B4+...)
5440

5441
	fmul.x		%fp0,%fp1		# S(A2+...)
5442
	fmul.x		%fp0,%fp2		# S(B3+...)
5443

5444
	fadd.x		SINA1(%pc),%fp1		# A1+S(A2+...)
5445
	fadd.x		COSB2(%pc),%fp2		# B2+S(B3+...)
5446

5447
	fmul.x		%fp0,%fp1		# S(A1+...)
5448
	fmul.x		%fp2,%fp0		# S(B2+...)
5449

5450
	fmul.x		RPRIME(%a6),%fp1	# R'S(A1+...)
5451
	fadd.s		COSB1(%pc),%fp0		# B1+S(B2...)
5452
	fmul.x		SPRIME(%a6),%fp0	# S'(B1+S(B2+...))
5453

5454
	fmovm.x		(%sp)+,&0x20		# restore fp2
5455

5456
	fmov.l		%d0,%fpcr
5457
	fadd.x		RPRIME(%a6),%fp1	# COS(X)
5458
	bsr		sto_cos			# store cosine result
5459
	fadd.s		POSNEG1(%a6),%fp0	# SIN(X)
5460
	bra		t_inx2
5461

5462
NEVEN:
5463
#--REGISTERS SAVED SO FAR: FP2.
5464
	fmovm.x		&0x04,-(%sp)		# save fp2
5465

5466
	fmov.x		%fp0,RPRIME(%a6)
5467
	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
5468

5469
	fmov.d		COSB8(%pc),%fp1		# B8
5470
	fmov.d		SINA7(%pc),%fp2		# A7
5471

5472
	fmul.x		%fp0,%fp1		# SB8
5473
	fmov.x		%fp0,SPRIME(%a6)
5474
	fmul.x		%fp0,%fp2		# SA7
5475

5476
	ror.l		&1,%d1
5477
	and.l		&0x80000000,%d1
5478

5479
	fadd.d		COSB7(%pc),%fp1		# B7+SB8
5480
	fadd.d		SINA6(%pc),%fp2		# A6+SA7
5481

5482
	eor.l		%d1,RPRIME(%a6)
5483
	eor.l		%d1,SPRIME(%a6)
5484

5485
	fmul.x		%fp0,%fp1		# S(B7+SB8)
5486

5487
	or.l		&0x3F800000,%d1
5488
	mov.l		%d1,POSNEG1(%a6)
5489

5490
	fmul.x		%fp0,%fp2		# S(A6+SA7)
5491

5492
	fadd.d		COSB6(%pc),%fp1		# B6+S(B7+SB8)
5493
	fadd.d		SINA5(%pc),%fp2		# A5+S(A6+SA7)
5494

5495
	fmul.x		%fp0,%fp1		# S(B6+S(B7+SB8))
5496
	fmul.x		%fp0,%fp2		# S(A5+S(A6+SA7))
5497

5498
	fadd.d		COSB5(%pc),%fp1		# B5+S(B6+S(B7+SB8))
5499
	fadd.d		SINA4(%pc),%fp2		# A4+S(A5+S(A6+SA7))
5500

5501
	fmul.x		%fp0,%fp1		# S(B5+...)
5502
	fmul.x		%fp0,%fp2		# S(A4+...)
5503

5504
	fadd.d		COSB4(%pc),%fp1		# B4+S(B5+...)
5505
	fadd.d		SINA3(%pc),%fp2		# A3+S(A4+...)
5506

5507
	fmul.x		%fp0,%fp1		# S(B4+...)
5508
	fmul.x		%fp0,%fp2		# S(A3+...)
5509

5510
	fadd.x		COSB3(%pc),%fp1		# B3+S(B4+...)
5511
	fadd.x		SINA2(%pc),%fp2		# A2+S(A3+...)
5512

5513
	fmul.x		%fp0,%fp1		# S(B3+...)
5514
	fmul.x		%fp0,%fp2		# S(A2+...)
5515

5516
	fadd.x		COSB2(%pc),%fp1		# B2+S(B3+...)
5517
	fadd.x		SINA1(%pc),%fp2		# A1+S(A2+...)
5518

5519
	fmul.x		%fp0,%fp1		# S(B2+...)
5520
	fmul.x		%fp2,%fp0		# s(a1+...)
5521

5522

5523
	fadd.s		COSB1(%pc),%fp1		# B1+S(B2...)
5524
	fmul.x		RPRIME(%a6),%fp0	# R'S(A1+...)
5525
	fmul.x		SPRIME(%a6),%fp1	# S'(B1+S(B2+...))
5526

5527
	fmovm.x		(%sp)+,&0x20		# restore fp2
5528

5529
	fmov.l		%d0,%fpcr
5530
	fadd.s		POSNEG1(%a6),%fp1	# COS(X)
5531
	bsr		sto_cos			# store cosine result
5532
	fadd.x		RPRIME(%a6),%fp0	# SIN(X)
5533
	bra		t_inx2
5534

5535
################################################
5536

5537
SCBORS:
5538
	cmp.l		%d1,&0x3FFF8000
5539
	bgt.w		SREDUCEX
5540

5541
################################################
5542

5543
SCSM:
5544
#	mov.w		&0x0000,XDCARE(%a6)
5545
	fmov.s		&0x3F800000,%fp1
5546

5547
	fmov.l		%d0,%fpcr
5548
	fsub.s		&0x00800000,%fp1
5549
	bsr		sto_cos			# store cosine result
5550
	fmov.l		%fpcr,%d0		# d0 must have fpcr,too
5551
	mov.b		&FMOV_OP,%d1		# last inst is MOVE
5552
	fmov.x		X(%a6),%fp0
5553
	bra		t_catch
5554

5555
##############################################
5556

5557
	global		ssincosd
5558
#--SIN AND COS OF X FOR DENORMALIZED X
5559
ssincosd:
5560
	mov.l		%d0,-(%sp)		# save d0
5561
	fmov.s		&0x3F800000,%fp1
5562
	bsr		sto_cos			# store cosine result
5563
	mov.l		(%sp)+,%d0		# restore d0
5564
	bra		t_extdnrm
5565

5566
############################################
5567

5568
#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5569
#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5570
#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5571
SREDUCEX:
5572
	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
5573
	mov.l		%d2,-(%sp)		# save d2
5574
	fmov.s		&0x00000000,%fp1	# fp1 = 0
5575

5576
#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5577
#--there is a danger of unwanted overflow in first LOOP iteration.  In this
5578
#--case, reduce argument by one remainder step to make subsequent reduction
5579
#--safe.
5580
	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
5581
	bne.b		SLOOP			# no
5582

5583
# yes; create 2**16383*PI/2
5584
	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
5585
	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
5586
	clr.l		FP_SCR0_LO(%a6)
5587

5588
# create low half of 2**16383*PI/2 at FP_SCR1
5589
	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
5590
	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
5591
	clr.l		FP_SCR1_LO(%a6)
5592

5593
	ftest.x		%fp0			# test sign of argument
5594
	fblt.w		sred_neg
5595

5596
	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
5597
	or.b		&0x80,FP_SCR1_EX(%a6)
5598
sred_neg:
5599
	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
5600
	fmov.x		%fp0,%fp1		# save high result in fp1
5601
	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
5602
	fsub.x		%fp0,%fp1		# determine low component of result
5603
	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
5604

5605
#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5606
#--integer quotient will be stored in N
5607
#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5608
SLOOP:
5609
	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
5610
	mov.w		INARG(%a6),%d1
5611
	mov.l		%d1,%a1			# save a copy of D0
5612
	and.l		&0x00007FFF,%d1
5613
	sub.l		&0x00003FFF,%d1		# d0 = K
5614
	cmp.l		%d1,&28
5615
	ble.b		SLASTLOOP
5616
SCONTLOOP:
5617
	sub.l		&27,%d1			# d0 = L := K-27
5618
	mov.b		&0,ENDFLAG(%a6)
5619
	bra.b		SWORK
5620
SLASTLOOP:
5621
	clr.l		%d1			# d0 = L := 0
5622
	mov.b		&1,ENDFLAG(%a6)
5623

5624
SWORK:
5625
#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
5626
#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
5627

5628
#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5629
#--2**L * (PIby2_1), 2**L * (PIby2_2)
5630

5631
	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
5632
	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
5633

5634
	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
5635
	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
5636
	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
5637

5638
	fmov.x		%fp0,%fp2
5639
	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
5640

5641
#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5642
#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
5643
#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5644
#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
5645
#--US THE DESIRED VALUE IN FLOATING POINT.
5646
	mov.l		%a1,%d2
5647
	swap		%d2
5648
	and.l		&0x80000000,%d2
5649
	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
5650
	mov.l		%d2,TWOTO63(%a6)
5651
	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
5652
	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
5653
#	fint.x		%fp2
5654

5655
#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5656
	mov.l		%d1,%d2			# d2 = L
5657

5658
	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
5659
	mov.w		%d2,FP_SCR0_EX(%a6)
5660
	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
5661
	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
5662

5663
	add.l		&0x00003FDD,%d1
5664
	mov.w		%d1,FP_SCR1_EX(%a6)
5665
	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
5666
	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
5667

5668
	mov.b		ENDFLAG(%a6),%d1
5669

5670
#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5671
#--P2 = 2**(L) * Piby2_2
5672
	fmov.x		%fp2,%fp4		# fp4 = N
5673
	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
5674
	fmov.x		%fp2,%fp5		# fp5 = N
5675
	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
5676
	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
5677

5678
#--we want P+p = W+w  but  |p| <= half ulp of P
5679
#--Then, we need to compute  A := R-P   and  a := r-p
5680
	fadd.x		%fp5,%fp3		# fp3 = P
5681
	fsub.x		%fp3,%fp4		# fp4 = W-P
5682

5683
	fsub.x		%fp3,%fp0		# fp0 = A := R - P
5684
	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
5685

5686
	fmov.x		%fp0,%fp3		# fp3 = A
5687
	fsub.x		%fp4,%fp1		# fp1 = a := r - p
5688

5689
#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
5690
#--|r| <= half ulp of R.
5691
	fadd.x		%fp1,%fp0		# fp0 = R := A+a
5692
#--No need to calculate r if this is the last loop
5693
	cmp.b		%d1,&0
5694
	bgt.w		SRESTORE
5695

5696
#--Need to calculate r
5697
	fsub.x		%fp0,%fp3		# fp3 = A-R
5698
	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
5699
	bra.w		SLOOP
5700

5701
SRESTORE:
5702
	fmov.l		%fp2,INT(%a6)
5703
	mov.l		(%sp)+,%d2		# restore d2
5704
	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
5705

5706
	mov.l		ADJN(%a6),%d1
5707
	cmp.l		%d1,&4
5708

5709
	blt.w		SINCONT
5710
	bra.w		SCCONT
5711

5712
#########################################################################
5713
# stan():  computes the tangent of a normalized input			#
5714
# stand(): computes the tangent of a denormalized input			#
5715
#									#
5716
# INPUT *************************************************************** #
5717
#	a0 = pointer to extended precision input			#
5718
#	d0 = round precision,mode					#
5719
#									#
5720
# OUTPUT ************************************************************** #
5721
#	fp0 = tan(X)							#
5722
#									#
5723
# ACCURACY and MONOTONICITY ******************************************* #
5724
#	The returned result is within 3 ulp in 64 significant bit, i.e. #
5725
#	within 0.5001 ulp to 53 bits if the result is subsequently	#
5726
#	rounded to double precision. The result is provably monotonic	#
5727
#	in double precision.						#
5728
#									#
5729
# ALGORITHM *********************************************************** #
5730
#									#
5731
#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
5732
#									#
5733
#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
5734
#		k = N mod 2, so in particular, k = 0 or 1.		#
5735
#									#
5736
#	3. If k is odd, go to 5.					#
5737
#									#
5738
#	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a	#
5739
#		rational function U/V where				#
5740
#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
5741
#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.	#
5742
#		Exit.							#
5743
#									#
5744
#	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5745
#		a rational function U/V where				#
5746
#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
5747
#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,	#
5748
#		-Cot(r) = -V/U. Exit.					#
5749
#									#
5750
#	6. If |X| > 1, go to 8.						#
5751
#									#
5752
#	7. (|X|<2**(-40)) Tan(X) = X. Exit.				#
5753
#									#
5754
#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back	#
5755
#		to 2.							#
5756
#									#
5757
#########################################################################
5758

5759
TANQ4:
5760
	long		0x3EA0B759,0xF50F8688
5761
TANP3:
5762
	long		0xBEF2BAA5,0xA8924F04
5763

5764
TANQ3:
5765
	long		0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5766

5767
TANP2:
5768
	long		0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5769

5770
TANQ2:
5771
	long		0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5772

5773
TANP1:
5774
	long		0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5775

5776
TANQ1:
5777
	long		0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5778

5779
INVTWOPI:
5780
	long		0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5781

5782
TWOPI1:
5783
	long		0x40010000,0xC90FDAA2,0x00000000,0x00000000
5784
TWOPI2:
5785
	long		0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5786

5787
#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5788
#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5789
#--MOST 69 BITS LONG.
5790
#	global		PITBL
5791
PITBL:
5792
	long		0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5793
	long		0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5794
	long		0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5795
	long		0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5796
	long		0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5797
	long		0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5798
	long		0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5799
	long		0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5800
	long		0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5801
	long		0xC0040000,0x90836524,0x88034B96,0x20B00000
5802
	long		0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5803
	long		0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5804
	long		0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5805
	long		0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5806
	long		0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5807
	long		0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5808
	long		0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5809
	long		0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5810
	long		0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5811
	long		0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5812
	long		0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5813
	long		0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5814
	long		0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5815
	long		0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5816
	long		0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5817
	long		0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5818
	long		0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5819
	long		0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5820
	long		0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5821
	long		0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5822
	long		0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5823
	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5824
	long		0x00000000,0x00000000,0x00000000,0x00000000
5825
	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5826
	long		0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5827
	long		0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5828
	long		0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5829
	long		0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5830
	long		0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5831
	long		0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5832
	long		0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5833
	long		0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5834
	long		0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5835
	long		0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5836
	long		0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5837
	long		0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5838
	long		0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5839
	long		0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5840
	long		0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5841
	long		0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5842
	long		0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5843
	long		0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5844
	long		0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5845
	long		0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5846
	long		0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5847
	long		0x40040000,0x90836524,0x88034B96,0xA0B00000
5848
	long		0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5849
	long		0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5850
	long		0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5851
	long		0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5852
	long		0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5853
	long		0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5854
	long		0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5855
	long		0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5856
	long		0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5857

5858
	set		INARG,FP_SCR0
5859

5860
	set		TWOTO63,L_SCR1
5861
	set		INT,L_SCR1
5862
	set		ENDFLAG,L_SCR2
5863

5864
	global		stan
5865
stan:
5866
	fmov.x		(%a0),%fp0		# LOAD INPUT
5867

5868
	mov.l		(%a0),%d1
5869
	mov.w		4(%a0),%d1
5870
	and.l		&0x7FFFFFFF,%d1
5871

5872
	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
5873
	bge.b		TANOK1
5874
	bra.w		TANSM
5875
TANOK1:
5876
	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
5877
	blt.b		TANMAIN
5878
	bra.w		REDUCEX
5879

5880
TANMAIN:
5881
#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5882
#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5883
	fmov.x		%fp0,%fp1
5884
	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
5885

5886
	lea.l		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
5887

5888
	fmov.l		%fp1,%d1		# CONVERT TO INTEGER
5889

5890
	asl.l		&4,%d1
5891
	add.l		%d1,%a1			# ADDRESS N*PIBY2 IN Y1, Y2
5892

5893
	fsub.x		(%a1)+,%fp0		# X-Y1
5894

5895
	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
5896

5897
	ror.l		&5,%d1
5898
	and.l		&0x80000000,%d1		# D0 WAS ODD IFF D0 < 0
5899

5900
TANCONT:
5901
	fmovm.x		&0x0c,-(%sp)		# save fp2,fp3
5902

5903
	cmp.l		%d1,&0
5904
	blt.w		NODD
5905

5906
	fmov.x		%fp0,%fp1
5907
	fmul.x		%fp1,%fp1		# S = R*R
5908

5909
	fmov.d		TANQ4(%pc),%fp3
5910
	fmov.d		TANP3(%pc),%fp2
5911

5912
	fmul.x		%fp1,%fp3		# SQ4
5913
	fmul.x		%fp1,%fp2		# SP3
5914

5915
	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
5916
	fadd.x		TANP2(%pc),%fp2		# P2+SP3
5917

5918
	fmul.x		%fp1,%fp3		# S(Q3+SQ4)
5919
	fmul.x		%fp1,%fp2		# S(P2+SP3)
5920

5921
	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
5922
	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
5923

5924
	fmul.x		%fp1,%fp3		# S(Q2+S(Q3+SQ4))
5925
	fmul.x		%fp1,%fp2		# S(P1+S(P2+SP3))
5926

5927
	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
5928
	fmul.x		%fp0,%fp2		# RS(P1+S(P2+SP3))
5929

5930
	fmul.x		%fp3,%fp1		# S(Q1+S(Q2+S(Q3+SQ4)))
5931

5932
	fadd.x		%fp2,%fp0		# R+RS(P1+S(P2+SP3))
5933

5934
	fadd.s		&0x3F800000,%fp1	# 1+S(Q1+...)
5935

5936
	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
5937

5938
	fmov.l		%d0,%fpcr		# restore users round mode,prec
5939
	fdiv.x		%fp1,%fp0		# last inst - possible exception set
5940
	bra		t_inx2
5941

5942
NODD:
5943
	fmov.x		%fp0,%fp1
5944
	fmul.x		%fp0,%fp0		# S = R*R
5945

5946
	fmov.d		TANQ4(%pc),%fp3
5947
	fmov.d		TANP3(%pc),%fp2
5948

5949
	fmul.x		%fp0,%fp3		# SQ4
5950
	fmul.x		%fp0,%fp2		# SP3
5951

5952
	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
5953
	fadd.x		TANP2(%pc),%fp2		# P2+SP3
5954

5955
	fmul.x		%fp0,%fp3		# S(Q3+SQ4)
5956
	fmul.x		%fp0,%fp2		# S(P2+SP3)
5957

5958
	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
5959
	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
5960

5961
	fmul.x		%fp0,%fp3		# S(Q2+S(Q3+SQ4))
5962
	fmul.x		%fp0,%fp2		# S(P1+S(P2+SP3))
5963

5964
	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
5965
	fmul.x		%fp1,%fp2		# RS(P1+S(P2+SP3))
5966

5967
	fmul.x		%fp3,%fp0		# S(Q1+S(Q2+S(Q3+SQ4)))
5968

5969
	fadd.x		%fp2,%fp1		# R+RS(P1+S(P2+SP3))
5970
	fadd.s		&0x3F800000,%fp0	# 1+S(Q1+...)
5971

5972
	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
5973

5974
	fmov.x		%fp1,-(%sp)
5975
	eor.l		&0x80000000,(%sp)
5976

5977
	fmov.l		%d0,%fpcr		# restore users round mode,prec
5978
	fdiv.x		(%sp)+,%fp0		# last inst - possible exception set
5979
	bra		t_inx2
5980

5981
TANBORS:
5982
#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5983
#--IF |X| < 2**(-40), RETURN X OR 1.
5984
	cmp.l		%d1,&0x3FFF8000
5985
	bgt.b		REDUCEX
5986

5987
TANSM:
5988
	fmov.x		%fp0,-(%sp)
5989
	fmov.l		%d0,%fpcr		# restore users round mode,prec
5990
	mov.b		&FMOV_OP,%d1		# last inst is MOVE
5991
	fmov.x		(%sp)+,%fp0		# last inst - posibble exception set
5992
	bra		t_catch
5993

5994
	global		stand
5995
#--TAN(X) = X FOR DENORMALIZED X
5996
stand:
5997
	bra		t_extdnrm
5998

5999
#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
6000
#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
6001
#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
6002
REDUCEX:
6003
	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
6004
	mov.l		%d2,-(%sp)		# save d2
6005
	fmov.s		&0x00000000,%fp1	# fp1 = 0
6006

6007
#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
6008
#--there is a danger of unwanted overflow in first LOOP iteration.  In this
6009
#--case, reduce argument by one remainder step to make subsequent reduction
6010
#--safe.
6011
	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
6012
	bne.b		LOOP			# no
6013

6014
# yes; create 2**16383*PI/2
6015
	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
6016
	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
6017
	clr.l		FP_SCR0_LO(%a6)
6018

6019
# create low half of 2**16383*PI/2 at FP_SCR1
6020
	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
6021
	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
6022
	clr.l		FP_SCR1_LO(%a6)
6023

6024
	ftest.x		%fp0			# test sign of argument
6025
	fblt.w		red_neg
6026

6027
	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
6028
	or.b		&0x80,FP_SCR1_EX(%a6)
6029
red_neg:
6030
	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
6031
	fmov.x		%fp0,%fp1		# save high result in fp1
6032
	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
6033
	fsub.x		%fp0,%fp1		# determine low component of result
6034
	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
6035

6036
#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
6037
#--integer quotient will be stored in N
6038
#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
6039
LOOP:
6040
	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
6041
	mov.w		INARG(%a6),%d1
6042
	mov.l		%d1,%a1			# save a copy of D0
6043
	and.l		&0x00007FFF,%d1
6044
	sub.l		&0x00003FFF,%d1		# d0 = K
6045
	cmp.l		%d1,&28
6046
	ble.b		LASTLOOP
6047
CONTLOOP:
6048
	sub.l		&27,%d1			# d0 = L := K-27
6049
	mov.b		&0,ENDFLAG(%a6)
6050
	bra.b		WORK
6051
LASTLOOP:
6052
	clr.l		%d1			# d0 = L := 0
6053
	mov.b		&1,ENDFLAG(%a6)
6054

6055
WORK:
6056
#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
6057
#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
6058

6059
#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
6060
#--2**L * (PIby2_1), 2**L * (PIby2_2)
6061

6062
	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
6063
	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
6064

6065
	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
6066
	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
6067
	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
6068

6069
	fmov.x		%fp0,%fp2
6070
	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
6071

6072
#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
6073
#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
6074
#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
6075
#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
6076
#--US THE DESIRED VALUE IN FLOATING POINT.
6077
	mov.l		%a1,%d2
6078
	swap		%d2
6079
	and.l		&0x80000000,%d2
6080
	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
6081
	mov.l		%d2,TWOTO63(%a6)
6082
	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
6083
	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
6084
#	fintrz.x	%fp2,%fp2
6085

6086
#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
6087
	mov.l		%d1,%d2			# d2 = L
6088

6089
	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
6090
	mov.w		%d2,FP_SCR0_EX(%a6)
6091
	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
6092
	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
6093

6094
	add.l		&0x00003FDD,%d1
6095
	mov.w		%d1,FP_SCR1_EX(%a6)
6096
	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
6097
	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
6098

6099
	mov.b		ENDFLAG(%a6),%d1
6100

6101
#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6102
#--P2 = 2**(L) * Piby2_2
6103
	fmov.x		%fp2,%fp4		# fp4 = N
6104
	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
6105
	fmov.x		%fp2,%fp5		# fp5 = N
6106
	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
6107
	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
6108

6109
#--we want P+p = W+w  but  |p| <= half ulp of P
6110
#--Then, we need to compute  A := R-P   and  a := r-p
6111
	fadd.x		%fp5,%fp3		# fp3 = P
6112
	fsub.x		%fp3,%fp4		# fp4 = W-P
6113

6114
	fsub.x		%fp3,%fp0		# fp0 = A := R - P
6115
	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
6116

6117
	fmov.x		%fp0,%fp3		# fp3 = A
6118
	fsub.x		%fp4,%fp1		# fp1 = a := r - p
6119

6120
#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
6121
#--|r| <= half ulp of R.
6122
	fadd.x		%fp1,%fp0		# fp0 = R := A+a
6123
#--No need to calculate r if this is the last loop
6124
	cmp.b		%d1,&0
6125
	bgt.w		RESTORE
6126

6127
#--Need to calculate r
6128
	fsub.x		%fp0,%fp3		# fp3 = A-R
6129
	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
6130
	bra.w		LOOP
6131

6132
RESTORE:
6133
	fmov.l		%fp2,INT(%a6)
6134
	mov.l		(%sp)+,%d2		# restore d2
6135
	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
6136

6137
	mov.l		INT(%a6),%d1
6138
	ror.l		&1,%d1
6139

6140
	bra.w		TANCONT
6141

6142
#########################################################################
6143
# satan():  computes the arctangent of a normalized number		#
6144
# satand(): computes the arctangent of a denormalized number		#
6145
#									#
6146
# INPUT	*************************************************************** #
6147
#	a0 = pointer to extended precision input			#
6148
#	d0 = round precision,mode					#
6149
#									#
6150
# OUTPUT ************************************************************** #
6151
#	fp0 = arctan(X)							#
6152
#									#
6153
# ACCURACY and MONOTONICITY ******************************************* #
6154
#	The returned result is within 2 ulps in	64 significant bit,	#
6155
#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6156
#	rounded to double precision. The result is provably monotonic	#
6157
#	in double precision.						#
6158
#									#
6159
# ALGORITHM *********************************************************** #
6160
#	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.		#
6161
#									#
6162
#	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x.			#
6163
#		Note that k = -4, -3,..., or 3.				#
6164
#		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5	#
6165
#		significant bits of X with a bit-1 attached at the 6-th	#
6166
#		bit position. Define u to be u = (X-F) / (1 + X*F).	#
6167
#									#
6168
#	Step 3. Approximate arctan(u) by a polynomial poly.		#
6169
#									#
6170
#	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a	#
6171
#		table of values calculated beforehand. Exit.		#
6172
#									#
6173
#	Step 5. If |X| >= 16, go to Step 7.				#
6174
#									#
6175
#	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.	#
6176
#									#
6177
#	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd	#
6178
#		polynomial in X'.					#
6179
#		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.		#
6180
#									#
6181
#########################################################################
6182

6183
ATANA3:	long		0xBFF6687E,0x314987D8
6184
ATANA2:	long		0x4002AC69,0x34A26DB3
6185
ATANA1:	long		0xBFC2476F,0x4E1DA28E
6186

6187
ATANB6:	long		0x3FB34444,0x7F876989
6188
ATANB5:	long		0xBFB744EE,0x7FAF45DB
6189
ATANB4:	long		0x3FBC71C6,0x46940220
6190
ATANB3:	long		0xBFC24924,0x921872F9
6191
ATANB2:	long		0x3FC99999,0x99998FA9
6192
ATANB1:	long		0xBFD55555,0x55555555
6193

6194
ATANC5:	long		0xBFB70BF3,0x98539E6A
6195
ATANC4:	long		0x3FBC7187,0x962D1D7D
6196
ATANC3:	long		0xBFC24924,0x827107B8
6197
ATANC2:	long		0x3FC99999,0x9996263E
6198
ATANC1:	long		0xBFD55555,0x55555536
6199

6200
PPIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6201
NPIBY2:	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6202

6203
PTINY:	long		0x00010000,0x80000000,0x00000000,0x00000000
6204
NTINY:	long		0x80010000,0x80000000,0x00000000,0x00000000
6205

6206
ATANTBL:
6207
	long		0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6208
	long		0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6209
	long		0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6210
	long		0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6211
	long		0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6212
	long		0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6213
	long		0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6214
	long		0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6215
	long		0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6216
	long		0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6217
	long		0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6218
	long		0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6219
	long		0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6220
	long		0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6221
	long		0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6222
	long		0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6223
	long		0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6224
	long		0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6225
	long		0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6226
	long		0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6227
	long		0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6228
	long		0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6229
	long		0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6230
	long		0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6231
	long		0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6232
	long		0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6233
	long		0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6234
	long		0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6235
	long		0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6236
	long		0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6237
	long		0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6238
	long		0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6239
	long		0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6240
	long		0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6241
	long		0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6242
	long		0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6243
	long		0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6244
	long		0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6245
	long		0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6246
	long		0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6247
	long		0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6248
	long		0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6249
	long		0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6250
	long		0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6251
	long		0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6252
	long		0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6253
	long		0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6254
	long		0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6255
	long		0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6256
	long		0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6257
	long		0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6258
	long		0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6259
	long		0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6260
	long		0x3FFE0000,0x97731420,0x365E538C,0x00000000
6261
	long		0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6262
	long		0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6263
	long		0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6264
	long		0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6265
	long		0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6266
	long		0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6267
	long		0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6268
	long		0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6269
	long		0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6270
	long		0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6271
	long		0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6272
	long		0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6273
	long		0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6274
	long		0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6275
	long		0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6276
	long		0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6277
	long		0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6278
	long		0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6279
	long		0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6280
	long		0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6281
	long		0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6282
	long		0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6283
	long		0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6284
	long		0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6285
	long		0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6286
	long		0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6287
	long		0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6288
	long		0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6289
	long		0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6290
	long		0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6291
	long		0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6292
	long		0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6293
	long		0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6294
	long		0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6295
	long		0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6296
	long		0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6297
	long		0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6298
	long		0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6299
	long		0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6300
	long		0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6301
	long		0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6302
	long		0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6303
	long		0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6304
	long		0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6305
	long		0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6306
	long		0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6307
	long		0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6308
	long		0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6309
	long		0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6310
	long		0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6311
	long		0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6312
	long		0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6313
	long		0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6314
	long		0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6315
	long		0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6316
	long		0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6317
	long		0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6318
	long		0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6319
	long		0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6320
	long		0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6321
	long		0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6322
	long		0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6323
	long		0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6324
	long		0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6325
	long		0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6326
	long		0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6327
	long		0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6328
	long		0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6329
	long		0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6330
	long		0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6331
	long		0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6332
	long		0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6333
	long		0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6334
	long		0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6335

6336
	set		X,FP_SCR0
6337
	set		XDCARE,X+2
6338
	set		XFRAC,X+4
6339
	set		XFRACLO,X+8
6340

6341
	set		ATANF,FP_SCR1
6342
	set		ATANFHI,ATANF+4
6343
	set		ATANFLO,ATANF+8
6344

6345
	global		satan
6346
#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6347
satan:
6348
	fmov.x		(%a0),%fp0		# LOAD INPUT
6349

6350
	mov.l		(%a0),%d1
6351
	mov.w		4(%a0),%d1
6352
	fmov.x		%fp0,X(%a6)
6353
	and.l		&0x7FFFFFFF,%d1
6354

6355
	cmp.l		%d1,&0x3FFB8000		# |X| >= 1/16?
6356
	bge.b		ATANOK1
6357
	bra.w		ATANSM
6358

6359
ATANOK1:
6360
	cmp.l		%d1,&0x4002FFFF		# |X| < 16 ?
6361
	ble.b		ATANMAIN
6362
	bra.w		ATANBIG
6363

6364
#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6365
#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6366
#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6367
#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6368
#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6369
#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6370
#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6371
#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6372
#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6373
#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6374
#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6375
#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6376
#--WILL INVOLVE A VERY LONG POLYNOMIAL.
6377

6378
#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6379
#--WE CHOSE F TO BE +-2^K * 1.BBBB1
6380
#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6381
#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6382
#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6383
#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6384

6385
ATANMAIN:
6386

6387
	and.l		&0xF8000000,XFRAC(%a6)	# FIRST 5 BITS
6388
	or.l		&0x04000000,XFRAC(%a6)	# SET 6-TH BIT TO 1
6389
	mov.l		&0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6390

6391
	fmov.x		%fp0,%fp1		# FP1 IS X
6392
	fmul.x		X(%a6),%fp1		# FP1 IS X*F, NOTE THAT X*F > 0
6393
	fsub.x		X(%a6),%fp0		# FP0 IS X-F
6394
	fadd.s		&0x3F800000,%fp1	# FP1 IS 1 + X*F
6395
	fdiv.x		%fp1,%fp0		# FP0 IS U = (X-F)/(1+X*F)
6396

6397
#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6398
#--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6399
#--SAVE REGISTERS FP2.
6400

6401
	mov.l		%d2,-(%sp)		# SAVE d2 TEMPORARILY
6402
	mov.l		%d1,%d2			# THE EXP AND 16 BITS OF X
6403
	and.l		&0x00007800,%d1		# 4 VARYING BITS OF F'S FRACTION
6404
	and.l		&0x7FFF0000,%d2		# EXPONENT OF F
6405
	sub.l		&0x3FFB0000,%d2		# K+4
6406
	asr.l		&1,%d2
6407
	add.l		%d2,%d1			# THE 7 BITS IDENTIFYING F
6408
	asr.l		&7,%d1			# INDEX INTO TBL OF ATAN(|F|)
6409
	lea		ATANTBL(%pc),%a1
6410
	add.l		%d1,%a1			# ADDRESS OF ATAN(|F|)
6411
	mov.l		(%a1)+,ATANF(%a6)
6412
	mov.l		(%a1)+,ATANFHI(%a6)
6413
	mov.l		(%a1)+,ATANFLO(%a6)	# ATANF IS NOW ATAN(|F|)
6414
	mov.l		X(%a6),%d1		# LOAD SIGN AND EXPO. AGAIN
6415
	and.l		&0x80000000,%d1		# SIGN(F)
6416
	or.l		%d1,ATANF(%a6)		# ATANF IS NOW SIGN(F)*ATAN(|F|)
6417
	mov.l		(%sp)+,%d2		# RESTORE d2
6418

6419
#--THAT'S ALL I HAVE TO DO FOR NOW,
6420
#--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6421

6422
#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6423
#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6424
#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6425
#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6426
#--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
6427
#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6428
#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6429

6430
	fmovm.x		&0x04,-(%sp)		# save fp2
6431

6432
	fmov.x		%fp0,%fp1
6433
	fmul.x		%fp1,%fp1
6434
	fmov.d		ATANA3(%pc),%fp2
6435
	fadd.x		%fp1,%fp2		# A3+V
6436
	fmul.x		%fp1,%fp2		# V*(A3+V)
6437
	fmul.x		%fp0,%fp1		# U*V
6438
	fadd.d		ATANA2(%pc),%fp2	# A2+V*(A3+V)
6439
	fmul.d		ATANA1(%pc),%fp1	# A1*U*V
6440
	fmul.x		%fp2,%fp1		# A1*U*V*(A2+V*(A3+V))
6441
	fadd.x		%fp1,%fp0		# ATAN(U), FP1 RELEASED
6442

6443
	fmovm.x		(%sp)+,&0x20		# restore fp2
6444

6445
	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6446
	fadd.x		ATANF(%a6),%fp0		# ATAN(X)
6447
	bra		t_inx2
6448

6449
ATANBORS:
6450
#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6451
#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6452
	cmp.l		%d1,&0x3FFF8000
6453
	bgt.w		ATANBIG			# I.E. |X| >= 16
6454

6455
ATANSM:
6456
#--|X| <= 1/16
6457
#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6458
#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6459
#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6460
#--WHERE Y = X*X, AND Z = Y*Y.
6461

6462
	cmp.l		%d1,&0x3FD78000
6463
	blt.w		ATANTINY
6464

6465
#--COMPUTE POLYNOMIAL
6466
	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
6467

6468
	fmul.x		%fp0,%fp0		# FPO IS Y = X*X
6469

6470
	fmov.x		%fp0,%fp1
6471
	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
6472

6473
	fmov.d		ATANB6(%pc),%fp2
6474
	fmov.d		ATANB5(%pc),%fp3
6475

6476
	fmul.x		%fp1,%fp2		# Z*B6
6477
	fmul.x		%fp1,%fp3		# Z*B5
6478

6479
	fadd.d		ATANB4(%pc),%fp2	# B4+Z*B6
6480
	fadd.d		ATANB3(%pc),%fp3	# B3+Z*B5
6481

6482
	fmul.x		%fp1,%fp2		# Z*(B4+Z*B6)
6483
	fmul.x		%fp3,%fp1		# Z*(B3+Z*B5)
6484

6485
	fadd.d		ATANB2(%pc),%fp2	# B2+Z*(B4+Z*B6)
6486
	fadd.d		ATANB1(%pc),%fp1	# B1+Z*(B3+Z*B5)
6487

6488
	fmul.x		%fp0,%fp2		# Y*(B2+Z*(B4+Z*B6))
6489
	fmul.x		X(%a6),%fp0		# X*Y
6490

6491
	fadd.x		%fp2,%fp1		# [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6492

6493
	fmul.x		%fp1,%fp0		# X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6494

6495
	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
6496

6497
	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6498
	fadd.x		X(%a6),%fp0
6499
	bra		t_inx2
6500

6501
ATANTINY:
6502
#--|X| < 2^(-40), ATAN(X) = X
6503

6504
	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6505
	mov.b		&FMOV_OP,%d1		# last inst is MOVE
6506
	fmov.x		X(%a6),%fp0		# last inst - possible exception set
6507

6508
	bra		t_catch
6509

6510
ATANBIG:
6511
#--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
6512
#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6513
	cmp.l		%d1,&0x40638000
6514
	bgt.w		ATANHUGE
6515

6516
#--APPROXIMATE ATAN(-1/X) BY
6517
#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6518
#--THIS CAN BE RE-WRITTEN AS
6519
#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6520

6521
	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
6522

6523
	fmov.s		&0xBF800000,%fp1	# LOAD -1
6524
	fdiv.x		%fp0,%fp1		# FP1 IS -1/X
6525

6526
#--DIVIDE IS STILL CRANKING
6527

6528
	fmov.x		%fp1,%fp0		# FP0 IS X'
6529
	fmul.x		%fp0,%fp0		# FP0 IS Y = X'*X'
6530
	fmov.x		%fp1,X(%a6)		# X IS REALLY X'
6531

6532
	fmov.x		%fp0,%fp1
6533
	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
6534

6535
	fmov.d		ATANC5(%pc),%fp3
6536
	fmov.d		ATANC4(%pc),%fp2
6537

6538
	fmul.x		%fp1,%fp3		# Z*C5
6539
	fmul.x		%fp1,%fp2		# Z*B4
6540

6541
	fadd.d		ATANC3(%pc),%fp3	# C3+Z*C5
6542
	fadd.d		ATANC2(%pc),%fp2	# C2+Z*C4
6543

6544
	fmul.x		%fp3,%fp1		# Z*(C3+Z*C5), FP3 RELEASED
6545
	fmul.x		%fp0,%fp2		# Y*(C2+Z*C4)
6546

6547
	fadd.d		ATANC1(%pc),%fp1	# C1+Z*(C3+Z*C5)
6548
	fmul.x		X(%a6),%fp0		# X'*Y
6549

6550
	fadd.x		%fp2,%fp1		# [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6551

6552
	fmul.x		%fp1,%fp0		# X'*Y*([B1+Z*(B3+Z*B5)]
6553
#					...	+[Y*(B2+Z*(B4+Z*B6))])
6554
	fadd.x		X(%a6),%fp0
6555

6556
	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
6557

6558
	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6559
	tst.b		(%a0)
6560
	bpl.b		pos_big
6561

6562
neg_big:
6563
	fadd.x		NPIBY2(%pc),%fp0
6564
	bra		t_minx2
6565

6566
pos_big:
6567
	fadd.x		PPIBY2(%pc),%fp0
6568
	bra		t_pinx2
6569

6570
ATANHUGE:
6571
#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6572
	tst.b		(%a0)
6573
	bpl.b		pos_huge
6574

6575
neg_huge:
6576
	fmov.x		NPIBY2(%pc),%fp0
6577
	fmov.l		%d0,%fpcr
6578
	fadd.x		PTINY(%pc),%fp0
6579
	bra		t_minx2
6580

6581
pos_huge:
6582
	fmov.x		PPIBY2(%pc),%fp0
6583
	fmov.l		%d0,%fpcr
6584
	fadd.x		NTINY(%pc),%fp0
6585
	bra		t_pinx2
6586

6587
	global		satand
6588
#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6589
satand:
6590
	bra		t_extdnrm
6591

6592
#########################################################################
6593
# sasin():  computes the inverse sine of a normalized input		#
6594
# sasind(): computes the inverse sine of a denormalized input		#
6595
#									#
6596
# INPUT ***************************************************************	#
6597
#	a0 = pointer to extended precision input			#
6598
#	d0 = round precision,mode					#
6599
#									#
6600
# OUTPUT **************************************************************	#
6601
#	fp0 = arcsin(X)							#
6602
#									#
6603
# ACCURACY and MONOTONICITY *******************************************	#
6604
#	The returned result is within 3 ulps in	64 significant bit,	#
6605
#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6606
#	rounded to double precision. The result is provably monotonic	#
6607
#	in double precision.						#
6608
#									#
6609
# ALGORITHM ***********************************************************	#
6610
#									#
6611
#	ASIN								#
6612
#	1. If |X| >= 1, go to 3.					#
6613
#									#
6614
#	2. (|X| < 1) Calculate asin(X) by				#
6615
#		z := sqrt( [1-X][1+X] )					#
6616
#		asin(X) = atan( x / z ).				#
6617
#		Exit.							#
6618
#									#
6619
#	3. If |X| > 1, go to 5.						#
6620
#									#
6621
#	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6622
#									#
6623
#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
6624
#		Exit.							#
6625
#									#
6626
#########################################################################
6627

6628
	global		sasin
6629
sasin:
6630
	fmov.x		(%a0),%fp0		# LOAD INPUT
6631

6632
	mov.l		(%a0),%d1
6633
	mov.w		4(%a0),%d1
6634
	and.l		&0x7FFFFFFF,%d1
6635
	cmp.l		%d1,&0x3FFF8000
6636
	bge.b		ASINBIG
6637

6638
# This catch is added here for the '060 QSP. Originally, the call to
6639
# satan() would handle this case by causing the exception which would
6640
# not be caught until gen_except(). Now, with the exceptions being
6641
# detected inside of satan(), the exception would have been handled there
6642
# instead of inside sasin() as expected.
6643
	cmp.l		%d1,&0x3FD78000
6644
	blt.w		ASINTINY
6645

6646
#--THIS IS THE USUAL CASE, |X| < 1
6647
#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6648

6649
ASINMAIN:
6650
	fmov.s		&0x3F800000,%fp1
6651
	fsub.x		%fp0,%fp1		# 1-X
6652
	fmovm.x		&0x4,-(%sp)		#  {fp2}
6653
	fmov.s		&0x3F800000,%fp2
6654
	fadd.x		%fp0,%fp2		# 1+X
6655
	fmul.x		%fp2,%fp1		# (1+X)(1-X)
6656
	fmovm.x		(%sp)+,&0x20		#  {fp2}
6657
	fsqrt.x		%fp1			# SQRT([1-X][1+X])
6658
	fdiv.x		%fp1,%fp0		# X/SQRT([1-X][1+X])
6659
	fmovm.x		&0x01,-(%sp)		# save X/SQRT(...)
6660
	lea		(%sp),%a0		# pass ptr to X/SQRT(...)
6661
	bsr		satan
6662
	add.l		&0xc,%sp		# clear X/SQRT(...) from stack
6663
	bra		t_inx2
6664

6665
ASINBIG:
6666
	fabs.x		%fp0			# |X|
6667
	fcmp.s		%fp0,&0x3F800000
6668
	fbgt		t_operr			# cause an operr exception
6669

6670
#--|X| = 1, ASIN(X) = +- PI/2.
6671
ASINONE:
6672
	fmov.x		PIBY2(%pc),%fp0
6673
	mov.l		(%a0),%d1
6674
	and.l		&0x80000000,%d1		# SIGN BIT OF X
6675
	or.l		&0x3F800000,%d1		# +-1 IN SGL FORMAT
6676
	mov.l		%d1,-(%sp)		# push SIGN(X) IN SGL-FMT
6677
	fmov.l		%d0,%fpcr
6678
	fmul.s		(%sp)+,%fp0
6679
	bra		t_inx2
6680

6681
#--|X| < 2^(-40), ATAN(X) = X
6682
ASINTINY:
6683
	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
6684
	mov.b		&FMOV_OP,%d1		# last inst is MOVE
6685
	fmov.x		(%a0),%fp0		# last inst - possible exception
6686
	bra		t_catch
6687

6688
	global		sasind
6689
#--ASIN(X) = X FOR DENORMALIZED X
6690
sasind:
6691
	bra		t_extdnrm
6692

6693
#########################################################################
6694
# sacos():  computes the inverse cosine of a normalized input		#
6695
# sacosd(): computes the inverse cosine of a denormalized input		#
6696
#									#
6697
# INPUT ***************************************************************	#
6698
#	a0 = pointer to extended precision input			#
6699
#	d0 = round precision,mode					#
6700
#									#
6701
# OUTPUT ************************************************************** #
6702
#	fp0 = arccos(X)							#
6703
#									#
6704
# ACCURACY and MONOTONICITY *******************************************	#
6705
#	The returned result is within 3 ulps in	64 significant bit,	#
6706
#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
6707
#	rounded to double precision. The result is provably monotonic	#
6708
#	in double precision.						#
6709
#									#
6710
# ALGORITHM *********************************************************** #
6711
#									#
6712
#	ACOS								#
6713
#	1. If |X| >= 1, go to 3.					#
6714
#									#
6715
#	2. (|X| < 1) Calculate acos(X) by				#
6716
#		z := (1-X) / (1+X)					#
6717
#		acos(X) = 2 * atan( sqrt(z) ).				#
6718
#		Exit.							#
6719
#									#
6720
#	3. If |X| > 1, go to 5.						#
6721
#									#
6722
#	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.	#
6723
#									#
6724
#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
6725
#		Exit.							#
6726
#									#
6727
#########################################################################
6728

6729
	global		sacos
6730
sacos:
6731
	fmov.x		(%a0),%fp0		# LOAD INPUT
6732

6733
	mov.l		(%a0),%d1		# pack exp w/ upper 16 fraction
6734
	mov.w		4(%a0),%d1
6735
	and.l		&0x7FFFFFFF,%d1
6736
	cmp.l		%d1,&0x3FFF8000
6737
	bge.b		ACOSBIG
6738

6739
#--THIS IS THE USUAL CASE, |X| < 1
6740
#--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) ) )
6741

6742
ACOSMAIN:
6743
	fmov.s		&0x3F800000,%fp1
6744
	fadd.x		%fp0,%fp1		# 1+X
6745
	fneg.x		%fp0			# -X
6746
	fadd.s		&0x3F800000,%fp0	# 1-X
6747
	fdiv.x		%fp1,%fp0		# (1-X)/(1+X)
6748
	fsqrt.x		%fp0			# SQRT((1-X)/(1+X))
6749
	mov.l		%d0,-(%sp)		# save original users fpcr
6750
	clr.l		%d0
6751
	fmovm.x		&0x01,-(%sp)		# save SQRT(...) to stack
6752
	lea		(%sp),%a0		# pass ptr to sqrt
6753
	bsr		satan			# ATAN(SQRT([1-X]/[1+X]))
6754
	add.l		&0xc,%sp		# clear SQRT(...) from stack
6755

6756
	fmov.l		(%sp)+,%fpcr		# restore users round prec,mode
6757
	fadd.x		%fp0,%fp0		# 2 * ATAN( STUFF )
6758
	bra		t_pinx2
6759

6760
ACOSBIG:
6761
	fabs.x		%fp0
6762
	fcmp.s		%fp0,&0x3F800000
6763
	fbgt		t_operr			# cause an operr exception
6764

6765
#--|X| = 1, ACOS(X) = 0 OR PI
6766
	tst.b		(%a0)			# is X positive or negative?
6767
	bpl.b		ACOSP1
6768

6769
#--X = -1
6770
#Returns PI and inexact exception
6771
ACOSM1:
6772
	fmov.x		PI(%pc),%fp0		# load PI
6773
	fmov.l		%d0,%fpcr		# load round mode,prec
6774
	fadd.s		&0x00800000,%fp0	# add a small value
6775
	bra		t_pinx2
6776

6777
ACOSP1:
6778
	bra		ld_pzero		# answer is positive zero
6779

6780
	global		sacosd
6781
#--ACOS(X) = PI/2 FOR DENORMALIZED X
6782
sacosd:
6783
	fmov.l		%d0,%fpcr		# load user's rnd mode/prec
6784
	fmov.x		PIBY2(%pc),%fp0
6785
	bra		t_pinx2
6786

6787
#########################################################################
6788
# setox():    computes the exponential for a normalized input		#
6789
# setoxd():   computes the exponential for a denormalized input		#
6790
# setoxm1():  computes the exponential minus 1 for a normalized input	#
6791
# setoxm1d(): computes the exponential minus 1 for a denormalized input	#
6792
#									#
6793
# INPUT	*************************************************************** #
6794
#	a0 = pointer to extended precision input			#
6795
#	d0 = round precision,mode					#
6796
#									#
6797
# OUTPUT ************************************************************** #
6798
#	fp0 = exp(X) or exp(X)-1					#
6799
#									#
6800
# ACCURACY and MONOTONICITY ******************************************* #
6801
#	The returned result is within 0.85 ulps in 64 significant bit,	#
6802
#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6803
#	rounded to double precision. The result is provably monotonic	#
6804
#	in double precision.						#
6805
#									#
6806
# ALGORITHM and IMPLEMENTATION **************************************** #
6807
#									#
6808
#	setoxd								#
6809
#	------								#
6810
#	Step 1.	Set ans := 1.0						#
6811
#									#
6812
#	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.		#
6813
#	Notes:	This will always generate one exception -- inexact.	#
6814
#									#
6815
#									#
6816
#	setox								#
6817
#	-----								#
6818
#									#
6819
#	Step 1.	Filter out extreme cases of input argument.		#
6820
#		1.1	If |X| >= 2^(-65), go to Step 1.3.		#
6821
#		1.2	Go to Step 7.					#
6822
#		1.3	If |X| < 16380 log(2), go to Step 2.		#
6823
#		1.4	Go to Step 8.					#
6824
#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
6825
#		To avoid the use of floating-point comparisons, a	#
6826
#		compact representation of |X| is used. This format is a	#
6827
#		32-bit integer, the upper (more significant) 16 bits	#
6828
#		are the sign and biased exponent field of |X|; the	#
6829
#		lower 16 bits are the 16 most significant fraction	#
6830
#		(including the explicit bit) bits of |X|. Consequently,	#
6831
#		the comparisons in Steps 1.1 and 1.3 can be performed	#
6832
#		by integer comparison. Note also that the constant	#
6833
#		16380 log(2) used in Step 1.3 is also in the compact	#
6834
#		form. Thus taking the branch to Step 2 guarantees	#
6835
#		|X| < 16380 log(2). There is no harm to have a small	#
6836
#		number of cases where |X| is less than,	but close to,	#
6837
#		16380 log(2) and the branch to Step 9 is taken.		#
6838
#									#
6839
#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
6840
#		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6841
#			was taken)					#
6842
#		2.2	N := round-to-nearest-integer( X * 64/log2 ).	#
6843
#		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., #
6844
#			or 63.						#
6845
#		2.4	Calculate	M = (N - J)/64; so N = 64M + J.	#
6846
#		2.5	Calculate the address of the stored value of	#
6847
#			2^(J/64).					#
6848
#		2.6	Create the value Scale = 2^M.			#
6849
#	Notes:	The calculation in 2.2 is really performed by		#
6850
#			Z := X * constant				#
6851
#			N := round-to-nearest-integer(Z)		#
6852
#		where							#
6853
#			constant := single-precision( 64/log 2 ).	#
6854
#									#
6855
#		Using a single-precision constant avoids memory		#
6856
#		access. Another effect of using a single-precision	#
6857
#		"constant" is that the calculated value Z is		#
6858
#									#
6859
#			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).	#
6860
#									#
6861
#		This error has to be considered later in Steps 3 and 4.	#
6862
#									#
6863
#	Step 3.	Calculate X - N*log2/64.				#
6864
#		3.1	R := X + N*L1,					#
6865
#				where L1 := single-precision(-log2/64).	#
6866
#		3.2	R := R + N*L2,					#
6867
#				L2 := extended-precision(-log2/64 - L1).#
6868
#	Notes:	a) The way L1 and L2 are chosen ensures L1+L2		#
6869
#		approximate the value -log2/64 to 88 bits of accuracy.	#
6870
#		b) N*L1 is exact because N is no longer than 22 bits	#
6871
#		and L1 is no longer than 24 bits.			#
6872
#		c) The calculation X+N*L1 is also exact due to		#
6873
#		cancellation. Thus, R is practically X+N(L1+L2) to full	#
6874
#		64 bits.						#
6875
#		d) It is important to estimate how large can |R| be	#
6876
#		after Step 3.2.						#
6877
#									#
6878
#		N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)	#
6879
#		X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5	#
6880
#		X*64/log2 - N	=	f - eps*X 64/log2		#
6881
#		X - N*log2/64	=	f*log2/64 - eps*X		#
6882
#									#
6883
#									#
6884
#		Now |X| <= 16446 log2, thus				#
6885
#									#
6886
#			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64	#
6887
#					<= 0.57 log2/64.		#
6888
#		 This bound will be used in Step 4.			#
6889
#									#
6890
#	Step 4.	Approximate exp(R)-1 by a polynomial			#
6891
#		p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))	#
6892
#	Notes:	a) In order to reduce memory access, the coefficients	#
6893
#		are made as "short" as possible: A1 (which is 1/2), A4	#
6894
#		and A5 are single precision; A2 and A3 are double	#
6895
#		precision.						#
6896
#		b) Even with the restrictions above,			#
6897
#		   |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.	#
6898
#		Note that 0.0062 is slightly bigger than 0.57 log2/64.	#
6899
#		c) To fully utilize the pipeline, p is separated into	#
6900
#		two independent pieces of roughly equal complexities	#
6901
#			p = [ R + R*S*(A2 + S*A4) ]	+		#
6902
#				[ S*(A1 + S*(A3 + S*A5)) ]		#
6903
#		where S = R*R.						#
6904
#									#
6905
#	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by		#
6906
#				ans := T + ( T*p + t)			#
6907
#		where T and t are the stored values for 2^(J/64).	#
6908
#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
6909
#		2^(J/64) to roughly 85 bits; T is in extended precision	#
6910
#		and t is in single precision. Note also that T is	#
6911
#		rounded to 62 bits so that the last two bits of T are	#
6912
#		zero. The reason for such a special form is that T-1,	#
6913
#		T-2, and T-8 will all be exact --- a property that will	#
6914
#		give much more accurate computation of the function	#
6915
#		EXPM1.							#
6916
#									#
6917
#	Step 6.	Reconstruction of exp(X)				#
6918
#			exp(X) = 2^M * 2^(J/64) * exp(R).		#
6919
#		6.1	If AdjFlag = 0, go to 6.3			#
6920
#		6.2	ans := ans * AdjScale				#
6921
#		6.3	Restore the user FPCR				#
6922
#		6.4	Return ans := ans * Scale. Exit.		#
6923
#	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,	#
6924
#		|M| <= 16380, and Scale = 2^M. Moreover, exp(X) will	#
6925
#		neither overflow nor underflow. If AdjFlag = 1, that	#
6926
#		means that						#
6927
#			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.	#
6928
#		Hence, exp(X) may overflow or underflow or neither.	#
6929
#		When that is the case, AdjScale = 2^(M1) where M1 is	#
6930
#		approximately M. Thus 6.2 will never cause		#
6931
#		over/underflow. Possible exception in 6.4 is overflow	#
6932
#		or underflow. The inexact exception is not generated in	#
6933
#		6.4. Although one can argue that the inexact flag	#
6934
#		should always be raised, to simulate that exception	#
6935
#		cost to much than the flag is worth in practical uses.	#
6936
#									#
6937
#	Step 7.	Return 1 + X.						#
6938
#		7.1	ans := X					#
6939
#		7.2	Restore user FPCR.				#
6940
#		7.3	Return ans := 1 + ans. Exit			#
6941
#	Notes:	For non-zero X, the inexact exception will always be	#
6942
#		raised by 7.3. That is the only exception raised by 7.3.#
6943
#		Note also that we use the FMOVEM instruction to move X	#
6944
#		in Step 7.1 to avoid unnecessary trapping. (Although	#
6945
#		the FMOVEM may not seem relevant since X is normalized,	#
6946
#		the precaution will be useful in the library version of	#
6947
#		this code where the separate entry for denormalized	#
6948
#		inputs will be done away with.)				#
6949
#									#
6950
#	Step 8.	Handle exp(X) where |X| >= 16380log2.			#
6951
#		8.1	If |X| > 16480 log2, go to Step 9.		#
6952
#		(mimic 2.2 - 2.6)					#
6953
#		8.2	N := round-to-integer( X * 64/log2 )		#
6954
#		8.3	Calculate J = N mod 64, J = 0,1,...,63		#
6955
#		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1,	#
6956
#			AdjFlag := 1.					#
6957
#		8.5	Calculate the address of the stored value	#
6958
#			2^(J/64).					#
6959
#		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.	#
6960
#		8.7	Go to Step 3.					#
6961
#	Notes:	Refer to notes for 2.2 - 2.6.				#
6962
#									#
6963
#	Step 9.	Handle exp(X), |X| > 16480 log2.			#
6964
#		9.1	If X < 0, go to 9.3				#
6965
#		9.2	ans := Huge, go to 9.4				#
6966
#		9.3	ans := Tiny.					#
6967
#		9.4	Restore user FPCR.				#
6968
#		9.5	Return ans := ans * ans. Exit.			#
6969
#	Notes:	Exp(X) will surely overflow or underflow, depending on	#
6970
#		X's sign. "Huge" and "Tiny" are respectively large/tiny	#
6971
#		extended-precision numbers whose square over/underflow	#
6972
#		with an inexact result. Thus, 9.5 always raises the	#
6973
#		inexact together with either overflow or underflow.	#
6974
#									#
6975
#	setoxm1d							#
6976
#	--------							#
6977
#									#
6978
#	Step 1.	Set ans := 0						#
6979
#									#
6980
#	Step 2.	Return	ans := X + ans. Exit.				#
6981
#	Notes:	This will return X with the appropriate rounding	#
6982
#		 precision prescribed by the user FPCR.			#
6983
#									#
6984
#	setoxm1								#
6985
#	-------								#
6986
#									#
6987
#	Step 1.	Check |X|						#
6988
#		1.1	If |X| >= 1/4, go to Step 1.3.			#
6989
#		1.2	Go to Step 7.					#
6990
#		1.3	If |X| < 70 log(2), go to Step 2.		#
6991
#		1.4	Go to Step 10.					#
6992
#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
6993
#		However, it is conceivable |X| can be small very often	#
6994
#		because EXPM1 is intended to evaluate exp(X)-1		#
6995
#		accurately when |X| is small. For further details on	#
6996
#		the comparisons, see the notes on Step 1 of setox.	#
6997
#									#
6998
#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
6999
#		2.1	N := round-to-nearest-integer( X * 64/log2 ).	#
7000
#		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., #
7001
#			or 63.						#
7002
#		2.3	Calculate	M = (N - J)/64; so N = 64M + J.	#
7003
#		2.4	Calculate the address of the stored value of	#
7004
#			2^(J/64).					#
7005
#		2.5	Create the values Sc = 2^M and			#
7006
#			OnebySc := -2^(-M).				#
7007
#	Notes:	See the notes on Step 2 of setox.			#
7008
#									#
7009
#	Step 3.	Calculate X - N*log2/64.				#
7010
#		3.1	R := X + N*L1,					#
7011
#				where L1 := single-precision(-log2/64).	#
7012
#		3.2	R := R + N*L2,					#
7013
#				L2 := extended-precision(-log2/64 - L1).#
7014
#	Notes:	Applying the analysis of Step 3 of setox in this case	#
7015
#		shows that |R| <= 0.0055 (note that |X| <= 70 log2 in	#
7016
#		this case).						#
7017
#									#
7018
#	Step 4.	Approximate exp(R)-1 by a polynomial			#
7019
#			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))	#
7020
#	Notes:	a) In order to reduce memory access, the coefficients	#
7021
#		are made as "short" as possible: A1 (which is 1/2), A5	#
7022
#		and A6 are single precision; A2, A3 and A4 are double	#
7023
#		precision.						#
7024
#		b) Even with the restriction above,			#
7025
#			|p - (exp(R)-1)| <	|R| * 2^(-72.7)		#
7026
#		for all |R| <= 0.0055.					#
7027
#		c) To fully utilize the pipeline, p is separated into	#
7028
#		two independent pieces of roughly equal complexity	#
7029
#			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+	#
7030
#				[ R + S*(A1 + S*(A3 + S*A5)) ]		#
7031
#		where S = R*R.						#
7032
#									#
7033
#	Step 5.	Compute 2^(J/64)*p by					#
7034
#				p := T*p				#
7035
#		where T and t are the stored values for 2^(J/64).	#
7036
#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
7037
#		2^(J/64) to roughly 85 bits; T is in extended precision	#
7038
#		and t is in single precision. Note also that T is	#
7039
#		rounded to 62 bits so that the last two bits of T are	#
7040
#		zero. The reason for such a special form is that T-1,	#
7041
#		T-2, and T-8 will all be exact --- a property that will	#
7042
#		be exploited in Step 6 below. The total relative error	#
7043
#		in p is no bigger than 2^(-67.7) compared to the final	#
7044
#		result.							#
7045
#									#
7046
#	Step 6.	Reconstruction of exp(X)-1				#
7047
#			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).	#
7048
#		6.1	If M <= 63, go to Step 6.3.			#
7049
#		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6	#
7050
#		6.3	If M >= -3, go to 6.5.				#
7051
#		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6	#
7052
#		6.5	ans := (T + OnebySc) + (p + t).			#
7053
#		6.6	Restore user FPCR.				#
7054
#		6.7	Return ans := Sc * ans. Exit.			#
7055
#	Notes:	The various arrangements of the expressions give	#
7056
#		accurate evaluations.					#
7057
#									#
7058
#	Step 7.	exp(X)-1 for |X| < 1/4.					#
7059
#		7.1	If |X| >= 2^(-65), go to Step 9.		#
7060
#		7.2	Go to Step 8.					#
7061
#									#
7062
#	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).			#
7063
#		8.1	If |X| < 2^(-16312), goto 8.3			#
7064
#		8.2	Restore FPCR; return ans := X - 2^(-16382).	#
7065
#			Exit.						#
7066
#		8.3	X := X * 2^(140).				#
7067
#		8.4	Restore FPCR; ans := ans - 2^(-16382).		#
7068
#		 Return ans := ans*2^(140). Exit			#
7069
#	Notes:	The idea is to return "X - tiny" under the user		#
7070
#		precision and rounding modes. To avoid unnecessary	#
7071
#		inefficiency, we stay away from denormalized numbers	#
7072
#		the best we can. For |X| >= 2^(-16312), the		#
7073
#		straightforward 8.2 generates the inexact exception as	#
7074
#		the case warrants.					#
7075
#									#
7076
#	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial		#
7077
#			p = X + X*X*(B1 + X*(B2 + ... + X*B12))		#
7078
#	Notes:	a) In order to reduce memory access, the coefficients	#
7079
#		are made as "short" as possible: B1 (which is 1/2), B9	#
7080
#		to B12 are single precision; B3 to B8 are double	#
7081
#		precision; and B2 is double extended.			#
7082
#		b) Even with the restriction above,			#
7083
#			|p - (exp(X)-1)| < |X| 2^(-70.6)		#
7084
#		for all |X| <= 0.251.					#
7085
#		Note that 0.251 is slightly bigger than 1/4.		#
7086
#		c) To fully preserve accuracy, the polynomial is	#
7087
#		computed as						#
7088
#			X + ( S*B1 +	Q ) where S = X*X and		#
7089
#			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))	#
7090
#		d) To fully utilize the pipeline, Q is separated into	#
7091
#		two independent pieces of roughly equal complexity	#
7092
#			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +	#
7093
#				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]	#
7094
#									#
7095
#	Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.		#
7096
#		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all	#
7097
#		practical purposes. Therefore, go to Step 1 of setox.	#
7098
#		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical	#
7099
#		purposes.						#
7100
#		ans := -1						#
7101
#		Restore user FPCR					#
7102
#		Return ans := ans + 2^(-126). Exit.			#
7103
#	Notes:	10.2 will always create an inexact and return -1 + tiny	#
7104
#		in the user rounding precision and mode.		#
7105
#									#
7106
#########################################################################
7107

7108
L2:	long		0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7109

7110
EEXPA3:	long		0x3FA55555,0x55554CC1
7111
EEXPA2:	long		0x3FC55555,0x55554A54
7112

7113
EM1A4:	long		0x3F811111,0x11174385
7114
EM1A3:	long		0x3FA55555,0x55554F5A
7115

7116
EM1A2:	long		0x3FC55555,0x55555555,0x00000000,0x00000000
7117

7118
EM1B8:	long		0x3EC71DE3,0xA5774682
7119
EM1B7:	long		0x3EFA01A0,0x19D7CB68
7120

7121
EM1B6:	long		0x3F2A01A0,0x1A019DF3
7122
EM1B5:	long		0x3F56C16C,0x16C170E2
7123

7124
EM1B4:	long		0x3F811111,0x11111111
7125
EM1B3:	long		0x3FA55555,0x55555555
7126

7127
EM1B2:	long		0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7128
	long		0x00000000
7129

7130
TWO140:	long		0x48B00000,0x00000000
7131
TWON140:
7132
	long		0x37300000,0x00000000
7133

7134
EEXPTBL:
7135
	long		0x3FFF0000,0x80000000,0x00000000,0x00000000
7136
	long		0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7137
	long		0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7138
	long		0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7139
	long		0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7140
	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7141
	long		0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7142
	long		0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7143
	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7144
	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7145
	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7146
	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7147
	long		0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7148
	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7149
	long		0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7150
	long		0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7151
	long		0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7152
	long		0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7153
	long		0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7154
	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7155
	long		0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7156
	long		0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7157
	long		0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7158
	long		0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7159
	long		0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7160
	long		0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7161
	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7162
	long		0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7163
	long		0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7164
	long		0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7165
	long		0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7166
	long		0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7167
	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7168
	long		0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7169
	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7170
	long		0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7171
	long		0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7172
	long		0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7173
	long		0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7174
	long		0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7175
	long		0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7176
	long		0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7177
	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7178
	long		0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7179
	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7180
	long		0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7181
	long		0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7182
	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7183
	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7184
	long		0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7185
	long		0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7186
	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7187
	long		0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7188
	long		0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7189
	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7190
	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7191
	long		0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7192
	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7193
	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7194
	long		0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7195
	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7196
	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7197
	long		0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7198
	long		0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7199

7200
	set		ADJFLAG,L_SCR2
7201
	set		SCALE,FP_SCR0
7202
	set		ADJSCALE,FP_SCR1
7203
	set		SC,FP_SCR0
7204
	set		ONEBYSC,FP_SCR1
7205

7206
	global		setox
7207
setox:
7208
#--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7209

7210
#--Step 1.
7211
	mov.l		(%a0),%d1		# load part of input X
7212
	and.l		&0x7FFF0000,%d1		# biased expo. of X
7213
	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
7214
	bge.b		EXPC1			# normal case
7215
	bra		EXPSM
7216

7217
EXPC1:
7218
#--The case |X| >= 2^(-65)
7219
	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
7220
	cmp.l		%d1,&0x400CB167		# 16380 log2 trunc. 16 bits
7221
	blt.b		EXPMAIN			# normal case
7222
	bra		EEXPBIG
7223

7224
EXPMAIN:
7225
#--Step 2.
7226
#--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
7227
	fmov.x		(%a0),%fp0		# load input from (a0)
7228

7229
	fmov.x		%fp0,%fp1
7230
	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7231
	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7232
	mov.l		&0,ADJFLAG(%a6)
7233
	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7234
	lea		EEXPTBL(%pc),%a1
7235
	fmov.l		%d1,%fp0		# convert to floating-format
7236

7237
	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7238
	and.l		&0x3F,%d1		# D0 is J = N mod 64
7239
	lsl.l		&4,%d1
7240
	add.l		%d1,%a1			# address of 2^(J/64)
7241
	mov.l		L_SCR1(%a6),%d1
7242
	asr.l		&6,%d1			# D0 is M
7243
	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
7244
	mov.w		L2(%pc),L_SCR1(%a6)	# prefetch L2, no need in CB
7245

7246
EXPCONT1:
7247
#--Step 3.
7248
#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7249
#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7250
	fmov.x		%fp0,%fp2
7251
	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
7252
	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
7253
	fadd.x		%fp1,%fp0		# X + N*L1
7254
	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
7255

7256
#--Step 4.
7257
#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7258
#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7259
#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7260
#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7261

7262
	fmov.x		%fp0,%fp1
7263
	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
7264

7265
	fmov.s		&0x3AB60B70,%fp2	# fp2 IS A5
7266

7267
	fmul.x		%fp1,%fp2		# fp2 IS S*A5
7268
	fmov.x		%fp1,%fp3
7269
	fmul.s		&0x3C088895,%fp3	# fp3 IS S*A4
7270

7271
	fadd.d		EEXPA3(%pc),%fp2	# fp2 IS A3+S*A5
7272
	fadd.d		EEXPA2(%pc),%fp3	# fp3 IS A2+S*A4
7273

7274
	fmul.x		%fp1,%fp2		# fp2 IS S*(A3+S*A5)
7275
	mov.w		%d1,SCALE(%a6)		# SCALE is 2^(M) in extended
7276
	mov.l		&0x80000000,SCALE+4(%a6)
7277
	clr.l		SCALE+8(%a6)
7278

7279
	fmul.x		%fp1,%fp3		# fp3 IS S*(A2+S*A4)
7280

7281
	fadd.s		&0x3F000000,%fp2	# fp2 IS A1+S*(A3+S*A5)
7282
	fmul.x		%fp0,%fp3		# fp3 IS R*S*(A2+S*A4)
7283

7284
	fmul.x		%fp1,%fp2		# fp2 IS S*(A1+S*(A3+S*A5))
7285
	fadd.x		%fp3,%fp0		# fp0 IS R+R*S*(A2+S*A4),
7286

7287
	fmov.x		(%a1)+,%fp1		# fp1 is lead. pt. of 2^(J/64)
7288
	fadd.x		%fp2,%fp0		# fp0 is EXP(R) - 1
7289

7290
#--Step 5
7291
#--final reconstruction process
7292
#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7293

7294
	fmul.x		%fp1,%fp0		# 2^(J/64)*(Exp(R)-1)
7295
	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7296
	fadd.s		(%a1),%fp0		# accurate 2^(J/64)
7297

7298
	fadd.x		%fp1,%fp0		# 2^(J/64) + 2^(J/64)*...
7299
	mov.l		ADJFLAG(%a6),%d1
7300

7301
#--Step 6
7302
	tst.l		%d1
7303
	beq.b		NORMAL
7304
ADJUST:
7305
	fmul.x		ADJSCALE(%a6),%fp0
7306
NORMAL:
7307
	fmov.l		%d0,%fpcr		# restore user FPCR
7308
	mov.b		&FMUL_OP,%d1		# last inst is MUL
7309
	fmul.x		SCALE(%a6),%fp0		# multiply 2^(M)
7310
	bra		t_catch
7311

7312
EXPSM:
7313
#--Step 7
7314
	fmovm.x		(%a0),&0x80		# load X
7315
	fmov.l		%d0,%fpcr
7316
	fadd.s		&0x3F800000,%fp0	# 1+X in user mode
7317
	bra		t_pinx2
7318

7319
EEXPBIG:
7320
#--Step 8
7321
	cmp.l		%d1,&0x400CB27C		# 16480 log2
7322
	bgt.b		EXP2BIG
7323
#--Steps 8.2 -- 8.6
7324
	fmov.x		(%a0),%fp0		# load input from (a0)
7325

7326
	fmov.x		%fp0,%fp1
7327
	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7328
	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7329
	mov.l		&1,ADJFLAG(%a6)
7330
	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7331
	lea		EEXPTBL(%pc),%a1
7332
	fmov.l		%d1,%fp0		# convert to floating-format
7333
	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7334
	and.l		&0x3F,%d1		# D0 is J = N mod 64
7335
	lsl.l		&4,%d1
7336
	add.l		%d1,%a1			# address of 2^(J/64)
7337
	mov.l		L_SCR1(%a6),%d1
7338
	asr.l		&6,%d1			# D0 is K
7339
	mov.l		%d1,L_SCR1(%a6)		# save K temporarily
7340
	asr.l		&1,%d1			# D0 is M1
7341
	sub.l		%d1,L_SCR1(%a6)		# a1 is M
7342
	add.w		&0x3FFF,%d1		# biased expo. of 2^(M1)
7343
	mov.w		%d1,ADJSCALE(%a6)	# ADJSCALE := 2^(M1)
7344
	mov.l		&0x80000000,ADJSCALE+4(%a6)
7345
	clr.l		ADJSCALE+8(%a6)
7346
	mov.l		L_SCR1(%a6),%d1		# D0 is M
7347
	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
7348
	bra.w		EXPCONT1		# go back to Step 3
7349

7350
EXP2BIG:
7351
#--Step 9
7352
	tst.b		(%a0)			# is X positive or negative?
7353
	bmi		t_unfl2
7354
	bra		t_ovfl2
7355

7356
	global		setoxd
7357
setoxd:
7358
#--entry point for EXP(X), X is denormalized
7359
	mov.l		(%a0),-(%sp)
7360
	andi.l		&0x80000000,(%sp)
7361
	ori.l		&0x00800000,(%sp)	# sign(X)*2^(-126)
7362

7363
	fmov.s		&0x3F800000,%fp0
7364

7365
	fmov.l		%d0,%fpcr
7366
	fadd.s		(%sp)+,%fp0
7367
	bra		t_pinx2
7368

7369
	global		setoxm1
7370
setoxm1:
7371
#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7372

7373
#--Step 1.
7374
#--Step 1.1
7375
	mov.l		(%a0),%d1		# load part of input X
7376
	and.l		&0x7FFF0000,%d1		# biased expo. of X
7377
	cmp.l		%d1,&0x3FFD0000		# 1/4
7378
	bge.b		EM1CON1			# |X| >= 1/4
7379
	bra		EM1SM
7380

7381
EM1CON1:
7382
#--Step 1.3
7383
#--The case |X| >= 1/4
7384
	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
7385
	cmp.l		%d1,&0x4004C215		# 70log2 rounded up to 16 bits
7386
	ble.b		EM1MAIN			# 1/4 <= |X| <= 70log2
7387
	bra		EM1BIG
7388

7389
EM1MAIN:
7390
#--Step 2.
7391
#--This is the case:	1/4 <= |X| <= 70 log2.
7392
	fmov.x		(%a0),%fp0		# load input from (a0)
7393

7394
	fmov.x		%fp0,%fp1
7395
	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
7396
	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7397
	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
7398
	lea		EEXPTBL(%pc),%a1
7399
	fmov.l		%d1,%fp0		# convert to floating-format
7400

7401
	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
7402
	and.l		&0x3F,%d1		# D0 is J = N mod 64
7403
	lsl.l		&4,%d1
7404
	add.l		%d1,%a1			# address of 2^(J/64)
7405
	mov.l		L_SCR1(%a6),%d1
7406
	asr.l		&6,%d1			# D0 is M
7407
	mov.l		%d1,L_SCR1(%a6)		# save a copy of M
7408

7409
#--Step 3.
7410
#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7411
#--a0 points to 2^(J/64), D0 and a1 both contain M
7412
	fmov.x		%fp0,%fp2
7413
	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
7414
	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
7415
	fadd.x		%fp1,%fp0		# X + N*L1
7416
	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
7417
	add.w		&0x3FFF,%d1		# D0 is biased expo. of 2^M
7418

7419
#--Step 4.
7420
#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7421
#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7422
#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7423
#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7424

7425
	fmov.x		%fp0,%fp1
7426
	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
7427

7428
	fmov.s		&0x3950097B,%fp2	# fp2 IS a6
7429

7430
	fmul.x		%fp1,%fp2		# fp2 IS S*A6
7431
	fmov.x		%fp1,%fp3
7432
	fmul.s		&0x3AB60B6A,%fp3	# fp3 IS S*A5
7433

7434
	fadd.d		EM1A4(%pc),%fp2		# fp2 IS A4+S*A6
7435
	fadd.d		EM1A3(%pc),%fp3		# fp3 IS A3+S*A5
7436
	mov.w		%d1,SC(%a6)		# SC is 2^(M) in extended
7437
	mov.l		&0x80000000,SC+4(%a6)
7438
	clr.l		SC+8(%a6)
7439

7440
	fmul.x		%fp1,%fp2		# fp2 IS S*(A4+S*A6)
7441
	mov.l		L_SCR1(%a6),%d1		# D0 is	M
7442
	neg.w		%d1			# D0 is -M
7443
	fmul.x		%fp1,%fp3		# fp3 IS S*(A3+S*A5)
7444
	add.w		&0x3FFF,%d1		# biased expo. of 2^(-M)
7445
	fadd.d		EM1A2(%pc),%fp2		# fp2 IS A2+S*(A4+S*A6)
7446
	fadd.s		&0x3F000000,%fp3	# fp3 IS A1+S*(A3+S*A5)
7447

7448
	fmul.x		%fp1,%fp2		# fp2 IS S*(A2+S*(A4+S*A6))
7449
	or.w		&0x8000,%d1		# signed/expo. of -2^(-M)
7450
	mov.w		%d1,ONEBYSC(%a6)	# OnebySc is -2^(-M)
7451
	mov.l		&0x80000000,ONEBYSC+4(%a6)
7452
	clr.l		ONEBYSC+8(%a6)
7453
	fmul.x		%fp3,%fp1		# fp1 IS S*(A1+S*(A3+S*A5))
7454

7455
	fmul.x		%fp0,%fp2		# fp2 IS R*S*(A2+S*(A4+S*A6))
7456
	fadd.x		%fp1,%fp0		# fp0 IS R+S*(A1+S*(A3+S*A5))
7457

7458
	fadd.x		%fp2,%fp0		# fp0 IS EXP(R)-1
7459

7460
	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7461

7462
#--Step 5
7463
#--Compute 2^(J/64)*p
7464

7465
	fmul.x		(%a1),%fp0		# 2^(J/64)*(Exp(R)-1)
7466

7467
#--Step 6
7468
#--Step 6.1
7469
	mov.l		L_SCR1(%a6),%d1		# retrieve M
7470
	cmp.l		%d1,&63
7471
	ble.b		MLE63
7472
#--Step 6.2	M >= 64
7473
	fmov.s		12(%a1),%fp1		# fp1 is t
7474
	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is t+OnebySc
7475
	fadd.x		%fp1,%fp0		# p+(t+OnebySc), fp1 released
7476
	fadd.x		(%a1),%fp0		# T+(p+(t+OnebySc))
7477
	bra		EM1SCALE
7478
MLE63:
7479
#--Step 6.3	M <= 63
7480
	cmp.l		%d1,&-3
7481
	bge.b		MGEN3
7482
MLTN3:
7483
#--Step 6.4	M <= -4
7484
	fadd.s		12(%a1),%fp0		# p+t
7485
	fadd.x		(%a1),%fp0		# T+(p+t)
7486
	fadd.x		ONEBYSC(%a6),%fp0	# OnebySc + (T+(p+t))
7487
	bra		EM1SCALE
7488
MGEN3:
7489
#--Step 6.5	-3 <= M <= 63
7490
	fmov.x		(%a1)+,%fp1		# fp1 is T
7491
	fadd.s		(%a1),%fp0		# fp0 is p+t
7492
	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is T+OnebySc
7493
	fadd.x		%fp1,%fp0		# (T+OnebySc)+(p+t)
7494

7495
EM1SCALE:
7496
#--Step 6.6
7497
	fmov.l		%d0,%fpcr
7498
	fmul.x		SC(%a6),%fp0
7499
	bra		t_inx2
7500

7501
EM1SM:
7502
#--Step 7	|X| < 1/4.
7503
	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
7504
	bge.b		EM1POLY
7505

7506
EM1TINY:
7507
#--Step 8	|X| < 2^(-65)
7508
	cmp.l		%d1,&0x00330000		# 2^(-16312)
7509
	blt.b		EM12TINY
7510
#--Step 8.2
7511
	mov.l		&0x80010000,SC(%a6)	# SC is -2^(-16382)
7512
	mov.l		&0x80000000,SC+4(%a6)
7513
	clr.l		SC+8(%a6)
7514
	fmov.x		(%a0),%fp0
7515
	fmov.l		%d0,%fpcr
7516
	mov.b		&FADD_OP,%d1		# last inst is ADD
7517
	fadd.x		SC(%a6),%fp0
7518
	bra		t_catch
7519

7520
EM12TINY:
7521
#--Step 8.3
7522
	fmov.x		(%a0),%fp0
7523
	fmul.d		TWO140(%pc),%fp0
7524
	mov.l		&0x80010000,SC(%a6)
7525
	mov.l		&0x80000000,SC+4(%a6)
7526
	clr.l		SC+8(%a6)
7527
	fadd.x		SC(%a6),%fp0
7528
	fmov.l		%d0,%fpcr
7529
	mov.b		&FMUL_OP,%d1		# last inst is MUL
7530
	fmul.d		TWON140(%pc),%fp0
7531
	bra		t_catch
7532

7533
EM1POLY:
7534
#--Step 9	exp(X)-1 by a simple polynomial
7535
	fmov.x		(%a0),%fp0		# fp0 is X
7536
	fmul.x		%fp0,%fp0		# fp0 is S := X*X
7537
	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
7538
	fmov.s		&0x2F30CAA8,%fp1	# fp1 is B12
7539
	fmul.x		%fp0,%fp1		# fp1 is S*B12
7540
	fmov.s		&0x310F8290,%fp2	# fp2 is B11
7541
	fadd.s		&0x32D73220,%fp1	# fp1 is B10+S*B12
7542

7543
	fmul.x		%fp0,%fp2		# fp2 is S*B11
7544
	fmul.x		%fp0,%fp1		# fp1 is S*(B10 + ...
7545

7546
	fadd.s		&0x3493F281,%fp2	# fp2 is B9+S*...
7547
	fadd.d		EM1B8(%pc),%fp1		# fp1 is B8+S*...
7548

7549
	fmul.x		%fp0,%fp2		# fp2 is S*(B9+...
7550
	fmul.x		%fp0,%fp1		# fp1 is S*(B8+...
7551

7552
	fadd.d		EM1B7(%pc),%fp2		# fp2 is B7+S*...
7553
	fadd.d		EM1B6(%pc),%fp1		# fp1 is B6+S*...
7554

7555
	fmul.x		%fp0,%fp2		# fp2 is S*(B7+...
7556
	fmul.x		%fp0,%fp1		# fp1 is S*(B6+...
7557

7558
	fadd.d		EM1B5(%pc),%fp2		# fp2 is B5+S*...
7559
	fadd.d		EM1B4(%pc),%fp1		# fp1 is B4+S*...
7560

7561
	fmul.x		%fp0,%fp2		# fp2 is S*(B5+...
7562
	fmul.x		%fp0,%fp1		# fp1 is S*(B4+...
7563

7564
	fadd.d		EM1B3(%pc),%fp2		# fp2 is B3+S*...
7565
	fadd.x		EM1B2(%pc),%fp1		# fp1 is B2+S*...
7566

7567
	fmul.x		%fp0,%fp2		# fp2 is S*(B3+...
7568
	fmul.x		%fp0,%fp1		# fp1 is S*(B2+...
7569

7570
	fmul.x		%fp0,%fp2		# fp2 is S*S*(B3+...)
7571
	fmul.x		(%a0),%fp1		# fp1 is X*S*(B2...
7572

7573
	fmul.s		&0x3F000000,%fp0	# fp0 is S*B1
7574
	fadd.x		%fp2,%fp1		# fp1 is Q
7575

7576
	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
7577

7578
	fadd.x		%fp1,%fp0		# fp0 is S*B1+Q
7579

7580
	fmov.l		%d0,%fpcr
7581
	fadd.x		(%a0),%fp0
7582
	bra		t_inx2
7583

7584
EM1BIG:
7585
#--Step 10	|X| > 70 log2
7586
	mov.l		(%a0),%d1
7587
	cmp.l		%d1,&0
7588
	bgt.w		EXPC1
7589
#--Step 10.2
7590
	fmov.s		&0xBF800000,%fp0	# fp0 is -1
7591
	fmov.l		%d0,%fpcr
7592
	fadd.s		&0x00800000,%fp0	# -1 + 2^(-126)
7593
	bra		t_minx2
7594

7595
	global		setoxm1d
7596
setoxm1d:
7597
#--entry point for EXPM1(X), here X is denormalized
7598
#--Step 0.
7599
	bra		t_extdnrm
7600

7601
#########################################################################
7602
# sgetexp():  returns the exponent portion of the input argument.	#
7603
#	      The exponent bias is removed and the exponent value is	#
7604
#	      returned as an extended precision number in fp0.		#
7605
# sgetexpd(): handles denormalized numbers.				#
7606
#									#
7607
# sgetman():  extracts the mantissa of the input argument. The		#
7608
#	      mantissa is converted to an extended precision number w/	#
7609
#	      an exponent of $3fff and is returned in fp0. The range of #
7610
#	      the result is [1.0 - 2.0).				#
7611
# sgetmand(): handles denormalized numbers.				#
7612
#									#
7613
# INPUT *************************************************************** #
7614
#	a0  = pointer to extended precision input			#
7615
#									#
7616
# OUTPUT ************************************************************** #
7617
#	fp0 = exponent(X) or mantissa(X)				#
7618
#									#
7619
#########################################################################
7620

7621
	global		sgetexp
7622
sgetexp:
7623
	mov.w		SRC_EX(%a0),%d0		# get the exponent
7624
	bclr		&0xf,%d0		# clear the sign bit
7625
	subi.w		&0x3fff,%d0		# subtract off the bias
7626
	fmov.w		%d0,%fp0		# return exp in fp0
7627
	blt.b		sgetexpn		# it's negative
7628
	rts
7629

7630
sgetexpn:
7631
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7632
	rts
7633

7634
	global		sgetexpd
7635
sgetexpd:
7636
	bsr.l		norm			# normalize
7637
	neg.w		%d0			# new exp = -(shft amt)
7638
	subi.w		&0x3fff,%d0		# subtract off the bias
7639
	fmov.w		%d0,%fp0		# return exp in fp0
7640
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7641
	rts
7642

7643
	global		sgetman
7644
sgetman:
7645
	mov.w		SRC_EX(%a0),%d0		# get the exp
7646
	ori.w		&0x7fff,%d0		# clear old exp
7647
	bclr		&0xe,%d0		# make it the new exp +-3fff
7648

7649
# here, we build the result in a tmp location so as not to disturb the input
7650
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7651
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7652
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
7653
	fmov.x		FP_SCR0(%a6),%fp0	# put new value back in fp0
7654
	bmi.b		sgetmann		# it's negative
7655
	rts
7656

7657
sgetmann:
7658
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
7659
	rts
7660

7661
#
7662
# For denormalized numbers, shift the mantissa until the j-bit = 1,
7663
# then load the exponent with +/1 $3fff.
7664
#
7665
	global		sgetmand
7666
sgetmand:
7667
	bsr.l		norm			# normalize exponent
7668
	bra.b		sgetman
7669

7670
#########################################################################
7671
# scosh():  computes the hyperbolic cosine of a normalized input	#
7672
# scoshd(): computes the hyperbolic cosine of a denormalized input	#
7673
#									#
7674
# INPUT ***************************************************************	#
7675
#	a0 = pointer to extended precision input			#
7676
#	d0 = round precision,mode					#
7677
#									#
7678
# OUTPUT **************************************************************	#
7679
#	fp0 = cosh(X)							#
7680
#									#
7681
# ACCURACY and MONOTONICITY *******************************************	#
7682
#	The returned result is within 3 ulps in 64 significant bit,	#
7683
#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
7684
#	rounded to double precision. The result is provably monotonic	#
7685
#	in double precision.						#
7686
#									#
7687
# ALGORITHM ***********************************************************	#
7688
#									#
7689
#	COSH								#
7690
#	1. If |X| > 16380 log2, go to 3.				#
7691
#									#
7692
#	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae	#
7693
#		y = |X|, z = exp(Y), and				#
7694
#		cosh(X) = (1/2)*( z + 1/z ).				#
7695
#		Exit.							#
7696
#									#
7697
#	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.		#
7698
#									#
7699
#	4. (16380 log2 < |X| <= 16480 log2)				#
7700
#		cosh(X) = sign(X) * exp(|X|)/2.				#
7701
#		However, invoking exp(|X|) may cause premature		#
7702
#		overflow. Thus, we calculate sinh(X) as follows:	#
7703
#		Y	:= |X|						#
7704
#		Fact	:=	2**(16380)				#
7705
#		Y'	:= Y - 16381 log2				#
7706
#		cosh(X) := Fact * exp(Y').				#
7707
#		Exit.							#
7708
#									#
7709
#	5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
7710
#		Huge*Huge to generate overflow and an infinity with	#
7711
#		the appropriate sign. Huge is the largest finite number	#
7712
#		in extended format. Exit.				#
7713
#									#
7714
#########################################################################
7715

7716
TWO16380:
7717
	long		0x7FFB0000,0x80000000,0x00000000,0x00000000
7718

7719
	global		scosh
7720
scosh:
7721
	fmov.x		(%a0),%fp0		# LOAD INPUT
7722

7723
	mov.l		(%a0),%d1
7724
	mov.w		4(%a0),%d1
7725
	and.l		&0x7FFFFFFF,%d1
7726
	cmp.l		%d1,&0x400CB167
7727
	bgt.b		COSHBIG
7728

7729
#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7730
#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7731

7732
	fabs.x		%fp0			# |X|
7733

7734
	mov.l		%d0,-(%sp)
7735
	clr.l		%d0
7736
	fmovm.x		&0x01,-(%sp)		# save |X| to stack
7737
	lea		(%sp),%a0		# pass ptr to |X|
7738
	bsr		setox			# FP0 IS EXP(|X|)
7739
	add.l		&0xc,%sp		# erase |X| from stack
7740
	fmul.s		&0x3F000000,%fp0	# (1/2)EXP(|X|)
7741
	mov.l		(%sp)+,%d0
7742

7743
	fmov.s		&0x3E800000,%fp1	# (1/4)
7744
	fdiv.x		%fp0,%fp1		# 1/(2 EXP(|X|))
7745

7746
	fmov.l		%d0,%fpcr
7747
	mov.b		&FADD_OP,%d1		# last inst is ADD
7748
	fadd.x		%fp1,%fp0
7749
	bra		t_catch
7750

7751
COSHBIG:
7752
	cmp.l		%d1,&0x400CB2B3
7753
	bgt.b		COSHHUGE
7754

7755
	fabs.x		%fp0
7756
	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
7757
	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
7758

7759
	mov.l		%d0,-(%sp)
7760
	clr.l		%d0
7761
	fmovm.x		&0x01,-(%sp)		# save fp0 to stack
7762
	lea		(%sp),%a0		# pass ptr to fp0
7763
	bsr		setox
7764
	add.l		&0xc,%sp		# clear fp0 from stack
7765
	mov.l		(%sp)+,%d0
7766

7767
	fmov.l		%d0,%fpcr
7768
	mov.b		&FMUL_OP,%d1		# last inst is MUL
7769
	fmul.x		TWO16380(%pc),%fp0
7770
	bra		t_catch
7771

7772
COSHHUGE:
7773
	bra		t_ovfl2
7774

7775
	global		scoshd
7776
#--COSH(X) = 1 FOR DENORMALIZED X
7777
scoshd:
7778
	fmov.s		&0x3F800000,%fp0
7779

7780
	fmov.l		%d0,%fpcr
7781
	fadd.s		&0x00800000,%fp0
7782
	bra		t_pinx2
7783

7784
#########################################################################
7785
# ssinh():  computes the hyperbolic sine of a normalized input		#
7786
# ssinhd(): computes the hyperbolic sine of a denormalized input	#
7787
#									#
7788
# INPUT *************************************************************** #
7789
#	a0 = pointer to extended precision input			#
7790
#	d0 = round precision,mode					#
7791
#									#
7792
# OUTPUT ************************************************************** #
7793
#	fp0 = sinh(X)							#
7794
#									#
7795
# ACCURACY and MONOTONICITY *******************************************	#
7796
#	The returned result is within 3 ulps in 64 significant bit,	#
7797
#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7798
#	rounded to double precision. The result is provably monotonic	#
7799
#	in double precision.						#
7800
#									#
7801
# ALGORITHM *********************************************************** #
7802
#									#
7803
#       SINH								#
7804
#       1. If |X| > 16380 log2, go to 3.				#
7805
#									#
7806
#       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula	#
7807
#               y = |X|, sgn = sign(X), and z = expm1(Y),		#
7808
#               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).			#
7809
#          Exit.							#
7810
#									#
7811
#       3. If |X| > 16480 log2, go to 5.				#
7812
#									#
7813
#       4. (16380 log2 < |X| <= 16480 log2)				#
7814
#               sinh(X) = sign(X) * exp(|X|)/2.				#
7815
#          However, invoking exp(|X|) may cause premature overflow.	#
7816
#          Thus, we calculate sinh(X) as follows:			#
7817
#             Y       := |X|						#
7818
#             sgn     := sign(X)					#
7819
#             sgnFact := sgn * 2**(16380)				#
7820
#             Y'      := Y - 16381 log2					#
7821
#             sinh(X) := sgnFact * exp(Y').				#
7822
#          Exit.							#
7823
#									#
7824
#       5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
7825
#          sign(X)*Huge*Huge to generate overflow and an infinity with	#
7826
#          the appropriate sign. Huge is the largest finite number in	#
7827
#          extended format. Exit.					#
7828
#									#
7829
#########################################################################
7830

7831
	global		ssinh
7832
ssinh:
7833
	fmov.x		(%a0),%fp0		# LOAD INPUT
7834

7835
	mov.l		(%a0),%d1
7836
	mov.w		4(%a0),%d1
7837
	mov.l		%d1,%a1			# save (compacted) operand
7838
	and.l		&0x7FFFFFFF,%d1
7839
	cmp.l		%d1,&0x400CB167
7840
	bgt.b		SINHBIG
7841

7842
#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7843
#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7844

7845
	fabs.x		%fp0			# Y = |X|
7846

7847
	movm.l		&0x8040,-(%sp)		# {a1/d0}
7848
	fmovm.x		&0x01,-(%sp)		# save Y on stack
7849
	lea		(%sp),%a0		# pass ptr to Y
7850
	clr.l		%d0
7851
	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
7852
	add.l		&0xc,%sp		# clear Y from stack
7853
	fmov.l		&0,%fpcr
7854
	movm.l		(%sp)+,&0x0201		# {a1/d0}
7855

7856
	fmov.x		%fp0,%fp1
7857
	fadd.s		&0x3F800000,%fp1	# 1+Z
7858
	fmov.x		%fp0,-(%sp)
7859
	fdiv.x		%fp1,%fp0		# Z/(1+Z)
7860
	mov.l		%a1,%d1
7861
	and.l		&0x80000000,%d1
7862
	or.l		&0x3F000000,%d1
7863
	fadd.x		(%sp)+,%fp0
7864
	mov.l		%d1,-(%sp)
7865

7866
	fmov.l		%d0,%fpcr
7867
	mov.b		&FMUL_OP,%d1		# last inst is MUL
7868
	fmul.s		(%sp)+,%fp0		# last fp inst - possible exceptions set
7869
	bra		t_catch
7870

7871
SINHBIG:
7872
	cmp.l		%d1,&0x400CB2B3
7873
	bgt		t_ovfl
7874
	fabs.x		%fp0
7875
	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
7876
	mov.l		&0,-(%sp)
7877
	mov.l		&0x80000000,-(%sp)
7878
	mov.l		%a1,%d1
7879
	and.l		&0x80000000,%d1
7880
	or.l		&0x7FFB0000,%d1
7881
	mov.l		%d1,-(%sp)		# EXTENDED FMT
7882
	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
7883

7884
	mov.l		%d0,-(%sp)
7885
	clr.l		%d0
7886
	fmovm.x		&0x01,-(%sp)		# save fp0 on stack
7887
	lea		(%sp),%a0		# pass ptr to fp0
7888
	bsr		setox
7889
	add.l		&0xc,%sp		# clear fp0 from stack
7890

7891
	mov.l		(%sp)+,%d0
7892
	fmov.l		%d0,%fpcr
7893
	mov.b		&FMUL_OP,%d1		# last inst is MUL
7894
	fmul.x		(%sp)+,%fp0		# possible exception
7895
	bra		t_catch
7896

7897
	global		ssinhd
7898
#--SINH(X) = X FOR DENORMALIZED X
7899
ssinhd:
7900
	bra		t_extdnrm
7901

7902
#########################################################################
7903
# stanh():  computes the hyperbolic tangent of a normalized input	#
7904
# stanhd(): computes the hyperbolic tangent of a denormalized input	#
7905
#									#
7906
# INPUT ***************************************************************	#
7907
#	a0 = pointer to extended precision input			#
7908
#	d0 = round precision,mode					#
7909
#									#
7910
# OUTPUT **************************************************************	#
7911
#	fp0 = tanh(X)							#
7912
#									#
7913
# ACCURACY and MONOTONICITY *******************************************	#
7914
#	The returned result is within 3 ulps in 64 significant bit,	#
7915
#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7916
#	rounded to double precision. The result is provably monotonic	#
7917
#	in double precision.						#
7918
#									#
7919
# ALGORITHM ***********************************************************	#
7920
#									#
7921
#	TANH								#
7922
#	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.		#
7923
#									#
7924
#	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by		#
7925
#		sgn := sign(X), y := 2|X|, z := expm1(Y), and		#
7926
#		tanh(X) = sgn*( z/(2+z) ).				#
7927
#		Exit.							#
7928
#									#
7929
#	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,		#
7930
#		go to 7.						#
7931
#									#
7932
#	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.		#
7933
#									#
7934
#	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by		#
7935
#		sgn := sign(X), y := 2|X|, z := exp(Y),			#
7936
#		tanh(X) = sgn - [ sgn*2/(1+z) ].			#
7937
#		Exit.							#
7938
#									#
7939
#	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we	#
7940
#		calculate Tanh(X) by					#
7941
#		sgn := sign(X), Tiny := 2**(-126),			#
7942
#		tanh(X) := sgn - sgn*Tiny.				#
7943
#		Exit.							#
7944
#									#
7945
#	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.			#
7946
#									#
7947
#########################################################################
7948

7949
	set		X,FP_SCR0
7950
	set		XFRAC,X+4
7951

7952
	set		SGN,L_SCR3
7953

7954
	set		V,FP_SCR0
7955

7956
	global		stanh
7957
stanh:
7958
	fmov.x		(%a0),%fp0		# LOAD INPUT
7959

7960
	fmov.x		%fp0,X(%a6)
7961
	mov.l		(%a0),%d1
7962
	mov.w		4(%a0),%d1
7963
	mov.l		%d1,X(%a6)
7964
	and.l		&0x7FFFFFFF,%d1
7965
	cmp.l		%d1, &0x3fd78000	# is |X| < 2^(-40)?
7966
	blt.w		TANHBORS		# yes
7967
	cmp.l		%d1, &0x3fffddce	# is |X| > (5/2)LOG2?
7968
	bgt.w		TANHBORS		# yes
7969

7970
#--THIS IS THE USUAL CASE
7971
#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7972

7973
	mov.l		X(%a6),%d1
7974
	mov.l		%d1,SGN(%a6)
7975
	and.l		&0x7FFF0000,%d1
7976
	add.l		&0x00010000,%d1		# EXPONENT OF 2|X|
7977
	mov.l		%d1,X(%a6)
7978
	and.l		&0x80000000,SGN(%a6)
7979
	fmov.x		X(%a6),%fp0		# FP0 IS Y = 2|X|
7980

7981
	mov.l		%d0,-(%sp)
7982
	clr.l		%d0
7983
	fmovm.x		&0x1,-(%sp)		# save Y on stack
7984
	lea		(%sp),%a0		# pass ptr to Y
7985
	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
7986
	add.l		&0xc,%sp		# clear Y from stack
7987
	mov.l		(%sp)+,%d0
7988

7989
	fmov.x		%fp0,%fp1
7990
	fadd.s		&0x40000000,%fp1	# Z+2
7991
	mov.l		SGN(%a6),%d1
7992
	fmov.x		%fp1,V(%a6)
7993
	eor.l		%d1,V(%a6)
7994

7995
	fmov.l		%d0,%fpcr		# restore users round prec,mode
7996
	fdiv.x		V(%a6),%fp0
7997
	bra		t_inx2
7998

7999
TANHBORS:
8000
	cmp.l		%d1,&0x3FFF8000
8001
	blt.w		TANHSM
8002

8003
	cmp.l		%d1,&0x40048AA1
8004
	bgt.w		TANHHUGE
8005

8006
#-- (5/2) LOG2 < |X| < 50 LOG2,
8007
#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
8008
#--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
8009

8010
	mov.l		X(%a6),%d1
8011
	mov.l		%d1,SGN(%a6)
8012
	and.l		&0x7FFF0000,%d1
8013
	add.l		&0x00010000,%d1		# EXPO OF 2|X|
8014
	mov.l		%d1,X(%a6)		# Y = 2|X|
8015
	and.l		&0x80000000,SGN(%a6)
8016
	mov.l		SGN(%a6),%d1
8017
	fmov.x		X(%a6),%fp0		# Y = 2|X|
8018

8019
	mov.l		%d0,-(%sp)
8020
	clr.l		%d0
8021
	fmovm.x		&0x01,-(%sp)		# save Y on stack
8022
	lea		(%sp),%a0		# pass ptr to Y
8023
	bsr		setox			# FP0 IS EXP(Y)
8024
	add.l		&0xc,%sp		# clear Y from stack
8025
	mov.l		(%sp)+,%d0
8026
	mov.l		SGN(%a6),%d1
8027
	fadd.s		&0x3F800000,%fp0	# EXP(Y)+1
8028

8029
	eor.l		&0xC0000000,%d1		# -SIGN(X)*2
8030
	fmov.s		%d1,%fp1		# -SIGN(X)*2 IN SGL FMT
8031
	fdiv.x		%fp0,%fp1		# -SIGN(X)2 / [EXP(Y)+1 ]
8032

8033
	mov.l		SGN(%a6),%d1
8034
	or.l		&0x3F800000,%d1		# SGN
8035
	fmov.s		%d1,%fp0		# SGN IN SGL FMT
8036

8037
	fmov.l		%d0,%fpcr		# restore users round prec,mode
8038
	mov.b		&FADD_OP,%d1		# last inst is ADD
8039
	fadd.x		%fp1,%fp0
8040
	bra		t_inx2
8041

8042
TANHSM:
8043
	fmov.l		%d0,%fpcr		# restore users round prec,mode
8044
	mov.b		&FMOV_OP,%d1		# last inst is MOVE
8045
	fmov.x		X(%a6),%fp0		# last inst - possible exception set
8046
	bra		t_catch
8047

8048
#---RETURN SGN(X) - SGN(X)EPS
8049
TANHHUGE:
8050
	mov.l		X(%a6),%d1
8051
	and.l		&0x80000000,%d1
8052
	or.l		&0x3F800000,%d1
8053
	fmov.s		%d1,%fp0
8054
	and.l		&0x80000000,%d1
8055
	eor.l		&0x80800000,%d1		# -SIGN(X)*EPS
8056

8057
	fmov.l		%d0,%fpcr		# restore users round prec,mode
8058
	fadd.s		%d1,%fp0
8059
	bra		t_inx2
8060

8061
	global		stanhd
8062
#--TANH(X) = X FOR DENORMALIZED X
8063
stanhd:
8064
	bra		t_extdnrm
8065

8066
#########################################################################
8067
# slogn():    computes the natural logarithm of a normalized input	#
8068
# slognd():   computes the natural logarithm of a denormalized input	#
8069
# slognp1():  computes the log(1+X) of a normalized input		#
8070
# slognp1d(): computes the log(1+X) of a denormalized input		#
8071
#									#
8072
# INPUT ***************************************************************	#
8073
#	a0 = pointer to extended precision input			#
8074
#	d0 = round precision,mode					#
8075
#									#
8076
# OUTPUT **************************************************************	#
8077
#	fp0 = log(X) or log(1+X)					#
8078
#									#
8079
# ACCURACY and MONOTONICITY *******************************************	#
8080
#	The returned result is within 2 ulps in 64 significant bit,	#
8081
#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8082
#	rounded to double precision. The result is provably monotonic	#
8083
#	in double precision.						#
8084
#									#
8085
# ALGORITHM ***********************************************************	#
8086
#	LOGN:								#
8087
#	Step 1. If |X-1| < 1/16, approximate log(X) by an odd		#
8088
#		polynomial in u, where u = 2(X-1)/(X+1). Otherwise,	#
8089
#		move on to Step 2.					#
8090
#									#
8091
#	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first	#
8092
#		seven significant bits of Y plus 2**(-7), i.e.		#
8093
#		F = 1.xxxxxx1 in base 2 where the six "x" match those	#
8094
#		of Y. Note that |Y-F| <= 2**(-7).			#
8095
#									#
8096
#	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a		#
8097
#		polynomial in u, log(1+u) = poly.			#
8098
#									#
8099
#	Step 4. Reconstruct						#
8100
#		log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)	#
8101
#		by k*log(2) + (log(F) + poly). The values of log(F) are	#
8102
#		calculated beforehand and stored in the program.	#
8103
#									#
8104
#	lognp1:								#
8105
#	Step 1: If |X| < 1/16, approximate log(1+X) by an odd		#
8106
#		polynomial in u where u = 2X/(2+X). Otherwise, move on	#
8107
#		to Step 2.						#
8108
#									#
8109
#	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done	#
8110
#		in Step 2 of the algorithm for LOGN and compute		#
8111
#		log(1+X) as k*log(2) + log(F) + poly where poly		#
8112
#		approximates log(1+u), u = (Y-F)/F.			#
8113
#									#
8114
#	Implementation Notes:						#
8115
#	Note 1. There are 64 different possible values for F, thus 64	#
8116
#		log(F)'s need to be tabulated. Moreover, the values of	#
8117
#		1/F are also tabulated so that the division in (Y-F)/F	#
8118
#		can be performed by a multiplication.			#
8119
#									#
8120
#	Note 2. In Step 2 of lognp1, in order to preserved accuracy,	#
8121
#		the value Y-F has to be calculated carefully when	#
8122
#		1/2 <= X < 3/2.						#
8123
#									#
8124
#	Note 3. To fully exploit the pipeline, polynomials are usually	#
8125
#		separated into two parts evaluated independently before	#
8126
#		being added up.						#
8127
#									#
8128
#########################################################################
8129
LOGOF2:
8130
	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8131

8132
one:
8133
	long		0x3F800000
8134
zero:
8135
	long		0x00000000
8136
infty:
8137
	long		0x7F800000
8138
negone:
8139
	long		0xBF800000
8140

8141
LOGA6:
8142
	long		0x3FC2499A,0xB5E4040B
8143
LOGA5:
8144
	long		0xBFC555B5,0x848CB7DB
8145

8146
LOGA4:
8147
	long		0x3FC99999,0x987D8730
8148
LOGA3:
8149
	long		0xBFCFFFFF,0xFF6F7E97
8150

8151
LOGA2:
8152
	long		0x3FD55555,0x555555A4
8153
LOGA1:
8154
	long		0xBFE00000,0x00000008
8155

8156
LOGB5:
8157
	long		0x3F175496,0xADD7DAD6
8158
LOGB4:
8159
	long		0x3F3C71C2,0xFE80C7E0
8160

8161
LOGB3:
8162
	long		0x3F624924,0x928BCCFF
8163
LOGB2:
8164
	long		0x3F899999,0x999995EC
8165

8166
LOGB1:
8167
	long		0x3FB55555,0x55555555
8168
TWO:
8169
	long		0x40000000,0x00000000
8170

8171
LTHOLD:
8172
	long		0x3f990000,0x80000000,0x00000000,0x00000000
8173

8174
LOGTBL:
8175
	long		0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8176
	long		0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8177
	long		0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8178
	long		0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8179
	long		0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8180
	long		0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8181
	long		0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8182
	long		0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8183
	long		0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8184
	long		0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8185
	long		0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8186
	long		0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8187
	long		0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8188
	long		0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8189
	long		0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8190
	long		0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8191
	long		0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8192
	long		0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8193
	long		0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8194
	long		0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8195
	long		0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8196
	long		0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8197
	long		0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8198
	long		0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8199
	long		0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8200
	long		0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8201
	long		0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8202
	long		0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8203
	long		0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8204
	long		0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8205
	long		0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8206
	long		0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8207
	long		0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8208
	long		0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8209
	long		0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8210
	long		0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8211
	long		0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8212
	long		0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8213
	long		0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8214
	long		0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8215
	long		0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8216
	long		0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8217
	long		0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8218
	long		0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8219
	long		0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8220
	long		0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8221
	long		0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8222
	long		0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8223
	long		0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8224
	long		0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8225
	long		0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8226
	long		0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8227
	long		0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8228
	long		0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8229
	long		0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8230
	long		0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8231
	long		0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8232
	long		0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8233
	long		0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8234
	long		0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8235
	long		0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8236
	long		0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8237
	long		0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8238
	long		0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8239
	long		0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8240
	long		0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8241
	long		0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8242
	long		0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8243
	long		0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8244
	long		0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8245
	long		0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8246
	long		0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8247
	long		0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8248
	long		0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8249
	long		0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8250
	long		0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8251
	long		0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8252
	long		0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8253
	long		0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8254
	long		0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8255
	long		0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8256
	long		0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8257
	long		0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8258
	long		0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8259
	long		0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8260
	long		0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8261
	long		0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8262
	long		0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8263
	long		0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8264
	long		0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8265
	long		0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8266
	long		0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8267
	long		0x3FFE0000,0x94458094,0x45809446,0x00000000
8268
	long		0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8269
	long		0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8270
	long		0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8271
	long		0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8272
	long		0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8273
	long		0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8274
	long		0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8275
	long		0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8276
	long		0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8277
	long		0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8278
	long		0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8279
	long		0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8280
	long		0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8281
	long		0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8282
	long		0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8283
	long		0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8284
	long		0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8285
	long		0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8286
	long		0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8287
	long		0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8288
	long		0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8289
	long		0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8290
	long		0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8291
	long		0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8292
	long		0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8293
	long		0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8294
	long		0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8295
	long		0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8296
	long		0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8297
	long		0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8298
	long		0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8299
	long		0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8300
	long		0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8301
	long		0x3FFE0000,0x80808080,0x80808081,0x00000000
8302
	long		0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8303

8304
	set		ADJK,L_SCR1
8305

8306
	set		X,FP_SCR0
8307
	set		XDCARE,X+2
8308
	set		XFRAC,X+4
8309

8310
	set		F,FP_SCR1
8311
	set		FFRAC,F+4
8312

8313
	set		KLOG2,FP_SCR0
8314

8315
	set		SAVEU,FP_SCR0
8316

8317
	global		slogn
8318
#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8319
slogn:
8320
	fmov.x		(%a0),%fp0		# LOAD INPUT
8321
	mov.l		&0x00000000,ADJK(%a6)
8322

8323
LOGBGN:
8324
#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8325
#--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8326

8327
	mov.l		(%a0),%d1
8328
	mov.w		4(%a0),%d1
8329

8330
	mov.l		(%a0),X(%a6)
8331
	mov.l		4(%a0),X+4(%a6)
8332
	mov.l		8(%a0),X+8(%a6)
8333

8334
	cmp.l		%d1,&0			# CHECK IF X IS NEGATIVE
8335
	blt.w		LOGNEG			# LOG OF NEGATIVE ARGUMENT IS INVALID
8336
# X IS POSITIVE, CHECK IF X IS NEAR 1
8337
	cmp.l		%d1,&0x3ffef07d		# IS X < 15/16?
8338
	blt.b		LOGMAIN			# YES
8339
	cmp.l		%d1,&0x3fff8841		# IS X > 17/16?
8340
	ble.w		LOGNEAR1		# NO
8341

8342
LOGMAIN:
8343
#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8344

8345
#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8346
#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8347
#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8348
#--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8349
#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8350
#--LOG(1+U) CAN BE VERY EFFICIENT.
8351
#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8352
#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8353

8354
#--GET K, Y, F, AND ADDRESS OF 1/F.
8355
	asr.l		&8,%d1
8356
	asr.l		&8,%d1			# SHIFTED 16 BITS, BIASED EXPO. OF X
8357
	sub.l		&0x3FFF,%d1		# THIS IS K
8358
	add.l		ADJK(%a6),%d1		# ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
8359
	lea		LOGTBL(%pc),%a0		# BASE ADDRESS OF 1/F AND LOG(F)
8360
	fmov.l		%d1,%fp1		# CONVERT K TO FLOATING-POINT FORMAT
8361

8362
#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8363
	mov.l		&0x3FFF0000,X(%a6)	# X IS NOW Y, I.E. 2^(-K)*X
8364
	mov.l		XFRAC(%a6),FFRAC(%a6)
8365
	and.l		&0xFE000000,FFRAC(%a6)	# FIRST 7 BITS OF Y
8366
	or.l		&0x01000000,FFRAC(%a6)	# GET F: ATTACH A 1 AT THE EIGHTH BIT
8367
	mov.l		FFRAC(%a6),%d1	# READY TO GET ADDRESS OF 1/F
8368
	and.l		&0x7E000000,%d1
8369
	asr.l		&8,%d1
8370
	asr.l		&8,%d1
8371
	asr.l		&4,%d1			# SHIFTED 20, D0 IS THE DISPLACEMENT
8372
	add.l		%d1,%a0			# A0 IS THE ADDRESS FOR 1/F
8373

8374
	fmov.x		X(%a6),%fp0
8375
	mov.l		&0x3fff0000,F(%a6)
8376
	clr.l		F+8(%a6)
8377
	fsub.x		F(%a6),%fp0		# Y-F
8378
	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3 WHILE FP0 IS NOT READY
8379
#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8380
#--REGISTERS SAVED: FPCR, FP1, FP2
8381

8382
LP1CONT1:
8383
#--AN RE-ENTRY POINT FOR LOGNP1
8384
	fmul.x		(%a0),%fp0		# FP0 IS U = (Y-F)/F
8385
	fmul.x		LOGOF2(%pc),%fp1	# GET K*LOG2 WHILE FP0 IS NOT READY
8386
	fmov.x		%fp0,%fp2
8387
	fmul.x		%fp2,%fp2		# FP2 IS V=U*U
8388
	fmov.x		%fp1,KLOG2(%a6)		# PUT K*LOG2 IN MEMEORY, FREE FP1
8389

8390
#--LOG(1+U) IS APPROXIMATED BY
8391
#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8392
#--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
8393

8394
	fmov.x		%fp2,%fp3
8395
	fmov.x		%fp2,%fp1
8396

8397
	fmul.d		LOGA6(%pc),%fp1		# V*A6
8398
	fmul.d		LOGA5(%pc),%fp2		# V*A5
8399

8400
	fadd.d		LOGA4(%pc),%fp1		# A4+V*A6
8401
	fadd.d		LOGA3(%pc),%fp2		# A3+V*A5
8402

8403
	fmul.x		%fp3,%fp1		# V*(A4+V*A6)
8404
	fmul.x		%fp3,%fp2		# V*(A3+V*A5)
8405

8406
	fadd.d		LOGA2(%pc),%fp1		# A2+V*(A4+V*A6)
8407
	fadd.d		LOGA1(%pc),%fp2		# A1+V*(A3+V*A5)
8408

8409
	fmul.x		%fp3,%fp1		# V*(A2+V*(A4+V*A6))
8410
	add.l		&16,%a0			# ADDRESS OF LOG(F)
8411
	fmul.x		%fp3,%fp2		# V*(A1+V*(A3+V*A5))
8412

8413
	fmul.x		%fp0,%fp1		# U*V*(A2+V*(A4+V*A6))
8414
	fadd.x		%fp2,%fp0		# U+V*(A1+V*(A3+V*A5))
8415

8416
	fadd.x		(%a0),%fp1		# LOG(F)+U*V*(A2+V*(A4+V*A6))
8417
	fmovm.x		(%sp)+,&0x30		# RESTORE FP2-3
8418
	fadd.x		%fp1,%fp0		# FP0 IS LOG(F) + LOG(1+U)
8419

8420
	fmov.l		%d0,%fpcr
8421
	fadd.x		KLOG2(%a6),%fp0		# FINAL ADD
8422
	bra		t_inx2
8423

8424

8425
LOGNEAR1:
8426

8427
# if the input is exactly equal to one, then exit through ld_pzero.
8428
# if these 2 lines weren't here, the correct answer would be returned
8429
# but the INEX2 bit would be set.
8430
	fcmp.b		%fp0,&0x1		# is it equal to one?
8431
	fbeq.l		ld_pzero		# yes
8432

8433
#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8434
	fmov.x		%fp0,%fp1
8435
	fsub.s		one(%pc),%fp1		# FP1 IS X-1
8436
	fadd.s		one(%pc),%fp0		# FP0 IS X+1
8437
	fadd.x		%fp1,%fp1		# FP1 IS 2(X-1)
8438
#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8439
#--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8440

8441
LP1CONT2:
8442
#--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8443
	fdiv.x		%fp0,%fp1		# FP1 IS U
8444
	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3
8445
#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8446
#--LET V=U*U, W=V*V, CALCULATE
8447
#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8448
#--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
8449
	fmov.x		%fp1,%fp0
8450
	fmul.x		%fp0,%fp0		# FP0 IS V
8451
	fmov.x		%fp1,SAVEU(%a6)		# STORE U IN MEMORY, FREE FP1
8452
	fmov.x		%fp0,%fp1
8453
	fmul.x		%fp1,%fp1		# FP1 IS W
8454

8455
	fmov.d		LOGB5(%pc),%fp3
8456
	fmov.d		LOGB4(%pc),%fp2
8457

8458
	fmul.x		%fp1,%fp3		# W*B5
8459
	fmul.x		%fp1,%fp2		# W*B4
8460

8461
	fadd.d		LOGB3(%pc),%fp3		# B3+W*B5
8462
	fadd.d		LOGB2(%pc),%fp2		# B2+W*B4
8463

8464
	fmul.x		%fp3,%fp1		# W*(B3+W*B5), FP3 RELEASED
8465

8466
	fmul.x		%fp0,%fp2		# V*(B2+W*B4)
8467

8468
	fadd.d		LOGB1(%pc),%fp1		# B1+W*(B3+W*B5)
8469
	fmul.x		SAVEU(%a6),%fp0		# FP0 IS U*V
8470

8471
	fadd.x		%fp2,%fp1		# B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8472
	fmovm.x		(%sp)+,&0x30		# FP2-3 RESTORED
8473

8474
	fmul.x		%fp1,%fp0		# U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8475

8476
	fmov.l		%d0,%fpcr
8477
	fadd.x		SAVEU(%a6),%fp0
8478
	bra		t_inx2
8479

8480
#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8481
LOGNEG:
8482
	bra		t_operr
8483

8484
	global		slognd
8485
slognd:
8486
#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8487

8488
	mov.l		&-100,ADJK(%a6)		# INPUT = 2^(ADJK) * FP0
8489

8490
#----normalize the input value by left shifting k bits (k to be determined
8491
#----below), adjusting exponent and storing -k to  ADJK
8492
#----the value TWOTO100 is no longer needed.
8493
#----Note that this code assumes the denormalized input is NON-ZERO.
8494

8495
	movm.l		&0x3f00,-(%sp)		# save some registers  {d2-d7}
8496
	mov.l		(%a0),%d3		# D3 is exponent of smallest norm. #
8497
	mov.l		4(%a0),%d4
8498
	mov.l		8(%a0),%d5		# (D4,D5) is (Hi_X,Lo_X)
8499
	clr.l		%d2			# D2 used for holding K
8500

8501
	tst.l		%d4
8502
	bne.b		Hi_not0
8503

8504
Hi_0:
8505
	mov.l		%d5,%d4
8506
	clr.l		%d5
8507
	mov.l		&32,%d2
8508
	clr.l		%d6
8509
	bfffo		%d4{&0:&32},%d6
8510
	lsl.l		%d6,%d4
8511
	add.l		%d6,%d2			# (D3,D4,D5) is normalized
8512

8513
	mov.l		%d3,X(%a6)
8514
	mov.l		%d4,XFRAC(%a6)
8515
	mov.l		%d5,XFRAC+4(%a6)
8516
	neg.l		%d2
8517
	mov.l		%d2,ADJK(%a6)
8518
	fmov.x		X(%a6),%fp0
8519
	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
8520
	lea		X(%a6),%a0
8521
	bra.w		LOGBGN			# begin regular log(X)
8522

8523
Hi_not0:
8524
	clr.l		%d6
8525
	bfffo		%d4{&0:&32},%d6		# find first 1
8526
	mov.l		%d6,%d2			# get k
8527
	lsl.l		%d6,%d4
8528
	mov.l		%d5,%d7			# a copy of D5
8529
	lsl.l		%d6,%d5
8530
	neg.l		%d6
8531
	add.l		&32,%d6
8532
	lsr.l		%d6,%d7
8533
	or.l		%d7,%d4			# (D3,D4,D5) normalized
8534

8535
	mov.l		%d3,X(%a6)
8536
	mov.l		%d4,XFRAC(%a6)
8537
	mov.l		%d5,XFRAC+4(%a6)
8538
	neg.l		%d2
8539
	mov.l		%d2,ADJK(%a6)
8540
	fmov.x		X(%a6),%fp0
8541
	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
8542
	lea		X(%a6),%a0
8543
	bra.w		LOGBGN			# begin regular log(X)
8544

8545
	global		slognp1
8546
#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8547
slognp1:
8548
	fmov.x		(%a0),%fp0		# LOAD INPUT
8549
	fabs.x		%fp0			# test magnitude
8550
	fcmp.x		%fp0,LTHOLD(%pc)	# compare with min threshold
8551
	fbgt.w		LP1REAL			# if greater, continue
8552
	fmov.l		%d0,%fpcr
8553
	mov.b		&FMOV_OP,%d1		# last inst is MOVE
8554
	fmov.x		(%a0),%fp0		# return signed argument
8555
	bra		t_catch
8556

8557
LP1REAL:
8558
	fmov.x		(%a0),%fp0		# LOAD INPUT
8559
	mov.l		&0x00000000,ADJK(%a6)
8560
	fmov.x		%fp0,%fp1		# FP1 IS INPUT Z
8561
	fadd.s		one(%pc),%fp0		# X := ROUND(1+Z)
8562
	fmov.x		%fp0,X(%a6)
8563
	mov.w		XFRAC(%a6),XDCARE(%a6)
8564
	mov.l		X(%a6),%d1
8565
	cmp.l		%d1,&0
8566
	ble.w		LP1NEG0			# LOG OF ZERO OR -VE
8567
	cmp.l		%d1,&0x3ffe8000		# IS BOUNDS [1/2,3/2]?
8568
	blt.w		LOGMAIN
8569
	cmp.l		%d1,&0x3fffc000
8570
	bgt.w		LOGMAIN
8571
#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8572
#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8573
#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8574

8575
LP1NEAR1:
8576
#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8577
	cmp.l		%d1,&0x3ffef07d
8578
	blt.w		LP1CARE
8579
	cmp.l		%d1,&0x3fff8841
8580
	bgt.w		LP1CARE
8581

8582
LP1ONE16:
8583
#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8584
#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8585
	fadd.x		%fp1,%fp1		# FP1 IS 2Z
8586
	fadd.s		one(%pc),%fp0		# FP0 IS 1+X
8587
#--U = FP1/FP0
8588
	bra.w		LP1CONT2
8589

8590
LP1CARE:
8591
#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8592
#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8593
#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8594
#--THERE ARE ONLY TWO CASES.
8595
#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8596
#--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
8597
#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8598
#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8599

8600
	mov.l		XFRAC(%a6),FFRAC(%a6)
8601
	and.l		&0xFE000000,FFRAC(%a6)
8602
	or.l		&0x01000000,FFRAC(%a6)	# F OBTAINED
8603
	cmp.l		%d1,&0x3FFF8000		# SEE IF 1+Z > 1
8604
	bge.b		KISZERO
8605

8606
KISNEG1:
8607
	fmov.s		TWO(%pc),%fp0
8608
	mov.l		&0x3fff0000,F(%a6)
8609
	clr.l		F+8(%a6)
8610
	fsub.x		F(%a6),%fp0		# 2-F
8611
	mov.l		FFRAC(%a6),%d1
8612
	and.l		&0x7E000000,%d1
8613
	asr.l		&8,%d1
8614
	asr.l		&8,%d1
8615
	asr.l		&4,%d1			# D0 CONTAINS DISPLACEMENT FOR 1/F
8616
	fadd.x		%fp1,%fp1		# GET 2Z
8617
	fmovm.x		&0xc,-(%sp)		# SAVE FP2  {%fp2/%fp3}
8618
	fadd.x		%fp1,%fp0		# FP0 IS Y-F = (2-F)+2Z
8619
	lea		LOGTBL(%pc),%a0		# A0 IS ADDRESS OF 1/F
8620
	add.l		%d1,%a0
8621
	fmov.s		negone(%pc),%fp1	# FP1 IS K = -1
8622
	bra.w		LP1CONT1
8623

8624
KISZERO:
8625
	fmov.s		one(%pc),%fp0
8626
	mov.l		&0x3fff0000,F(%a6)
8627
	clr.l		F+8(%a6)
8628
	fsub.x		F(%a6),%fp0		# 1-F
8629
	mov.l		FFRAC(%a6),%d1
8630
	and.l		&0x7E000000,%d1
8631
	asr.l		&8,%d1
8632
	asr.l		&8,%d1
8633
	asr.l		&4,%d1
8634
	fadd.x		%fp1,%fp0		# FP0 IS Y-F
8635
	fmovm.x		&0xc,-(%sp)		# FP2 SAVED {%fp2/%fp3}
8636
	lea		LOGTBL(%pc),%a0
8637
	add.l		%d1,%a0			# A0 IS ADDRESS OF 1/F
8638
	fmov.s		zero(%pc),%fp1		# FP1 IS K = 0
8639
	bra.w		LP1CONT1
8640

8641
LP1NEG0:
8642
#--FPCR SAVED. D0 IS X IN COMPACT FORM.
8643
	cmp.l		%d1,&0
8644
	blt.b		LP1NEG
8645
LP1ZERO:
8646
	fmov.s		negone(%pc),%fp0
8647

8648
	fmov.l		%d0,%fpcr
8649
	bra		t_dz
8650

8651
LP1NEG:
8652
	fmov.s		zero(%pc),%fp0
8653

8654
	fmov.l		%d0,%fpcr
8655
	bra		t_operr
8656

8657
	global		slognp1d
8658
#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8659
# Simply return the denorm
8660
slognp1d:
8661
	bra		t_extdnrm
8662

8663
#########################################################################
8664
# satanh():  computes the inverse hyperbolic tangent of a norm input	#
8665
# satanhd(): computes the inverse hyperbolic tangent of a denorm input	#
8666
#									#
8667
# INPUT ***************************************************************	#
8668
#	a0 = pointer to extended precision input			#
8669
#	d0 = round precision,mode					#
8670
#									#
8671
# OUTPUT **************************************************************	#
8672
#	fp0 = arctanh(X)						#
8673
#									#
8674
# ACCURACY and MONOTONICITY *******************************************	#
8675
#	The returned result is within 3 ulps in	64 significant bit,	#
8676
#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8677
#	rounded to double precision. The result is provably monotonic	#
8678
#	in double precision.						#
8679
#									#
8680
# ALGORITHM ***********************************************************	#
8681
#									#
8682
#	ATANH								#
8683
#	1. If |X| >= 1, go to 3.					#
8684
#									#
8685
#	2. (|X| < 1) Calculate atanh(X) by				#
8686
#		sgn := sign(X)						#
8687
#		y := |X|						#
8688
#		z := 2y/(1-y)						#
8689
#		atanh(X) := sgn * (1/2) * logp1(z)			#
8690
#		Exit.							#
8691
#									#
8692
#	3. If |X| > 1, go to 5.						#
8693
#									#
8694
#	4. (|X| = 1) Generate infinity with an appropriate sign and	#
8695
#		divide-by-zero by					#
8696
#		sgn := sign(X)						#
8697
#		atan(X) := sgn / (+0).					#
8698
#		Exit.							#
8699
#									#
8700
#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
8701
#		Exit.							#
8702
#									#
8703
#########################################################################
8704

8705
	global		satanh
8706
satanh:
8707
	mov.l		(%a0),%d1
8708
	mov.w		4(%a0),%d1
8709
	and.l		&0x7FFFFFFF,%d1
8710
	cmp.l		%d1,&0x3FFF8000
8711
	bge.b		ATANHBIG
8712

8713
#--THIS IS THE USUAL CASE, |X| < 1
8714
#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8715

8716
	fabs.x		(%a0),%fp0		# Y = |X|
8717
	fmov.x		%fp0,%fp1
8718
	fneg.x		%fp1			# -Y
8719
	fadd.x		%fp0,%fp0		# 2Y
8720
	fadd.s		&0x3F800000,%fp1	# 1-Y
8721
	fdiv.x		%fp1,%fp0		# 2Y/(1-Y)
8722
	mov.l		(%a0),%d1
8723
	and.l		&0x80000000,%d1
8724
	or.l		&0x3F000000,%d1		# SIGN(X)*HALF
8725
	mov.l		%d1,-(%sp)
8726

8727
	mov.l		%d0,-(%sp)		# save rnd prec,mode
8728
	clr.l		%d0			# pass ext prec,RN
8729
	fmovm.x		&0x01,-(%sp)		# save Z on stack
8730
	lea		(%sp),%a0		# pass ptr to Z
8731
	bsr		slognp1			# LOG1P(Z)
8732
	add.l		&0xc,%sp		# clear Z from stack
8733

8734
	mov.l		(%sp)+,%d0		# fetch old prec,mode
8735
	fmov.l		%d0,%fpcr		# load it
8736
	mov.b		&FMUL_OP,%d1		# last inst is MUL
8737
	fmul.s		(%sp)+,%fp0
8738
	bra		t_catch
8739

8740
ATANHBIG:
8741
	fabs.x		(%a0),%fp0		# |X|
8742
	fcmp.s		%fp0,&0x3F800000
8743
	fbgt		t_operr
8744
	bra		t_dz
8745

8746
	global		satanhd
8747
#--ATANH(X) = X FOR DENORMALIZED X
8748
satanhd:
8749
	bra		t_extdnrm
8750

8751
#########################################################################
8752
# slog10():  computes the base-10 logarithm of a normalized input	#
8753
# slog10d(): computes the base-10 logarithm of a denormalized input	#
8754
# slog2():   computes the base-2 logarithm of a normalized input	#
8755
# slog2d():  computes the base-2 logarithm of a denormalized input	#
8756
#									#
8757
# INPUT *************************************************************** #
8758
#	a0 = pointer to extended precision input			#
8759
#	d0 = round precision,mode					#
8760
#									#
8761
# OUTPUT **************************************************************	#
8762
#	fp0 = log_10(X) or log_2(X)					#
8763
#									#
8764
# ACCURACY and MONOTONICITY *******************************************	#
8765
#	The returned result is within 1.7 ulps in 64 significant bit,	#
8766
#	i.e. within 0.5003 ulp to 53 bits if the result is subsequently	#
8767
#	rounded to double precision. The result is provably monotonic	#
8768
#	in double precision.						#
8769
#									#
8770
# ALGORITHM ***********************************************************	#
8771
#									#
8772
#       slog10d:							#
8773
#									#
8774
#       Step 0.	If X < 0, create a NaN and raise the invalid operation	#
8775
#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8776
#       Notes:  Default means round-to-nearest mode, no floating-point	#
8777
#               traps, and precision control = double extended.		#
8778
#									#
8779
#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
8780
#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
8781
#									#
8782
#       Step 2.  Compute log_10(X) = log(X) * (1/log(10)).		#
8783
#            2.1 Restore the user FPCR					#
8784
#            2.2 Return ans := Y * INV_L10.				#
8785
#									#
8786
#       slog10:								#
8787
#									#
8788
#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8789
#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8790
#       Notes:  Default means round-to-nearest mode, no floating-point	#
8791
#               traps, and precision control = double extended.		#
8792
#									#
8793
#       Step 1. Call sLogN to obtain Y = log(X), the natural log of X.	#
8794
#									#
8795
#       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).		#
8796
#            2.1  Restore the user FPCR					#
8797
#            2.2  Return ans := Y * INV_L10.				#
8798
#									#
8799
#       sLog2d:								#
8800
#									#
8801
#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8802
#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8803
#       Notes:  Default means round-to-nearest mode, no floating-point	#
8804
#               traps, and precision control = double extended.		#
8805
#									#
8806
#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
8807
#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
8808
#									#
8809
#       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).		#
8810
#            2.1  Restore the user FPCR					#
8811
#            2.2  Return ans := Y * INV_L2.				#
8812
#									#
8813
#       sLog2:								#
8814
#									#
8815
#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
8816
#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
8817
#       Notes:  Default means round-to-nearest mode, no floating-point	#
8818
#               traps, and precision control = double extended.		#
8819
#									#
8820
#       Step 1. If X is not an integer power of two, i.e., X != 2^k,	#
8821
#               go to Step 3.						#
8822
#									#
8823
#       Step 2.   Return k.						#
8824
#            2.1  Get integer k, X = 2^k.				#
8825
#            2.2  Restore the user FPCR.				#
8826
#            2.3  Return ans := convert-to-double-extended(k).		#
8827
#									#
8828
#       Step 3. Call sLogN to obtain Y = log(X), the natural log of X.	#
8829
#									#
8830
#       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).		#
8831
#            4.1  Restore the user FPCR					#
8832
#            4.2  Return ans := Y * INV_L2.				#
8833
#									#
8834
#########################################################################
8835

8836
INV_L10:
8837
	long		0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8838

8839
INV_L2:
8840
	long		0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8841

8842
	global		slog10
8843
#--entry point for Log10(X), X is normalized
8844
slog10:
8845
	fmov.b		&0x1,%fp0
8846
	fcmp.x		%fp0,(%a0)		# if operand == 1,
8847
	fbeq.l		ld_pzero		# return an EXACT zero
8848

8849
	mov.l		(%a0),%d1
8850
	blt.w		invalid
8851
	mov.l		%d0,-(%sp)
8852
	clr.l		%d0
8853
	bsr		slogn			# log(X), X normal.
8854
	fmov.l		(%sp)+,%fpcr
8855
	fmul.x		INV_L10(%pc),%fp0
8856
	bra		t_inx2
8857

8858
	global		slog10d
8859
#--entry point for Log10(X), X is denormalized
8860
slog10d:
8861
	mov.l		(%a0),%d1
8862
	blt.w		invalid
8863
	mov.l		%d0,-(%sp)
8864
	clr.l		%d0
8865
	bsr		slognd			# log(X), X denorm.
8866
	fmov.l		(%sp)+,%fpcr
8867
	fmul.x		INV_L10(%pc),%fp0
8868
	bra		t_minx2
8869

8870
	global		slog2
8871
#--entry point for Log2(X), X is normalized
8872
slog2:
8873
	mov.l		(%a0),%d1
8874
	blt.w		invalid
8875

8876
	mov.l		8(%a0),%d1
8877
	bne.b		continue		# X is not 2^k
8878

8879
	mov.l		4(%a0),%d1
8880
	and.l		&0x7FFFFFFF,%d1
8881
	bne.b		continue
8882

8883
#--X = 2^k.
8884
	mov.w		(%a0),%d1
8885
	and.l		&0x00007FFF,%d1
8886
	sub.l		&0x3FFF,%d1
8887
	beq.l		ld_pzero
8888
	fmov.l		%d0,%fpcr
8889
	fmov.l		%d1,%fp0
8890
	bra		t_inx2
8891

8892
continue:
8893
	mov.l		%d0,-(%sp)
8894
	clr.l		%d0
8895
	bsr		slogn			# log(X), X normal.
8896
	fmov.l		(%sp)+,%fpcr
8897
	fmul.x		INV_L2(%pc),%fp0
8898
	bra		t_inx2
8899

8900
invalid:
8901
	bra		t_operr
8902

8903
	global		slog2d
8904
#--entry point for Log2(X), X is denormalized
8905
slog2d:
8906
	mov.l		(%a0),%d1
8907
	blt.w		invalid
8908
	mov.l		%d0,-(%sp)
8909
	clr.l		%d0
8910
	bsr		slognd			# log(X), X denorm.
8911
	fmov.l		(%sp)+,%fpcr
8912
	fmul.x		INV_L2(%pc),%fp0
8913
	bra		t_minx2
8914

8915
#########################################################################
8916
# stwotox():  computes 2**X for a normalized input			#
8917
# stwotoxd(): computes 2**X for a denormalized input			#
8918
# stentox():  computes 10**X for a normalized input			#
8919
# stentoxd(): computes 10**X for a denormalized input			#
8920
#									#
8921
# INPUT ***************************************************************	#
8922
#	a0 = pointer to extended precision input			#
8923
#	d0 = round precision,mode					#
8924
#									#
8925
# OUTPUT **************************************************************	#
8926
#	fp0 = 2**X or 10**X						#
8927
#									#
8928
# ACCURACY and MONOTONICITY *******************************************	#
8929
#	The returned result is within 2 ulps in 64 significant bit,	#
8930
#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
8931
#	rounded to double precision. The result is provably monotonic	#
8932
#	in double precision.						#
8933
#									#
8934
# ALGORITHM ***********************************************************	#
8935
#									#
8936
#	twotox								#
8937
#	1. If |X| > 16480, go to ExpBig.				#
8938
#									#
8939
#	2. If |X| < 2**(-70), go to ExpSm.				#
8940
#									#
8941
#	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore	#
8942
#		decompose N as						#
8943
#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
8944
#									#
8945
#	4. Overwrite r := r * log2. Then				#
8946
#		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
8947
#		Go to expr to compute that expression.			#
8948
#									#
8949
#	tentox								#
8950
#	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.	#
8951
#									#
8952
#	2. If |X| < 2**(-70), go to ExpSm.				#
8953
#									#
8954
#	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set		#
8955
#		N := round-to-int(y). Decompose N as			#
8956
#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
8957
#									#
8958
#	4. Define r as							#
8959
#		r := ((X - N*L1)-N*L2) * L10				#
8960
#		where L1, L2 are the leading and trailing parts of	#
8961
#		log_10(2)/64 and L10 is the natural log of 10. Then	#
8962
#		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
8963
#		Go to expr to compute that expression.			#
8964
#									#
8965
#	expr								#
8966
#	1. Fetch 2**(j/64) from table as Fact1 and Fact2.		#
8967
#									#
8968
#	2. Overwrite Fact1 and Fact2 by					#
8969
#		Fact1 := 2**(M) * Fact1					#
8970
#		Fact2 := 2**(M) * Fact2					#
8971
#		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).		#
8972
#									#
8973
#	3. Calculate P where 1 + P approximates exp(r):			#
8974
#		P = r + r*r*(A1+r*(A2+...+r*A5)).			#
8975
#									#
8976
#	4. Let AdjFact := 2**(M'). Return				#
8977
#		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).		#
8978
#		Exit.							#
8979
#									#
8980
#	ExpBig								#
8981
#	1. Generate overflow by Huge * Huge if X > 0; otherwise,	#
8982
#	        generate underflow by Tiny * Tiny.			#
8983
#									#
8984
#	ExpSm								#
8985
#	1. Return 1 + X.						#
8986
#									#
8987
#########################################################################
8988

8989
L2TEN64:
8990
	long		0x406A934F,0x0979A371	# 64LOG10/LOG2
8991
L10TWO1:
8992
	long		0x3F734413,0x509F8000	# LOG2/64LOG10
8993

8994
L10TWO2:
8995
	long		0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
8996

8997
LOG10:	long		0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
8998

8999
LOG2:	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
9000

9001
EXPA5:	long		0x3F56C16D,0x6F7BD0B2
9002
EXPA4:	long		0x3F811112,0x302C712C
9003
EXPA3:	long		0x3FA55555,0x55554CC1
9004
EXPA2:	long		0x3FC55555,0x55554A54
9005
EXPA1:	long		0x3FE00000,0x00000000,0x00000000,0x00000000
9006

9007
TEXPTBL:
9008
	long		0x3FFF0000,0x80000000,0x00000000,0x3F738000
9009
	long		0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
9010
	long		0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
9011
	long		0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
9012
	long		0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
9013
	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
9014
	long		0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
9015
	long		0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
9016
	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
9017
	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
9018
	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
9019
	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
9020
	long		0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
9021
	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
9022
	long		0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
9023
	long		0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
9024
	long		0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
9025
	long		0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
9026
	long		0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
9027
	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
9028
	long		0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
9029
	long		0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
9030
	long		0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
9031
	long		0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
9032
	long		0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
9033
	long		0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
9034
	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
9035
	long		0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
9036
	long		0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
9037
	long		0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
9038
	long		0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
9039
	long		0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
9040
	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
9041
	long		0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
9042
	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
9043
	long		0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
9044
	long		0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
9045
	long		0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
9046
	long		0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
9047
	long		0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
9048
	long		0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
9049
	long		0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
9050
	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
9051
	long		0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
9052
	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
9053
	long		0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
9054
	long		0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
9055
	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
9056
	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
9057
	long		0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
9058
	long		0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
9059
	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
9060
	long		0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
9061
	long		0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
9062
	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
9063
	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
9064
	long		0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
9065
	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
9066
	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
9067
	long		0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
9068
	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
9069
	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
9070
	long		0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
9071
	long		0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
9072

9073
	set		INT,L_SCR1
9074

9075
	set		X,FP_SCR0
9076
	set		XDCARE,X+2
9077
	set		XFRAC,X+4
9078

9079
	set		ADJFACT,FP_SCR0
9080

9081
	set		FACT1,FP_SCR0
9082
	set		FACT1HI,FACT1+4
9083
	set		FACT1LOW,FACT1+8
9084

9085
	set		FACT2,FP_SCR1
9086
	set		FACT2HI,FACT2+4
9087
	set		FACT2LOW,FACT2+8
9088

9089
	global		stwotox
9090
#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9091
stwotox:
9092
	fmovm.x		(%a0),&0x80		# LOAD INPUT
9093

9094
	mov.l		(%a0),%d1
9095
	mov.w		4(%a0),%d1
9096
	fmov.x		%fp0,X(%a6)
9097
	and.l		&0x7FFFFFFF,%d1
9098

9099
	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
9100
	bge.b		TWOOK1
9101
	bra.w		EXPBORS
9102

9103
TWOOK1:
9104
	cmp.l		%d1,&0x400D80C0		# |X| > 16480?
9105
	ble.b		TWOMAIN
9106
	bra.w		EXPBORS
9107

9108
TWOMAIN:
9109
#--USUAL CASE, 2^(-70) <= |X| <= 16480
9110

9111
	fmov.x		%fp0,%fp1
9112
	fmul.s		&0x42800000,%fp1	# 64 * X
9113
	fmov.l		%fp1,INT(%a6)		# N = ROUND-TO-INT(64 X)
9114
	mov.l		%d2,-(%sp)
9115
	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
9116
	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
9117
	mov.l		INT(%a6),%d1
9118
	mov.l		%d1,%d2
9119
	and.l		&0x3F,%d1		# D0 IS J
9120
	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
9121
	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
9122
	asr.l		&6,%d2			# d2 IS L, N = 64L + J
9123
	mov.l		%d2,%d1
9124
	asr.l		&1,%d1			# D0 IS M
9125
	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
9126
	add.l		&0x3FFF,%d2
9127

9128
#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9129
#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9130
#--ADJFACT = 2^(M').
9131
#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9132

9133
	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
9134

9135
	fmul.s		&0x3C800000,%fp1	# (1/64)*N
9136
	mov.l		(%a1)+,FACT1(%a6)
9137
	mov.l		(%a1)+,FACT1HI(%a6)
9138
	mov.l		(%a1)+,FACT1LOW(%a6)
9139
	mov.w		(%a1)+,FACT2(%a6)
9140

9141
	fsub.x		%fp1,%fp0		# X - (1/64)*INT(64 X)
9142

9143
	mov.w		(%a1)+,FACT2HI(%a6)
9144
	clr.w		FACT2HI+2(%a6)
9145
	clr.l		FACT2LOW(%a6)
9146
	add.w		%d1,FACT1(%a6)
9147
	fmul.x		LOG2(%pc),%fp0		# FP0 IS R
9148
	add.w		%d1,FACT2(%a6)
9149

9150
	bra.w		expr
9151

9152
EXPBORS:
9153
#--FPCR, D0 SAVED
9154
	cmp.l		%d1,&0x3FFF8000
9155
	bgt.b		TEXPBIG
9156

9157
#--|X| IS SMALL, RETURN 1 + X
9158

9159
	fmov.l		%d0,%fpcr		# restore users round prec,mode
9160
	fadd.s		&0x3F800000,%fp0	# RETURN 1 + X
9161
	bra		t_pinx2
9162

9163
TEXPBIG:
9164
#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9165
#--REGISTERS SAVE SO FAR ARE FPCR AND  D0
9166
	mov.l		X(%a6),%d1
9167
	cmp.l		%d1,&0
9168
	blt.b		EXPNEG
9169

9170
	bra		t_ovfl2			# t_ovfl expects positive value
9171

9172
EXPNEG:
9173
	bra		t_unfl2			# t_unfl expects positive value
9174

9175
	global		stwotoxd
9176
stwotoxd:
9177
#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9178

9179
	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
9180
	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
9181
	mov.l		(%a0),%d1
9182
	or.l		&0x00800001,%d1
9183
	fadd.s		%d1,%fp0
9184
	bra		t_pinx2
9185

9186
	global		stentox
9187
#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9188
stentox:
9189
	fmovm.x		(%a0),&0x80		# LOAD INPUT
9190

9191
	mov.l		(%a0),%d1
9192
	mov.w		4(%a0),%d1
9193
	fmov.x		%fp0,X(%a6)
9194
	and.l		&0x7FFFFFFF,%d1
9195

9196
	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
9197
	bge.b		TENOK1
9198
	bra.w		EXPBORS
9199

9200
TENOK1:
9201
	cmp.l		%d1,&0x400B9B07		# |X| <= 16480*log2/log10 ?
9202
	ble.b		TENMAIN
9203
	bra.w		EXPBORS
9204

9205
TENMAIN:
9206
#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9207

9208
	fmov.x		%fp0,%fp1
9209
	fmul.d		L2TEN64(%pc),%fp1	# X*64*LOG10/LOG2
9210
	fmov.l		%fp1,INT(%a6)		# N=INT(X*64*LOG10/LOG2)
9211
	mov.l		%d2,-(%sp)
9212
	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
9213
	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
9214
	mov.l		INT(%a6),%d1
9215
	mov.l		%d1,%d2
9216
	and.l		&0x3F,%d1		# D0 IS J
9217
	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
9218
	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
9219
	asr.l		&6,%d2			# d2 IS L, N = 64L + J
9220
	mov.l		%d2,%d1
9221
	asr.l		&1,%d1			# D0 IS M
9222
	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
9223
	add.l		&0x3FFF,%d2
9224

9225
#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9226
#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9227
#--ADJFACT = 2^(M').
9228
#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9229
	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
9230

9231
	fmov.x		%fp1,%fp2
9232

9233
	fmul.d		L10TWO1(%pc),%fp1	# N*(LOG2/64LOG10)_LEAD
9234
	mov.l		(%a1)+,FACT1(%a6)
9235

9236
	fmul.x		L10TWO2(%pc),%fp2	# N*(LOG2/64LOG10)_TRAIL
9237

9238
	mov.l		(%a1)+,FACT1HI(%a6)
9239
	mov.l		(%a1)+,FACT1LOW(%a6)
9240
	fsub.x		%fp1,%fp0		# X - N L_LEAD
9241
	mov.w		(%a1)+,FACT2(%a6)
9242

9243
	fsub.x		%fp2,%fp0		# X - N L_TRAIL
9244

9245
	mov.w		(%a1)+,FACT2HI(%a6)
9246
	clr.w		FACT2HI+2(%a6)
9247
	clr.l		FACT2LOW(%a6)
9248

9249
	fmul.x		LOG10(%pc),%fp0		# FP0 IS R
9250
	add.w		%d1,FACT1(%a6)
9251
	add.w		%d1,FACT2(%a6)
9252

9253
expr:
9254
#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9255
#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9256
#--FP0 IS R. THE FOLLOWING CODE COMPUTES
9257
#--	2**(M'+M) * 2**(J/64) * EXP(R)
9258

9259
	fmov.x		%fp0,%fp1
9260
	fmul.x		%fp1,%fp1		# FP1 IS S = R*R
9261

9262
	fmov.d		EXPA5(%pc),%fp2		# FP2 IS A5
9263
	fmov.d		EXPA4(%pc),%fp3		# FP3 IS A4
9264

9265
	fmul.x		%fp1,%fp2		# FP2 IS S*A5
9266
	fmul.x		%fp1,%fp3		# FP3 IS S*A4
9267

9268
	fadd.d		EXPA3(%pc),%fp2		# FP2 IS A3+S*A5
9269
	fadd.d		EXPA2(%pc),%fp3		# FP3 IS A2+S*A4
9270

9271
	fmul.x		%fp1,%fp2		# FP2 IS S*(A3+S*A5)
9272
	fmul.x		%fp1,%fp3		# FP3 IS S*(A2+S*A4)
9273

9274
	fadd.d		EXPA1(%pc),%fp2		# FP2 IS A1+S*(A3+S*A5)
9275
	fmul.x		%fp0,%fp3		# FP3 IS R*S*(A2+S*A4)
9276

9277
	fmul.x		%fp1,%fp2		# FP2 IS S*(A1+S*(A3+S*A5))
9278
	fadd.x		%fp3,%fp0		# FP0 IS R+R*S*(A2+S*A4)
9279
	fadd.x		%fp2,%fp0		# FP0 IS EXP(R) - 1
9280

9281
	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
9282

9283
#--FINAL RECONSTRUCTION PROCESS
9284
#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
9285

9286
	fmul.x		FACT1(%a6),%fp0
9287
	fadd.x		FACT2(%a6),%fp0
9288
	fadd.x		FACT1(%a6),%fp0
9289

9290
	fmov.l		%d0,%fpcr		# restore users round prec,mode
9291
	mov.w		%d2,ADJFACT(%a6)	# INSERT EXPONENT
9292
	mov.l		(%sp)+,%d2
9293
	mov.l		&0x80000000,ADJFACT+4(%a6)
9294
	clr.l		ADJFACT+8(%a6)
9295
	mov.b		&FMUL_OP,%d1		# last inst is MUL
9296
	fmul.x		ADJFACT(%a6),%fp0	# FINAL ADJUSTMENT
9297
	bra		t_catch
9298

9299
	global		stentoxd
9300
stentoxd:
9301
#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9302

9303
	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
9304
	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
9305
	mov.l		(%a0),%d1
9306
	or.l		&0x00800001,%d1
9307
	fadd.s		%d1,%fp0
9308
	bra		t_pinx2
9309

9310
#########################################################################
9311
# smovcr(): returns the ROM constant at the offset specified in d1	#
9312
#	    rounded to the mode and precision specified in d0.		#
9313
#									#
9314
# INPUT	***************************************************************	#
9315
#	d0 = rnd prec,mode						#
9316
#	d1 = ROM offset							#
9317
#									#
9318
# OUTPUT **************************************************************	#
9319
#	fp0 = the ROM constant rounded to the user's rounding mode,prec	#
9320
#									#
9321
#########################################################################
9322

9323
	global		smovcr
9324
smovcr:
9325
	mov.l		%d1,-(%sp)		# save rom offset for a sec
9326

9327
	lsr.b		&0x4,%d0		# shift ctrl bits to lo
9328
	mov.l		%d0,%d1			# make a copy
9329
	andi.w		&0x3,%d1		# extract rnd mode
9330
	andi.w		&0xc,%d0		# extract rnd prec
9331
	swap		%d0			# put rnd prec in hi
9332
	mov.w		%d1,%d0			# put rnd mode in lo
9333

9334
	mov.l		(%sp)+,%d1		# get rom offset
9335

9336
#
9337
# check range of offset
9338
#
9339
	tst.b		%d1			# if zero, offset is to pi
9340
	beq.b		pi_tbl			# it is pi
9341
	cmpi.b		%d1,&0x0a		# check range $01 - $0a
9342
	ble.b		z_val			# if in this range, return zero
9343
	cmpi.b		%d1,&0x0e		# check range $0b - $0e
9344
	ble.b		sm_tbl			# valid constants in this range
9345
	cmpi.b		%d1,&0x2f		# check range $10 - $2f
9346
	ble.b		z_val			# if in this range, return zero
9347
	cmpi.b		%d1,&0x3f		# check range $30 - $3f
9348
	ble.b		bg_tbl			# valid constants in this range
9349

9350
z_val:
9351
	bra.l		ld_pzero		# return a zero
9352

9353
#
9354
# the answer is PI rounded to the proper precision.
9355
#
9356
# fetch a pointer to the answer table relating to the proper rounding
9357
# precision.
9358
#
9359
pi_tbl:
9360
	tst.b		%d0			# is rmode RN?
9361
	bne.b		pi_not_rn		# no
9362
pi_rn:
9363
	lea.l		PIRN(%pc),%a0		# yes; load PI RN table addr
9364
	bra.w		set_finx
9365
pi_not_rn:
9366
	cmpi.b		%d0,&rp_mode		# is rmode RP?
9367
	beq.b		pi_rp			# yes
9368
pi_rzrm:
9369
	lea.l		PIRZRM(%pc),%a0		# no; load PI RZ,RM table addr
9370
	bra.b		set_finx
9371
pi_rp:
9372
	lea.l		PIRP(%pc),%a0		# load PI RP table addr
9373
	bra.b		set_finx
9374

9375
#
9376
# the answer is one of:
9377
#	$0B	log10(2)	(inexact)
9378
#	$0C	e		(inexact)
9379
#	$0D	log2(e)		(inexact)
9380
#	$0E	log10(e)	(exact)
9381
#
9382
# fetch a pointer to the answer table relating to the proper rounding
9383
# precision.
9384
#
9385
sm_tbl:
9386
	subi.b		&0xb,%d1		# make offset in 0-4 range
9387
	tst.b		%d0			# is rmode RN?
9388
	bne.b		sm_not_rn		# no
9389
sm_rn:
9390
	lea.l		SMALRN(%pc),%a0		# yes; load RN table addr
9391
sm_tbl_cont:
9392
	cmpi.b		%d1,&0x2		# is result log10(e)?
9393
	ble.b		set_finx		# no; answer is inexact
9394
	bra.b		no_finx			# yes; answer is exact
9395
sm_not_rn:
9396
	cmpi.b		%d0,&rp_mode		# is rmode RP?
9397
	beq.b		sm_rp			# yes
9398
sm_rzrm:
9399
	lea.l		SMALRZRM(%pc),%a0	# no; load RZ,RM table addr
9400
	bra.b		sm_tbl_cont
9401
sm_rp:
9402
	lea.l		SMALRP(%pc),%a0		# load RP table addr
9403
	bra.b		sm_tbl_cont
9404

9405
#
9406
# the answer is one of:
9407
#	$30	ln(2)		(inexact)
9408
#	$31	ln(10)		(inexact)
9409
#	$32	10^0		(exact)
9410
#	$33	10^1		(exact)
9411
#	$34	10^2		(exact)
9412
#	$35	10^4		(exact)
9413
#	$36	10^8		(exact)
9414
#	$37	10^16		(exact)
9415
#	$38	10^32		(inexact)
9416
#	$39	10^64		(inexact)
9417
#	$3A	10^128		(inexact)
9418
#	$3B	10^256		(inexact)
9419
#	$3C	10^512		(inexact)
9420
#	$3D	10^1024		(inexact)
9421
#	$3E	10^2048		(inexact)
9422
#	$3F	10^4096		(inexact)
9423
#
9424
# fetch a pointer to the answer table relating to the proper rounding
9425
# precision.
9426
#
9427
bg_tbl:
9428
	subi.b		&0x30,%d1		# make offset in 0-f range
9429
	tst.b		%d0			# is rmode RN?
9430
	bne.b		bg_not_rn		# no
9431
bg_rn:
9432
	lea.l		BIGRN(%pc),%a0		# yes; load RN table addr
9433
bg_tbl_cont:
9434
	cmpi.b		%d1,&0x1		# is offset <= $31?
9435
	ble.b		set_finx		# yes; answer is inexact
9436
	cmpi.b		%d1,&0x7		# is $32 <= offset <= $37?
9437
	ble.b		no_finx			# yes; answer is exact
9438
	bra.b		set_finx		# no; answer is inexact
9439
bg_not_rn:
9440
	cmpi.b		%d0,&rp_mode		# is rmode RP?
9441
	beq.b		bg_rp			# yes
9442
bg_rzrm:
9443
	lea.l		BIGRZRM(%pc),%a0	# no; load RZ,RM table addr
9444
	bra.b		bg_tbl_cont
9445
bg_rp:
9446
	lea.l		BIGRP(%pc),%a0		# load RP table addr
9447
	bra.b		bg_tbl_cont
9448

9449
# answer is inexact, so set INEX2 and AINEX in the user's FPSR.
9450
set_finx:
9451
	ori.l		&inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
9452
no_finx:
9453
	mulu.w		&0xc,%d1		# offset points into tables
9454
	swap		%d0			# put rnd prec in lo word
9455
	tst.b		%d0			# is precision extended?
9456

9457
	bne.b		not_ext			# if xprec, do not call round
9458

9459
# Precision is extended
9460
	fmovm.x		(%a0,%d1.w),&0x80	# return result in fp0
9461
	rts
9462

9463
# Precision is single or double
9464
not_ext:
9465
	swap		%d0			# rnd prec in upper word
9466

9467
# call round() to round the answer to the proper precision.
9468
# exponents out of range for single or double DO NOT cause underflow
9469
# or overflow.
9470
	mov.w		0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
9471
	mov.l		0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
9472
	mov.l		0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
9473
	mov.l		%d0,%d1
9474
	clr.l		%d0			# clear g,r,s
9475
	lea		FP_SCR1(%a6),%a0	# pass ptr to answer
9476
	clr.w		LOCAL_SGN(%a0)		# sign always positive
9477
	bsr.l		_round			# round the mantissa
9478

9479
	fmovm.x		(%a0),&0x80		# return rounded result in fp0
9480
	rts
9481

9482
	align		0x4
9483

9484
PIRN:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
9485
PIRZRM:	long		0x40000000,0xc90fdaa2,0x2168c234	# pi
9486
PIRP:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
9487

9488
SMALRN:	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
9489
	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
9490
	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
9491
	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9492
	long		0x00000000,0x00000000,0x00000000	# 0.0
9493

9494
SMALRZRM:
9495
	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
9496
	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
9497
	long		0x3fff0000,0xb8aa3b29,0x5c17f0bb	# log2(e)
9498
	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9499
	long		0x00000000,0x00000000,0x00000000	# 0.0
9500

9501
SMALRP:	long		0x3ffd0000,0x9a209a84,0xfbcff799	# log10(2)
9502
	long		0x40000000,0xadf85458,0xa2bb4a9b	# e
9503
	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
9504
	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
9505
	long		0x00000000,0x00000000,0x00000000	# 0.0
9506

9507
BIGRN:	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
9508
	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
9509

9510
	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9511
	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9512
	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9513
	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9514
	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9515
	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9516
	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
9517
	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
9518
	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
9519
	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
9520
	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
9521
	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
9522
	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
9523
	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
9524

9525
BIGRZRM:
9526
	long		0x3ffe0000,0xb17217f7,0xd1cf79ab	# ln(2)
9527
	long		0x40000000,0x935d8ddd,0xaaa8ac16	# ln(10)
9528

9529
	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9530
	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9531
	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9532
	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9533
	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9534
	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9535
	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
9536
	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
9537
	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
9538
	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
9539
	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
9540
	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
9541
	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
9542
	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
9543

9544
BIGRP:
9545
	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
9546
	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
9547

9548
	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
9549
	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
9550
	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
9551
	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
9552
	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
9553
	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
9554
	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
9555
	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
9556
	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
9557
	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
9558
	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
9559
	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
9560
	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
9561
	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
9562

9563
#########################################################################
9564
# sscale(): computes the destination operand scaled by the source	#
9565
#	    operand. If the absoulute value of the source operand is	#
9566
#	    >= 2^14, an overflow or underflow is returned.		#
9567
#									#
9568
# INPUT *************************************************************** #
9569
#	a0  = pointer to double-extended source operand X		#
9570
#	a1  = pointer to double-extended destination operand Y		#
9571
#									#
9572
# OUTPUT ************************************************************** #
9573
#	fp0 =  scale(X,Y)						#
9574
#									#
9575
#########################################################################
9576

9577
set	SIGN,		L_SCR1
9578

9579
	global		sscale
9580
sscale:
9581
	mov.l		%d0,-(%sp)		# store off ctrl bits for now
9582

9583
	mov.w		DST_EX(%a1),%d1		# get dst exponent
9584
	smi.b		SIGN(%a6)		# use SIGN to hold dst sign
9585
	andi.l		&0x00007fff,%d1		# strip sign from dst exp
9586

9587
	mov.w		SRC_EX(%a0),%d0		# check src bounds
9588
	andi.w		&0x7fff,%d0		# clr src sign bit
9589
	cmpi.w		%d0,&0x3fff		# is src ~ ZERO?
9590
	blt.w		src_small		# yes
9591
	cmpi.w		%d0,&0x400c		# no; is src too big?
9592
	bgt.w		src_out			# yes
9593

9594
#
9595
# Source is within 2^14 range.
9596
#
9597
src_ok:
9598
	fintrz.x	SRC(%a0),%fp0		# calc int of src
9599
	fmov.l		%fp0,%d0		# int src to d0
9600
# don't want any accrued bits from the fintrz showing up later since
9601
# we may need to read the fpsr for the last fp op in t_catch2().
9602
	fmov.l		&0x0,%fpsr
9603

9604
	tst.b		DST_HI(%a1)		# is dst denormalized?
9605
	bmi.b		sok_norm
9606

9607
# the dst is a DENORM. normalize the DENORM and add the adjustment to
9608
# the src value. then, jump to the norm part of the routine.
9609
sok_dnrm:
9610
	mov.l		%d0,-(%sp)		# save src for now
9611

9612
	mov.w		DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9613
	mov.l		DST_HI(%a1),FP_SCR0_HI(%a6)
9614
	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
9615

9616
	lea		FP_SCR0(%a6),%a0	# pass ptr to DENORM
9617
	bsr.l		norm			# normalize the DENORM
9618
	neg.l		%d0
9619
	add.l		(%sp)+,%d0		# add adjustment to src
9620

9621
	fmovm.x		FP_SCR0(%a6),&0x80	# load normalized DENORM
9622

9623
	cmpi.w		%d0,&-0x3fff		# is the shft amt really low?
9624
	bge.b		sok_norm2		# thank goodness no
9625

9626
# the multiply factor that we're trying to create should be a denorm
9627
# for the multiply to work. Therefore, we're going to actually do a
9628
# multiply with a denorm which will cause an unimplemented data type
9629
# exception to be put into the machine which will be caught and corrected
9630
# later. we don't do this with the DENORMs above because this method
9631
# is slower. but, don't fret, I don't see it being used much either.
9632
	fmov.l		(%sp)+,%fpcr		# restore user fpcr
9633
	mov.l		&0x80000000,%d1		# load normalized mantissa
9634
	subi.l		&-0x3fff,%d0		# how many should we shift?
9635
	neg.l		%d0			# make it positive
9636
	cmpi.b		%d0,&0x20		# is it > 32?
9637
	bge.b		sok_dnrm_32		# yes
9638
	lsr.l		%d0,%d1			# no; bit stays in upper lw
9639
	clr.l		-(%sp)			# insert zero low mantissa
9640
	mov.l		%d1,-(%sp)		# insert new high mantissa
9641
	clr.l		-(%sp)			# make zero exponent
9642
	bra.b		sok_norm_cont
9643
sok_dnrm_32:
9644
	subi.b		&0x20,%d0		# get shift count
9645
	lsr.l		%d0,%d1			# make low mantissa longword
9646
	mov.l		%d1,-(%sp)		# insert new low mantissa
9647
	clr.l		-(%sp)			# insert zero high mantissa
9648
	clr.l		-(%sp)			# make zero exponent
9649
	bra.b		sok_norm_cont
9650

9651
# the src will force the dst to a DENORM value or worse. so, let's
9652
# create an fp multiply that will create the result.
9653
sok_norm:
9654
	fmovm.x		DST(%a1),&0x80		# load fp0 with normalized src
9655
sok_norm2:
9656
	fmov.l		(%sp)+,%fpcr		# restore user fpcr
9657

9658
	addi.w		&0x3fff,%d0		# turn src amt into exp value
9659
	swap		%d0			# put exponent in high word
9660
	clr.l		-(%sp)			# insert new exponent
9661
	mov.l		&0x80000000,-(%sp)	# insert new high mantissa
9662
	mov.l		%d0,-(%sp)		# insert new lo mantissa
9663

9664
sok_norm_cont:
9665
	fmov.l		%fpcr,%d0		# d0 needs fpcr for t_catch2
9666
	mov.b		&FMUL_OP,%d1		# last inst is MUL
9667
	fmul.x		(%sp)+,%fp0		# do the multiply
9668
	bra		t_catch2		# catch any exceptions
9669

9670
#
9671
# Source is outside of 2^14 range.  Test the sign and branch
9672
# to the appropriate exception handler.
9673
#
9674
src_out:
9675
	mov.l		(%sp)+,%d0		# restore ctrl bits
9676
	exg		%a0,%a1			# swap src,dst ptrs
9677
	tst.b		SRC_EX(%a1)		# is src negative?
9678
	bmi		t_unfl			# yes; underflow
9679
	bra		t_ovfl_sc		# no; overflow
9680

9681
#
9682
# The source input is below 1, so we check for denormalized numbers
9683
# and set unfl.
9684
#
9685
src_small:
9686
	tst.b		DST_HI(%a1)		# is dst denormalized?
9687
	bpl.b		ssmall_done		# yes
9688

9689
	mov.l		(%sp)+,%d0
9690
	fmov.l		%d0,%fpcr		# no; load control bits
9691
	mov.b		&FMOV_OP,%d1		# last inst is MOVE
9692
	fmov.x		DST(%a1),%fp0		# simply return dest
9693
	bra		t_catch2
9694
ssmall_done:
9695
	mov.l		(%sp)+,%d0		# load control bits into d1
9696
	mov.l		%a1,%a0			# pass ptr to dst
9697
	bra		t_resdnrm
9698

9699
#########################################################################
9700
# smod(): computes the fp MOD of the input values X,Y.			#
9701
# srem(): computes the fp (IEEE) REM of the input values X,Y.		#
9702
#									#
9703
# INPUT *************************************************************** #
9704
#	a0 = pointer to extended precision input X			#
9705
#	a1 = pointer to extended precision input Y			#
9706
#	d0 = round precision,mode					#
9707
#									#
9708
#	The input operands X and Y can be either normalized or		#
9709
#	denormalized.							#
9710
#									#
9711
# OUTPUT ************************************************************** #
9712
#      fp0 = FREM(X,Y) or FMOD(X,Y)					#
9713
#									#
9714
# ALGORITHM *********************************************************** #
9715
#									#
9716
#       Step 1.  Save and strip signs of X and Y: signX := sign(X),	#
9717
#                signY := sign(Y), X := |X|, Y := |Y|,			#
9718
#                signQ := signX EOR signY. Record whether MOD or REM	#
9719
#                is requested.						#
9720
#									#
9721
#       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.		#
9722
#                If (L < 0) then					#
9723
#                   R := X, go to Step 4.				#
9724
#                else							#
9725
#                   R := 2^(-L)X, j := L.				#
9726
#                endif							#
9727
#									#
9728
#       Step 3.  Perform MOD(X,Y)					#
9729
#            3.1 If R = Y, go to Step 9.				#
9730
#            3.2 If R > Y, then { R := R - Y, Q := Q + 1}		#
9731
#            3.3 If j = 0, go to Step 4.				#
9732
#            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to	#
9733
#                Step 3.1.						#
9734
#									#
9735
#       Step 4.  At this point, R = X - QY = MOD(X,Y). Set		#
9736
#                Last_Subtract := false (used in Step 7 below). If	#
9737
#                MOD is requested, go to Step 6.			#
9738
#									#
9739
#       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.		#
9740
#            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to	#
9741
#                Step 6.						#
9742
#            5.2 If R > Y/2, then { set Last_Subtract := true,		#
9743
#                Q := Q + 1, Y := signY*Y }. Go to Step 6.		#
9744
#            5.3 This is the tricky case of R = Y/2. If Q is odd,	#
9745
#                then { Q := Q + 1, signX := -signX }.			#
9746
#									#
9747
#       Step 6.  R := signX*R.						#
9748
#									#
9749
#       Step 7.  If Last_Subtract = true, R := R - Y.			#
9750
#									#
9751
#       Step 8.  Return signQ, last 7 bits of Q, and R as required.	#
9752
#									#
9753
#       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,		#
9754
#                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),		#
9755
#                R := 0. Return signQ, last 7 bits of Q, and R.		#
9756
#									#
9757
#########################################################################
9758

9759
	set		Mod_Flag,L_SCR3
9760
	set		Sc_Flag,L_SCR3+1
9761

9762
	set		SignY,L_SCR2
9763
	set		SignX,L_SCR2+2
9764
	set		SignQ,L_SCR3+2
9765

9766
	set		Y,FP_SCR0
9767
	set		Y_Hi,Y+4
9768
	set		Y_Lo,Y+8
9769

9770
	set		R,FP_SCR1
9771
	set		R_Hi,R+4
9772
	set		R_Lo,R+8
9773

9774
Scale:
9775
	long		0x00010000,0x80000000,0x00000000,0x00000000
9776

9777
	global		smod
9778
smod:
9779
	clr.b		FPSR_QBYTE(%a6)
9780
	mov.l		%d0,-(%sp)		# save ctrl bits
9781
	clr.b		Mod_Flag(%a6)
9782
	bra.b		Mod_Rem
9783

9784
	global		srem
9785
srem:
9786
	clr.b		FPSR_QBYTE(%a6)
9787
	mov.l		%d0,-(%sp)		# save ctrl bits
9788
	mov.b		&0x1,Mod_Flag(%a6)
9789

9790
Mod_Rem:
9791
#..Save sign of X and Y
9792
	movm.l		&0x3f00,-(%sp)		# save data registers
9793
	mov.w		SRC_EX(%a0),%d3
9794
	mov.w		%d3,SignY(%a6)
9795
	and.l		&0x00007FFF,%d3		# Y := |Y|
9796

9797
#
9798
	mov.l		SRC_HI(%a0),%d4
9799
	mov.l		SRC_LO(%a0),%d5		# (D3,D4,D5) is |Y|
9800

9801
	tst.l		%d3
9802
	bne.b		Y_Normal
9803

9804
	mov.l		&0x00003FFE,%d3		# $3FFD + 1
9805
	tst.l		%d4
9806
	bne.b		HiY_not0
9807

9808
HiY_0:
9809
	mov.l		%d5,%d4
9810
	clr.l		%d5
9811
	sub.l		&32,%d3
9812
	clr.l		%d6
9813
	bfffo		%d4{&0:&32},%d6
9814
	lsl.l		%d6,%d4
9815
	sub.l		%d6,%d3			# (D3,D4,D5) is normalized
9816
#	                                        ...with bias $7FFD
9817
	bra.b		Chk_X
9818

9819
HiY_not0:
9820
	clr.l		%d6
9821
	bfffo		%d4{&0:&32},%d6
9822
	sub.l		%d6,%d3
9823
	lsl.l		%d6,%d4
9824
	mov.l		%d5,%d7			# a copy of D5
9825
	lsl.l		%d6,%d5
9826
	neg.l		%d6
9827
	add.l		&32,%d6
9828
	lsr.l		%d6,%d7
9829
	or.l		%d7,%d4			# (D3,D4,D5) normalized
9830
#                                       ...with bias $7FFD
9831
	bra.b		Chk_X
9832

9833
Y_Normal:
9834
	add.l		&0x00003FFE,%d3		# (D3,D4,D5) normalized
9835
#                                       ...with bias $7FFD
9836

9837
Chk_X:
9838
	mov.w		DST_EX(%a1),%d0
9839
	mov.w		%d0,SignX(%a6)
9840
	mov.w		SignY(%a6),%d1
9841
	eor.l		%d0,%d1
9842
	and.l		&0x00008000,%d1
9843
	mov.w		%d1,SignQ(%a6)		# sign(Q) obtained
9844
	and.l		&0x00007FFF,%d0
9845
	mov.l		DST_HI(%a1),%d1
9846
	mov.l		DST_LO(%a1),%d2		# (D0,D1,D2) is |X|
9847
	tst.l		%d0
9848
	bne.b		X_Normal
9849
	mov.l		&0x00003FFE,%d0
9850
	tst.l		%d1
9851
	bne.b		HiX_not0
9852

9853
HiX_0:
9854
	mov.l		%d2,%d1
9855
	clr.l		%d2
9856
	sub.l		&32,%d0
9857
	clr.l		%d6
9858
	bfffo		%d1{&0:&32},%d6
9859
	lsl.l		%d6,%d1
9860
	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
9861
#                                       ...with bias $7FFD
9862
	bra.b		Init
9863

9864
HiX_not0:
9865
	clr.l		%d6
9866
	bfffo		%d1{&0:&32},%d6
9867
	sub.l		%d6,%d0
9868
	lsl.l		%d6,%d1
9869
	mov.l		%d2,%d7			# a copy of D2
9870
	lsl.l		%d6,%d2
9871
	neg.l		%d6
9872
	add.l		&32,%d6
9873
	lsr.l		%d6,%d7
9874
	or.l		%d7,%d1			# (D0,D1,D2) normalized
9875
#                                       ...with bias $7FFD
9876
	bra.b		Init
9877

9878
X_Normal:
9879
	add.l		&0x00003FFE,%d0		# (D0,D1,D2) normalized
9880
#                                       ...with bias $7FFD
9881

9882
Init:
9883
#
9884
	mov.l		%d3,L_SCR1(%a6)		# save biased exp(Y)
9885
	mov.l		%d0,-(%sp)		# save biased exp(X)
9886
	sub.l		%d3,%d0			# L := expo(X)-expo(Y)
9887

9888
	clr.l		%d6			# D6 := carry <- 0
9889
	clr.l		%d3			# D3 is Q
9890
	mov.l		&0,%a1			# A1 is k; j+k=L, Q=0
9891

9892
#..(Carry,D1,D2) is R
9893
	tst.l		%d0
9894
	bge.b		Mod_Loop_pre
9895

9896
#..expo(X) < expo(Y). Thus X = mod(X,Y)
9897
#
9898
	mov.l		(%sp)+,%d0		# restore d0
9899
	bra.w		Get_Mod
9900

9901
Mod_Loop_pre:
9902
	addq.l		&0x4,%sp		# erase exp(X)
9903
#..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
9904
Mod_Loop:
9905
	tst.l		%d6			# test carry bit
9906
	bgt.b		R_GT_Y
9907

9908
#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9909
	cmp.l		%d1,%d4			# compare hi(R) and hi(Y)
9910
	bne.b		R_NE_Y
9911
	cmp.l		%d2,%d5			# compare lo(R) and lo(Y)
9912
	bne.b		R_NE_Y
9913

9914
#..At this point, R = Y
9915
	bra.w		Rem_is_0
9916

9917
R_NE_Y:
9918
#..use the borrow of the previous compare
9919
	bcs.b		R_LT_Y			# borrow is set iff R < Y
9920

9921
R_GT_Y:
9922
#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9923
#..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9924
	sub.l		%d5,%d2			# lo(R) - lo(Y)
9925
	subx.l		%d4,%d1			# hi(R) - hi(Y)
9926
	clr.l		%d6			# clear carry
9927
	addq.l		&1,%d3			# Q := Q + 1
9928

9929
R_LT_Y:
9930
#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9931
	tst.l		%d0			# see if j = 0.
9932
	beq.b		PostLoop
9933

9934
	add.l		%d3,%d3			# Q := 2Q
9935
	add.l		%d2,%d2			# lo(R) = 2lo(R)
9936
	roxl.l		&1,%d1			# hi(R) = 2hi(R) + carry
9937
	scs		%d6			# set Carry if 2(R) overflows
9938
	addq.l		&1,%a1			# k := k+1
9939
	subq.l		&1,%d0			# j := j - 1
9940
#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9941

9942
	bra.b		Mod_Loop
9943

9944
PostLoop:
9945
#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9946

9947
#..normalize R.
9948
	mov.l		L_SCR1(%a6),%d0		# new biased expo of R
9949
	tst.l		%d1
9950
	bne.b		HiR_not0
9951

9952
HiR_0:
9953
	mov.l		%d2,%d1
9954
	clr.l		%d2
9955
	sub.l		&32,%d0
9956
	clr.l		%d6
9957
	bfffo		%d1{&0:&32},%d6
9958
	lsl.l		%d6,%d1
9959
	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
9960
#                                       ...with bias $7FFD
9961
	bra.b		Get_Mod
9962

9963
HiR_not0:
9964
	clr.l		%d6
9965
	bfffo		%d1{&0:&32},%d6
9966
	bmi.b		Get_Mod			# already normalized
9967
	sub.l		%d6,%d0
9968
	lsl.l		%d6,%d1
9969
	mov.l		%d2,%d7			# a copy of D2
9970
	lsl.l		%d6,%d2
9971
	neg.l		%d6
9972
	add.l		&32,%d6
9973
	lsr.l		%d6,%d7
9974
	or.l		%d7,%d1			# (D0,D1,D2) normalized
9975

9976
#
9977
Get_Mod:
9978
	cmp.l		%d0,&0x000041FE
9979
	bge.b		No_Scale
9980
Do_Scale:
9981
	mov.w		%d0,R(%a6)
9982
	mov.l		%d1,R_Hi(%a6)
9983
	mov.l		%d2,R_Lo(%a6)
9984
	mov.l		L_SCR1(%a6),%d6
9985
	mov.w		%d6,Y(%a6)
9986
	mov.l		%d4,Y_Hi(%a6)
9987
	mov.l		%d5,Y_Lo(%a6)
9988
	fmov.x		R(%a6),%fp0		# no exception
9989
	mov.b		&1,Sc_Flag(%a6)
9990
	bra.b		ModOrRem
9991
No_Scale:
9992
	mov.l		%d1,R_Hi(%a6)
9993
	mov.l		%d2,R_Lo(%a6)
9994
	sub.l		&0x3FFE,%d0
9995
	mov.w		%d0,R(%a6)
9996
	mov.l		L_SCR1(%a6),%d6
9997
	sub.l		&0x3FFE,%d6
9998
	mov.l		%d6,L_SCR1(%a6)
9999
	fmov.x		R(%a6),%fp0
10000
	mov.w		%d6,Y(%a6)
10001
	mov.l		%d4,Y_Hi(%a6)
10002
	mov.l		%d5,Y_Lo(%a6)
10003
	clr.b		Sc_Flag(%a6)
10004

10005
#
10006
ModOrRem:
10007
	tst.b		Mod_Flag(%a6)
10008
	beq.b		Fix_Sign
10009

10010
	mov.l		L_SCR1(%a6),%d6		# new biased expo(Y)
10011
	subq.l		&1,%d6			# biased expo(Y/2)
10012
	cmp.l		%d0,%d6
10013
	blt.b		Fix_Sign
10014
	bgt.b		Last_Sub
10015

10016
	cmp.l		%d1,%d4
10017
	bne.b		Not_EQ
10018
	cmp.l		%d2,%d5
10019
	bne.b		Not_EQ
10020
	bra.w		Tie_Case
10021

10022
Not_EQ:
10023
	bcs.b		Fix_Sign
10024

10025
Last_Sub:
10026
#
10027
	fsub.x		Y(%a6),%fp0		# no exceptions
10028
	addq.l		&1,%d3			# Q := Q + 1
10029

10030
#
10031
Fix_Sign:
10032
#..Get sign of X
10033
	mov.w		SignX(%a6),%d6
10034
	bge.b		Get_Q
10035
	fneg.x		%fp0
10036

10037
#..Get Q
10038
#
10039
Get_Q:
10040
	clr.l		%d6
10041
	mov.w		SignQ(%a6),%d6		# D6 is sign(Q)
10042
	mov.l		&8,%d7
10043
	lsr.l		%d7,%d6
10044
	and.l		&0x0000007F,%d3		# 7 bits of Q
10045
	or.l		%d6,%d3			# sign and bits of Q
10046
#	swap		%d3
10047
#	fmov.l		%fpsr,%d6
10048
#	and.l		&0xFF00FFFF,%d6
10049
#	or.l		%d3,%d6
10050
#	fmov.l		%d6,%fpsr		# put Q in fpsr
10051
	mov.b		%d3,FPSR_QBYTE(%a6)	# put Q in fpsr
10052

10053
#
10054
Restore:
10055
	movm.l		(%sp)+,&0xfc		#  {%d2-%d7}
10056
	mov.l		(%sp)+,%d0
10057
	fmov.l		%d0,%fpcr
10058
	tst.b		Sc_Flag(%a6)
10059
	beq.b		Finish
10060
	mov.b		&FMUL_OP,%d1		# last inst is MUL
10061
	fmul.x		Scale(%pc),%fp0		# may cause underflow
10062
	bra		t_catch2
10063
# the '040 package did this apparently to see if the dst operand for the
10064
# preceding fmul was a denorm. but, it better not have been since the
10065
# algorithm just got done playing with fp0 and expected no exceptions
10066
# as a result. trust me...
10067
#	bra		t_avoid_unsupp		# check for denorm as a
10068
#						;result of the scaling
10069

10070
Finish:
10071
	mov.b		&FMOV_OP,%d1		# last inst is MOVE
10072
	fmov.x		%fp0,%fp0		# capture exceptions & round
10073
	bra		t_catch2
10074

10075
Rem_is_0:
10076
#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
10077
	addq.l		&1,%d3
10078
	cmp.l		%d0,&8			# D0 is j
10079
	bge.b		Q_Big
10080

10081
	lsl.l		%d0,%d3
10082
	bra.b		Set_R_0
10083

10084
Q_Big:
10085
	clr.l		%d3
10086

10087
Set_R_0:
10088
	fmov.s		&0x00000000,%fp0
10089
	clr.b		Sc_Flag(%a6)
10090
	bra.w		Fix_Sign
10091

10092
Tie_Case:
10093
#..Check parity of Q
10094
	mov.l		%d3,%d6
10095
	and.l		&0x00000001,%d6
10096
	tst.l		%d6
10097
	beq.w		Fix_Sign		# Q is even
10098

10099
#..Q is odd, Q := Q + 1, signX := -signX
10100
	addq.l		&1,%d3
10101
	mov.w		SignX(%a6),%d6
10102
	eor.l		&0x00008000,%d6
10103
	mov.w		%d6,SignX(%a6)
10104
	bra.w		Fix_Sign
10105

10106
qnan:	long		0x7fff0000, 0xffffffff, 0xffffffff
10107

10108
#########################################################################
10109
# XDEF ****************************************************************	#
10110
#	t_dz(): Handle DZ exception during transcendental emulation.	#
10111
#	        Sets N bit according to sign of source operand.		#
10112
#	t_dz2(): Handle DZ exception during transcendental emulation.	#
10113
#		 Sets N bit always.					#
10114
#									#
10115
# XREF ****************************************************************	#
10116
#	None								#
10117
#									#
10118
# INPUT ***************************************************************	#
10119
#	a0 = pointer to source operand					#
10120
#									#
10121
# OUTPUT **************************************************************	#
10122
#	fp0 = default result						#
10123
#									#
10124
# ALGORITHM ***********************************************************	#
10125
#	- Store properly signed INF into fp0.				#
10126
#	- Set FPSR exception status dz bit, ccode inf bit, and		#
10127
#	  accrued dz bit.						#
10128
#									#
10129
#########################################################################
10130

10131
	global		t_dz
10132
t_dz:
10133
	tst.b		SRC_EX(%a0)		# no; is src negative?
10134
	bmi.b		t_dz2			# yes
10135

10136
dz_pinf:
10137
	fmov.s		&0x7f800000,%fp0	# return +INF in fp0
10138
	ori.l		&dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
10139
	rts
10140

10141
	global		t_dz2
10142
t_dz2:
10143
	fmov.s		&0xff800000,%fp0	# return -INF in fp0
10144
	ori.l		&dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
10145
	rts
10146

10147
#################################################################
10148
# OPERR exception:						#
10149
#	- set FPSR exception status operr bit, condition code	#
10150
#	  nan bit; Store default NAN into fp0			#
10151
#################################################################
10152
	global		t_operr
10153
t_operr:
10154
	ori.l		&opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
10155
	fmovm.x		qnan(%pc),&0x80		# return default NAN in fp0
10156
	rts
10157

10158
#################################################################
10159
# Extended DENORM:						#
10160
#	- For all functions that have a denormalized input and	#
10161
#	  that f(x)=x, this is the entry point.			#
10162
#	- we only return the EXOP here if either underflow or	#
10163
#	  inexact is enabled.					#
10164
#################################################################
10165

10166
# Entry point for scale w/ extended denorm. The function does
10167
# NOT set INEX2/AUNFL/AINEX.
10168
	global		t_resdnrm
10169
t_resdnrm:
10170
	ori.l		&unfl_mask,USER_FPSR(%a6) # set UNFL
10171
	bra.b		xdnrm_con
10172

10173
	global		t_extdnrm
10174
t_extdnrm:
10175
	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10176

10177
xdnrm_con:
10178
	mov.l		%a0,%a1			# make copy of src ptr
10179
	mov.l		%d0,%d1			# make copy of rnd prec,mode
10180
	andi.b		&0xc0,%d1		# extended precision?
10181
	bne.b		xdnrm_sd		# no
10182

10183
# result precision is extended.
10184
	tst.b		LOCAL_EX(%a0)		# is denorm negative?
10185
	bpl.b		xdnrm_exit		# no
10186

10187
	bset		&neg_bit,FPSR_CC(%a6)	# yes; set 'N' ccode bit
10188
	bra.b		xdnrm_exit
10189

10190
# result precision is single or double
10191
xdnrm_sd:
10192
	mov.l		%a1,-(%sp)
10193
	tst.b		LOCAL_EX(%a0)		# is denorm pos or neg?
10194
	smi.b		%d1			# set d0 accordingly
10195
	bsr.l		unf_sub
10196
	mov.l		(%sp)+,%a1
10197
xdnrm_exit:
10198
	fmovm.x		(%a0),&0x80		# return default result in fp0
10199

10200
	mov.b		FPCR_ENABLE(%a6),%d0
10201
	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
10202
	bne.b		xdnrm_ena		# yes
10203
	rts
10204

10205
################
10206
# unfl enabled #
10207
################
10208
# we have a DENORM that needs to be converted into an EXOP.
10209
# so, normalize the mantissa, add 0x6000 to the new exponent,
10210
# and return the result in fp1.
10211
xdnrm_ena:
10212
	mov.w		LOCAL_EX(%a1),FP_SCR0_EX(%a6)
10213
	mov.l		LOCAL_HI(%a1),FP_SCR0_HI(%a6)
10214
	mov.l		LOCAL_LO(%a1),FP_SCR0_LO(%a6)
10215

10216
	lea		FP_SCR0(%a6),%a0
10217
	bsr.l		norm			# normalize mantissa
10218
	addi.l		&0x6000,%d0		# add extra bias
10219
	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep old sign
10220
	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
10221

10222
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10223
	rts
10224

10225
#################################################################
10226
# UNFL exception:						#
10227
#	- This routine is for cases where even an EXOP isn't	#
10228
#	  large enough to hold the range of this result.	#
10229
#	  In such a case, the EXOP equals zero.			#
10230
#	- Return the default result to the proper precision	#
10231
#	  with the sign of this result being the same as that	#
10232
#	  of the src operand.					#
10233
#	- t_unfl2() is provided to force the result sign to	#
10234
#	  positive which is the desired result for fetox().	#
10235
#################################################################
10236
	global		t_unfl
10237
t_unfl:
10238
	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10239

10240
	tst.b		(%a0)			# is result pos or neg?
10241
	smi.b		%d1			# set d1 accordingly
10242
	bsr.l		unf_sub			# calc default unfl result
10243
	fmovm.x		(%a0),&0x80		# return default result in fp0
10244

10245
	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10246
	rts
10247

10248
# t_unfl2 ALWAYS tells unf_sub to create a positive result
10249
	global		t_unfl2
10250
t_unfl2:
10251
	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10252

10253
	sf.b		%d1			# set d0 to represent positive
10254
	bsr.l		unf_sub			# calc default unfl result
10255
	fmovm.x		(%a0),&0x80		# return default result in fp0
10256

10257
	fmov.s		&0x0000000,%fp1		# return EXOP in fp1
10258
	rts
10259

10260
#################################################################
10261
# OVFL exception:						#
10262
#	- This routine is for cases where even an EXOP isn't	#
10263
#	  large enough to hold the range of this result.	#
10264
#	- Return the default result to the proper precision	#
10265
#	  with the sign of this result being the same as that	#
10266
#	  of the src operand.					#
10267
#	- t_ovfl2() is provided to force the result sign to	#
10268
#	  positive which is the desired result for fcosh().	#
10269
#	- t_ovfl_sc() is provided for scale() which only sets	#
10270
#	  the inexact bits if the number is inexact for the	#
10271
#	  precision indicated.					#
10272
#################################################################
10273

10274
	global		t_ovfl_sc
10275
t_ovfl_sc:
10276
	ori.l		&ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10277

10278
	mov.b		%d0,%d1			# fetch rnd mode/prec
10279
	andi.b		&0xc0,%d1		# extract rnd prec
10280
	beq.b		ovfl_work		# prec is extended
10281

10282
	tst.b		LOCAL_HI(%a0)		# is dst a DENORM?
10283
	bmi.b		ovfl_sc_norm		# no
10284

10285
# dst op is a DENORM. we have to normalize the mantissa to see if the
10286
# result would be inexact for the given precision. make a copy of the
10287
# dst so we don't screw up the version passed to us.
10288
	mov.w		LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10289
	mov.l		LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10290
	mov.l		LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10291
	lea		FP_SCR0(%a6),%a0	# pass ptr to FP_SCR0
10292
	movm.l		&0xc080,-(%sp)		# save d0-d1/a0
10293
	bsr.l		norm			# normalize mantissa
10294
	movm.l		(%sp)+,&0x0103		# restore d0-d1/a0
10295

10296
ovfl_sc_norm:
10297
	cmpi.b		%d1,&0x40		# is prec dbl?
10298
	bne.b		ovfl_sc_dbl		# no; sgl
10299
ovfl_sc_sgl:
10300
	tst.l		LOCAL_LO(%a0)		# is lo lw of sgl set?
10301
	bne.b		ovfl_sc_inx		# yes
10302
	tst.b		3+LOCAL_HI(%a0)		# is lo byte of hi lw set?
10303
	bne.b		ovfl_sc_inx		# yes
10304
	bra.b		ovfl_work		# don't set INEX2
10305
ovfl_sc_dbl:
10306
	mov.l		LOCAL_LO(%a0),%d1	# are any of lo 11 bits of
10307
	andi.l		&0x7ff,%d1		# dbl mantissa set?
10308
	beq.b		ovfl_work		# no; don't set INEX2
10309
ovfl_sc_inx:
10310
	ori.l		&inex2_mask,USER_FPSR(%a6) # set INEX2
10311
	bra.b		ovfl_work		# continue
10312

10313
	global		t_ovfl
10314
t_ovfl:
10315
	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10316

10317
ovfl_work:
10318
	tst.b		LOCAL_EX(%a0)		# what is the sign?
10319
	smi.b		%d1			# set d1 accordingly
10320
	bsr.l		ovf_res			# calc default ovfl result
10321
	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
10322
	fmovm.x		(%a0),&0x80		# return default result in fp0
10323

10324
	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10325
	rts
10326

10327
# t_ovfl2 ALWAYS tells ovf_res to create a positive result
10328
	global		t_ovfl2
10329
t_ovfl2:
10330
	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10331

10332
	sf.b		%d1			# clear sign flag for positive
10333
	bsr.l		ovf_res			# calc default ovfl result
10334
	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
10335
	fmovm.x		(%a0),&0x80		# return default result in fp0
10336

10337
	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
10338
	rts
10339

10340
#################################################################
10341
# t_catch():							#
10342
#	- the last operation of a transcendental emulation	#
10343
#	  routine may have caused an underflow or overflow.	#
10344
#	  we find out if this occurred by doing an fsave and	#
10345
#	  checking the exception bit. if one did occur, then we	#
10346
#	  jump to fgen_except() which creates the default	#
10347
#	  result and EXOP for us.				#
10348
#################################################################
10349
	global		t_catch
10350
t_catch:
10351

10352
	fsave		-(%sp)
10353
	tst.b		0x2(%sp)
10354
	bmi.b		catch
10355
	add.l		&0xc,%sp
10356

10357
#################################################################
10358
# INEX2 exception:						#
10359
#	- The inex2 and ainex bits are set.			#
10360
#################################################################
10361
	global		t_inx2
10362
t_inx2:
10363
	fblt.w		t_minx2
10364
	fbeq.w		inx2_zero
10365

10366
	global		t_pinx2
10367
t_pinx2:
10368
	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10369
	rts
10370

10371
	global		t_minx2
10372
t_minx2:
10373
	ori.l		&inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
10374
	rts
10375

10376
inx2_zero:
10377
	mov.b		&z_bmask,FPSR_CC(%a6)
10378
	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10379
	rts
10380

10381
# an underflow or overflow exception occurred.
10382
# we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
10383
catch:
10384
	ori.w		&inx2a_mask,FPSR_EXCEPT(%a6)
10385
catch2:
10386
	bsr.l		fgen_except
10387
	add.l		&0xc,%sp
10388
	rts
10389

10390
	global		t_catch2
10391
t_catch2:
10392

10393
	fsave		-(%sp)
10394

10395
	tst.b		0x2(%sp)
10396
	bmi.b		catch2
10397
	add.l		&0xc,%sp
10398

10399
	fmov.l		%fpsr,%d0
10400
	or.l		%d0,USER_FPSR(%a6)
10401

10402
	rts
10403

10404
#########################################################################
10405

10406
#########################################################################
10407
# unf_res(): underflow default result calculation for transcendentals	#
10408
#									#
10409
# INPUT:								#
10410
#	d0   : rnd mode,precision					#
10411
#	d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+))	#
10412
# OUTPUT:								#
10413
#	a0   : points to result (in instruction memory)			#
10414
#########################################################################
10415
unf_sub:
10416
	ori.l		&unfinx_mask,USER_FPSR(%a6)
10417

10418
	andi.w		&0x10,%d1		# keep sign bit in 4th spot
10419

10420
	lsr.b		&0x4,%d0		# shift rnd prec,mode to lo bits
10421
	andi.b		&0xf,%d0		# strip hi rnd mode bit
10422
	or.b		%d1,%d0			# concat {sgn,mode,prec}
10423

10424
	mov.l		%d0,%d1			# make a copy
10425
	lsl.b		&0x1,%d1		# mult index 2 by 2
10426

10427
	mov.b		(tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
10428
	lea		(tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
10429
	rts
10430

10431
tbl_unf_cc:
10432
	byte		0x4, 0x4, 0x4, 0x0
10433
	byte		0x4, 0x4, 0x4, 0x0
10434
	byte		0x4, 0x4, 0x4, 0x0
10435
	byte		0x0, 0x0, 0x0, 0x0
10436
	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10437
	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10438
	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10439

10440
tbl_unf_result:
10441
	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10442
	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10443
	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10444
	long		0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10445

10446
	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10447
	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10448
	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10449
	long		0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10450

10451
	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10452
	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
10453
	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10454
	long		0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10455

10456
	long		0x0,0x0,0x0,0x0
10457
	long		0x0,0x0,0x0,0x0
10458
	long		0x0,0x0,0x0,0x0
10459
	long		0x0,0x0,0x0,0x0
10460

10461
	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10462
	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10463
	long		0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10464
	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10465

10466
	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10467
	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10468
	long		0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10469
	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10470

10471
	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10472
	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10473
	long		0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10474
	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10475

10476
############################################################
10477

10478
#########################################################################
10479
# src_zero(): Return signed zero according to sign of src operand.	#
10480
#########################################################################
10481
	global		src_zero
10482
src_zero:
10483
	tst.b		SRC_EX(%a0)		# get sign of src operand
10484
	bmi.b		ld_mzero		# if neg, load neg zero
10485

10486
#
10487
# ld_pzero(): return a positive zero.
10488
#
10489
	global		ld_pzero
10490
ld_pzero:
10491
	fmov.s		&0x00000000,%fp0	# load +0
10492
	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10493
	rts
10494

10495
# ld_mzero(): return a negative zero.
10496
	global		ld_mzero
10497
ld_mzero:
10498
	fmov.s		&0x80000000,%fp0	# load -0
10499
	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10500
	rts
10501

10502
#########################################################################
10503
# dst_zero(): Return signed zero according to sign of dst operand.	#
10504
#########################################################################
10505
	global		dst_zero
10506
dst_zero:
10507
	tst.b		DST_EX(%a1)		# get sign of dst operand
10508
	bmi.b		ld_mzero		# if neg, load neg zero
10509
	bra.b		ld_pzero		# load positive zero
10510

10511
#########################################################################
10512
# src_inf(): Return signed inf according to sign of src operand.	#
10513
#########################################################################
10514
	global		src_inf
10515
src_inf:
10516
	tst.b		SRC_EX(%a0)		# get sign of src operand
10517
	bmi.b		ld_minf			# if negative branch
10518

10519
#
10520
# ld_pinf(): return a positive infinity.
10521
#
10522
	global		ld_pinf
10523
ld_pinf:
10524
	fmov.s		&0x7f800000,%fp0	# load +INF
10525
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'INF' ccode bit
10526
	rts
10527

10528
#
10529
# ld_minf():return a negative infinity.
10530
#
10531
	global		ld_minf
10532
ld_minf:
10533
	fmov.s		&0xff800000,%fp0	# load -INF
10534
	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10535
	rts
10536

10537
#########################################################################
10538
# dst_inf(): Return signed inf according to sign of dst operand.	#
10539
#########################################################################
10540
	global		dst_inf
10541
dst_inf:
10542
	tst.b		DST_EX(%a1)		# get sign of dst operand
10543
	bmi.b		ld_minf			# if negative branch
10544
	bra.b		ld_pinf
10545

10546
	global		szr_inf
10547
#################################################################
10548
# szr_inf(): Return +ZERO for a negative src operand or		#
10549
#	            +INF for a positive src operand.		#
10550
#	     Routine used for fetox, ftwotox, and ftentox.	#
10551
#################################################################
10552
szr_inf:
10553
	tst.b		SRC_EX(%a0)		# check sign of source
10554
	bmi.b		ld_pzero
10555
	bra.b		ld_pinf
10556

10557
#########################################################################
10558
# sopr_inf(): Return +INF for a positive src operand or			#
10559
#	      jump to operand error routine for a negative src operand.	#
10560
#	      Routine used for flogn, flognp1, flog10, and flog2.	#
10561
#########################################################################
10562
	global		sopr_inf
10563
sopr_inf:
10564
	tst.b		SRC_EX(%a0)		# check sign of source
10565
	bmi.w		t_operr
10566
	bra.b		ld_pinf
10567

10568
#################################################################
10569
# setoxm1i(): Return minus one for a negative src operand or	#
10570
#	      positive infinity for a positive src operand.	#
10571
#	      Routine used for fetoxm1.				#
10572
#################################################################
10573
	global		setoxm1i
10574
setoxm1i:
10575
	tst.b		SRC_EX(%a0)		# check sign of source
10576
	bmi.b		ld_mone
10577
	bra.b		ld_pinf
10578

10579
#########################################################################
10580
# src_one(): Return signed one according to sign of src operand.	#
10581
#########################################################################
10582
	global		src_one
10583
src_one:
10584
	tst.b		SRC_EX(%a0)		# check sign of source
10585
	bmi.b		ld_mone
10586

10587
#
10588
# ld_pone(): return positive one.
10589
#
10590
	global		ld_pone
10591
ld_pone:
10592
	fmov.s		&0x3f800000,%fp0	# load +1
10593
	clr.b		FPSR_CC(%a6)
10594
	rts
10595

10596
#
10597
# ld_mone(): return negative one.
10598
#
10599
	global		ld_mone
10600
ld_mone:
10601
	fmov.s		&0xbf800000,%fp0	# load -1
10602
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
10603
	rts
10604

10605
ppiby2:	long		0x3fff0000, 0xc90fdaa2, 0x2168c235
10606
mpiby2:	long		0xbfff0000, 0xc90fdaa2, 0x2168c235
10607

10608
#################################################################
10609
# spi_2(): Return signed PI/2 according to sign of src operand.	#
10610
#################################################################
10611
	global		spi_2
10612
spi_2:
10613
	tst.b		SRC_EX(%a0)		# check sign of source
10614
	bmi.b		ld_mpi2
10615

10616
#
10617
# ld_ppi2(): return positive PI/2.
10618
#
10619
	global		ld_ppi2
10620
ld_ppi2:
10621
	fmov.l		%d0,%fpcr
10622
	fmov.x		ppiby2(%pc),%fp0	# load +pi/2
10623
	bra.w		t_pinx2			# set INEX2
10624

10625
#
10626
# ld_mpi2(): return negative PI/2.
10627
#
10628
	global		ld_mpi2
10629
ld_mpi2:
10630
	fmov.l		%d0,%fpcr
10631
	fmov.x		mpiby2(%pc),%fp0	# load -pi/2
10632
	bra.w		t_minx2			# set INEX2
10633

10634
####################################################
10635
# The following routines give support for fsincos. #
10636
####################################################
10637

10638
#
10639
# ssincosz(): When the src operand is ZERO, store a one in the
10640
#	      cosine register and return a ZERO in fp0 w/ the same sign
10641
#	      as the src operand.
10642
#
10643
	global		ssincosz
10644
ssincosz:
10645
	fmov.s		&0x3f800000,%fp1
10646
	tst.b		SRC_EX(%a0)		# test sign
10647
	bpl.b		sincoszp
10648
	fmov.s		&0x80000000,%fp0	# return sin result in fp0
10649
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)
10650
	bra.b		sto_cos			# store cosine result
10651
sincoszp:
10652
	fmov.s		&0x00000000,%fp0	# return sin result in fp0
10653
	mov.b		&z_bmask,FPSR_CC(%a6)
10654
	bra.b		sto_cos			# store cosine result
10655

10656
#
10657
# ssincosi(): When the src operand is INF, store a QNAN in the cosine
10658
#	      register and jump to the operand error routine for negative
10659
#	      src operands.
10660
#
10661
	global		ssincosi
10662
ssincosi:
10663
	fmov.x		qnan(%pc),%fp1		# load NAN
10664
	bsr.l		sto_cos			# store cosine result
10665
	bra.w		t_operr
10666

10667
#
10668
# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10669
#		 register and branch to the src QNAN routine.
10670
#
10671
	global		ssincosqnan
10672
ssincosqnan:
10673
	fmov.x		LOCAL_EX(%a0),%fp1
10674
	bsr.l		sto_cos
10675
	bra.w		src_qnan
10676

10677
#
10678
# ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
10679
#		 in the cosine register and branch to the src SNAN routine.
10680
#
10681
	global		ssincossnan
10682
ssincossnan:
10683
	fmov.x		LOCAL_EX(%a0),%fp1
10684
	bsr.l		sto_cos
10685
	bra.w		src_snan
10686

10687
########################################################################
10688

10689
#########################################################################
10690
# sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field.	#
10691
#	     fp1 holds the result of the cosine portion of ssincos().	#
10692
#	     the value in fp1 will not take any exceptions when moved.	#
10693
# INPUT:								#
10694
#	fp1 : fp value to store						#
10695
# MODIFIED:								#
10696
#	d0								#
10697
#########################################################################
10698
	global		sto_cos
10699
sto_cos:
10700
	mov.b		1+EXC_CMDREG(%a6),%d0
10701
	andi.w		&0x7,%d0
10702
	mov.w		(tbl_sto_cos.b,%pc,%d0.w*2),%d0
10703
	jmp		(tbl_sto_cos.b,%pc,%d0.w*1)
10704

10705
tbl_sto_cos:
10706
	short		sto_cos_0 - tbl_sto_cos
10707
	short		sto_cos_1 - tbl_sto_cos
10708
	short		sto_cos_2 - tbl_sto_cos
10709
	short		sto_cos_3 - tbl_sto_cos
10710
	short		sto_cos_4 - tbl_sto_cos
10711
	short		sto_cos_5 - tbl_sto_cos
10712
	short		sto_cos_6 - tbl_sto_cos
10713
	short		sto_cos_7 - tbl_sto_cos
10714

10715
sto_cos_0:
10716
	fmovm.x		&0x40,EXC_FP0(%a6)
10717
	rts
10718
sto_cos_1:
10719
	fmovm.x		&0x40,EXC_FP1(%a6)
10720
	rts
10721
sto_cos_2:
10722
	fmov.x		%fp1,%fp2
10723
	rts
10724
sto_cos_3:
10725
	fmov.x		%fp1,%fp3
10726
	rts
10727
sto_cos_4:
10728
	fmov.x		%fp1,%fp4
10729
	rts
10730
sto_cos_5:
10731
	fmov.x		%fp1,%fp5
10732
	rts
10733
sto_cos_6:
10734
	fmov.x		%fp1,%fp6
10735
	rts
10736
sto_cos_7:
10737
	fmov.x		%fp1,%fp7
10738
	rts
10739

10740
##################################################################
10741
	global		smod_sdnrm
10742
	global		smod_snorm
10743
smod_sdnrm:
10744
smod_snorm:
10745
	mov.b		DTAG(%a6),%d1
10746
	beq.l		smod
10747
	cmpi.b		%d1,&ZERO
10748
	beq.w		smod_zro
10749
	cmpi.b		%d1,&INF
10750
	beq.l		t_operr
10751
	cmpi.b		%d1,&DENORM
10752
	beq.l		smod
10753
	cmpi.b		%d1,&SNAN
10754
	beq.l		dst_snan
10755
	bra.l		dst_qnan
10756

10757
	global		smod_szero
10758
smod_szero:
10759
	mov.b		DTAG(%a6),%d1
10760
	beq.l		t_operr
10761
	cmpi.b		%d1,&ZERO
10762
	beq.l		t_operr
10763
	cmpi.b		%d1,&INF
10764
	beq.l		t_operr
10765
	cmpi.b		%d1,&DENORM
10766
	beq.l		t_operr
10767
	cmpi.b		%d1,&QNAN
10768
	beq.l		dst_qnan
10769
	bra.l		dst_snan
10770

10771
	global		smod_sinf
10772
smod_sinf:
10773
	mov.b		DTAG(%a6),%d1
10774
	beq.l		smod_fpn
10775
	cmpi.b		%d1,&ZERO
10776
	beq.l		smod_zro
10777
	cmpi.b		%d1,&INF
10778
	beq.l		t_operr
10779
	cmpi.b		%d1,&DENORM
10780
	beq.l		smod_fpn
10781
	cmpi.b		%d1,&QNAN
10782
	beq.l		dst_qnan
10783
	bra.l		dst_snan
10784

10785
smod_zro:
10786
srem_zro:
10787
	mov.b		SRC_EX(%a0),%d1		# get src sign
10788
	mov.b		DST_EX(%a1),%d0		# get dst sign
10789
	eor.b		%d0,%d1			# get qbyte sign
10790
	andi.b		&0x80,%d1
10791
	mov.b		%d1,FPSR_QBYTE(%a6)
10792
	tst.b		%d0
10793
	bpl.w		ld_pzero
10794
	bra.w		ld_mzero
10795

10796
smod_fpn:
10797
srem_fpn:
10798
	clr.b		FPSR_QBYTE(%a6)
10799
	mov.l		%d0,-(%sp)
10800
	mov.b		SRC_EX(%a0),%d1		# get src sign
10801
	mov.b		DST_EX(%a1),%d0		# get dst sign
10802
	eor.b		%d0,%d1			# get qbyte sign
10803
	andi.b		&0x80,%d1
10804
	mov.b		%d1,FPSR_QBYTE(%a6)
10805
	cmpi.b		DTAG(%a6),&DENORM
10806
	bne.b		smod_nrm
10807
	lea		DST(%a1),%a0
10808
	mov.l		(%sp)+,%d0
10809
	bra		t_resdnrm
10810
smod_nrm:
10811
	fmov.l		(%sp)+,%fpcr
10812
	fmov.x		DST(%a1),%fp0
10813
	tst.b		DST_EX(%a1)
10814
	bmi.b		smod_nrm_neg
10815
	rts
10816

10817
smod_nrm_neg:
10818
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode
10819
	rts
10820

10821
#########################################################################
10822
	global		srem_snorm
10823
	global		srem_sdnrm
10824
srem_sdnrm:
10825
srem_snorm:
10826
	mov.b		DTAG(%a6),%d1
10827
	beq.l		srem
10828
	cmpi.b		%d1,&ZERO
10829
	beq.w		srem_zro
10830
	cmpi.b		%d1,&INF
10831
	beq.l		t_operr
10832
	cmpi.b		%d1,&DENORM
10833
	beq.l		srem
10834
	cmpi.b		%d1,&QNAN
10835
	beq.l		dst_qnan
10836
	bra.l		dst_snan
10837

10838
	global		srem_szero
10839
srem_szero:
10840
	mov.b		DTAG(%a6),%d1
10841
	beq.l		t_operr
10842
	cmpi.b		%d1,&ZERO
10843
	beq.l		t_operr
10844
	cmpi.b		%d1,&INF
10845
	beq.l		t_operr
10846
	cmpi.b		%d1,&DENORM
10847
	beq.l		t_operr
10848
	cmpi.b		%d1,&QNAN
10849
	beq.l		dst_qnan
10850
	bra.l		dst_snan
10851

10852
	global		srem_sinf
10853
srem_sinf:
10854
	mov.b		DTAG(%a6),%d1
10855
	beq.w		srem_fpn
10856
	cmpi.b		%d1,&ZERO
10857
	beq.w		srem_zro
10858
	cmpi.b		%d1,&INF
10859
	beq.l		t_operr
10860
	cmpi.b		%d1,&DENORM
10861
	beq.l		srem_fpn
10862
	cmpi.b		%d1,&QNAN
10863
	beq.l		dst_qnan
10864
	bra.l		dst_snan
10865

10866
#########################################################################
10867
	global		sscale_snorm
10868
	global		sscale_sdnrm
10869
sscale_snorm:
10870
sscale_sdnrm:
10871
	mov.b		DTAG(%a6),%d1
10872
	beq.l		sscale
10873
	cmpi.b		%d1,&ZERO
10874
	beq.l		dst_zero
10875
	cmpi.b		%d1,&INF
10876
	beq.l		dst_inf
10877
	cmpi.b		%d1,&DENORM
10878
	beq.l		sscale
10879
	cmpi.b		%d1,&QNAN
10880
	beq.l		dst_qnan
10881
	bra.l		dst_snan
10882

10883
	global		sscale_szero
10884
sscale_szero:
10885
	mov.b		DTAG(%a6),%d1
10886
	beq.l		sscale
10887
	cmpi.b		%d1,&ZERO
10888
	beq.l		dst_zero
10889
	cmpi.b		%d1,&INF
10890
	beq.l		dst_inf
10891
	cmpi.b		%d1,&DENORM
10892
	beq.l		sscale
10893
	cmpi.b		%d1,&QNAN
10894
	beq.l		dst_qnan
10895
	bra.l		dst_snan
10896

10897
	global		sscale_sinf
10898
sscale_sinf:
10899
	mov.b		DTAG(%a6),%d1
10900
	beq.l		t_operr
10901
	cmpi.b		%d1,&QNAN
10902
	beq.l		dst_qnan
10903
	cmpi.b		%d1,&SNAN
10904
	beq.l		dst_snan
10905
	bra.l		t_operr
10906

10907
########################################################################
10908

10909
#
10910
# sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
10911
#
10912
	global		sop_sqnan
10913
sop_sqnan:
10914
	mov.b		DTAG(%a6),%d1
10915
	cmpi.b		%d1,&QNAN
10916
	beq.b		dst_qnan
10917
	cmpi.b		%d1,&SNAN
10918
	beq.b		dst_snan
10919
	bra.b		src_qnan
10920

10921
#
10922
# sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
10923
#
10924
	global		sop_ssnan
10925
sop_ssnan:
10926
	mov.b		DTAG(%a6),%d1
10927
	cmpi.b		%d1,&QNAN
10928
	beq.b		dst_qnan_src_snan
10929
	cmpi.b		%d1,&SNAN
10930
	beq.b		dst_snan
10931
	bra.b		src_snan
10932

10933
dst_qnan_src_snan:
10934
	ori.l		&snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
10935
	bra.b		dst_qnan
10936

10937
#
10938
# dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
10939
#
10940
	global		dst_snan
10941
dst_snan:
10942
	fmov.x		DST(%a1),%fp0		# the fmove sets the SNAN bit
10943
	fmov.l		%fpsr,%d0		# catch resulting status
10944
	or.l		%d0,USER_FPSR(%a6)	# store status
10945
	rts
10946

10947
#
10948
# dst_qnan(): Return the dst QNAN.
10949
#
10950
	global		dst_qnan
10951
dst_qnan:
10952
	fmov.x		DST(%a1),%fp0		# return the non-signalling nan
10953
	tst.b		DST_EX(%a1)		# set ccodes according to QNAN sign
10954
	bmi.b		dst_qnan_m
10955
dst_qnan_p:
10956
	mov.b		&nan_bmask,FPSR_CC(%a6)
10957
	rts
10958
dst_qnan_m:
10959
	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
10960
	rts
10961

10962
#
10963
# src_snan(): Return the src SNAN w/ the SNAN bit set.
10964
#
10965
	global		src_snan
10966
src_snan:
10967
	fmov.x		SRC(%a0),%fp0		# the fmove sets the SNAN bit
10968
	fmov.l		%fpsr,%d0		# catch resulting status
10969
	or.l		%d0,USER_FPSR(%a6)	# store status
10970
	rts
10971

10972
#
10973
# src_qnan(): Return the src QNAN.
10974
#
10975
	global		src_qnan
10976
src_qnan:
10977
	fmov.x		SRC(%a0),%fp0		# return the non-signalling nan
10978
	tst.b		SRC_EX(%a0)		# set ccodes according to QNAN sign
10979
	bmi.b		dst_qnan_m
10980
src_qnan_p:
10981
	mov.b		&nan_bmask,FPSR_CC(%a6)
10982
	rts
10983
src_qnan_m:
10984
	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
10985
	rts
10986

10987
#
10988
# fkern2.s:
10989
#	These entry points are used by the exception handler
10990
# routines where an instruction is selected by an index into
10991
# a large jump table corresponding to a given instruction which
10992
# has been decoded. Flow continues here where we now decode
10993
# further according to the source operand type.
10994
#
10995

10996
	global		fsinh
10997
fsinh:
10998
	mov.b		STAG(%a6),%d1
10999
	beq.l		ssinh
11000
	cmpi.b		%d1,&ZERO
11001
	beq.l		src_zero
11002
	cmpi.b		%d1,&INF
11003
	beq.l		src_inf
11004
	cmpi.b		%d1,&DENORM
11005
	beq.l		ssinhd
11006
	cmpi.b		%d1,&QNAN
11007
	beq.l		src_qnan
11008
	bra.l		src_snan
11009

11010
	global		flognp1
11011
flognp1:
11012
	mov.b		STAG(%a6),%d1
11013
	beq.l		slognp1
11014
	cmpi.b		%d1,&ZERO
11015
	beq.l		src_zero
11016
	cmpi.b		%d1,&INF
11017
	beq.l		sopr_inf
11018
	cmpi.b		%d1,&DENORM
11019
	beq.l		slognp1d
11020
	cmpi.b		%d1,&QNAN
11021
	beq.l		src_qnan
11022
	bra.l		src_snan
11023

11024
	global		fetoxm1
11025
fetoxm1:
11026
	mov.b		STAG(%a6),%d1
11027
	beq.l		setoxm1
11028
	cmpi.b		%d1,&ZERO
11029
	beq.l		src_zero
11030
	cmpi.b		%d1,&INF
11031
	beq.l		setoxm1i
11032
	cmpi.b		%d1,&DENORM
11033
	beq.l		setoxm1d
11034
	cmpi.b		%d1,&QNAN
11035
	beq.l		src_qnan
11036
	bra.l		src_snan
11037

11038
	global		ftanh
11039
ftanh:
11040
	mov.b		STAG(%a6),%d1
11041
	beq.l		stanh
11042
	cmpi.b		%d1,&ZERO
11043
	beq.l		src_zero
11044
	cmpi.b		%d1,&INF
11045
	beq.l		src_one
11046
	cmpi.b		%d1,&DENORM
11047
	beq.l		stanhd
11048
	cmpi.b		%d1,&QNAN
11049
	beq.l		src_qnan
11050
	bra.l		src_snan
11051

11052
	global		fatan
11053
fatan:
11054
	mov.b		STAG(%a6),%d1
11055
	beq.l		satan
11056
	cmpi.b		%d1,&ZERO
11057
	beq.l		src_zero
11058
	cmpi.b		%d1,&INF
11059
	beq.l		spi_2
11060
	cmpi.b		%d1,&DENORM
11061
	beq.l		satand
11062
	cmpi.b		%d1,&QNAN
11063
	beq.l		src_qnan
11064
	bra.l		src_snan
11065

11066
	global		fasin
11067
fasin:
11068
	mov.b		STAG(%a6),%d1
11069
	beq.l		sasin
11070
	cmpi.b		%d1,&ZERO
11071
	beq.l		src_zero
11072
	cmpi.b		%d1,&INF
11073
	beq.l		t_operr
11074
	cmpi.b		%d1,&DENORM
11075
	beq.l		sasind
11076
	cmpi.b		%d1,&QNAN
11077
	beq.l		src_qnan
11078
	bra.l		src_snan
11079

11080
	global		fatanh
11081
fatanh:
11082
	mov.b		STAG(%a6),%d1
11083
	beq.l		satanh
11084
	cmpi.b		%d1,&ZERO
11085
	beq.l		src_zero
11086
	cmpi.b		%d1,&INF
11087
	beq.l		t_operr
11088
	cmpi.b		%d1,&DENORM
11089
	beq.l		satanhd
11090
	cmpi.b		%d1,&QNAN
11091
	beq.l		src_qnan
11092
	bra.l		src_snan
11093

11094
	global		fsine
11095
fsine:
11096
	mov.b		STAG(%a6),%d1
11097
	beq.l		ssin
11098
	cmpi.b		%d1,&ZERO
11099
	beq.l		src_zero
11100
	cmpi.b		%d1,&INF
11101
	beq.l		t_operr
11102
	cmpi.b		%d1,&DENORM
11103
	beq.l		ssind
11104
	cmpi.b		%d1,&QNAN
11105
	beq.l		src_qnan
11106
	bra.l		src_snan
11107

11108
	global		ftan
11109
ftan:
11110
	mov.b		STAG(%a6),%d1
11111
	beq.l		stan
11112
	cmpi.b		%d1,&ZERO
11113
	beq.l		src_zero
11114
	cmpi.b		%d1,&INF
11115
	beq.l		t_operr
11116
	cmpi.b		%d1,&DENORM
11117
	beq.l		stand
11118
	cmpi.b		%d1,&QNAN
11119
	beq.l		src_qnan
11120
	bra.l		src_snan
11121

11122
	global		fetox
11123
fetox:
11124
	mov.b		STAG(%a6),%d1
11125
	beq.l		setox
11126
	cmpi.b		%d1,&ZERO
11127
	beq.l		ld_pone
11128
	cmpi.b		%d1,&INF
11129
	beq.l		szr_inf
11130
	cmpi.b		%d1,&DENORM
11131
	beq.l		setoxd
11132
	cmpi.b		%d1,&QNAN
11133
	beq.l		src_qnan
11134
	bra.l		src_snan
11135

11136
	global		ftwotox
11137
ftwotox:
11138
	mov.b		STAG(%a6),%d1
11139
	beq.l		stwotox
11140
	cmpi.b		%d1,&ZERO
11141
	beq.l		ld_pone
11142
	cmpi.b		%d1,&INF
11143
	beq.l		szr_inf
11144
	cmpi.b		%d1,&DENORM
11145
	beq.l		stwotoxd
11146
	cmpi.b		%d1,&QNAN
11147
	beq.l		src_qnan
11148
	bra.l		src_snan
11149

11150
	global		ftentox
11151
ftentox:
11152
	mov.b		STAG(%a6),%d1
11153
	beq.l		stentox
11154
	cmpi.b		%d1,&ZERO
11155
	beq.l		ld_pone
11156
	cmpi.b		%d1,&INF
11157
	beq.l		szr_inf
11158
	cmpi.b		%d1,&DENORM
11159
	beq.l		stentoxd
11160
	cmpi.b		%d1,&QNAN
11161
	beq.l		src_qnan
11162
	bra.l		src_snan
11163

11164
	global		flogn
11165
flogn:
11166
	mov.b		STAG(%a6),%d1
11167
	beq.l		slogn
11168
	cmpi.b		%d1,&ZERO
11169
	beq.l		t_dz2
11170
	cmpi.b		%d1,&INF
11171
	beq.l		sopr_inf
11172
	cmpi.b		%d1,&DENORM
11173
	beq.l		slognd
11174
	cmpi.b		%d1,&QNAN
11175
	beq.l		src_qnan
11176
	bra.l		src_snan
11177

11178
	global		flog10
11179
flog10:
11180
	mov.b		STAG(%a6),%d1
11181
	beq.l		slog10
11182
	cmpi.b		%d1,&ZERO
11183
	beq.l		t_dz2
11184
	cmpi.b		%d1,&INF
11185
	beq.l		sopr_inf
11186
	cmpi.b		%d1,&DENORM
11187
	beq.l		slog10d
11188
	cmpi.b		%d1,&QNAN
11189
	beq.l		src_qnan
11190
	bra.l		src_snan
11191

11192
	global		flog2
11193
flog2:
11194
	mov.b		STAG(%a6),%d1
11195
	beq.l		slog2
11196
	cmpi.b		%d1,&ZERO
11197
	beq.l		t_dz2
11198
	cmpi.b		%d1,&INF
11199
	beq.l		sopr_inf
11200
	cmpi.b		%d1,&DENORM
11201
	beq.l		slog2d
11202
	cmpi.b		%d1,&QNAN
11203
	beq.l		src_qnan
11204
	bra.l		src_snan
11205

11206
	global		fcosh
11207
fcosh:
11208
	mov.b		STAG(%a6),%d1
11209
	beq.l		scosh
11210
	cmpi.b		%d1,&ZERO
11211
	beq.l		ld_pone
11212
	cmpi.b		%d1,&INF
11213
	beq.l		ld_pinf
11214
	cmpi.b		%d1,&DENORM
11215
	beq.l		scoshd
11216
	cmpi.b		%d1,&QNAN
11217
	beq.l		src_qnan
11218
	bra.l		src_snan
11219

11220
	global		facos
11221
facos:
11222
	mov.b		STAG(%a6),%d1
11223
	beq.l		sacos
11224
	cmpi.b		%d1,&ZERO
11225
	beq.l		ld_ppi2
11226
	cmpi.b		%d1,&INF
11227
	beq.l		t_operr
11228
	cmpi.b		%d1,&DENORM
11229
	beq.l		sacosd
11230
	cmpi.b		%d1,&QNAN
11231
	beq.l		src_qnan
11232
	bra.l		src_snan
11233

11234
	global		fcos
11235
fcos:
11236
	mov.b		STAG(%a6),%d1
11237
	beq.l		scos
11238
	cmpi.b		%d1,&ZERO
11239
	beq.l		ld_pone
11240
	cmpi.b		%d1,&INF
11241
	beq.l		t_operr
11242
	cmpi.b		%d1,&DENORM
11243
	beq.l		scosd
11244
	cmpi.b		%d1,&QNAN
11245
	beq.l		src_qnan
11246
	bra.l		src_snan
11247

11248
	global		fgetexp
11249
fgetexp:
11250
	mov.b		STAG(%a6),%d1
11251
	beq.l		sgetexp
11252
	cmpi.b		%d1,&ZERO
11253
	beq.l		src_zero
11254
	cmpi.b		%d1,&INF
11255
	beq.l		t_operr
11256
	cmpi.b		%d1,&DENORM
11257
	beq.l		sgetexpd
11258
	cmpi.b		%d1,&QNAN
11259
	beq.l		src_qnan
11260
	bra.l		src_snan
11261

11262
	global		fgetman
11263
fgetman:
11264
	mov.b		STAG(%a6),%d1
11265
	beq.l		sgetman
11266
	cmpi.b		%d1,&ZERO
11267
	beq.l		src_zero
11268
	cmpi.b		%d1,&INF
11269
	beq.l		t_operr
11270
	cmpi.b		%d1,&DENORM
11271
	beq.l		sgetmand
11272
	cmpi.b		%d1,&QNAN
11273
	beq.l		src_qnan
11274
	bra.l		src_snan
11275

11276
	global		fsincos
11277
fsincos:
11278
	mov.b		STAG(%a6),%d1
11279
	beq.l		ssincos
11280
	cmpi.b		%d1,&ZERO
11281
	beq.l		ssincosz
11282
	cmpi.b		%d1,&INF
11283
	beq.l		ssincosi
11284
	cmpi.b		%d1,&DENORM
11285
	beq.l		ssincosd
11286
	cmpi.b		%d1,&QNAN
11287
	beq.l		ssincosqnan
11288
	bra.l		ssincossnan
11289

11290
	global		fmod
11291
fmod:
11292
	mov.b		STAG(%a6),%d1
11293
	beq.l		smod_snorm
11294
	cmpi.b		%d1,&ZERO
11295
	beq.l		smod_szero
11296
	cmpi.b		%d1,&INF
11297
	beq.l		smod_sinf
11298
	cmpi.b		%d1,&DENORM
11299
	beq.l		smod_sdnrm
11300
	cmpi.b		%d1,&QNAN
11301
	beq.l		sop_sqnan
11302
	bra.l		sop_ssnan
11303

11304
	global		frem
11305
frem:
11306
	mov.b		STAG(%a6),%d1
11307
	beq.l		srem_snorm
11308
	cmpi.b		%d1,&ZERO
11309
	beq.l		srem_szero
11310
	cmpi.b		%d1,&INF
11311
	beq.l		srem_sinf
11312
	cmpi.b		%d1,&DENORM
11313
	beq.l		srem_sdnrm
11314
	cmpi.b		%d1,&QNAN
11315
	beq.l		sop_sqnan
11316
	bra.l		sop_ssnan
11317

11318
	global		fscale
11319
fscale:
11320
	mov.b		STAG(%a6),%d1
11321
	beq.l		sscale_snorm
11322
	cmpi.b		%d1,&ZERO
11323
	beq.l		sscale_szero
11324
	cmpi.b		%d1,&INF
11325
	beq.l		sscale_sinf
11326
	cmpi.b		%d1,&DENORM
11327
	beq.l		sscale_sdnrm
11328
	cmpi.b		%d1,&QNAN
11329
	beq.l		sop_sqnan
11330
	bra.l		sop_ssnan
11331

11332
#########################################################################
11333
# XDEF ****************************************************************	#
11334
#	fgen_except(): catch an exception during transcendental		#
11335
#		       emulation					#
11336
#									#
11337
# XREF ****************************************************************	#
11338
#	fmul() - emulate a multiply instruction				#
11339
#	fadd() - emulate an add instruction				#
11340
#	fin() - emulate an fmove instruction				#
11341
#									#
11342
# INPUT ***************************************************************	#
11343
#	fp0 = destination operand					#
11344
#	d0  = type of instruction that took exception			#
11345
#	fsave frame = source operand					#
11346
#									#
11347
# OUTPUT **************************************************************	#
11348
#	fp0 = result							#
11349
#	fp1 = EXOP							#
11350
#									#
11351
# ALGORITHM ***********************************************************	#
11352
#	An exception occurred on the last instruction of the		#
11353
# transcendental emulation. hopefully, this won't be happening much	#
11354
# because it will be VERY slow.						#
11355
#	The only exceptions capable of passing through here are		#
11356
# Overflow, Underflow, and Unsupported Data Type.			#
11357
#									#
11358
#########################################################################
11359

11360
	global		fgen_except
11361
fgen_except:
11362
	cmpi.b		0x3(%sp),&0x7		# is exception UNSUPP?
11363
	beq.b		fge_unsupp		# yes
11364

11365
	mov.b		&NORM,STAG(%a6)
11366

11367
fge_cont:
11368
	mov.b		&NORM,DTAG(%a6)
11369

11370
# ok, I have a problem with putting the dst op at FP_DST. the emulation
11371
# routines aren't supposed to alter the operands but we've just squashed
11372
# FP_DST here...
11373

11374
# 8/17/93 - this turns out to be more of a "cleanliness" standpoint
11375
# then a potential bug. to begin with, only the dyadic functions
11376
# frem,fmod, and fscale would get the dst trashed here. But, for
11377
# the 060SP, the FP_DST is never used again anyways.
11378
	fmovm.x		&0x80,FP_DST(%a6)	# dst op is in fp0
11379

11380
	lea		0x4(%sp),%a0		# pass: ptr to src op
11381
	lea		FP_DST(%a6),%a1		# pass: ptr to dst op
11382

11383
	cmpi.b		%d1,&FMOV_OP
11384
	beq.b		fge_fin			# it was an "fmov"
11385
	cmpi.b		%d1,&FADD_OP
11386
	beq.b		fge_fadd		# it was an "fadd"
11387
fge_fmul:
11388
	bsr.l		fmul
11389
	rts
11390
fge_fadd:
11391
	bsr.l		fadd
11392
	rts
11393
fge_fin:
11394
	bsr.l		fin
11395
	rts
11396

11397
fge_unsupp:
11398
	mov.b		&DENORM,STAG(%a6)
11399
	bra.b		fge_cont
11400

11401
#
11402
# This table holds the offsets of the emulation routines for each individual
11403
# math operation relative to the address of this table. Included are
11404
# routines like fadd/fmul/fabs as well as the transcendentals.
11405
# The location within the table is determined by the extension bits of the
11406
# operation longword.
11407
#
11408

11409
	swbeg		&109
11410
tbl_unsupp:
11411
	long		fin		- tbl_unsupp	# 00: fmove
11412
	long		fint		- tbl_unsupp	# 01: fint
11413
	long		fsinh		- tbl_unsupp	# 02: fsinh
11414
	long		fintrz		- tbl_unsupp	# 03: fintrz
11415
	long		fsqrt		- tbl_unsupp	# 04: fsqrt
11416
	long		tbl_unsupp	- tbl_unsupp
11417
	long		flognp1		- tbl_unsupp	# 06: flognp1
11418
	long		tbl_unsupp	- tbl_unsupp
11419
	long		fetoxm1		- tbl_unsupp	# 08: fetoxm1
11420
	long		ftanh		- tbl_unsupp	# 09: ftanh
11421
	long		fatan		- tbl_unsupp	# 0a: fatan
11422
	long		tbl_unsupp	- tbl_unsupp
11423
	long		fasin		- tbl_unsupp	# 0c: fasin
11424
	long		fatanh		- tbl_unsupp	# 0d: fatanh
11425
	long		fsine		- tbl_unsupp	# 0e: fsin
11426
	long		ftan		- tbl_unsupp	# 0f: ftan
11427
	long		fetox		- tbl_unsupp	# 10: fetox
11428
	long		ftwotox		- tbl_unsupp	# 11: ftwotox
11429
	long		ftentox		- tbl_unsupp	# 12: ftentox
11430
	long		tbl_unsupp	- tbl_unsupp
11431
	long		flogn		- tbl_unsupp	# 14: flogn
11432
	long		flog10		- tbl_unsupp	# 15: flog10
11433
	long		flog2		- tbl_unsupp	# 16: flog2
11434
	long		tbl_unsupp	- tbl_unsupp
11435
	long		fabs		- tbl_unsupp	# 18: fabs
11436
	long		fcosh		- tbl_unsupp	# 19: fcosh
11437
	long		fneg		- tbl_unsupp	# 1a: fneg
11438
	long		tbl_unsupp	- tbl_unsupp
11439
	long		facos		- tbl_unsupp	# 1c: facos
11440
	long		fcos		- tbl_unsupp	# 1d: fcos
11441
	long		fgetexp		- tbl_unsupp	# 1e: fgetexp
11442
	long		fgetman		- tbl_unsupp	# 1f: fgetman
11443
	long		fdiv		- tbl_unsupp	# 20: fdiv
11444
	long		fmod		- tbl_unsupp	# 21: fmod
11445
	long		fadd		- tbl_unsupp	# 22: fadd
11446
	long		fmul		- tbl_unsupp	# 23: fmul
11447
	long		fsgldiv		- tbl_unsupp	# 24: fsgldiv
11448
	long		frem		- tbl_unsupp	# 25: frem
11449
	long		fscale		- tbl_unsupp	# 26: fscale
11450
	long		fsglmul		- tbl_unsupp	# 27: fsglmul
11451
	long		fsub		- tbl_unsupp	# 28: fsub
11452
	long		tbl_unsupp	- tbl_unsupp
11453
	long		tbl_unsupp	- tbl_unsupp
11454
	long		tbl_unsupp	- tbl_unsupp
11455
	long		tbl_unsupp	- tbl_unsupp
11456
	long		tbl_unsupp	- tbl_unsupp
11457
	long		tbl_unsupp	- tbl_unsupp
11458
	long		tbl_unsupp	- tbl_unsupp
11459
	long		fsincos		- tbl_unsupp	# 30: fsincos
11460
	long		fsincos		- tbl_unsupp	# 31: fsincos
11461
	long		fsincos		- tbl_unsupp	# 32: fsincos
11462
	long		fsincos		- tbl_unsupp	# 33: fsincos
11463
	long		fsincos		- tbl_unsupp	# 34: fsincos
11464
	long		fsincos		- tbl_unsupp	# 35: fsincos
11465
	long		fsincos		- tbl_unsupp	# 36: fsincos
11466
	long		fsincos		- tbl_unsupp	# 37: fsincos
11467
	long		fcmp		- tbl_unsupp	# 38: fcmp
11468
	long		tbl_unsupp	- tbl_unsupp
11469
	long		ftst		- tbl_unsupp	# 3a: ftst
11470
	long		tbl_unsupp	- tbl_unsupp
11471
	long		tbl_unsupp	- tbl_unsupp
11472
	long		tbl_unsupp	- tbl_unsupp
11473
	long		tbl_unsupp	- tbl_unsupp
11474
	long		tbl_unsupp	- tbl_unsupp
11475
	long		fsin		- tbl_unsupp	# 40: fsmove
11476
	long		fssqrt		- tbl_unsupp	# 41: fssqrt
11477
	long		tbl_unsupp	- tbl_unsupp
11478
	long		tbl_unsupp	- tbl_unsupp
11479
	long		fdin		- tbl_unsupp	# 44: fdmove
11480
	long		fdsqrt		- tbl_unsupp	# 45: fdsqrt
11481
	long		tbl_unsupp	- tbl_unsupp
11482
	long		tbl_unsupp	- tbl_unsupp
11483
	long		tbl_unsupp	- tbl_unsupp
11484
	long		tbl_unsupp	- tbl_unsupp
11485
	long		tbl_unsupp	- tbl_unsupp
11486
	long		tbl_unsupp	- tbl_unsupp
11487
	long		tbl_unsupp	- tbl_unsupp
11488
	long		tbl_unsupp	- tbl_unsupp
11489
	long		tbl_unsupp	- tbl_unsupp
11490
	long		tbl_unsupp	- tbl_unsupp
11491
	long		tbl_unsupp	- tbl_unsupp
11492
	long		tbl_unsupp	- tbl_unsupp
11493
	long		tbl_unsupp	- tbl_unsupp
11494
	long		tbl_unsupp	- tbl_unsupp
11495
	long		tbl_unsupp	- tbl_unsupp
11496
	long		tbl_unsupp	- tbl_unsupp
11497
	long		tbl_unsupp	- tbl_unsupp
11498
	long		tbl_unsupp	- tbl_unsupp
11499
	long		fsabs		- tbl_unsupp	# 58: fsabs
11500
	long		tbl_unsupp	- tbl_unsupp
11501
	long		fsneg		- tbl_unsupp	# 5a: fsneg
11502
	long		tbl_unsupp	- tbl_unsupp
11503
	long		fdabs		- tbl_unsupp	# 5c: fdabs
11504
	long		tbl_unsupp	- tbl_unsupp
11505
	long		fdneg		- tbl_unsupp	# 5e: fdneg
11506
	long		tbl_unsupp	- tbl_unsupp
11507
	long		fsdiv		- tbl_unsupp	# 60: fsdiv
11508
	long		tbl_unsupp	- tbl_unsupp
11509
	long		fsadd		- tbl_unsupp	# 62: fsadd
11510
	long		fsmul		- tbl_unsupp	# 63: fsmul
11511
	long		fddiv		- tbl_unsupp	# 64: fddiv
11512
	long		tbl_unsupp	- tbl_unsupp
11513
	long		fdadd		- tbl_unsupp	# 66: fdadd
11514
	long		fdmul		- tbl_unsupp	# 67: fdmul
11515
	long		fssub		- tbl_unsupp	# 68: fssub
11516
	long		tbl_unsupp	- tbl_unsupp
11517
	long		tbl_unsupp	- tbl_unsupp
11518
	long		tbl_unsupp	- tbl_unsupp
11519
	long		fdsub		- tbl_unsupp	# 6c: fdsub
11520

11521
#########################################################################
11522
# XDEF ****************************************************************	#
11523
#	fmul(): emulates the fmul instruction				#
11524
#	fsmul(): emulates the fsmul instruction				#
11525
#	fdmul(): emulates the fdmul instruction				#
11526
#									#
11527
# XREF ****************************************************************	#
11528
#	scale_to_zero_src() - scale src exponent to zero		#
11529
#	scale_to_zero_dst() - scale dst exponent to zero		#
11530
#	unf_res() - return default underflow result			#
11531
#	ovf_res() - return default overflow result			#
11532
#	res_qnan() - return QNAN result					#
11533
#	res_snan() - return SNAN result					#
11534
#									#
11535
# INPUT ***************************************************************	#
11536
#	a0 = pointer to extended precision source operand		#
11537
#	a1 = pointer to extended precision destination operand		#
11538
#	d0  rnd prec,mode						#
11539
#									#
11540
# OUTPUT **************************************************************	#
11541
#	fp0 = result							#
11542
#	fp1 = EXOP (if exception occurred)				#
11543
#									#
11544
# ALGORITHM ***********************************************************	#
11545
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11546
# norms/denorms into ext/sgl/dbl precision.				#
11547
#	For norms/denorms, scale the exponents such that a multiply	#
11548
# instruction won't cause an exception. Use the regular fmul to		#
11549
# compute a result. Check if the regular operands would have taken	#
11550
# an exception. If so, return the default overflow/underflow result	#
11551
# and return the EXOP if exceptions are enabled. Else, scale the	#
11552
# result operand to the proper exponent.				#
11553
#									#
11554
#########################################################################
11555

11556
	align		0x10
11557
tbl_fmul_ovfl:
11558
	long		0x3fff - 0x7ffe		# ext_max
11559
	long		0x3fff - 0x407e		# sgl_max
11560
	long		0x3fff - 0x43fe		# dbl_max
11561
tbl_fmul_unfl:
11562
	long		0x3fff + 0x0001		# ext_unfl
11563
	long		0x3fff - 0x3f80		# sgl_unfl
11564
	long		0x3fff - 0x3c00		# dbl_unfl
11565

11566
	global		fsmul
11567
fsmul:
11568
	andi.b		&0x30,%d0		# clear rnd prec
11569
	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11570
	bra.b		fmul
11571

11572
	global		fdmul
11573
fdmul:
11574
	andi.b		&0x30,%d0
11575
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11576

11577
	global		fmul
11578
fmul:
11579
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11580

11581
	clr.w		%d1
11582
	mov.b		DTAG(%a6),%d1
11583
	lsl.b		&0x3,%d1
11584
	or.b		STAG(%a6),%d1		# combine src tags
11585
	bne.w		fmul_not_norm		# optimize on non-norm input
11586

11587
fmul_norm:
11588
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11589
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11590
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11591

11592
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11593
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11594
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11595

11596
	bsr.l		scale_to_zero_src	# scale src exponent
11597
	mov.l		%d0,-(%sp)		# save scale factor 1
11598

11599
	bsr.l		scale_to_zero_dst	# scale dst exponent
11600

11601
	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
11602

11603
	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
11604
	lsr.b		&0x6,%d1		# shift to lo bits
11605
	mov.l		(%sp)+,%d0		# load S.F.
11606
	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
11607
	beq.w		fmul_may_ovfl		# result may rnd to overflow
11608
	blt.w		fmul_ovfl		# result will overflow
11609

11610
	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
11611
	beq.w		fmul_may_unfl		# result may rnd to no unfl
11612
	bgt.w		fmul_unfl		# result will underflow
11613

11614
#
11615
# NORMAL:
11616
# - the result of the multiply operation will neither overflow nor underflow.
11617
# - do the multiply to the proper precision and rounding mode.
11618
# - scale the result exponent using the scale factor. if both operands were
11619
# normalized then we really don't need to go through this scaling. but for now,
11620
# this will do.
11621
#
11622
fmul_normal:
11623
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11624

11625
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11626
	fmov.l		&0x0,%fpsr		# clear FPSR
11627

11628
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11629

11630
	fmov.l		%fpsr,%d1		# save status
11631
	fmov.l		&0x0,%fpcr		# clear FPCR
11632

11633
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11634

11635
fmul_normal_exit:
11636
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11637
	mov.l		%d2,-(%sp)		# save d2
11638
	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
11639
	mov.l		%d1,%d2			# make a copy
11640
	andi.l		&0x7fff,%d1		# strip sign
11641
	andi.w		&0x8000,%d2		# keep old sign
11642
	sub.l		%d0,%d1			# add scale factor
11643
	or.w		%d2,%d1			# concat old sign,new exp
11644
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11645
	mov.l		(%sp)+,%d2		# restore d2
11646
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11647
	rts
11648

11649
#
11650
# OVERFLOW:
11651
# - the result of the multiply operation is an overflow.
11652
# - do the multiply to the proper precision and rounding mode in order to
11653
# set the inexact bits.
11654
# - calculate the default result and return it in fp0.
11655
# - if overflow or inexact is enabled, we need a multiply result rounded to
11656
# extended precision. if the original operation was extended, then we have this
11657
# result. if the original operation was single or double, we have to do another
11658
# multiply using extended precision and the correct rounding mode. the result
11659
# of this operation then has its exponent scaled by -0x6000 to create the
11660
# exceptional operand.
11661
#
11662
fmul_ovfl:
11663
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11664

11665
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11666
	fmov.l		&0x0,%fpsr		# clear FPSR
11667

11668
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11669

11670
	fmov.l		%fpsr,%d1		# save status
11671
	fmov.l		&0x0,%fpcr		# clear FPCR
11672

11673
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11674

11675
# save setting this until now because this is where fmul_may_ovfl may jump in
11676
fmul_ovfl_tst:
11677
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11678

11679
	mov.b		FPCR_ENABLE(%a6),%d1
11680
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11681
	bne.b		fmul_ovfl_ena		# yes
11682

11683
# calculate the default result
11684
fmul_ovfl_dis:
11685
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11686
	sne		%d1			# set sign param accordingly
11687
	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
11688
	bsr.l		ovf_res			# calculate default result
11689
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11690
	fmovm.x		(%a0),&0x80		# return default result in fp0
11691
	rts
11692

11693
#
11694
# OVFL is enabled; Create EXOP:
11695
# - if precision is extended, then we have the EXOP. simply bias the exponent
11696
# with an extra -0x6000. if the precision is single or double, we need to
11697
# calculate a result rounded to extended precision.
11698
#
11699
fmul_ovfl_ena:
11700
	mov.l		L_SCR3(%a6),%d1
11701
	andi.b		&0xc0,%d1		# test the rnd prec
11702
	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
11703

11704
fmul_ovfl_ena_cont:
11705
	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
11706

11707
	mov.l		%d2,-(%sp)		# save d2
11708
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11709
	mov.w		%d1,%d2			# make a copy
11710
	andi.l		&0x7fff,%d1		# strip sign
11711
	sub.l		%d0,%d1			# add scale factor
11712
	subi.l		&0x6000,%d1		# subtract bias
11713
	andi.w		&0x7fff,%d1		# clear sign bit
11714
	andi.w		&0x8000,%d2		# keep old sign
11715
	or.w		%d2,%d1			# concat old sign,new exp
11716
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11717
	mov.l		(%sp)+,%d2		# restore d2
11718
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11719
	bra.b		fmul_ovfl_dis
11720

11721
fmul_ovfl_ena_sd:
11722
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11723

11724
	mov.l		L_SCR3(%a6),%d1
11725
	andi.b		&0x30,%d1		# keep rnd mode only
11726
	fmov.l		%d1,%fpcr		# set FPCR
11727

11728
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11729

11730
	fmov.l		&0x0,%fpcr		# clear FPCR
11731
	bra.b		fmul_ovfl_ena_cont
11732

11733
#
11734
# may OVERFLOW:
11735
# - the result of the multiply operation MAY overflow.
11736
# - do the multiply to the proper precision and rounding mode in order to
11737
# set the inexact bits.
11738
# - calculate the default result and return it in fp0.
11739
#
11740
fmul_may_ovfl:
11741
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11742

11743
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11744
	fmov.l		&0x0,%fpsr		# clear FPSR
11745

11746
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11747

11748
	fmov.l		%fpsr,%d1		# save status
11749
	fmov.l		&0x0,%fpcr		# clear FPCR
11750

11751
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11752

11753
	fabs.x		%fp0,%fp1		# make a copy of result
11754
	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
11755
	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
11756

11757
# no, it didn't overflow; we have correct result
11758
	bra.w		fmul_normal_exit
11759

11760
#
11761
# UNDERFLOW:
11762
# - the result of the multiply operation is an underflow.
11763
# - do the multiply to the proper precision and rounding mode in order to
11764
# set the inexact bits.
11765
# - calculate the default result and return it in fp0.
11766
# - if overflow or inexact is enabled, we need a multiply result rounded to
11767
# extended precision. if the original operation was extended, then we have this
11768
# result. if the original operation was single or double, we have to do another
11769
# multiply using extended precision and the correct rounding mode. the result
11770
# of this operation then has its exponent scaled by -0x6000 to create the
11771
# exceptional operand.
11772
#
11773
fmul_unfl:
11774
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11775

11776
# for fun, let's use only extended precision, round to zero. then, let
11777
# the unf_res() routine figure out all the rest.
11778
# will we get the correct answer.
11779
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11780

11781
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11782
	fmov.l		&0x0,%fpsr		# clear FPSR
11783

11784
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11785

11786
	fmov.l		%fpsr,%d1		# save status
11787
	fmov.l		&0x0,%fpcr		# clear FPCR
11788

11789
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11790

11791
	mov.b		FPCR_ENABLE(%a6),%d1
11792
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11793
	bne.b		fmul_unfl_ena		# yes
11794

11795
fmul_unfl_dis:
11796
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11797

11798
	lea		FP_SCR0(%a6),%a0	# pass: result addr
11799
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11800
	bsr.l		unf_res			# calculate default result
11801
	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
11802
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11803
	rts
11804

11805
#
11806
# UNFL is enabled.
11807
#
11808
fmul_unfl_ena:
11809
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
11810

11811
	mov.l		L_SCR3(%a6),%d1
11812
	andi.b		&0xc0,%d1		# is precision extended?
11813
	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
11814

11815
# if the rnd mode is anything but RZ, then we have to re-do the above
11816
# multiplication because we used RZ for all.
11817
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11818

11819
fmul_unfl_ena_cont:
11820
	fmov.l		&0x0,%fpsr		# clear FPSR
11821

11822
	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
11823

11824
	fmov.l		&0x0,%fpcr		# clear FPCR
11825

11826
	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
11827
	mov.l		%d2,-(%sp)		# save d2
11828
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11829
	mov.l		%d1,%d2			# make a copy
11830
	andi.l		&0x7fff,%d1		# strip sign
11831
	andi.w		&0x8000,%d2		# keep old sign
11832
	sub.l		%d0,%d1			# add scale factor
11833
	addi.l		&0x6000,%d1		# add bias
11834
	andi.w		&0x7fff,%d1
11835
	or.w		%d2,%d1			# concat old sign,new exp
11836
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11837
	mov.l		(%sp)+,%d2		# restore d2
11838
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11839
	bra.w		fmul_unfl_dis
11840

11841
fmul_unfl_ena_sd:
11842
	mov.l		L_SCR3(%a6),%d1
11843
	andi.b		&0x30,%d1		# use only rnd mode
11844
	fmov.l		%d1,%fpcr		# set FPCR
11845

11846
	bra.b		fmul_unfl_ena_cont
11847

11848
# MAY UNDERFLOW:
11849
# -use the correct rounding mode and precision. this code favors operations
11850
# that do not underflow.
11851
fmul_may_unfl:
11852
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
11853

11854
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11855
	fmov.l		&0x0,%fpsr		# clear FPSR
11856

11857
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
11858

11859
	fmov.l		%fpsr,%d1		# save status
11860
	fmov.l		&0x0,%fpcr		# clear FPCR
11861

11862
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
11863

11864
	fabs.x		%fp0,%fp1		# make a copy of result
11865
	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
11866
	fbgt.w		fmul_normal_exit	# no; no underflow occurred
11867
	fblt.w		fmul_unfl		# yes; underflow occurred
11868

11869
#
11870
# we still don't know if underflow occurred. result is ~ equal to 2. but,
11871
# we don't know if the result was an underflow that rounded up to a 2 or
11872
# a normalized number that rounded down to a 2. so, redo the entire operation
11873
# using RZ as the rounding mode to see what the pre-rounded result is.
11874
# this case should be relatively rare.
11875
#
11876
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
11877

11878
	mov.l		L_SCR3(%a6),%d1
11879
	andi.b		&0xc0,%d1		# keep rnd prec
11880
	ori.b		&rz_mode*0x10,%d1	# insert RZ
11881

11882
	fmov.l		%d1,%fpcr		# set FPCR
11883
	fmov.l		&0x0,%fpsr		# clear FPSR
11884

11885
	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
11886

11887
	fmov.l		&0x0,%fpcr		# clear FPCR
11888
	fabs.x		%fp1			# make absolute value
11889
	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
11890
	fbge.w		fmul_normal_exit	# no; no underflow occurred
11891
	bra.w		fmul_unfl		# yes, underflow occurred
11892

11893
################################################################################
11894

11895
#
11896
# Multiply: inputs are not both normalized; what are they?
11897
#
11898
fmul_not_norm:
11899
	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
11900
	jmp		(tbl_fmul_op.b,%pc,%d1.w)
11901

11902
	swbeg		&48
11903
tbl_fmul_op:
11904
	short		fmul_norm	- tbl_fmul_op # NORM x NORM
11905
	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
11906
	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
11907
	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
11908
	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
11909
	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
11910
	short		tbl_fmul_op	- tbl_fmul_op #
11911
	short		tbl_fmul_op	- tbl_fmul_op #
11912

11913
	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
11914
	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
11915
	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
11916
	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
11917
	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
11918
	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
11919
	short		tbl_fmul_op	- tbl_fmul_op #
11920
	short		tbl_fmul_op	- tbl_fmul_op #
11921

11922
	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
11923
	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
11924
	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
11925
	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
11926
	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
11927
	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
11928
	short		tbl_fmul_op	- tbl_fmul_op #
11929
	short		tbl_fmul_op	- tbl_fmul_op #
11930

11931
	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
11932
	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
11933
	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
11934
	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
11935
	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
11936
	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
11937
	short		tbl_fmul_op	- tbl_fmul_op #
11938
	short		tbl_fmul_op	- tbl_fmul_op #
11939

11940
	short		fmul_norm	- tbl_fmul_op # NORM x NORM
11941
	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
11942
	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
11943
	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
11944
	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
11945
	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
11946
	short		tbl_fmul_op	- tbl_fmul_op #
11947
	short		tbl_fmul_op	- tbl_fmul_op #
11948

11949
	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
11950
	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
11951
	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
11952
	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
11953
	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
11954
	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
11955
	short		tbl_fmul_op	- tbl_fmul_op #
11956
	short		tbl_fmul_op	- tbl_fmul_op #
11957

11958
fmul_res_operr:
11959
	bra.l		res_operr
11960
fmul_res_snan:
11961
	bra.l		res_snan
11962
fmul_res_qnan:
11963
	bra.l		res_qnan
11964

11965
#
11966
# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
11967
#
11968
	global		fmul_zero		# global for fsglmul
11969
fmul_zero:
11970
	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11971
	mov.b		DST_EX(%a1),%d1
11972
	eor.b		%d0,%d1
11973
	bpl.b		fmul_zero_p		# result ZERO is pos.
11974
fmul_zero_n:
11975
	fmov.s		&0x80000000,%fp0	# load -ZERO
11976
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
11977
	rts
11978
fmul_zero_p:
11979
	fmov.s		&0x00000000,%fp0	# load +ZERO
11980
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11981
	rts
11982

11983
#
11984
# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
11985
#
11986
# Note: The j-bit for an infinity is a don't-care. However, to be
11987
# strictly compatible w/ the 68881/882, we make sure to return an
11988
# INF w/ the j-bit set if the input INF j-bit was set. Destination
11989
# INFs take priority.
11990
#
11991
	global		fmul_inf_dst		# global for fsglmul
11992
fmul_inf_dst:
11993
	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
11994
	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11995
	mov.b		DST_EX(%a1),%d1
11996
	eor.b		%d0,%d1
11997
	bpl.b		fmul_inf_dst_p		# result INF is pos.
11998
fmul_inf_dst_n:
11999
	fabs.x		%fp0			# clear result sign
12000
	fneg.x		%fp0			# set result sign
12001
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12002
	rts
12003
fmul_inf_dst_p:
12004
	fabs.x		%fp0			# clear result sign
12005
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
12006
	rts
12007

12008
	global		fmul_inf_src		# global for fsglmul
12009
fmul_inf_src:
12010
	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
12011
	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
12012
	mov.b		DST_EX(%a1),%d1
12013
	eor.b		%d0,%d1
12014
	bpl.b		fmul_inf_dst_p		# result INF is pos.
12015
	bra.b		fmul_inf_dst_n
12016

12017
#########################################################################
12018
# XDEF ****************************************************************	#
12019
#	fin(): emulates the fmove instruction				#
12020
#	fsin(): emulates the fsmove instruction				#
12021
#	fdin(): emulates the fdmove instruction				#
12022
#									#
12023
# XREF ****************************************************************	#
12024
#	norm() - normalize mantissa for EXOP on denorm			#
12025
#	scale_to_zero_src() - scale src exponent to zero		#
12026
#	ovf_res() - return default overflow result			#
12027
#	unf_res() - return default underflow result			#
12028
#	res_qnan_1op() - return QNAN result				#
12029
#	res_snan_1op() - return SNAN result				#
12030
#									#
12031
# INPUT ***************************************************************	#
12032
#	a0 = pointer to extended precision source operand		#
12033
#	d0 = round prec/mode						#
12034
#									#
12035
# OUTPUT **************************************************************	#
12036
#	fp0 = result							#
12037
#	fp1 = EXOP (if exception occurred)				#
12038
#									#
12039
# ALGORITHM ***********************************************************	#
12040
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
12041
# norms into extended, single, and double precision.			#
12042
#	Norms can be emulated w/ a regular fmove instruction. For	#
12043
# sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
12044
# if the result would have overflowed/underflowed. If so, use unf_res()	#
12045
# or ovf_res() to return the default result. Also return EXOP if	#
12046
# exception is enabled. If no exception, return the default result.	#
12047
#	Unnorms don't pass through here.				#
12048
#									#
12049
#########################################################################
12050

12051
	global		fsin
12052
fsin:
12053
	andi.b		&0x30,%d0		# clear rnd prec
12054
	ori.b		&s_mode*0x10,%d0	# insert sgl precision
12055
	bra.b		fin
12056

12057
	global		fdin
12058
fdin:
12059
	andi.b		&0x30,%d0		# clear rnd prec
12060
	ori.b		&d_mode*0x10,%d0	# insert dbl precision
12061

12062
	global		fin
12063
fin:
12064
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12065

12066
	mov.b		STAG(%a6),%d1		# fetch src optype tag
12067
	bne.w		fin_not_norm		# optimize on non-norm input
12068

12069
#
12070
# FP MOVE IN: NORMs and DENORMs ONLY!
12071
#
12072
fin_norm:
12073
	andi.b		&0xc0,%d0		# is precision extended?
12074
	bne.w		fin_not_ext		# no, so go handle dbl or sgl
12075

12076
#
12077
# precision selected is extended. so...we cannot get an underflow
12078
# or overflow because of rounding to the correct precision. so...
12079
# skip the scaling and unscaling...
12080
#
12081
	tst.b		SRC_EX(%a0)		# is the operand negative?
12082
	bpl.b		fin_norm_done		# no
12083
	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
12084
fin_norm_done:
12085
	fmovm.x		SRC(%a0),&0x80		# return result in fp0
12086
	rts
12087

12088
#
12089
# for an extended precision DENORM, the UNFL exception bit is set
12090
# the accrued bit is NOT set in this instance(no inexactness!)
12091
#
12092
fin_denorm:
12093
	andi.b		&0xc0,%d0		# is precision extended?
12094
	bne.w		fin_not_ext		# no, so go handle dbl or sgl
12095

12096
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12097
	tst.b		SRC_EX(%a0)		# is the operand negative?
12098
	bpl.b		fin_denorm_done		# no
12099
	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
12100
fin_denorm_done:
12101
	fmovm.x		SRC(%a0),&0x80		# return result in fp0
12102
	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12103
	bne.b		fin_denorm_unfl_ena	# yes
12104
	rts
12105

12106
#
12107
# the input is an extended DENORM and underflow is enabled in the FPCR.
12108
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
12109
# exponent and insert back into the operand.
12110
#
12111
fin_denorm_unfl_ena:
12112
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12113
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12114
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12115
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
12116
	bsr.l		norm			# normalize result
12117
	neg.w		%d0			# new exponent = -(shft val)
12118
	addi.w		&0x6000,%d0		# add new bias to exponent
12119
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
12120
	andi.w		&0x8000,%d1		# keep old sign
12121
	andi.w		&0x7fff,%d0		# clear sign position
12122
	or.w		%d1,%d0			# concat new exo,old sign
12123
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
12124
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12125
	rts
12126

12127
#
12128
# operand is to be rounded to single or double precision
12129
#
12130
fin_not_ext:
12131
	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12132
	bne.b		fin_dbl
12133

12134
#
12135
# operand is to be rounded to single precision
12136
#
12137
fin_sgl:
12138
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12139
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12140
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12141
	bsr.l		scale_to_zero_src	# calculate scale factor
12142

12143
	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
12144
	bge.w		fin_sd_unfl		# yes; go handle underflow
12145
	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
12146
	beq.w		fin_sd_may_ovfl		# maybe; go check
12147
	blt.w		fin_sd_ovfl		# yes; go handle overflow
12148

12149
#
12150
# operand will NOT overflow or underflow when moved into the fp reg file
12151
#
12152
fin_sd_normal:
12153
	fmov.l		&0x0,%fpsr		# clear FPSR
12154
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12155

12156
	fmov.x		FP_SCR0(%a6),%fp0	# perform move
12157

12158
	fmov.l		%fpsr,%d1		# save FPSR
12159
	fmov.l		&0x0,%fpcr		# clear FPCR
12160

12161
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12162

12163
fin_sd_normal_exit:
12164
	mov.l		%d2,-(%sp)		# save d2
12165
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12166
	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
12167
	mov.w		%d1,%d2			# make a copy
12168
	andi.l		&0x7fff,%d1		# strip sign
12169
	sub.l		%d0,%d1			# add scale factor
12170
	andi.w		&0x8000,%d2		# keep old sign
12171
	or.w		%d1,%d2			# concat old sign,new exponent
12172
	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12173
	mov.l		(%sp)+,%d2		# restore d2
12174
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12175
	rts
12176

12177
#
12178
# operand is to be rounded to double precision
12179
#
12180
fin_dbl:
12181
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12182
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12183
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12184
	bsr.l		scale_to_zero_src	# calculate scale factor
12185

12186
	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
12187
	bge.w		fin_sd_unfl		# yes; go handle underflow
12188
	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
12189
	beq.w		fin_sd_may_ovfl		# maybe; go check
12190
	blt.w		fin_sd_ovfl		# yes; go handle overflow
12191
	bra.w		fin_sd_normal		# no; ho handle normalized op
12192

12193
#
12194
# operand WILL underflow when moved in to the fp register file
12195
#
12196
fin_sd_unfl:
12197
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12198

12199
	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
12200
	bpl.b		fin_sd_unfl_tst
12201
	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
12202

12203
# if underflow or inexact is enabled, then go calculate the EXOP first.
12204
fin_sd_unfl_tst:
12205
	mov.b		FPCR_ENABLE(%a6),%d1
12206
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12207
	bne.b		fin_sd_unfl_ena		# yes
12208

12209
fin_sd_unfl_dis:
12210
	lea		FP_SCR0(%a6),%a0	# pass: result addr
12211
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12212
	bsr.l		unf_res			# calculate default result
12213
	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
12214
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12215
	rts
12216

12217
#
12218
# operand will underflow AND underflow or inexact is enabled.
12219
# Therefore, we must return the result rounded to extended precision.
12220
#
12221
fin_sd_unfl_ena:
12222
	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12223
	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12224
	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
12225

12226
	mov.l		%d2,-(%sp)		# save d2
12227
	mov.w		%d1,%d2			# make a copy
12228
	andi.l		&0x7fff,%d1		# strip sign
12229
	sub.l		%d0,%d1			# subtract scale factor
12230
	andi.w		&0x8000,%d2		# extract old sign
12231
	addi.l		&0x6000,%d1		# add new bias
12232
	andi.w		&0x7fff,%d1
12233
	or.w		%d1,%d2			# concat old sign,new exp
12234
	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
12235
	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
12236
	mov.l		(%sp)+,%d2		# restore d2
12237
	bra.b		fin_sd_unfl_dis
12238

12239
#
12240
# operand WILL overflow.
12241
#
12242
fin_sd_ovfl:
12243
	fmov.l		&0x0,%fpsr		# clear FPSR
12244
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12245

12246
	fmov.x		FP_SCR0(%a6),%fp0	# perform move
12247

12248
	fmov.l		&0x0,%fpcr		# clear FPCR
12249
	fmov.l		%fpsr,%d1		# save FPSR
12250

12251
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12252

12253
fin_sd_ovfl_tst:
12254
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12255

12256
	mov.b		FPCR_ENABLE(%a6),%d1
12257
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12258
	bne.b		fin_sd_ovfl_ena		# yes
12259

12260
#
12261
# OVFL is not enabled; therefore, we must create the default result by
12262
# calling ovf_res().
12263
#
12264
fin_sd_ovfl_dis:
12265
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12266
	sne		%d1			# set sign param accordingly
12267
	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
12268
	bsr.l		ovf_res			# calculate default result
12269
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
12270
	fmovm.x		(%a0),&0x80		# return default result in fp0
12271
	rts
12272

12273
#
12274
# OVFL is enabled.
12275
# the INEX2 bit has already been updated by the round to the correct precision.
12276
# now, round to extended(and don't alter the FPSR).
12277
#
12278
fin_sd_ovfl_ena:
12279
	mov.l		%d2,-(%sp)		# save d2
12280
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12281
	mov.l		%d1,%d2			# make a copy
12282
	andi.l		&0x7fff,%d1		# strip sign
12283
	andi.w		&0x8000,%d2		# keep old sign
12284
	sub.l		%d0,%d1			# add scale factor
12285
	sub.l		&0x6000,%d1		# subtract bias
12286
	andi.w		&0x7fff,%d1
12287
	or.w		%d2,%d1
12288
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12289
	mov.l		(%sp)+,%d2		# restore d2
12290
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12291
	bra.b		fin_sd_ovfl_dis
12292

12293
#
12294
# the move in MAY overflow. so...
12295
#
12296
fin_sd_may_ovfl:
12297
	fmov.l		&0x0,%fpsr		# clear FPSR
12298
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12299

12300
	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
12301

12302
	fmov.l		%fpsr,%d1		# save status
12303
	fmov.l		&0x0,%fpcr		# clear FPCR
12304

12305
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12306

12307
	fabs.x		%fp0,%fp1		# make a copy of result
12308
	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
12309
	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
12310

12311
# no, it didn't overflow; we have correct result
12312
	bra.w		fin_sd_normal_exit
12313

12314
##########################################################################
12315

12316
#
12317
# operand is not a NORM: check its optype and branch accordingly
12318
#
12319
fin_not_norm:
12320
	cmpi.b		%d1,&DENORM		# weed out DENORM
12321
	beq.w		fin_denorm
12322
	cmpi.b		%d1,&SNAN		# weed out SNANs
12323
	beq.l		res_snan_1op
12324
	cmpi.b		%d1,&QNAN		# weed out QNANs
12325
	beq.l		res_qnan_1op
12326

12327
#
12328
# do the fmove in; at this point, only possible ops are ZERO and INF.
12329
# use fmov to determine ccodes.
12330
# prec:mode should be zero at this point but it won't affect answer anyways.
12331
#
12332
	fmov.x		SRC(%a0),%fp0		# do fmove in
12333
	fmov.l		%fpsr,%d0		# no exceptions possible
12334
	rol.l		&0x8,%d0		# put ccodes in lo byte
12335
	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
12336
	rts
12337

12338
#########################################################################
12339
# XDEF ****************************************************************	#
12340
#	fdiv(): emulates the fdiv instruction				#
12341
#	fsdiv(): emulates the fsdiv instruction				#
12342
#	fddiv(): emulates the fddiv instruction				#
12343
#									#
12344
# XREF ****************************************************************	#
12345
#	scale_to_zero_src() - scale src exponent to zero		#
12346
#	scale_to_zero_dst() - scale dst exponent to zero		#
12347
#	unf_res() - return default underflow result			#
12348
#	ovf_res() - return default overflow result			#
12349
#	res_qnan() - return QNAN result					#
12350
#	res_snan() - return SNAN result					#
12351
#									#
12352
# INPUT ***************************************************************	#
12353
#	a0 = pointer to extended precision source operand		#
12354
#	a1 = pointer to extended precision destination operand		#
12355
#	d0  rnd prec,mode						#
12356
#									#
12357
# OUTPUT **************************************************************	#
12358
#	fp0 = result							#
12359
#	fp1 = EXOP (if exception occurred)				#
12360
#									#
12361
# ALGORITHM ***********************************************************	#
12362
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
12363
# norms/denorms into ext/sgl/dbl precision.				#
12364
#	For norms/denorms, scale the exponents such that a divide	#
12365
# instruction won't cause an exception. Use the regular fdiv to		#
12366
# compute a result. Check if the regular operands would have taken	#
12367
# an exception. If so, return the default overflow/underflow result	#
12368
# and return the EXOP if exceptions are enabled. Else, scale the	#
12369
# result operand to the proper exponent.				#
12370
#									#
12371
#########################################################################
12372

12373
	align		0x10
12374
tbl_fdiv_unfl:
12375
	long		0x3fff - 0x0000		# ext_unfl
12376
	long		0x3fff - 0x3f81		# sgl_unfl
12377
	long		0x3fff - 0x3c01		# dbl_unfl
12378

12379
tbl_fdiv_ovfl:
12380
	long		0x3fff - 0x7ffe		# ext overflow exponent
12381
	long		0x3fff - 0x407e		# sgl overflow exponent
12382
	long		0x3fff - 0x43fe		# dbl overflow exponent
12383

12384
	global		fsdiv
12385
fsdiv:
12386
	andi.b		&0x30,%d0		# clear rnd prec
12387
	ori.b		&s_mode*0x10,%d0	# insert sgl prec
12388
	bra.b		fdiv
12389

12390
	global		fddiv
12391
fddiv:
12392
	andi.b		&0x30,%d0		# clear rnd prec
12393
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
12394

12395
	global		fdiv
12396
fdiv:
12397
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12398

12399
	clr.w		%d1
12400
	mov.b		DTAG(%a6),%d1
12401
	lsl.b		&0x3,%d1
12402
	or.b		STAG(%a6),%d1		# combine src tags
12403

12404
	bne.w		fdiv_not_norm		# optimize on non-norm input
12405

12406
#
12407
# DIVIDE: NORMs and DENORMs ONLY!
12408
#
12409
fdiv_norm:
12410
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
12411
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
12412
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
12413

12414
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12415
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12416
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12417

12418
	bsr.l		scale_to_zero_src	# scale src exponent
12419
	mov.l		%d0,-(%sp)		# save scale factor 1
12420

12421
	bsr.l		scale_to_zero_dst	# scale dst exponent
12422

12423
	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
12424
	add.l		%d0,(%sp)
12425

12426
	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
12427
	lsr.b		&0x6,%d1		# shift to lo bits
12428
	mov.l		(%sp)+,%d0		# load S.F.
12429
	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
12430
	ble.w		fdiv_may_ovfl		# result will overflow
12431

12432
	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
12433
	beq.w		fdiv_may_unfl		# maybe
12434
	bgt.w		fdiv_unfl		# yes; go handle underflow
12435

12436
fdiv_normal:
12437
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12438

12439
	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
12440
	fmov.l		&0x0,%fpsr		# clear FPSR
12441

12442
	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
12443

12444
	fmov.l		%fpsr,%d1		# save FPSR
12445
	fmov.l		&0x0,%fpcr		# clear FPCR
12446

12447
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12448

12449
fdiv_normal_exit:
12450
	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
12451
	mov.l		%d2,-(%sp)		# store d2
12452
	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
12453
	mov.l		%d1,%d2			# make a copy
12454
	andi.l		&0x7fff,%d1		# strip sign
12455
	andi.w		&0x8000,%d2		# keep old sign
12456
	sub.l		%d0,%d1			# add scale factor
12457
	or.w		%d2,%d1			# concat old sign,new exp
12458
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12459
	mov.l		(%sp)+,%d2		# restore d2
12460
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12461
	rts
12462

12463
tbl_fdiv_ovfl2:
12464
	long		0x7fff
12465
	long		0x407f
12466
	long		0x43ff
12467

12468
fdiv_no_ovfl:
12469
	mov.l		(%sp)+,%d0		# restore scale factor
12470
	bra.b		fdiv_normal_exit
12471

12472
fdiv_may_ovfl:
12473
	mov.l		%d0,-(%sp)		# save scale factor
12474

12475
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12476

12477
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12478
	fmov.l		&0x0,%fpsr		# set FPSR
12479

12480
	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12481

12482
	fmov.l		%fpsr,%d0
12483
	fmov.l		&0x0,%fpcr
12484

12485
	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
12486

12487
	fmovm.x		&0x01,-(%sp)		# save result to stack
12488
	mov.w		(%sp),%d0		# fetch new exponent
12489
	add.l		&0xc,%sp		# clear result from stack
12490
	andi.l		&0x7fff,%d0		# strip sign
12491
	sub.l		(%sp),%d0		# add scale factor
12492
	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
12493
	blt.b		fdiv_no_ovfl
12494
	mov.l		(%sp)+,%d0
12495

12496
fdiv_ovfl_tst:
12497
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12498

12499
	mov.b		FPCR_ENABLE(%a6),%d1
12500
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12501
	bne.b		fdiv_ovfl_ena		# yes
12502

12503
fdiv_ovfl_dis:
12504
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12505
	sne		%d1			# set sign param accordingly
12506
	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
12507
	bsr.l		ovf_res			# calculate default result
12508
	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
12509
	fmovm.x		(%a0),&0x80		# return default result in fp0
12510
	rts
12511

12512
fdiv_ovfl_ena:
12513
	mov.l		L_SCR3(%a6),%d1
12514
	andi.b		&0xc0,%d1		# is precision extended?
12515
	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
12516

12517
fdiv_ovfl_ena_cont:
12518
	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
12519

12520
	mov.l		%d2,-(%sp)		# save d2
12521
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12522
	mov.w		%d1,%d2			# make a copy
12523
	andi.l		&0x7fff,%d1		# strip sign
12524
	sub.l		%d0,%d1			# add scale factor
12525
	subi.l		&0x6000,%d1		# subtract bias
12526
	andi.w		&0x7fff,%d1		# clear sign bit
12527
	andi.w		&0x8000,%d2		# keep old sign
12528
	or.w		%d2,%d1			# concat old sign,new exp
12529
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12530
	mov.l		(%sp)+,%d2		# restore d2
12531
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12532
	bra.b		fdiv_ovfl_dis
12533

12534
fdiv_ovfl_ena_sd:
12535
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
12536

12537
	mov.l		L_SCR3(%a6),%d1
12538
	andi.b		&0x30,%d1		# keep rnd mode
12539
	fmov.l		%d1,%fpcr		# set FPCR
12540

12541
	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12542

12543
	fmov.l		&0x0,%fpcr		# clear FPCR
12544
	bra.b		fdiv_ovfl_ena_cont
12545

12546
fdiv_unfl:
12547
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12548

12549
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12550

12551
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
12552
	fmov.l		&0x0,%fpsr		# clear FPSR
12553

12554
	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12555

12556
	fmov.l		%fpsr,%d1		# save status
12557
	fmov.l		&0x0,%fpcr		# clear FPCR
12558

12559
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12560

12561
	mov.b		FPCR_ENABLE(%a6),%d1
12562
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12563
	bne.b		fdiv_unfl_ena		# yes
12564

12565
fdiv_unfl_dis:
12566
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12567

12568
	lea		FP_SCR0(%a6),%a0	# pass: result addr
12569
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12570
	bsr.l		unf_res			# calculate default result
12571
	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
12572
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12573
	rts
12574

12575
#
12576
# UNFL is enabled.
12577
#
12578
fdiv_unfl_ena:
12579
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
12580

12581
	mov.l		L_SCR3(%a6),%d1
12582
	andi.b		&0xc0,%d1		# is precision extended?
12583
	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
12584

12585
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12586

12587
fdiv_unfl_ena_cont:
12588
	fmov.l		&0x0,%fpsr		# clear FPSR
12589

12590
	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
12591

12592
	fmov.l		&0x0,%fpcr		# clear FPCR
12593

12594
	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
12595
	mov.l		%d2,-(%sp)		# save d2
12596
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12597
	mov.l		%d1,%d2			# make a copy
12598
	andi.l		&0x7fff,%d1		# strip sign
12599
	andi.w		&0x8000,%d2		# keep old sign
12600
	sub.l		%d0,%d1			# add scale factoer
12601
	addi.l		&0x6000,%d1		# add bias
12602
	andi.w		&0x7fff,%d1
12603
	or.w		%d2,%d1			# concat old sign,new exp
12604
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
12605
	mov.l		(%sp)+,%d2		# restore d2
12606
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12607
	bra.w		fdiv_unfl_dis
12608

12609
fdiv_unfl_ena_sd:
12610
	mov.l		L_SCR3(%a6),%d1
12611
	andi.b		&0x30,%d1		# use only rnd mode
12612
	fmov.l		%d1,%fpcr		# set FPCR
12613

12614
	bra.b		fdiv_unfl_ena_cont
12615

12616
#
12617
# the divide operation MAY underflow:
12618
#
12619
fdiv_may_unfl:
12620
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
12621

12622
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12623
	fmov.l		&0x0,%fpsr		# clear FPSR
12624

12625
	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
12626

12627
	fmov.l		%fpsr,%d1		# save status
12628
	fmov.l		&0x0,%fpcr		# clear FPCR
12629

12630
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12631

12632
	fabs.x		%fp0,%fp1		# make a copy of result
12633
	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
12634
	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
12635
	fblt.w		fdiv_unfl		# yes; underflow occurred
12636

12637
#
12638
# we still don't know if underflow occurred. result is ~ equal to 1. but,
12639
# we don't know if the result was an underflow that rounded up to a 1
12640
# or a normalized number that rounded down to a 1. so, redo the entire
12641
# operation using RZ as the rounding mode to see what the pre-rounded
12642
# result is. this case should be relatively rare.
12643
#
12644
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
12645

12646
	mov.l		L_SCR3(%a6),%d1
12647
	andi.b		&0xc0,%d1		# keep rnd prec
12648
	ori.b		&rz_mode*0x10,%d1	# insert RZ
12649

12650
	fmov.l		%d1,%fpcr		# set FPCR
12651
	fmov.l		&0x0,%fpsr		# clear FPSR
12652

12653
	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
12654

12655
	fmov.l		&0x0,%fpcr		# clear FPCR
12656
	fabs.x		%fp1			# make absolute value
12657
	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
12658
	fbge.w		fdiv_normal_exit	# no; no underflow occurred
12659
	bra.w		fdiv_unfl		# yes; underflow occurred
12660

12661
############################################################################
12662

12663
#
12664
# Divide: inputs are not both normalized; what are they?
12665
#
12666
fdiv_not_norm:
12667
	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
12668
	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
12669

12670
	swbeg		&48
12671
tbl_fdiv_op:
12672
	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
12673
	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
12674
	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
12675
	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
12676
	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
12677
	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
12678
	short		tbl_fdiv_op	- tbl_fdiv_op #
12679
	short		tbl_fdiv_op	- tbl_fdiv_op #
12680

12681
	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
12682
	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
12683
	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
12684
	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
12685
	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
12686
	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
12687
	short		tbl_fdiv_op	- tbl_fdiv_op #
12688
	short		tbl_fdiv_op	- tbl_fdiv_op #
12689

12690
	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
12691
	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
12692
	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
12693
	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
12694
	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
12695
	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
12696
	short		tbl_fdiv_op	- tbl_fdiv_op #
12697
	short		tbl_fdiv_op	- tbl_fdiv_op #
12698

12699
	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
12700
	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
12701
	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
12702
	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
12703
	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
12704
	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
12705
	short		tbl_fdiv_op	- tbl_fdiv_op #
12706
	short		tbl_fdiv_op	- tbl_fdiv_op #
12707

12708
	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
12709
	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
12710
	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
12711
	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
12712
	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
12713
	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
12714
	short		tbl_fdiv_op	- tbl_fdiv_op #
12715
	short		tbl_fdiv_op	- tbl_fdiv_op #
12716

12717
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
12718
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
12719
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
12720
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
12721
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
12722
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
12723
	short		tbl_fdiv_op	- tbl_fdiv_op #
12724
	short		tbl_fdiv_op	- tbl_fdiv_op #
12725

12726
fdiv_res_qnan:
12727
	bra.l		res_qnan
12728
fdiv_res_snan:
12729
	bra.l		res_snan
12730
fdiv_res_operr:
12731
	bra.l		res_operr
12732

12733
	global		fdiv_zero_load		# global for fsgldiv
12734
fdiv_zero_load:
12735
	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
12736
	mov.b		DST_EX(%a1),%d1		# or of input signs.
12737
	eor.b		%d0,%d1
12738
	bpl.b		fdiv_zero_load_p	# result is positive
12739
	fmov.s		&0x80000000,%fp0	# load a -ZERO
12740
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
12741
	rts
12742
fdiv_zero_load_p:
12743
	fmov.s		&0x00000000,%fp0	# load a +ZERO
12744
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
12745
	rts
12746

12747
#
12748
# The destination was In Range and the source was a ZERO. The result,
12749
# Therefore, is an INF w/ the proper sign.
12750
# So, determine the sign and return a new INF (w/ the j-bit cleared).
12751
#
12752
	global		fdiv_inf_load		# global for fsgldiv
12753
fdiv_inf_load:
12754
	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
12755
	mov.b		SRC_EX(%a0),%d0		# load both signs
12756
	mov.b		DST_EX(%a1),%d1
12757
	eor.b		%d0,%d1
12758
	bpl.b		fdiv_inf_load_p		# result is positive
12759
	fmov.s		&0xff800000,%fp0	# make result -INF
12760
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12761
	rts
12762
fdiv_inf_load_p:
12763
	fmov.s		&0x7f800000,%fp0	# make result +INF
12764
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
12765
	rts
12766

12767
#
12768
# The destination was an INF w/ an In Range or ZERO source, the result is
12769
# an INF w/ the proper sign.
12770
# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
12771
# dst INF is set, then then j-bit of the result INF is also set).
12772
#
12773
	global		fdiv_inf_dst		# global for fsgldiv
12774
fdiv_inf_dst:
12775
	mov.b		DST_EX(%a1),%d0		# load both signs
12776
	mov.b		SRC_EX(%a0),%d1
12777
	eor.b		%d0,%d1
12778
	bpl.b		fdiv_inf_dst_p		# result is positive
12779

12780
	fmovm.x		DST(%a1),&0x80		# return result in fp0
12781
	fabs.x		%fp0			# clear sign bit
12782
	fneg.x		%fp0			# set sign bit
12783
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
12784
	rts
12785

12786
fdiv_inf_dst_p:
12787
	fmovm.x		DST(%a1),&0x80		# return result in fp0
12788
	fabs.x		%fp0			# return positive INF
12789
	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
12790
	rts
12791

12792
#########################################################################
12793
# XDEF ****************************************************************	#
12794
#	fneg(): emulates the fneg instruction				#
12795
#	fsneg(): emulates the fsneg instruction				#
12796
#	fdneg(): emulates the fdneg instruction				#
12797
#									#
12798
# XREF ****************************************************************	#
12799
#	norm() - normalize a denorm to provide EXOP			#
12800
#	scale_to_zero_src() - scale sgl/dbl source exponent		#
12801
#	ovf_res() - return default overflow result			#
12802
#	unf_res() - return default underflow result			#
12803
#	res_qnan_1op() - return QNAN result				#
12804
#	res_snan_1op() - return SNAN result				#
12805
#									#
12806
# INPUT ***************************************************************	#
12807
#	a0 = pointer to extended precision source operand		#
12808
#	d0 = rnd prec,mode						#
12809
#									#
12810
# OUTPUT **************************************************************	#
12811
#	fp0 = result							#
12812
#	fp1 = EXOP (if exception occurred)				#
12813
#									#
12814
# ALGORITHM ***********************************************************	#
12815
#	Handle NANs, zeroes, and infinities as special cases. Separate	#
12816
# norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
12817
# emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
12818
# and an actual fneg performed to see if overflow/underflow would have	#
12819
# occurred. If so, return default underflow/overflow result. Else,	#
12820
# scale the result exponent and return result. FPSR gets set based on	#
12821
# the result value.							#
12822
#									#
12823
#########################################################################
12824

12825
	global		fsneg
12826
fsneg:
12827
	andi.b		&0x30,%d0		# clear rnd prec
12828
	ori.b		&s_mode*0x10,%d0	# insert sgl precision
12829
	bra.b		fneg
12830

12831
	global		fdneg
12832
fdneg:
12833
	andi.b		&0x30,%d0		# clear rnd prec
12834
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
12835

12836
	global		fneg
12837
fneg:
12838
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
12839
	mov.b		STAG(%a6),%d1
12840
	bne.w		fneg_not_norm		# optimize on non-norm input
12841

12842
#
12843
# NEGATE SIGN : norms and denorms ONLY!
12844
#
12845
fneg_norm:
12846
	andi.b		&0xc0,%d0		# is precision extended?
12847
	bne.w		fneg_not_ext		# no; go handle sgl or dbl
12848

12849
#
12850
# precision selected is extended. so...we can not get an underflow
12851
# or overflow because of rounding to the correct precision. so...
12852
# skip the scaling and unscaling...
12853
#
12854
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12855
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12856
	mov.w		SRC_EX(%a0),%d0
12857
	eori.w		&0x8000,%d0		# negate sign
12858
	bpl.b		fneg_norm_load		# sign is positive
12859
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
12860
fneg_norm_load:
12861
	mov.w		%d0,FP_SCR0_EX(%a6)
12862
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12863
	rts
12864

12865
#
12866
# for an extended precision DENORM, the UNFL exception bit is set
12867
# the accrued bit is NOT set in this instance(no inexactness!)
12868
#
12869
fneg_denorm:
12870
	andi.b		&0xc0,%d0		# is precision extended?
12871
	bne.b		fneg_not_ext		# no; go handle sgl or dbl
12872

12873
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12874

12875
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12876
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12877
	mov.w		SRC_EX(%a0),%d0
12878
	eori.w		&0x8000,%d0		# negate sign
12879
	bpl.b		fneg_denorm_done	# no
12880
	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
12881
fneg_denorm_done:
12882
	mov.w		%d0,FP_SCR0_EX(%a6)
12883
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12884

12885
	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12886
	bne.b		fneg_ext_unfl_ena	# yes
12887
	rts
12888

12889
#
12890
# the input is an extended DENORM and underflow is enabled in the FPCR.
12891
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
12892
# exponent and insert back into the operand.
12893
#
12894
fneg_ext_unfl_ena:
12895
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
12896
	bsr.l		norm			# normalize result
12897
	neg.w		%d0			# new exponent = -(shft val)
12898
	addi.w		&0x6000,%d0		# add new bias to exponent
12899
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
12900
	andi.w		&0x8000,%d1		# keep old sign
12901
	andi.w		&0x7fff,%d0		# clear sign position
12902
	or.w		%d1,%d0			# concat old sign, new exponent
12903
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
12904
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12905
	rts
12906

12907
#
12908
# operand is either single or double
12909
#
12910
fneg_not_ext:
12911
	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12912
	bne.b		fneg_dbl
12913

12914
#
12915
# operand is to be rounded to single precision
12916
#
12917
fneg_sgl:
12918
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12919
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12920
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12921
	bsr.l		scale_to_zero_src	# calculate scale factor
12922

12923
	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
12924
	bge.w		fneg_sd_unfl		# yes; go handle underflow
12925
	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
12926
	beq.w		fneg_sd_may_ovfl	# maybe; go check
12927
	blt.w		fneg_sd_ovfl		# yes; go handle overflow
12928

12929
#
12930
# operand will NOT overflow or underflow when moved in to the fp reg file
12931
#
12932
fneg_sd_normal:
12933
	fmov.l		&0x0,%fpsr		# clear FPSR
12934
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12935

12936
	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
12937

12938
	fmov.l		%fpsr,%d1		# save FPSR
12939
	fmov.l		&0x0,%fpcr		# clear FPCR
12940

12941
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12942

12943
fneg_sd_normal_exit:
12944
	mov.l		%d2,-(%sp)		# save d2
12945
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12946
	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
12947
	mov.w		%d1,%d2			# make a copy
12948
	andi.l		&0x7fff,%d1		# strip sign
12949
	sub.l		%d0,%d1			# add scale factor
12950
	andi.w		&0x8000,%d2		# keep old sign
12951
	or.w		%d1,%d2			# concat old sign,new exp
12952
	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12953
	mov.l		(%sp)+,%d2		# restore d2
12954
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12955
	rts
12956

12957
#
12958
# operand is to be rounded to double precision
12959
#
12960
fneg_dbl:
12961
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12962
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12963
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12964
	bsr.l		scale_to_zero_src	# calculate scale factor
12965

12966
	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
12967
	bge.b		fneg_sd_unfl		# yes; go handle underflow
12968
	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
12969
	beq.w		fneg_sd_may_ovfl	# maybe; go check
12970
	blt.w		fneg_sd_ovfl		# yes; go handle overflow
12971
	bra.w		fneg_sd_normal		# no; ho handle normalized op
12972

12973
#
12974
# operand WILL underflow when moved in to the fp register file
12975
#
12976
fneg_sd_unfl:
12977
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12978

12979
	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
12980
	bpl.b		fneg_sd_unfl_tst
12981
	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
12982

12983
# if underflow or inexact is enabled, go calculate EXOP first.
12984
fneg_sd_unfl_tst:
12985
	mov.b		FPCR_ENABLE(%a6),%d1
12986
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12987
	bne.b		fneg_sd_unfl_ena	# yes
12988

12989
fneg_sd_unfl_dis:
12990
	lea		FP_SCR0(%a6),%a0	# pass: result addr
12991
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12992
	bsr.l		unf_res			# calculate default result
12993
	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
12994
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12995
	rts
12996

12997
#
12998
# operand will underflow AND underflow is enabled.
12999
# Therefore, we must return the result rounded to extended precision.
13000
#
13001
fneg_sd_unfl_ena:
13002
	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13003
	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13004
	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
13005

13006
	mov.l		%d2,-(%sp)		# save d2
13007
	mov.l		%d1,%d2			# make a copy
13008
	andi.l		&0x7fff,%d1		# strip sign
13009
	andi.w		&0x8000,%d2		# keep old sign
13010
	sub.l		%d0,%d1			# subtract scale factor
13011
	addi.l		&0x6000,%d1		# add new bias
13012
	andi.w		&0x7fff,%d1
13013
	or.w		%d2,%d1			# concat new sign,new exp
13014
	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
13015
	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
13016
	mov.l		(%sp)+,%d2		# restore d2
13017
	bra.b		fneg_sd_unfl_dis
13018

13019
#
13020
# operand WILL overflow.
13021
#
13022
fneg_sd_ovfl:
13023
	fmov.l		&0x0,%fpsr		# clear FPSR
13024
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13025

13026
	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
13027

13028
	fmov.l		&0x0,%fpcr		# clear FPCR
13029
	fmov.l		%fpsr,%d1		# save FPSR
13030

13031
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13032

13033
fneg_sd_ovfl_tst:
13034
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13035

13036
	mov.b		FPCR_ENABLE(%a6),%d1
13037
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
13038
	bne.b		fneg_sd_ovfl_ena	# yes
13039

13040
#
13041
# OVFL is not enabled; therefore, we must create the default result by
13042
# calling ovf_res().
13043
#
13044
fneg_sd_ovfl_dis:
13045
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
13046
	sne		%d1			# set sign param accordingly
13047
	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
13048
	bsr.l		ovf_res			# calculate default result
13049
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
13050
	fmovm.x		(%a0),&0x80		# return default result in fp0
13051
	rts
13052

13053
#
13054
# OVFL is enabled.
13055
# the INEX2 bit has already been updated by the round to the correct precision.
13056
# now, round to extended(and don't alter the FPSR).
13057
#
13058
fneg_sd_ovfl_ena:
13059
	mov.l		%d2,-(%sp)		# save d2
13060
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
13061
	mov.l		%d1,%d2			# make a copy
13062
	andi.l		&0x7fff,%d1		# strip sign
13063
	andi.w		&0x8000,%d2		# keep old sign
13064
	sub.l		%d0,%d1			# add scale factor
13065
	subi.l		&0x6000,%d1		# subtract bias
13066
	andi.w		&0x7fff,%d1
13067
	or.w		%d2,%d1			# concat sign,exp
13068
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
13069
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13070
	mov.l		(%sp)+,%d2		# restore d2
13071
	bra.b		fneg_sd_ovfl_dis
13072

13073
#
13074
# the move in MAY underflow. so...
13075
#
13076
fneg_sd_may_ovfl:
13077
	fmov.l		&0x0,%fpsr		# clear FPSR
13078
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13079

13080
	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
13081

13082
	fmov.l		%fpsr,%d1		# save status
13083
	fmov.l		&0x0,%fpcr		# clear FPCR
13084

13085
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13086

13087
	fabs.x		%fp0,%fp1		# make a copy of result
13088
	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
13089
	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
13090

13091
# no, it didn't overflow; we have correct result
13092
	bra.w		fneg_sd_normal_exit
13093

13094
##########################################################################
13095

13096
#
13097
# input is not normalized; what is it?
13098
#
13099
fneg_not_norm:
13100
	cmpi.b		%d1,&DENORM		# weed out DENORM
13101
	beq.w		fneg_denorm
13102
	cmpi.b		%d1,&SNAN		# weed out SNAN
13103
	beq.l		res_snan_1op
13104
	cmpi.b		%d1,&QNAN		# weed out QNAN
13105
	beq.l		res_qnan_1op
13106

13107
#
13108
# do the fneg; at this point, only possible ops are ZERO and INF.
13109
# use fneg to determine ccodes.
13110
# prec:mode should be zero at this point but it won't affect answer anyways.
13111
#
13112
	fneg.x		SRC_EX(%a0),%fp0	# do fneg
13113
	fmov.l		%fpsr,%d0
13114
	rol.l		&0x8,%d0		# put ccodes in lo byte
13115
	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
13116
	rts
13117

13118
#########################################################################
13119
# XDEF ****************************************************************	#
13120
#	ftst(): emulates the ftest instruction				#
13121
#									#
13122
# XREF ****************************************************************	#
13123
#	res{s,q}nan_1op() - set NAN result for monadic instruction	#
13124
#									#
13125
# INPUT ***************************************************************	#
13126
#	a0 = pointer to extended precision source operand		#
13127
#									#
13128
# OUTPUT **************************************************************	#
13129
#	none								#
13130
#									#
13131
# ALGORITHM ***********************************************************	#
13132
#	Check the source operand tag (STAG) and set the FPCR according	#
13133
# to the operand type and sign.						#
13134
#									#
13135
#########################################################################
13136

13137
	global		ftst
13138
ftst:
13139
	mov.b		STAG(%a6),%d1
13140
	bne.b		ftst_not_norm		# optimize on non-norm input
13141

13142
#
13143
# Norm:
13144
#
13145
ftst_norm:
13146
	tst.b		SRC_EX(%a0)		# is operand negative?
13147
	bmi.b		ftst_norm_m		# yes
13148
	rts
13149
ftst_norm_m:
13150
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13151
	rts
13152

13153
#
13154
# input is not normalized; what is it?
13155
#
13156
ftst_not_norm:
13157
	cmpi.b		%d1,&ZERO		# weed out ZERO
13158
	beq.b		ftst_zero
13159
	cmpi.b		%d1,&INF		# weed out INF
13160
	beq.b		ftst_inf
13161
	cmpi.b		%d1,&SNAN		# weed out SNAN
13162
	beq.l		res_snan_1op
13163
	cmpi.b		%d1,&QNAN		# weed out QNAN
13164
	beq.l		res_qnan_1op
13165

13166
#
13167
# Denorm:
13168
#
13169
ftst_denorm:
13170
	tst.b		SRC_EX(%a0)		# is operand negative?
13171
	bmi.b		ftst_denorm_m		# yes
13172
	rts
13173
ftst_denorm_m:
13174
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13175
	rts
13176

13177
#
13178
# Infinity:
13179
#
13180
ftst_inf:
13181
	tst.b		SRC_EX(%a0)		# is operand negative?
13182
	bmi.b		ftst_inf_m		# yes
13183
ftst_inf_p:
13184
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13185
	rts
13186
ftst_inf_m:
13187
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
13188
	rts
13189

13190
#
13191
# Zero:
13192
#
13193
ftst_zero:
13194
	tst.b		SRC_EX(%a0)		# is operand negative?
13195
	bmi.b		ftst_zero_m		# yes
13196
ftst_zero_p:
13197
	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
13198
	rts
13199
ftst_zero_m:
13200
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
13201
	rts
13202

13203
#########################################################################
13204
# XDEF ****************************************************************	#
13205
#	fint(): emulates the fint instruction				#
13206
#									#
13207
# XREF ****************************************************************	#
13208
#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13209
#									#
13210
# INPUT ***************************************************************	#
13211
#	a0 = pointer to extended precision source operand		#
13212
#	d0 = round precision/mode					#
13213
#									#
13214
# OUTPUT **************************************************************	#
13215
#	fp0 = result							#
13216
#									#
13217
# ALGORITHM ***********************************************************	#
13218
#	Separate according to operand type. Unnorms don't pass through	#
13219
# here. For norms, load the rounding mode/prec, execute a "fint", then	#
13220
# store the resulting FPSR bits.					#
13221
#	For denorms, force the j-bit to a one and do the same as for	#
13222
# norms. Denorms are so low that the answer will either be a zero or a	#
13223
# one.									#
13224
#	For zeroes/infs/NANs, return the same while setting the FPSR	#
13225
# as appropriate.							#
13226
#									#
13227
#########################################################################
13228

13229
	global		fint
13230
fint:
13231
	mov.b		STAG(%a6),%d1
13232
	bne.b		fint_not_norm		# optimize on non-norm input
13233

13234
#
13235
# Norm:
13236
#
13237
fint_norm:
13238
	andi.b		&0x30,%d0		# set prec = ext
13239

13240
	fmov.l		%d0,%fpcr		# set FPCR
13241
	fmov.l		&0x0,%fpsr		# clear FPSR
13242

13243
	fint.x		SRC(%a0),%fp0		# execute fint
13244

13245
	fmov.l		&0x0,%fpcr		# clear FPCR
13246
	fmov.l		%fpsr,%d0		# save FPSR
13247
	or.l		%d0,USER_FPSR(%a6)	# set exception bits
13248

13249
	rts
13250

13251
#
13252
# input is not normalized; what is it?
13253
#
13254
fint_not_norm:
13255
	cmpi.b		%d1,&ZERO		# weed out ZERO
13256
	beq.b		fint_zero
13257
	cmpi.b		%d1,&INF		# weed out INF
13258
	beq.b		fint_inf
13259
	cmpi.b		%d1,&DENORM		# weed out DENORM
13260
	beq.b		fint_denorm
13261
	cmpi.b		%d1,&SNAN		# weed out SNAN
13262
	beq.l		res_snan_1op
13263
	bra.l		res_qnan_1op		# weed out QNAN
13264

13265
#
13266
# Denorm:
13267
#
13268
# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
13269
# also, the INEX2 and AINEX exception bits will be set.
13270
# so, we could either set these manually or force the DENORM
13271
# to a very small NORM and ship it to the NORM routine.
13272
# I do the latter.
13273
#
13274
fint_denorm:
13275
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13276
	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
13277
	lea		FP_SCR0(%a6),%a0
13278
	bra.b		fint_norm
13279

13280
#
13281
# Zero:
13282
#
13283
fint_zero:
13284
	tst.b		SRC_EX(%a0)		# is ZERO negative?
13285
	bmi.b		fint_zero_m		# yes
13286
fint_zero_p:
13287
	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
13288
	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13289
	rts
13290
fint_zero_m:
13291
	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
13292
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13293
	rts
13294

13295
#
13296
# Infinity:
13297
#
13298
fint_inf:
13299
	fmovm.x		SRC(%a0),&0x80		# return result in fp0
13300
	tst.b		SRC_EX(%a0)		# is INF negative?
13301
	bmi.b		fint_inf_m		# yes
13302
fint_inf_p:
13303
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13304
	rts
13305
fint_inf_m:
13306
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13307
	rts
13308

13309
#########################################################################
13310
# XDEF ****************************************************************	#
13311
#	fintrz(): emulates the fintrz instruction			#
13312
#									#
13313
# XREF ****************************************************************	#
13314
#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13315
#									#
13316
# INPUT ***************************************************************	#
13317
#	a0 = pointer to extended precision source operand		#
13318
#	d0 = round precision/mode					#
13319
#									#
13320
# OUTPUT **************************************************************	#
13321
#	fp0 = result							#
13322
#									#
13323
# ALGORITHM ***********************************************************	#
13324
#	Separate according to operand type. Unnorms don't pass through	#
13325
# here. For norms, load the rounding mode/prec, execute a "fintrz",	#
13326
# then store the resulting FPSR bits.					#
13327
#	For denorms, force the j-bit to a one and do the same as for	#
13328
# norms. Denorms are so low that the answer will either be a zero or a	#
13329
# one.									#
13330
#	For zeroes/infs/NANs, return the same while setting the FPSR	#
13331
# as appropriate.							#
13332
#									#
13333
#########################################################################
13334

13335
	global		fintrz
13336
fintrz:
13337
	mov.b		STAG(%a6),%d1
13338
	bne.b		fintrz_not_norm		# optimize on non-norm input
13339

13340
#
13341
# Norm:
13342
#
13343
fintrz_norm:
13344
	fmov.l		&0x0,%fpsr		# clear FPSR
13345

13346
	fintrz.x	SRC(%a0),%fp0		# execute fintrz
13347

13348
	fmov.l		%fpsr,%d0		# save FPSR
13349
	or.l		%d0,USER_FPSR(%a6)	# set exception bits
13350

13351
	rts
13352

13353
#
13354
# input is not normalized; what is it?
13355
#
13356
fintrz_not_norm:
13357
	cmpi.b		%d1,&ZERO		# weed out ZERO
13358
	beq.b		fintrz_zero
13359
	cmpi.b		%d1,&INF		# weed out INF
13360
	beq.b		fintrz_inf
13361
	cmpi.b		%d1,&DENORM		# weed out DENORM
13362
	beq.b		fintrz_denorm
13363
	cmpi.b		%d1,&SNAN		# weed out SNAN
13364
	beq.l		res_snan_1op
13365
	bra.l		res_qnan_1op		# weed out QNAN
13366

13367
#
13368
# Denorm:
13369
#
13370
# for DENORMs, the result will be (+/-)ZERO.
13371
# also, the INEX2 and AINEX exception bits will be set.
13372
# so, we could either set these manually or force the DENORM
13373
# to a very small NORM and ship it to the NORM routine.
13374
# I do the latter.
13375
#
13376
fintrz_denorm:
13377
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13378
	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
13379
	lea		FP_SCR0(%a6),%a0
13380
	bra.b		fintrz_norm
13381

13382
#
13383
# Zero:
13384
#
13385
fintrz_zero:
13386
	tst.b		SRC_EX(%a0)		# is ZERO negative?
13387
	bmi.b		fintrz_zero_m		# yes
13388
fintrz_zero_p:
13389
	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
13390
	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13391
	rts
13392
fintrz_zero_m:
13393
	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
13394
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13395
	rts
13396

13397
#
13398
# Infinity:
13399
#
13400
fintrz_inf:
13401
	fmovm.x		SRC(%a0),&0x80		# return result in fp0
13402
	tst.b		SRC_EX(%a0)		# is INF negative?
13403
	bmi.b		fintrz_inf_m		# yes
13404
fintrz_inf_p:
13405
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13406
	rts
13407
fintrz_inf_m:
13408
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13409
	rts
13410

13411
#########################################################################
13412
# XDEF ****************************************************************	#
13413
#	fabs():  emulates the fabs instruction				#
13414
#	fsabs(): emulates the fsabs instruction				#
13415
#	fdabs(): emulates the fdabs instruction				#
13416
#									#
13417
# XREF **************************************************************** #
13418
#	norm() - normalize denorm mantissa to provide EXOP		#
13419
#	scale_to_zero_src() - make exponent. = 0; get scale factor	#
13420
#	unf_res() - calculate underflow result				#
13421
#	ovf_res() - calculate overflow result				#
13422
#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
13423
#									#
13424
# INPUT *************************************************************** #
13425
#	a0 = pointer to extended precision source operand		#
13426
#	d0 = rnd precision/mode						#
13427
#									#
13428
# OUTPUT ************************************************************** #
13429
#	fp0 = result							#
13430
#	fp1 = EXOP (if exception occurred)				#
13431
#									#
13432
# ALGORITHM ***********************************************************	#
13433
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
13434
# norms into extended, single, and double precision.			#
13435
#	Simply clear sign for extended precision norm. Ext prec denorm	#
13436
# gets an EXOP created for it since it's an underflow.			#
13437
#	Double and single precision can overflow and underflow. First,	#
13438
# scale the operand such that the exponent is zero. Perform an "fabs"	#
13439
# using the correct rnd mode/prec. Check to see if the original		#
13440
# exponent would take an exception. If so, use unf_res() or ovf_res()	#
13441
# to calculate the default result. Also, create the EXOP for the	#
13442
# exceptional case. If no exception should occur, insert the correct	#
13443
# result exponent and return.						#
13444
#	Unnorms don't pass through here.				#
13445
#									#
13446
#########################################################################
13447

13448
	global		fsabs
13449
fsabs:
13450
	andi.b		&0x30,%d0		# clear rnd prec
13451
	ori.b		&s_mode*0x10,%d0	# insert sgl precision
13452
	bra.b		fabs
13453

13454
	global		fdabs
13455
fdabs:
13456
	andi.b		&0x30,%d0		# clear rnd prec
13457
	ori.b		&d_mode*0x10,%d0	# insert dbl precision
13458

13459
	global		fabs
13460
fabs:
13461
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
13462
	mov.b		STAG(%a6),%d1
13463
	bne.w		fabs_not_norm		# optimize on non-norm input
13464

13465
#
13466
# ABSOLUTE VALUE: norms and denorms ONLY!
13467
#
13468
fabs_norm:
13469
	andi.b		&0xc0,%d0		# is precision extended?
13470
	bne.b		fabs_not_ext		# no; go handle sgl or dbl
13471

13472
#
13473
# precision selected is extended. so...we can not get an underflow
13474
# or overflow because of rounding to the correct precision. so...
13475
# skip the scaling and unscaling...
13476
#
13477
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13478
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13479
	mov.w		SRC_EX(%a0),%d1
13480
	bclr		&15,%d1			# force absolute value
13481
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
13482
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
13483
	rts
13484

13485
#
13486
# for an extended precision DENORM, the UNFL exception bit is set
13487
# the accrued bit is NOT set in this instance(no inexactness!)
13488
#
13489
fabs_denorm:
13490
	andi.b		&0xc0,%d0		# is precision extended?
13491
	bne.b		fabs_not_ext		# no
13492

13493
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13494

13495
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13496
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13497
	mov.w		SRC_EX(%a0),%d0
13498
	bclr		&15,%d0			# clear sign
13499
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
13500

13501
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
13502

13503
	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
13504
	bne.b		fabs_ext_unfl_ena
13505
	rts
13506

13507
#
13508
# the input is an extended DENORM and underflow is enabled in the FPCR.
13509
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
13510
# exponent and insert back into the operand.
13511
#
13512
fabs_ext_unfl_ena:
13513
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
13514
	bsr.l		norm			# normalize result
13515
	neg.w		%d0			# new exponent = -(shft val)
13516
	addi.w		&0x6000,%d0		# add new bias to exponent
13517
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
13518
	andi.w		&0x8000,%d1		# keep old sign
13519
	andi.w		&0x7fff,%d0		# clear sign position
13520
	or.w		%d1,%d0			# concat old sign, new exponent
13521
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
13522
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13523
	rts
13524

13525
#
13526
# operand is either single or double
13527
#
13528
fabs_not_ext:
13529
	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
13530
	bne.b		fabs_dbl
13531

13532
#
13533
# operand is to be rounded to single precision
13534
#
13535
fabs_sgl:
13536
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13537
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13538
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13539
	bsr.l		scale_to_zero_src	# calculate scale factor
13540

13541
	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
13542
	bge.w		fabs_sd_unfl		# yes; go handle underflow
13543
	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
13544
	beq.w		fabs_sd_may_ovfl	# maybe; go check
13545
	blt.w		fabs_sd_ovfl		# yes; go handle overflow
13546

13547
#
13548
# operand will NOT overflow or underflow when moved in to the fp reg file
13549
#
13550
fabs_sd_normal:
13551
	fmov.l		&0x0,%fpsr		# clear FPSR
13552
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13553

13554
	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13555

13556
	fmov.l		%fpsr,%d1		# save FPSR
13557
	fmov.l		&0x0,%fpcr		# clear FPCR
13558

13559
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13560

13561
fabs_sd_normal_exit:
13562
	mov.l		%d2,-(%sp)		# save d2
13563
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
13564
	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
13565
	mov.l		%d1,%d2			# make a copy
13566
	andi.l		&0x7fff,%d1		# strip sign
13567
	sub.l		%d0,%d1			# add scale factor
13568
	andi.w		&0x8000,%d2		# keep old sign
13569
	or.w		%d1,%d2			# concat old sign,new exp
13570
	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
13571
	mov.l		(%sp)+,%d2		# restore d2
13572
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
13573
	rts
13574

13575
#
13576
# operand is to be rounded to double precision
13577
#
13578
fabs_dbl:
13579
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13580
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13581
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13582
	bsr.l		scale_to_zero_src	# calculate scale factor
13583

13584
	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
13585
	bge.b		fabs_sd_unfl		# yes; go handle underflow
13586
	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
13587
	beq.w		fabs_sd_may_ovfl	# maybe; go check
13588
	blt.w		fabs_sd_ovfl		# yes; go handle overflow
13589
	bra.w		fabs_sd_normal		# no; ho handle normalized op
13590

13591
#
13592
# operand WILL underflow when moved in to the fp register file
13593
#
13594
fabs_sd_unfl:
13595
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13596

13597
	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
13598

13599
# if underflow or inexact is enabled, go calculate EXOP first.
13600
	mov.b		FPCR_ENABLE(%a6),%d1
13601
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
13602
	bne.b		fabs_sd_unfl_ena	# yes
13603

13604
fabs_sd_unfl_dis:
13605
	lea		FP_SCR0(%a6),%a0	# pass: result addr
13606
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
13607
	bsr.l		unf_res			# calculate default result
13608
	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
13609
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
13610
	rts
13611

13612
#
13613
# operand will underflow AND underflow is enabled.
13614
# Therefore, we must return the result rounded to extended precision.
13615
#
13616
fabs_sd_unfl_ena:
13617
	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13618
	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13619
	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
13620

13621
	mov.l		%d2,-(%sp)		# save d2
13622
	mov.l		%d1,%d2			# make a copy
13623
	andi.l		&0x7fff,%d1		# strip sign
13624
	andi.w		&0x8000,%d2		# keep old sign
13625
	sub.l		%d0,%d1			# subtract scale factor
13626
	addi.l		&0x6000,%d1		# add new bias
13627
	andi.w		&0x7fff,%d1
13628
	or.w		%d2,%d1			# concat new sign,new exp
13629
	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
13630
	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
13631
	mov.l		(%sp)+,%d2		# restore d2
13632
	bra.b		fabs_sd_unfl_dis
13633

13634
#
13635
# operand WILL overflow.
13636
#
13637
fabs_sd_ovfl:
13638
	fmov.l		&0x0,%fpsr		# clear FPSR
13639
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13640

13641
	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13642

13643
	fmov.l		&0x0,%fpcr		# clear FPCR
13644
	fmov.l		%fpsr,%d1		# save FPSR
13645

13646
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13647

13648
fabs_sd_ovfl_tst:
13649
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13650

13651
	mov.b		FPCR_ENABLE(%a6),%d1
13652
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
13653
	bne.b		fabs_sd_ovfl_ena	# yes
13654

13655
#
13656
# OVFL is not enabled; therefore, we must create the default result by
13657
# calling ovf_res().
13658
#
13659
fabs_sd_ovfl_dis:
13660
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
13661
	sne		%d1			# set sign param accordingly
13662
	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
13663
	bsr.l		ovf_res			# calculate default result
13664
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
13665
	fmovm.x		(%a0),&0x80		# return default result in fp0
13666
	rts
13667

13668
#
13669
# OVFL is enabled.
13670
# the INEX2 bit has already been updated by the round to the correct precision.
13671
# now, round to extended(and don't alter the FPSR).
13672
#
13673
fabs_sd_ovfl_ena:
13674
	mov.l		%d2,-(%sp)		# save d2
13675
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
13676
	mov.l		%d1,%d2			# make a copy
13677
	andi.l		&0x7fff,%d1		# strip sign
13678
	andi.w		&0x8000,%d2		# keep old sign
13679
	sub.l		%d0,%d1			# add scale factor
13680
	subi.l		&0x6000,%d1		# subtract bias
13681
	andi.w		&0x7fff,%d1
13682
	or.w		%d2,%d1			# concat sign,exp
13683
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
13684
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
13685
	mov.l		(%sp)+,%d2		# restore d2
13686
	bra.b		fabs_sd_ovfl_dis
13687

13688
#
13689
# the move in MAY underflow. so...
13690
#
13691
fabs_sd_may_ovfl:
13692
	fmov.l		&0x0,%fpsr		# clear FPSR
13693
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13694

13695
	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
13696

13697
	fmov.l		%fpsr,%d1		# save status
13698
	fmov.l		&0x0,%fpcr		# clear FPCR
13699

13700
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
13701

13702
	fabs.x		%fp0,%fp1		# make a copy of result
13703
	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
13704
	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
13705

13706
# no, it didn't overflow; we have correct result
13707
	bra.w		fabs_sd_normal_exit
13708

13709
##########################################################################
13710

13711
#
13712
# input is not normalized; what is it?
13713
#
13714
fabs_not_norm:
13715
	cmpi.b		%d1,&DENORM		# weed out DENORM
13716
	beq.w		fabs_denorm
13717
	cmpi.b		%d1,&SNAN		# weed out SNAN
13718
	beq.l		res_snan_1op
13719
	cmpi.b		%d1,&QNAN		# weed out QNAN
13720
	beq.l		res_qnan_1op
13721

13722
	fabs.x		SRC(%a0),%fp0		# force absolute value
13723

13724
	cmpi.b		%d1,&INF		# weed out INF
13725
	beq.b		fabs_inf
13726
fabs_zero:
13727
	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13728
	rts
13729
fabs_inf:
13730
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
13731
	rts
13732

13733
#########################################################################
13734
# XDEF ****************************************************************	#
13735
#	fcmp(): fp compare op routine					#
13736
#									#
13737
# XREF ****************************************************************	#
13738
#	res_qnan() - return QNAN result					#
13739
#	res_snan() - return SNAN result					#
13740
#									#
13741
# INPUT ***************************************************************	#
13742
#	a0 = pointer to extended precision source operand		#
13743
#	a1 = pointer to extended precision destination operand		#
13744
#	d0 = round prec/mode						#
13745
#									#
13746
# OUTPUT ************************************************************** #
13747
#	None								#
13748
#									#
13749
# ALGORITHM ***********************************************************	#
13750
#	Handle NANs and denorms as special cases. For everything else,	#
13751
# just use the actual fcmp instruction to produce the correct condition	#
13752
# codes.								#
13753
#									#
13754
#########################################################################
13755

13756
	global		fcmp
13757
fcmp:
13758
	clr.w		%d1
13759
	mov.b		DTAG(%a6),%d1
13760
	lsl.b		&0x3,%d1
13761
	or.b		STAG(%a6),%d1
13762
	bne.b		fcmp_not_norm		# optimize on non-norm input
13763

13764
#
13765
# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
13766
#
13767
fcmp_norm:
13768
	fmovm.x		DST(%a1),&0x80		# load dst op
13769

13770
	fcmp.x		%fp0,SRC(%a0)		# do compare
13771

13772
	fmov.l		%fpsr,%d0		# save FPSR
13773
	rol.l		&0x8,%d0		# extract ccode bits
13774
	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
13775

13776
	rts
13777

13778
#
13779
# fcmp: inputs are not both normalized; what are they?
13780
#
13781
fcmp_not_norm:
13782
	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
13783
	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
13784

13785
	swbeg		&48
13786
tbl_fcmp_op:
13787
	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
13788
	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
13789
	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
13790
	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
13791
	short		fcmp_nrm_dnrm	- tbl_fcmp_op # NORM - DENORM
13792
	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
13793
	short		tbl_fcmp_op	- tbl_fcmp_op #
13794
	short		tbl_fcmp_op	- tbl_fcmp_op #
13795

13796
	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
13797
	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
13798
	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
13799
	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
13800
	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
13801
	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
13802
	short		tbl_fcmp_op	- tbl_fcmp_op #
13803
	short		tbl_fcmp_op	- tbl_fcmp_op #
13804

13805
	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
13806
	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
13807
	short		fcmp_norm	- tbl_fcmp_op # INF - INF
13808
	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
13809
	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
13810
	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
13811
	short		tbl_fcmp_op	- tbl_fcmp_op #
13812
	short		tbl_fcmp_op	- tbl_fcmp_op #
13813

13814
	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
13815
	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
13816
	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
13817
	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
13818
	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
13819
	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
13820
	short		tbl_fcmp_op	- tbl_fcmp_op #
13821
	short		tbl_fcmp_op	- tbl_fcmp_op #
13822

13823
	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
13824
	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
13825
	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
13826
	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
13827
	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
13828
	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
13829
	short		tbl_fcmp_op	- tbl_fcmp_op #
13830
	short		tbl_fcmp_op	- tbl_fcmp_op #
13831

13832
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
13833
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
13834
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
13835
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
13836
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
13837
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
13838
	short		tbl_fcmp_op	- tbl_fcmp_op #
13839
	short		tbl_fcmp_op	- tbl_fcmp_op #
13840

13841
# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
13842
# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
13843
fcmp_res_qnan:
13844
	bsr.l		res_qnan
13845
	andi.b		&0xf7,FPSR_CC(%a6)
13846
	rts
13847
fcmp_res_snan:
13848
	bsr.l		res_snan
13849
	andi.b		&0xf7,FPSR_CC(%a6)
13850
	rts
13851

13852
#
13853
# DENORMs are a little more difficult.
13854
# If you have a 2 DENORMs, then you can just force the j-bit to a one
13855
# and use the fcmp_norm routine.
13856
# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
13857
# and use the fcmp_norm routine.
13858
# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
13859
# But with a DENORM and a NORM of the same sign, the neg bit is set if the
13860
# (1) signs are (+) and the DENORM is the dst or
13861
# (2) signs are (-) and the DENORM is the src
13862
#
13863

13864
fcmp_dnrm_s:
13865
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13866
	mov.l		SRC_HI(%a0),%d0
13867
	bset		&31,%d0			# DENORM src; make into small norm
13868
	mov.l		%d0,FP_SCR0_HI(%a6)
13869
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13870
	lea		FP_SCR0(%a6),%a0
13871
	bra.w		fcmp_norm
13872

13873
fcmp_dnrm_d:
13874
	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
13875
	mov.l		DST_HI(%a1),%d0
13876
	bset		&31,%d0			# DENORM src; make into small norm
13877
	mov.l		%d0,FP_SCR0_HI(%a6)
13878
	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
13879
	lea		FP_SCR0(%a6),%a1
13880
	bra.w		fcmp_norm
13881

13882
fcmp_dnrm_sd:
13883
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
13884
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13885
	mov.l		DST_HI(%a1),%d0
13886
	bset		&31,%d0			# DENORM dst; make into small norm
13887
	mov.l		%d0,FP_SCR1_HI(%a6)
13888
	mov.l		SRC_HI(%a0),%d0
13889
	bset		&31,%d0			# DENORM dst; make into small norm
13890
	mov.l		%d0,FP_SCR0_HI(%a6)
13891
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
13892
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13893
	lea		FP_SCR1(%a6),%a1
13894
	lea		FP_SCR0(%a6),%a0
13895
	bra.w		fcmp_norm
13896

13897
fcmp_nrm_dnrm:
13898
	mov.b		SRC_EX(%a0),%d0		# determine if like signs
13899
	mov.b		DST_EX(%a1),%d1
13900
	eor.b		%d0,%d1
13901
	bmi.w		fcmp_dnrm_s
13902

13903
# signs are the same, so must determine the answer ourselves.
13904
	tst.b		%d0			# is src op negative?
13905
	bmi.b		fcmp_nrm_dnrm_m		# yes
13906
	rts
13907
fcmp_nrm_dnrm_m:
13908
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13909
	rts
13910

13911
fcmp_dnrm_nrm:
13912
	mov.b		SRC_EX(%a0),%d0		# determine if like signs
13913
	mov.b		DST_EX(%a1),%d1
13914
	eor.b		%d0,%d1
13915
	bmi.w		fcmp_dnrm_d
13916

13917
# signs are the same, so must determine the answer ourselves.
13918
	tst.b		%d0			# is src op negative?
13919
	bpl.b		fcmp_dnrm_nrm_m		# no
13920
	rts
13921
fcmp_dnrm_nrm_m:
13922
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
13923
	rts
13924

13925
#########################################################################
13926
# XDEF ****************************************************************	#
13927
#	fsglmul(): emulates the fsglmul instruction			#
13928
#									#
13929
# XREF ****************************************************************	#
13930
#	scale_to_zero_src() - scale src exponent to zero		#
13931
#	scale_to_zero_dst() - scale dst exponent to zero		#
13932
#	unf_res4() - return default underflow result for sglop		#
13933
#	ovf_res() - return default overflow result			#
13934
#	res_qnan() - return QNAN result					#
13935
#	res_snan() - return SNAN result					#
13936
#									#
13937
# INPUT ***************************************************************	#
13938
#	a0 = pointer to extended precision source operand		#
13939
#	a1 = pointer to extended precision destination operand		#
13940
#	d0  rnd prec,mode						#
13941
#									#
13942
# OUTPUT **************************************************************	#
13943
#	fp0 = result							#
13944
#	fp1 = EXOP (if exception occurred)				#
13945
#									#
13946
# ALGORITHM ***********************************************************	#
13947
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
13948
# norms/denorms into ext/sgl/dbl precision.				#
13949
#	For norms/denorms, scale the exponents such that a multiply	#
13950
# instruction won't cause an exception. Use the regular fsglmul to	#
13951
# compute a result. Check if the regular operands would have taken	#
13952
# an exception. If so, return the default overflow/underflow result	#
13953
# and return the EXOP if exceptions are enabled. Else, scale the	#
13954
# result operand to the proper exponent.				#
13955
#									#
13956
#########################################################################
13957

13958
	global		fsglmul
13959
fsglmul:
13960
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
13961

13962
	clr.w		%d1
13963
	mov.b		DTAG(%a6),%d1
13964
	lsl.b		&0x3,%d1
13965
	or.b		STAG(%a6),%d1
13966

13967
	bne.w		fsglmul_not_norm	# optimize on non-norm input
13968

13969
fsglmul_norm:
13970
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
13971
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
13972
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
13973

13974
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
13975
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
13976
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
13977

13978
	bsr.l		scale_to_zero_src	# scale exponent
13979
	mov.l		%d0,-(%sp)		# save scale factor 1
13980

13981
	bsr.l		scale_to_zero_dst	# scale dst exponent
13982

13983
	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
13984

13985
	cmpi.l		%d0,&0x3fff-0x7ffe	# would result ovfl?
13986
	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
13987
	blt.w		fsglmul_ovfl		# result will overflow
13988

13989
	cmpi.l		%d0,&0x3fff+0x0001	# would result unfl?
13990
	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
13991
	bgt.w		fsglmul_unfl		# result will underflow
13992

13993
fsglmul_normal:
13994
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
13995

13996
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
13997
	fmov.l		&0x0,%fpsr		# clear FPSR
13998

13999
	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14000

14001
	fmov.l		%fpsr,%d1		# save status
14002
	fmov.l		&0x0,%fpcr		# clear FPCR
14003

14004
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14005

14006
fsglmul_normal_exit:
14007
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14008
	mov.l		%d2,-(%sp)		# save d2
14009
	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
14010
	mov.l		%d1,%d2			# make a copy
14011
	andi.l		&0x7fff,%d1		# strip sign
14012
	andi.w		&0x8000,%d2		# keep old sign
14013
	sub.l		%d0,%d1			# add scale factor
14014
	or.w		%d2,%d1			# concat old sign,new exp
14015
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14016
	mov.l		(%sp)+,%d2		# restore d2
14017
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
14018
	rts
14019

14020
fsglmul_ovfl:
14021
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14022

14023
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14024
	fmov.l		&0x0,%fpsr		# clear FPSR
14025

14026
	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14027

14028
	fmov.l		%fpsr,%d1		# save status
14029
	fmov.l		&0x0,%fpcr		# clear FPCR
14030

14031
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14032

14033
fsglmul_ovfl_tst:
14034

14035
# save setting this until now because this is where fsglmul_may_ovfl may jump in
14036
	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
14037

14038
	mov.b		FPCR_ENABLE(%a6),%d1
14039
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14040
	bne.b		fsglmul_ovfl_ena	# yes
14041

14042
fsglmul_ovfl_dis:
14043
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
14044
	sne		%d1			# set sign param accordingly
14045
	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14046
	andi.b		&0x30,%d0		# force prec = ext
14047
	bsr.l		ovf_res			# calculate default result
14048
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
14049
	fmovm.x		(%a0),&0x80		# return default result in fp0
14050
	rts
14051

14052
fsglmul_ovfl_ena:
14053
	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
14054

14055
	mov.l		%d2,-(%sp)		# save d2
14056
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14057
	mov.l		%d1,%d2			# make a copy
14058
	andi.l		&0x7fff,%d1		# strip sign
14059
	sub.l		%d0,%d1			# add scale factor
14060
	subi.l		&0x6000,%d1		# subtract bias
14061
	andi.w		&0x7fff,%d1
14062
	andi.w		&0x8000,%d2		# keep old sign
14063
	or.w		%d2,%d1			# concat old sign,new exp
14064
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14065
	mov.l		(%sp)+,%d2		# restore d2
14066
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14067
	bra.b		fsglmul_ovfl_dis
14068

14069
fsglmul_may_ovfl:
14070
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14071

14072
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14073
	fmov.l		&0x0,%fpsr		# clear FPSR
14074

14075
	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14076

14077
	fmov.l		%fpsr,%d1		# save status
14078
	fmov.l		&0x0,%fpcr		# clear FPCR
14079

14080
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14081

14082
	fabs.x		%fp0,%fp1		# make a copy of result
14083
	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
14084
	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
14085

14086
# no, it didn't overflow; we have correct result
14087
	bra.w		fsglmul_normal_exit
14088

14089
fsglmul_unfl:
14090
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14091

14092
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14093

14094
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14095
	fmov.l		&0x0,%fpsr		# clear FPSR
14096

14097
	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14098

14099
	fmov.l		%fpsr,%d1		# save status
14100
	fmov.l		&0x0,%fpcr		# clear FPCR
14101

14102
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14103

14104
	mov.b		FPCR_ENABLE(%a6),%d1
14105
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14106
	bne.b		fsglmul_unfl_ena	# yes
14107

14108
fsglmul_unfl_dis:
14109
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14110

14111
	lea		FP_SCR0(%a6),%a0	# pass: result addr
14112
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14113
	bsr.l		unf_res4		# calculate default result
14114
	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14115
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14116
	rts
14117

14118
#
14119
# UNFL is enabled.
14120
#
14121
fsglmul_unfl_ena:
14122
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14123

14124
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14125
	fmov.l		&0x0,%fpsr		# clear FPSR
14126

14127
	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
14128

14129
	fmov.l		&0x0,%fpcr		# clear FPCR
14130

14131
	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14132
	mov.l		%d2,-(%sp)		# save d2
14133
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14134
	mov.l		%d1,%d2			# make a copy
14135
	andi.l		&0x7fff,%d1		# strip sign
14136
	andi.w		&0x8000,%d2		# keep old sign
14137
	sub.l		%d0,%d1			# add scale factor
14138
	addi.l		&0x6000,%d1		# add bias
14139
	andi.w		&0x7fff,%d1
14140
	or.w		%d2,%d1			# concat old sign,new exp
14141
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14142
	mov.l		(%sp)+,%d2		# restore d2
14143
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14144
	bra.w		fsglmul_unfl_dis
14145

14146
fsglmul_may_unfl:
14147
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14148

14149
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14150
	fmov.l		&0x0,%fpsr		# clear FPSR
14151

14152
	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
14153

14154
	fmov.l		%fpsr,%d1		# save status
14155
	fmov.l		&0x0,%fpcr		# clear FPCR
14156

14157
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14158

14159
	fabs.x		%fp0,%fp1		# make a copy of result
14160
	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
14161
	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
14162
	fblt.w		fsglmul_unfl		# yes; underflow occurred
14163

14164
#
14165
# we still don't know if underflow occurred. result is ~ equal to 2. but,
14166
# we don't know if the result was an underflow that rounded up to a 2 or
14167
# a normalized number that rounded down to a 2. so, redo the entire operation
14168
# using RZ as the rounding mode to see what the pre-rounded result is.
14169
# this case should be relatively rare.
14170
#
14171
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
14172

14173
	mov.l		L_SCR3(%a6),%d1
14174
	andi.b		&0xc0,%d1		# keep rnd prec
14175
	ori.b		&rz_mode*0x10,%d1	# insert RZ
14176

14177
	fmov.l		%d1,%fpcr		# set FPCR
14178
	fmov.l		&0x0,%fpsr		# clear FPSR
14179

14180
	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
14181

14182
	fmov.l		&0x0,%fpcr		# clear FPCR
14183
	fabs.x		%fp1			# make absolute value
14184
	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
14185
	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
14186
	bra.w		fsglmul_unfl		# yes, underflow occurred
14187

14188
##############################################################################
14189

14190
#
14191
# Single Precision Multiply: inputs are not both normalized; what are they?
14192
#
14193
fsglmul_not_norm:
14194
	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
14195
	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
14196

14197
	swbeg		&48
14198
tbl_fsglmul_op:
14199
	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
14200
	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
14201
	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
14202
	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
14203
	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
14204
	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
14205
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14206
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14207

14208
	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
14209
	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
14210
	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
14211
	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
14212
	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
14213
	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
14214
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14215
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14216

14217
	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
14218
	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
14219
	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
14220
	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
14221
	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
14222
	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
14223
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14224
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14225

14226
	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
14227
	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
14228
	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
14229
	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
14230
	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
14231
	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
14232
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14233
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14234

14235
	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
14236
	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
14237
	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
14238
	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
14239
	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
14240
	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
14241
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14242
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14243

14244
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
14245
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
14246
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
14247
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
14248
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
14249
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
14250
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14251
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
14252

14253
fsglmul_res_operr:
14254
	bra.l		res_operr
14255
fsglmul_res_snan:
14256
	bra.l		res_snan
14257
fsglmul_res_qnan:
14258
	bra.l		res_qnan
14259
fsglmul_zero:
14260
	bra.l		fmul_zero
14261
fsglmul_inf_src:
14262
	bra.l		fmul_inf_src
14263
fsglmul_inf_dst:
14264
	bra.l		fmul_inf_dst
14265

14266
#########################################################################
14267
# XDEF ****************************************************************	#
14268
#	fsgldiv(): emulates the fsgldiv instruction			#
14269
#									#
14270
# XREF ****************************************************************	#
14271
#	scale_to_zero_src() - scale src exponent to zero		#
14272
#	scale_to_zero_dst() - scale dst exponent to zero		#
14273
#	unf_res4() - return default underflow result for sglop		#
14274
#	ovf_res() - return default overflow result			#
14275
#	res_qnan() - return QNAN result					#
14276
#	res_snan() - return SNAN result					#
14277
#									#
14278
# INPUT ***************************************************************	#
14279
#	a0 = pointer to extended precision source operand		#
14280
#	a1 = pointer to extended precision destination operand		#
14281
#	d0  rnd prec,mode						#
14282
#									#
14283
# OUTPUT **************************************************************	#
14284
#	fp0 = result							#
14285
#	fp1 = EXOP (if exception occurred)				#
14286
#									#
14287
# ALGORITHM ***********************************************************	#
14288
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
14289
# norms/denorms into ext/sgl/dbl precision.				#
14290
#	For norms/denorms, scale the exponents such that a divide	#
14291
# instruction won't cause an exception. Use the regular fsgldiv to	#
14292
# compute a result. Check if the regular operands would have taken	#
14293
# an exception. If so, return the default overflow/underflow result	#
14294
# and return the EXOP if exceptions are enabled. Else, scale the	#
14295
# result operand to the proper exponent.				#
14296
#									#
14297
#########################################################################
14298

14299
	global		fsgldiv
14300
fsgldiv:
14301
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
14302

14303
	clr.w		%d1
14304
	mov.b		DTAG(%a6),%d1
14305
	lsl.b		&0x3,%d1
14306
	or.b		STAG(%a6),%d1		# combine src tags
14307

14308
	bne.w		fsgldiv_not_norm	# optimize on non-norm input
14309

14310
#
14311
# DIVIDE: NORMs and DENORMs ONLY!
14312
#
14313
fsgldiv_norm:
14314
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
14315
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
14316
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
14317

14318
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
14319
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
14320
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
14321

14322
	bsr.l		scale_to_zero_src	# calculate scale factor 1
14323
	mov.l		%d0,-(%sp)		# save scale factor 1
14324

14325
	bsr.l		scale_to_zero_dst	# calculate scale factor 2
14326

14327
	neg.l		(%sp)			# S.F. = scale1 - scale2
14328
	add.l		%d0,(%sp)
14329

14330
	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
14331
	lsr.b		&0x6,%d1
14332
	mov.l		(%sp)+,%d0
14333
	cmpi.l		%d0,&0x3fff-0x7ffe
14334
	ble.w		fsgldiv_may_ovfl
14335

14336
	cmpi.l		%d0,&0x3fff-0x0000	# will result underflow?
14337
	beq.w		fsgldiv_may_unfl	# maybe
14338
	bgt.w		fsgldiv_unfl		# yes; go handle underflow
14339

14340
fsgldiv_normal:
14341
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14342

14343
	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
14344
	fmov.l		&0x0,%fpsr		# clear FPSR
14345

14346
	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
14347

14348
	fmov.l		%fpsr,%d1		# save FPSR
14349
	fmov.l		&0x0,%fpcr		# clear FPCR
14350

14351
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14352

14353
fsgldiv_normal_exit:
14354
	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
14355
	mov.l		%d2,-(%sp)		# save d2
14356
	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
14357
	mov.l		%d1,%d2			# make a copy
14358
	andi.l		&0x7fff,%d1		# strip sign
14359
	andi.w		&0x8000,%d2		# keep old sign
14360
	sub.l		%d0,%d1			# add scale factor
14361
	or.w		%d2,%d1			# concat old sign,new exp
14362
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14363
	mov.l		(%sp)+,%d2		# restore d2
14364
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
14365
	rts
14366

14367
fsgldiv_may_ovfl:
14368
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14369

14370
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14371
	fmov.l		&0x0,%fpsr		# set FPSR
14372

14373
	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
14374

14375
	fmov.l		%fpsr,%d1
14376
	fmov.l		&0x0,%fpcr
14377

14378
	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
14379

14380
	fmovm.x		&0x01,-(%sp)		# save result to stack
14381
	mov.w		(%sp),%d1		# fetch new exponent
14382
	add.l		&0xc,%sp		# clear result
14383
	andi.l		&0x7fff,%d1		# strip sign
14384
	sub.l		%d0,%d1			# add scale factor
14385
	cmp.l		%d1,&0x7fff		# did divide overflow?
14386
	blt.b		fsgldiv_normal_exit
14387

14388
fsgldiv_ovfl_tst:
14389
	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
14390

14391
	mov.b		FPCR_ENABLE(%a6),%d1
14392
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14393
	bne.b		fsgldiv_ovfl_ena	# yes
14394

14395
fsgldiv_ovfl_dis:
14396
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative
14397
	sne		%d1			# set sign param accordingly
14398
	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14399
	andi.b		&0x30,%d0		# kill precision
14400
	bsr.l		ovf_res			# calculate default result
14401
	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
14402
	fmovm.x		(%a0),&0x80		# return default result in fp0
14403
	rts
14404

14405
fsgldiv_ovfl_ena:
14406
	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
14407

14408
	mov.l		%d2,-(%sp)		# save d2
14409
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14410
	mov.l		%d1,%d2			# make a copy
14411
	andi.l		&0x7fff,%d1		# strip sign
14412
	andi.w		&0x8000,%d2		# keep old sign
14413
	sub.l		%d0,%d1			# add scale factor
14414
	subi.l		&0x6000,%d1		# subtract new bias
14415
	andi.w		&0x7fff,%d1		# clear ms bit
14416
	or.w		%d2,%d1			# concat old sign,new exp
14417
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14418
	mov.l		(%sp)+,%d2		# restore d2
14419
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14420
	bra.b		fsgldiv_ovfl_dis
14421

14422
fsgldiv_unfl:
14423
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14424

14425
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14426

14427
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14428
	fmov.l		&0x0,%fpsr		# clear FPSR
14429

14430
	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
14431

14432
	fmov.l		%fpsr,%d1		# save status
14433
	fmov.l		&0x0,%fpcr		# clear FPCR
14434

14435
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14436

14437
	mov.b		FPCR_ENABLE(%a6),%d1
14438
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14439
	bne.b		fsgldiv_unfl_ena	# yes
14440

14441
fsgldiv_unfl_dis:
14442
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14443

14444
	lea		FP_SCR0(%a6),%a0	# pass: result addr
14445
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14446
	bsr.l		unf_res4		# calculate default result
14447
	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14448
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14449
	rts
14450

14451
#
14452
# UNFL is enabled.
14453
#
14454
fsgldiv_unfl_ena:
14455
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14456

14457
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14458
	fmov.l		&0x0,%fpsr		# clear FPSR
14459

14460
	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
14461

14462
	fmov.l		&0x0,%fpcr		# clear FPCR
14463

14464
	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14465
	mov.l		%d2,-(%sp)		# save d2
14466
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14467
	mov.l		%d1,%d2			# make a copy
14468
	andi.l		&0x7fff,%d1		# strip sign
14469
	andi.w		&0x8000,%d2		# keep old sign
14470
	sub.l		%d0,%d1			# add scale factor
14471
	addi.l		&0x6000,%d1		# add bias
14472
	andi.w		&0x7fff,%d1		# clear top bit
14473
	or.w		%d2,%d1			# concat old sign, new exp
14474
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14475
	mov.l		(%sp)+,%d2		# restore d2
14476
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14477
	bra.b		fsgldiv_unfl_dis
14478

14479
#
14480
# the divide operation MAY underflow:
14481
#
14482
fsgldiv_may_unfl:
14483
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14484

14485
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14486
	fmov.l		&0x0,%fpsr		# clear FPSR
14487

14488
	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
14489

14490
	fmov.l		%fpsr,%d1		# save status
14491
	fmov.l		&0x0,%fpcr		# clear FPCR
14492

14493
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
14494

14495
	fabs.x		%fp0,%fp1		# make a copy of result
14496
	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
14497
	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
14498
	fblt.w		fsgldiv_unfl		# yes; underflow occurred
14499

14500
#
14501
# we still don't know if underflow occurred. result is ~ equal to 1. but,
14502
# we don't know if the result was an underflow that rounded up to a 1
14503
# or a normalized number that rounded down to a 1. so, redo the entire
14504
# operation using RZ as the rounding mode to see what the pre-rounded
14505
# result is. this case should be relatively rare.
14506
#
14507
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
14508

14509
	clr.l		%d1			# clear scratch register
14510
	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
14511

14512
	fmov.l		%d1,%fpcr		# set FPCR
14513
	fmov.l		&0x0,%fpsr		# clear FPSR
14514

14515
	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
14516

14517
	fmov.l		&0x0,%fpcr		# clear FPCR
14518
	fabs.x		%fp1			# make absolute value
14519
	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
14520
	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
14521
	bra.w		fsgldiv_unfl		# yes; underflow occurred
14522

14523
############################################################################
14524

14525
#
14526
# Divide: inputs are not both normalized; what are they?
14527
#
14528
fsgldiv_not_norm:
14529
	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
14530
	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
14531

14532
	swbeg		&48
14533
tbl_fsgldiv_op:
14534
	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
14535
	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
14536
	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
14537
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
14538
	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
14539
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
14540
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14541
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14542

14543
	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
14544
	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
14545
	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
14546
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
14547
	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
14548
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
14549
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14550
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14551

14552
	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
14553
	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
14554
	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
14555
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
14556
	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
14557
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
14558
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14559
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14560

14561
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
14562
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
14563
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
14564
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
14565
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
14566
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
14567
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14568
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14569

14570
	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
14571
	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
14572
	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
14573
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
14574
	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
14575
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
14576
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14577
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14578

14579
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
14580
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
14581
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
14582
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
14583
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
14584
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
14585
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14586
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
14587

14588
fsgldiv_res_qnan:
14589
	bra.l		res_qnan
14590
fsgldiv_res_snan:
14591
	bra.l		res_snan
14592
fsgldiv_res_operr:
14593
	bra.l		res_operr
14594
fsgldiv_inf_load:
14595
	bra.l		fdiv_inf_load
14596
fsgldiv_zero_load:
14597
	bra.l		fdiv_zero_load
14598
fsgldiv_inf_dst:
14599
	bra.l		fdiv_inf_dst
14600

14601
#########################################################################
14602
# XDEF ****************************************************************	#
14603
#	fadd(): emulates the fadd instruction				#
14604
#	fsadd(): emulates the fadd instruction				#
14605
#	fdadd(): emulates the fdadd instruction				#
14606
#									#
14607
# XREF ****************************************************************	#
14608
#	addsub_scaler2() - scale the operands so they won't take exc	#
14609
#	ovf_res() - return default overflow result			#
14610
#	unf_res() - return default underflow result			#
14611
#	res_qnan() - set QNAN result					#
14612
#	res_snan() - set SNAN result					#
14613
#	res_operr() - set OPERR result					#
14614
#	scale_to_zero_src() - set src operand exponent equal to zero	#
14615
#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
14616
#									#
14617
# INPUT ***************************************************************	#
14618
#	a0 = pointer to extended precision source operand		#
14619
#	a1 = pointer to extended precision destination operand		#
14620
#									#
14621
# OUTPUT **************************************************************	#
14622
#	fp0 = result							#
14623
#	fp1 = EXOP (if exception occurred)				#
14624
#									#
14625
# ALGORITHM ***********************************************************	#
14626
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
14627
# norms into extended, single, and double precision.			#
14628
#	Do addition after scaling exponents such that exception won't	#
14629
# occur. Then, check result exponent to see if exception would have	#
14630
# occurred. If so, return default result and maybe EXOP. Else, insert	#
14631
# the correct result exponent and return. Set FPSR bits as appropriate.	#
14632
#									#
14633
#########################################################################
14634

14635
	global		fsadd
14636
fsadd:
14637
	andi.b		&0x30,%d0		# clear rnd prec
14638
	ori.b		&s_mode*0x10,%d0	# insert sgl prec
14639
	bra.b		fadd
14640

14641
	global		fdadd
14642
fdadd:
14643
	andi.b		&0x30,%d0		# clear rnd prec
14644
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
14645

14646
	global		fadd
14647
fadd:
14648
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
14649

14650
	clr.w		%d1
14651
	mov.b		DTAG(%a6),%d1
14652
	lsl.b		&0x3,%d1
14653
	or.b		STAG(%a6),%d1		# combine src tags
14654

14655
	bne.w		fadd_not_norm		# optimize on non-norm input
14656

14657
#
14658
# ADD: norms and denorms
14659
#
14660
fadd_norm:
14661
	bsr.l		addsub_scaler2		# scale exponents
14662

14663
fadd_zero_entry:
14664
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14665

14666
	fmov.l		&0x0,%fpsr		# clear FPSR
14667
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14668

14669
	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14670

14671
	fmov.l		&0x0,%fpcr		# clear FPCR
14672
	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
14673

14674
	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
14675

14676
	fbeq.w		fadd_zero_exit		# if result is zero, end now
14677

14678
	mov.l		%d2,-(%sp)		# save d2
14679

14680
	fmovm.x		&0x01,-(%sp)		# save result to stack
14681

14682
	mov.w		2+L_SCR3(%a6),%d1
14683
	lsr.b		&0x6,%d1
14684

14685
	mov.w		(%sp),%d2		# fetch new sign, exp
14686
	andi.l		&0x7fff,%d2		# strip sign
14687
	sub.l		%d0,%d2			# add scale factor
14688

14689
	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
14690
	bge.b		fadd_ovfl		# yes
14691

14692
	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
14693
	blt.w		fadd_unfl		# yes
14694
	beq.w		fadd_may_unfl		# maybe; go find out
14695

14696
fadd_normal:
14697
	mov.w		(%sp),%d1
14698
	andi.w		&0x8000,%d1		# keep sign
14699
	or.w		%d2,%d1			# concat sign,new exp
14700
	mov.w		%d1,(%sp)		# insert new exponent
14701

14702
	fmovm.x		(%sp)+,&0x80		# return result in fp0
14703

14704
	mov.l		(%sp)+,%d2		# restore d2
14705
	rts
14706

14707
fadd_zero_exit:
14708
#	fmov.s		&0x00000000,%fp0	# return zero in fp0
14709
	rts
14710

14711
tbl_fadd_ovfl:
14712
	long		0x7fff			# ext ovfl
14713
	long		0x407f			# sgl ovfl
14714
	long		0x43ff			# dbl ovfl
14715

14716
tbl_fadd_unfl:
14717
	long	        0x0000			# ext unfl
14718
	long		0x3f81			# sgl unfl
14719
	long		0x3c01			# dbl unfl
14720

14721
fadd_ovfl:
14722
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
14723

14724
	mov.b		FPCR_ENABLE(%a6),%d1
14725
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
14726
	bne.b		fadd_ovfl_ena		# yes
14727

14728
	add.l		&0xc,%sp
14729
fadd_ovfl_dis:
14730
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
14731
	sne		%d1			# set sign param accordingly
14732
	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
14733
	bsr.l		ovf_res			# calculate default result
14734
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
14735
	fmovm.x		(%a0),&0x80		# return default result in fp0
14736
	mov.l		(%sp)+,%d2		# restore d2
14737
	rts
14738

14739
fadd_ovfl_ena:
14740
	mov.b		L_SCR3(%a6),%d1
14741
	andi.b		&0xc0,%d1		# is precision extended?
14742
	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
14743

14744
fadd_ovfl_ena_cont:
14745
	mov.w		(%sp),%d1
14746
	andi.w		&0x8000,%d1		# keep sign
14747
	subi.l		&0x6000,%d2		# add extra bias
14748
	andi.w		&0x7fff,%d2
14749
	or.w		%d2,%d1			# concat sign,new exp
14750
	mov.w		%d1,(%sp)		# insert new exponent
14751

14752
	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
14753
	bra.b		fadd_ovfl_dis
14754

14755
fadd_ovfl_ena_sd:
14756
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14757

14758
	mov.l		L_SCR3(%a6),%d1
14759
	andi.b		&0x30,%d1		# keep rnd mode
14760
	fmov.l		%d1,%fpcr		# set FPCR
14761

14762
	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14763

14764
	fmov.l		&0x0,%fpcr		# clear FPCR
14765

14766
	add.l		&0xc,%sp
14767
	fmovm.x		&0x01,-(%sp)
14768
	bra.b		fadd_ovfl_ena_cont
14769

14770
fadd_unfl:
14771
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14772

14773
	add.l		&0xc,%sp
14774

14775
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
14776

14777
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
14778
	fmov.l		&0x0,%fpsr		# clear FPSR
14779

14780
	fadd.x		FP_SCR0(%a6),%fp0	# execute add
14781

14782
	fmov.l		&0x0,%fpcr		# clear FPCR
14783
	fmov.l		%fpsr,%d1		# save status
14784

14785
	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
14786

14787
	mov.b		FPCR_ENABLE(%a6),%d1
14788
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
14789
	bne.b		fadd_unfl_ena		# yes
14790

14791
fadd_unfl_dis:
14792
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
14793

14794
	lea		FP_SCR0(%a6),%a0	# pass: result addr
14795
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
14796
	bsr.l		unf_res			# calculate default result
14797
	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
14798
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
14799
	mov.l		(%sp)+,%d2		# restore d2
14800
	rts
14801

14802
fadd_unfl_ena:
14803
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
14804

14805
	mov.l		L_SCR3(%a6),%d1
14806
	andi.b		&0xc0,%d1		# is precision extended?
14807
	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
14808

14809
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
14810

14811
fadd_unfl_ena_cont:
14812
	fmov.l		&0x0,%fpsr		# clear FPSR
14813

14814
	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
14815

14816
	fmov.l		&0x0,%fpcr		# clear FPCR
14817

14818
	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
14819
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
14820
	mov.l		%d1,%d2			# make a copy
14821
	andi.l		&0x7fff,%d1		# strip sign
14822
	andi.w		&0x8000,%d2		# keep old sign
14823
	sub.l		%d0,%d1			# add scale factor
14824
	addi.l		&0x6000,%d1		# add new bias
14825
	andi.w		&0x7fff,%d1		# clear top bit
14826
	or.w		%d2,%d1			# concat sign,new exp
14827
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
14828
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
14829
	bra.w		fadd_unfl_dis
14830

14831
fadd_unfl_ena_sd:
14832
	mov.l		L_SCR3(%a6),%d1
14833
	andi.b		&0x30,%d1		# use only rnd mode
14834
	fmov.l		%d1,%fpcr		# set FPCR
14835

14836
	bra.b		fadd_unfl_ena_cont
14837

14838
#
14839
# result is equal to the smallest normalized number in the selected precision
14840
# if the precision is extended, this result could not have come from an
14841
# underflow that rounded up.
14842
#
14843
fadd_may_unfl:
14844
	mov.l		L_SCR3(%a6),%d1
14845
	andi.b		&0xc0,%d1
14846
	beq.w		fadd_normal		# yes; no underflow occurred
14847

14848
	mov.l		0x4(%sp),%d1		# extract hi(man)
14849
	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
14850
	bne.w		fadd_normal		# no; no underflow occurred
14851

14852
	tst.l		0x8(%sp)		# is lo(man) = 0x0?
14853
	bne.w		fadd_normal		# no; no underflow occurred
14854

14855
	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
14856
	beq.w		fadd_normal		# no; no underflow occurred
14857

14858
#
14859
# ok, so now the result has a exponent equal to the smallest normalized
14860
# exponent for the selected precision. also, the mantissa is equal to
14861
# 0x8000000000000000 and this mantissa is the result of rounding non-zero
14862
# g,r,s.
14863
# now, we must determine whether the pre-rounded result was an underflow
14864
# rounded "up" or a normalized number rounded "down".
14865
# so, we do this be re-executing the add using RZ as the rounding mode and
14866
# seeing if the new result is smaller or equal to the current result.
14867
#
14868
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
14869

14870
	mov.l		L_SCR3(%a6),%d1
14871
	andi.b		&0xc0,%d1		# keep rnd prec
14872
	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
14873
	fmov.l		%d1,%fpcr		# set FPCR
14874
	fmov.l		&0x0,%fpsr		# clear FPSR
14875

14876
	fadd.x		FP_SCR0(%a6),%fp1	# execute add
14877

14878
	fmov.l		&0x0,%fpcr		# clear FPCR
14879

14880
	fabs.x		%fp0			# compare absolute values
14881
	fabs.x		%fp1
14882
	fcmp.x		%fp0,%fp1		# is first result > second?
14883

14884
	fbgt.w		fadd_unfl		# yes; it's an underflow
14885
	bra.w		fadd_normal		# no; it's not an underflow
14886

14887
##########################################################################
14888

14889
#
14890
# Add: inputs are not both normalized; what are they?
14891
#
14892
fadd_not_norm:
14893
	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
14894
	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
14895

14896
	swbeg		&48
14897
tbl_fadd_op:
14898
	short		fadd_norm	- tbl_fadd_op # NORM + NORM
14899
	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
14900
	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
14901
	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14902
	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
14903
	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14904
	short		tbl_fadd_op	- tbl_fadd_op #
14905
	short		tbl_fadd_op	- tbl_fadd_op #
14906

14907
	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
14908
	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
14909
	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
14910
	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14911
	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
14912
	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14913
	short		tbl_fadd_op	- tbl_fadd_op #
14914
	short		tbl_fadd_op	- tbl_fadd_op #
14915

14916
	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
14917
	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
14918
	short		fadd_inf_2	- tbl_fadd_op # INF + INF
14919
	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14920
	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
14921
	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14922
	short		tbl_fadd_op	- tbl_fadd_op #
14923
	short		tbl_fadd_op	- tbl_fadd_op #
14924

14925
	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
14926
	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
14927
	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
14928
	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
14929
	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
14930
	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
14931
	short		tbl_fadd_op	- tbl_fadd_op #
14932
	short		tbl_fadd_op	- tbl_fadd_op #
14933

14934
	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
14935
	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
14936
	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
14937
	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
14938
	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
14939
	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
14940
	short		tbl_fadd_op	- tbl_fadd_op #
14941
	short		tbl_fadd_op	- tbl_fadd_op #
14942

14943
	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
14944
	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
14945
	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
14946
	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
14947
	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
14948
	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
14949
	short		tbl_fadd_op	- tbl_fadd_op #
14950
	short		tbl_fadd_op	- tbl_fadd_op #
14951

14952
fadd_res_qnan:
14953
	bra.l		res_qnan
14954
fadd_res_snan:
14955
	bra.l		res_snan
14956

14957
#
14958
# both operands are ZEROes
14959
#
14960
fadd_zero_2:
14961
	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
14962
	mov.b		DST_EX(%a1),%d1
14963
	eor.b		%d0,%d1
14964
	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
14965

14966
# the signs are the same. so determine whether they are positive or negative
14967
# and return the appropriately signed zero.
14968
	tst.b		%d0			# are ZEROes positive or negative?
14969
	bmi.b		fadd_zero_rm		# negative
14970
	fmov.s		&0x00000000,%fp0	# return +ZERO
14971
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
14972
	rts
14973

14974
#
14975
# the ZEROes have opposite signs:
14976
# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
14977
# - -ZERO is returned in the case of RM.
14978
#
14979
fadd_zero_2_chk_rm:
14980
	mov.b		3+L_SCR3(%a6),%d1
14981
	andi.b		&0x30,%d1		# extract rnd mode
14982
	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
14983
	beq.b		fadd_zero_rm		# yes
14984
	fmov.s		&0x00000000,%fp0	# return +ZERO
14985
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
14986
	rts
14987

14988
fadd_zero_rm:
14989
	fmov.s		&0x80000000,%fp0	# return -ZERO
14990
	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
14991
	rts
14992

14993
#
14994
# one operand is a ZERO and the other is a DENORM or NORM. scale
14995
# the DENORM or NORM and jump to the regular fadd routine.
14996
#
14997
fadd_zero_dst:
14998
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
14999
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15000
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15001
	bsr.l		scale_to_zero_src	# scale the operand
15002
	clr.w		FP_SCR1_EX(%a6)
15003
	clr.l		FP_SCR1_HI(%a6)
15004
	clr.l		FP_SCR1_LO(%a6)
15005
	bra.w		fadd_zero_entry		# go execute fadd
15006

15007
fadd_zero_src:
15008
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
15009
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15010
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15011
	bsr.l		scale_to_zero_dst	# scale the operand
15012
	clr.w		FP_SCR0_EX(%a6)
15013
	clr.l		FP_SCR0_HI(%a6)
15014
	clr.l		FP_SCR0_LO(%a6)
15015
	bra.w		fadd_zero_entry		# go execute fadd
15016

15017
#
15018
# both operands are INFs. an OPERR will result if the INFs have
15019
# different signs. else, an INF of the same sign is returned
15020
#
15021
fadd_inf_2:
15022
	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
15023
	mov.b		DST_EX(%a1),%d1
15024
	eor.b		%d1,%d0
15025
	bmi.l		res_operr		# weed out (-INF)+(+INF)
15026

15027
# ok, so it's not an OPERR. but, we do have to remember to return the
15028
# src INF since that's where the 881/882 gets the j-bit from...
15029

15030
#
15031
# operands are INF and one of {ZERO, INF, DENORM, NORM}
15032
#
15033
fadd_inf_src:
15034
	fmovm.x		SRC(%a0),&0x80		# return src INF
15035
	tst.b		SRC_EX(%a0)		# is INF positive?
15036
	bpl.b		fadd_inf_done		# yes; we're done
15037
	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15038
	rts
15039

15040
#
15041
# operands are INF and one of {ZERO, INF, DENORM, NORM}
15042
#
15043
fadd_inf_dst:
15044
	fmovm.x		DST(%a1),&0x80		# return dst INF
15045
	tst.b		DST_EX(%a1)		# is INF positive?
15046
	bpl.b		fadd_inf_done		# yes; we're done
15047
	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15048
	rts
15049

15050
fadd_inf_done:
15051
	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
15052
	rts
15053

15054
#########################################################################
15055
# XDEF ****************************************************************	#
15056
#	fsub(): emulates the fsub instruction				#
15057
#	fssub(): emulates the fssub instruction				#
15058
#	fdsub(): emulates the fdsub instruction				#
15059
#									#
15060
# XREF ****************************************************************	#
15061
#	addsub_scaler2() - scale the operands so they won't take exc	#
15062
#	ovf_res() - return default overflow result			#
15063
#	unf_res() - return default underflow result			#
15064
#	res_qnan() - set QNAN result					#
15065
#	res_snan() - set SNAN result					#
15066
#	res_operr() - set OPERR result					#
15067
#	scale_to_zero_src() - set src operand exponent equal to zero	#
15068
#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
15069
#									#
15070
# INPUT ***************************************************************	#
15071
#	a0 = pointer to extended precision source operand		#
15072
#	a1 = pointer to extended precision destination operand		#
15073
#									#
15074
# OUTPUT **************************************************************	#
15075
#	fp0 = result							#
15076
#	fp1 = EXOP (if exception occurred)				#
15077
#									#
15078
# ALGORITHM ***********************************************************	#
15079
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
15080
# norms into extended, single, and double precision.			#
15081
#	Do subtraction after scaling exponents such that exception won't#
15082
# occur. Then, check result exponent to see if exception would have	#
15083
# occurred. If so, return default result and maybe EXOP. Else, insert	#
15084
# the correct result exponent and return. Set FPSR bits as appropriate.	#
15085
#									#
15086
#########################################################################
15087

15088
	global		fssub
15089
fssub:
15090
	andi.b		&0x30,%d0		# clear rnd prec
15091
	ori.b		&s_mode*0x10,%d0	# insert sgl prec
15092
	bra.b		fsub
15093

15094
	global		fdsub
15095
fdsub:
15096
	andi.b		&0x30,%d0		# clear rnd prec
15097
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
15098

15099
	global		fsub
15100
fsub:
15101
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
15102

15103
	clr.w		%d1
15104
	mov.b		DTAG(%a6),%d1
15105
	lsl.b		&0x3,%d1
15106
	or.b		STAG(%a6),%d1		# combine src tags
15107

15108
	bne.w		fsub_not_norm		# optimize on non-norm input
15109

15110
#
15111
# SUB: norms and denorms
15112
#
15113
fsub_norm:
15114
	bsr.l		addsub_scaler2		# scale exponents
15115

15116
fsub_zero_entry:
15117
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15118

15119
	fmov.l		&0x0,%fpsr		# clear FPSR
15120
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15121

15122
	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15123

15124
	fmov.l		&0x0,%fpcr		# clear FPCR
15125
	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
15126

15127
	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
15128

15129
	fbeq.w		fsub_zero_exit		# if result zero, end now
15130

15131
	mov.l		%d2,-(%sp)		# save d2
15132

15133
	fmovm.x		&0x01,-(%sp)		# save result to stack
15134

15135
	mov.w		2+L_SCR3(%a6),%d1
15136
	lsr.b		&0x6,%d1
15137

15138
	mov.w		(%sp),%d2		# fetch new exponent
15139
	andi.l		&0x7fff,%d2		# strip sign
15140
	sub.l		%d0,%d2			# add scale factor
15141

15142
	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
15143
	bge.b		fsub_ovfl		# yes
15144

15145
	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
15146
	blt.w		fsub_unfl		# yes
15147
	beq.w		fsub_may_unfl		# maybe; go find out
15148

15149
fsub_normal:
15150
	mov.w		(%sp),%d1
15151
	andi.w		&0x8000,%d1		# keep sign
15152
	or.w		%d2,%d1			# insert new exponent
15153
	mov.w		%d1,(%sp)		# insert new exponent
15154

15155
	fmovm.x		(%sp)+,&0x80		# return result in fp0
15156

15157
	mov.l		(%sp)+,%d2		# restore d2
15158
	rts
15159

15160
fsub_zero_exit:
15161
#	fmov.s		&0x00000000,%fp0	# return zero in fp0
15162
	rts
15163

15164
tbl_fsub_ovfl:
15165
	long		0x7fff			# ext ovfl
15166
	long		0x407f			# sgl ovfl
15167
	long		0x43ff			# dbl ovfl
15168

15169
tbl_fsub_unfl:
15170
	long	        0x0000			# ext unfl
15171
	long		0x3f81			# sgl unfl
15172
	long		0x3c01			# dbl unfl
15173

15174
fsub_ovfl:
15175
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15176

15177
	mov.b		FPCR_ENABLE(%a6),%d1
15178
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
15179
	bne.b		fsub_ovfl_ena		# yes
15180

15181
	add.l		&0xc,%sp
15182
fsub_ovfl_dis:
15183
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
15184
	sne		%d1			# set sign param accordingly
15185
	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
15186
	bsr.l		ovf_res			# calculate default result
15187
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
15188
	fmovm.x		(%a0),&0x80		# return default result in fp0
15189
	mov.l		(%sp)+,%d2		# restore d2
15190
	rts
15191

15192
fsub_ovfl_ena:
15193
	mov.b		L_SCR3(%a6),%d1
15194
	andi.b		&0xc0,%d1		# is precision extended?
15195
	bne.b		fsub_ovfl_ena_sd	# no
15196

15197
fsub_ovfl_ena_cont:
15198
	mov.w		(%sp),%d1		# fetch {sgn,exp}
15199
	andi.w		&0x8000,%d1		# keep sign
15200
	subi.l		&0x6000,%d2		# subtract new bias
15201
	andi.w		&0x7fff,%d2		# clear top bit
15202
	or.w		%d2,%d1			# concat sign,exp
15203
	mov.w		%d1,(%sp)		# insert new exponent
15204

15205
	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
15206
	bra.b		fsub_ovfl_dis
15207

15208
fsub_ovfl_ena_sd:
15209
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15210

15211
	mov.l		L_SCR3(%a6),%d1
15212
	andi.b		&0x30,%d1		# clear rnd prec
15213
	fmov.l		%d1,%fpcr		# set FPCR
15214

15215
	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15216

15217
	fmov.l		&0x0,%fpcr		# clear FPCR
15218

15219
	add.l		&0xc,%sp
15220
	fmovm.x		&0x01,-(%sp)
15221
	bra.b		fsub_ovfl_ena_cont
15222

15223
fsub_unfl:
15224
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15225

15226
	add.l		&0xc,%sp
15227

15228
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
15229

15230
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
15231
	fmov.l		&0x0,%fpsr		# clear FPSR
15232

15233
	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
15234

15235
	fmov.l		&0x0,%fpcr		# clear FPCR
15236
	fmov.l		%fpsr,%d1		# save status
15237

15238
	or.l		%d1,USER_FPSR(%a6)
15239

15240
	mov.b		FPCR_ENABLE(%a6),%d1
15241
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
15242
	bne.b		fsub_unfl_ena		# yes
15243

15244
fsub_unfl_dis:
15245
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15246

15247
	lea		FP_SCR0(%a6),%a0	# pass: result addr
15248
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
15249
	bsr.l		unf_res			# calculate default result
15250
	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
15251
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
15252
	mov.l		(%sp)+,%d2		# restore d2
15253
	rts
15254

15255
fsub_unfl_ena:
15256
	fmovm.x		FP_SCR1(%a6),&0x40
15257

15258
	mov.l		L_SCR3(%a6),%d1
15259
	andi.b		&0xc0,%d1		# is precision extended?
15260
	bne.b		fsub_unfl_ena_sd	# no
15261

15262
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15263

15264
fsub_unfl_ena_cont:
15265
	fmov.l		&0x0,%fpsr		# clear FPSR
15266

15267
	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
15268

15269
	fmov.l		&0x0,%fpcr		# clear FPCR
15270

15271
	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
15272
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
15273
	mov.l		%d1,%d2			# make a copy
15274
	andi.l		&0x7fff,%d1		# strip sign
15275
	andi.w		&0x8000,%d2		# keep old sign
15276
	sub.l		%d0,%d1			# add scale factor
15277
	addi.l		&0x6000,%d1		# subtract new bias
15278
	andi.w		&0x7fff,%d1		# clear top bit
15279
	or.w		%d2,%d1			# concat sgn,exp
15280
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
15281
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
15282
	bra.w		fsub_unfl_dis
15283

15284
fsub_unfl_ena_sd:
15285
	mov.l		L_SCR3(%a6),%d1
15286
	andi.b		&0x30,%d1		# clear rnd prec
15287
	fmov.l		%d1,%fpcr		# set FPCR
15288

15289
	bra.b		fsub_unfl_ena_cont
15290

15291
#
15292
# result is equal to the smallest normalized number in the selected precision
15293
# if the precision is extended, this result could not have come from an
15294
# underflow that rounded up.
15295
#
15296
fsub_may_unfl:
15297
	mov.l		L_SCR3(%a6),%d1
15298
	andi.b		&0xc0,%d1		# fetch rnd prec
15299
	beq.w		fsub_normal		# yes; no underflow occurred
15300

15301
	mov.l		0x4(%sp),%d1
15302
	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
15303
	bne.w		fsub_normal		# no; no underflow occurred
15304

15305
	tst.l		0x8(%sp)		# is lo(man) = 0x0?
15306
	bne.w		fsub_normal		# no; no underflow occurred
15307

15308
	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
15309
	beq.w		fsub_normal		# no; no underflow occurred
15310

15311
#
15312
# ok, so now the result has a exponent equal to the smallest normalized
15313
# exponent for the selected precision. also, the mantissa is equal to
15314
# 0x8000000000000000 and this mantissa is the result of rounding non-zero
15315
# g,r,s.
15316
# now, we must determine whether the pre-rounded result was an underflow
15317
# rounded "up" or a normalized number rounded "down".
15318
# so, we do this be re-executing the add using RZ as the rounding mode and
15319
# seeing if the new result is smaller or equal to the current result.
15320
#
15321
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
15322

15323
	mov.l		L_SCR3(%a6),%d1
15324
	andi.b		&0xc0,%d1		# keep rnd prec
15325
	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
15326
	fmov.l		%d1,%fpcr		# set FPCR
15327
	fmov.l		&0x0,%fpsr		# clear FPSR
15328

15329
	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
15330

15331
	fmov.l		&0x0,%fpcr		# clear FPCR
15332

15333
	fabs.x		%fp0			# compare absolute values
15334
	fabs.x		%fp1
15335
	fcmp.x		%fp0,%fp1		# is first result > second?
15336

15337
	fbgt.w		fsub_unfl		# yes; it's an underflow
15338
	bra.w		fsub_normal		# no; it's not an underflow
15339

15340
##########################################################################
15341

15342
#
15343
# Sub: inputs are not both normalized; what are they?
15344
#
15345
fsub_not_norm:
15346
	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
15347
	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
15348

15349
	swbeg		&48
15350
tbl_fsub_op:
15351
	short		fsub_norm	- tbl_fsub_op # NORM - NORM
15352
	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
15353
	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
15354
	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15355
	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
15356
	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15357
	short		tbl_fsub_op	- tbl_fsub_op #
15358
	short		tbl_fsub_op	- tbl_fsub_op #
15359

15360
	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
15361
	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
15362
	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
15363
	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15364
	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
15365
	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15366
	short		tbl_fsub_op	- tbl_fsub_op #
15367
	short		tbl_fsub_op	- tbl_fsub_op #
15368

15369
	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
15370
	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
15371
	short		fsub_inf_2	- tbl_fsub_op # INF - INF
15372
	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15373
	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
15374
	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15375
	short		tbl_fsub_op	- tbl_fsub_op #
15376
	short		tbl_fsub_op	- tbl_fsub_op #
15377

15378
	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
15379
	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
15380
	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
15381
	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
15382
	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
15383
	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
15384
	short		tbl_fsub_op	- tbl_fsub_op #
15385
	short		tbl_fsub_op	- tbl_fsub_op #
15386

15387
	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
15388
	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
15389
	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
15390
	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
15391
	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
15392
	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
15393
	short		tbl_fsub_op	- tbl_fsub_op #
15394
	short		tbl_fsub_op	- tbl_fsub_op #
15395

15396
	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
15397
	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
15398
	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
15399
	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
15400
	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
15401
	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
15402
	short		tbl_fsub_op	- tbl_fsub_op #
15403
	short		tbl_fsub_op	- tbl_fsub_op #
15404

15405
fsub_res_qnan:
15406
	bra.l		res_qnan
15407
fsub_res_snan:
15408
	bra.l		res_snan
15409

15410
#
15411
# both operands are ZEROes
15412
#
15413
fsub_zero_2:
15414
	mov.b		SRC_EX(%a0),%d0
15415
	mov.b		DST_EX(%a1),%d1
15416
	eor.b		%d1,%d0
15417
	bpl.b		fsub_zero_2_chk_rm
15418

15419
# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
15420
	tst.b		%d0			# is dst negative?
15421
	bmi.b		fsub_zero_2_rm		# yes
15422
	fmov.s		&0x00000000,%fp0	# no; return +ZERO
15423
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
15424
	rts
15425

15426
#
15427
# the ZEROes have the same signs:
15428
# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
15429
# - -ZERO is returned in the case of RM.
15430
#
15431
fsub_zero_2_chk_rm:
15432
	mov.b		3+L_SCR3(%a6),%d1
15433
	andi.b		&0x30,%d1		# extract rnd mode
15434
	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
15435
	beq.b		fsub_zero_2_rm		# yes
15436
	fmov.s		&0x00000000,%fp0	# no; return +ZERO
15437
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
15438
	rts
15439

15440
fsub_zero_2_rm:
15441
	fmov.s		&0x80000000,%fp0	# return -ZERO
15442
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
15443
	rts
15444

15445
#
15446
# one operand is a ZERO and the other is a DENORM or a NORM.
15447
# scale the DENORM or NORM and jump to the regular fsub routine.
15448
#
15449
fsub_zero_dst:
15450
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15451
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15452
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15453
	bsr.l		scale_to_zero_src	# scale the operand
15454
	clr.w		FP_SCR1_EX(%a6)
15455
	clr.l		FP_SCR1_HI(%a6)
15456
	clr.l		FP_SCR1_LO(%a6)
15457
	bra.w		fsub_zero_entry		# go execute fsub
15458

15459
fsub_zero_src:
15460
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
15461
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15462
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15463
	bsr.l		scale_to_zero_dst	# scale the operand
15464
	clr.w		FP_SCR0_EX(%a6)
15465
	clr.l		FP_SCR0_HI(%a6)
15466
	clr.l		FP_SCR0_LO(%a6)
15467
	bra.w		fsub_zero_entry		# go execute fsub
15468

15469
#
15470
# both operands are INFs. an OPERR will result if the INFs have the
15471
# same signs. else,
15472
#
15473
fsub_inf_2:
15474
	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
15475
	mov.b		DST_EX(%a1),%d1
15476
	eor.b		%d1,%d0
15477
	bpl.l		res_operr		# weed out (-INF)+(+INF)
15478

15479
# ok, so it's not an OPERR. but we do have to remember to return
15480
# the src INF since that's where the 881/882 gets the j-bit.
15481

15482
fsub_inf_src:
15483
	fmovm.x		SRC(%a0),&0x80		# return src INF
15484
	fneg.x		%fp0			# invert sign
15485
	fbge.w		fsub_inf_done		# sign is now positive
15486
	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15487
	rts
15488

15489
fsub_inf_dst:
15490
	fmovm.x		DST(%a1),&0x80		# return dst INF
15491
	tst.b		DST_EX(%a1)		# is INF negative?
15492
	bpl.b		fsub_inf_done		# no
15493
	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15494
	rts
15495

15496
fsub_inf_done:
15497
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
15498
	rts
15499

15500
#########################################################################
15501
# XDEF ****************************************************************	#
15502
#	fsqrt(): emulates the fsqrt instruction				#
15503
#	fssqrt(): emulates the fssqrt instruction			#
15504
#	fdsqrt(): emulates the fdsqrt instruction			#
15505
#									#
15506
# XREF ****************************************************************	#
15507
#	scale_sqrt() - scale the source operand				#
15508
#	unf_res() - return default underflow result			#
15509
#	ovf_res() - return default overflow result			#
15510
#	res_qnan_1op() - return QNAN result				#
15511
#	res_snan_1op() - return SNAN result				#
15512
#									#
15513
# INPUT ***************************************************************	#
15514
#	a0 = pointer to extended precision source operand		#
15515
#	d0  rnd prec,mode						#
15516
#									#
15517
# OUTPUT **************************************************************	#
15518
#	fp0 = result							#
15519
#	fp1 = EXOP (if exception occurred)				#
15520
#									#
15521
# ALGORITHM ***********************************************************	#
15522
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
15523
# norms/denorms into ext/sgl/dbl precision.				#
15524
#	For norms/denorms, scale the exponents such that a sqrt		#
15525
# instruction won't cause an exception. Use the regular fsqrt to	#
15526
# compute a result. Check if the regular operands would have taken	#
15527
# an exception. If so, return the default overflow/underflow result	#
15528
# and return the EXOP if exceptions are enabled. Else, scale the	#
15529
# result operand to the proper exponent.				#
15530
#									#
15531
#########################################################################
15532

15533
	global		fssqrt
15534
fssqrt:
15535
	andi.b		&0x30,%d0		# clear rnd prec
15536
	ori.b		&s_mode*0x10,%d0	# insert sgl precision
15537
	bra.b		fsqrt
15538

15539
	global		fdsqrt
15540
fdsqrt:
15541
	andi.b		&0x30,%d0		# clear rnd prec
15542
	ori.b		&d_mode*0x10,%d0	# insert dbl precision
15543

15544
	global		fsqrt
15545
fsqrt:
15546
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
15547
	clr.w		%d1
15548
	mov.b		STAG(%a6),%d1
15549
	bne.w		fsqrt_not_norm		# optimize on non-norm input
15550

15551
#
15552
# SQUARE ROOT: norms and denorms ONLY!
15553
#
15554
fsqrt_norm:
15555
	tst.b		SRC_EX(%a0)		# is operand negative?
15556
	bmi.l		res_operr		# yes
15557

15558
	andi.b		&0xc0,%d0		# is precision extended?
15559
	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
15560

15561
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15562
	fmov.l		&0x0,%fpsr		# clear FPSR
15563

15564
	fsqrt.x		(%a0),%fp0		# execute square root
15565

15566
	fmov.l		%fpsr,%d1
15567
	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
15568

15569
	rts
15570

15571
fsqrt_denorm:
15572
	tst.b		SRC_EX(%a0)		# is operand negative?
15573
	bmi.l		res_operr		# yes
15574

15575
	andi.b		&0xc0,%d0		# is precision extended?
15576
	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
15577

15578
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15579
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15580
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15581

15582
	bsr.l		scale_sqrt		# calculate scale factor
15583

15584
	bra.w		fsqrt_sd_normal
15585

15586
#
15587
# operand is either single or double
15588
#
15589
fsqrt_not_ext:
15590
	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
15591
	bne.w		fsqrt_dbl
15592

15593
#
15594
# operand is to be rounded to single precision
15595
#
15596
fsqrt_sgl:
15597
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15598
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15599
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15600

15601
	bsr.l		scale_sqrt		# calculate scale factor
15602

15603
	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
15604
	beq.w		fsqrt_sd_may_unfl
15605
	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
15606
	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
15607
	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
15608
	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
15609

15610
#
15611
# operand will NOT overflow or underflow when moved in to the fp reg file
15612
#
15613
fsqrt_sd_normal:
15614
	fmov.l		&0x0,%fpsr		# clear FPSR
15615
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15616

15617
	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
15618

15619
	fmov.l		%fpsr,%d1		# save FPSR
15620
	fmov.l		&0x0,%fpcr		# clear FPCR
15621

15622
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15623

15624
fsqrt_sd_normal_exit:
15625
	mov.l		%d2,-(%sp)		# save d2
15626
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15627
	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
15628
	mov.l		%d1,%d2			# make a copy
15629
	andi.l		&0x7fff,%d1		# strip sign
15630
	sub.l		%d0,%d1			# add scale factor
15631
	andi.w		&0x8000,%d2		# keep old sign
15632
	or.w		%d1,%d2			# concat old sign,new exp
15633
	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
15634
	mov.l		(%sp)+,%d2		# restore d2
15635
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
15636
	rts
15637

15638
#
15639
# operand is to be rounded to double precision
15640
#
15641
fsqrt_dbl:
15642
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
15643
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15644
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15645

15646
	bsr.l		scale_sqrt		# calculate scale factor
15647

15648
	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
15649
	beq.w		fsqrt_sd_may_unfl
15650
	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
15651
	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
15652
	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
15653
	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
15654
	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
15655

15656
# we're on the line here and the distinguising characteristic is whether
15657
# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
15658
# elsewise fall through to underflow.
15659
fsqrt_sd_may_unfl:
15660
	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
15661
	bne.w		fsqrt_sd_normal		# yes, so no underflow
15662

15663
#
15664
# operand WILL underflow when moved in to the fp register file
15665
#
15666
fsqrt_sd_unfl:
15667
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15668

15669
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
15670
	fmov.l		&0x0,%fpsr		# clear FPSR
15671

15672
	fsqrt.x		FP_SCR0(%a6),%fp0	# execute square root
15673

15674
	fmov.l		%fpsr,%d1		# save status
15675
	fmov.l		&0x0,%fpcr		# clear FPCR
15676

15677
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15678

15679
# if underflow or inexact is enabled, go calculate EXOP first.
15680
	mov.b		FPCR_ENABLE(%a6),%d1
15681
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
15682
	bne.b		fsqrt_sd_unfl_ena	# yes
15683

15684
fsqrt_sd_unfl_dis:
15685
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
15686

15687
	lea		FP_SCR0(%a6),%a0	# pass: result addr
15688
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
15689
	bsr.l		unf_res			# calculate default result
15690
	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
15691
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
15692
	rts
15693

15694
#
15695
# operand will underflow AND underflow is enabled.
15696
# Therefore, we must return the result rounded to extended precision.
15697
#
15698
fsqrt_sd_unfl_ena:
15699
	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
15700
	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
15701
	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
15702

15703
	mov.l		%d2,-(%sp)		# save d2
15704
	mov.l		%d1,%d2			# make a copy
15705
	andi.l		&0x7fff,%d1		# strip sign
15706
	andi.w		&0x8000,%d2		# keep old sign
15707
	sub.l		%d0,%d1			# subtract scale factor
15708
	addi.l		&0x6000,%d1		# add new bias
15709
	andi.w		&0x7fff,%d1
15710
	or.w		%d2,%d1			# concat new sign,new exp
15711
	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
15712
	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
15713
	mov.l		(%sp)+,%d2		# restore d2
15714
	bra.b		fsqrt_sd_unfl_dis
15715

15716
#
15717
# operand WILL overflow.
15718
#
15719
fsqrt_sd_ovfl:
15720
	fmov.l		&0x0,%fpsr		# clear FPSR
15721
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15722

15723
	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
15724

15725
	fmov.l		&0x0,%fpcr		# clear FPCR
15726
	fmov.l		%fpsr,%d1		# save FPSR
15727

15728
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15729

15730
fsqrt_sd_ovfl_tst:
15731
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15732

15733
	mov.b		FPCR_ENABLE(%a6),%d1
15734
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
15735
	bne.b		fsqrt_sd_ovfl_ena	# yes
15736

15737
#
15738
# OVFL is not enabled; therefore, we must create the default result by
15739
# calling ovf_res().
15740
#
15741
fsqrt_sd_ovfl_dis:
15742
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
15743
	sne		%d1			# set sign param accordingly
15744
	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
15745
	bsr.l		ovf_res			# calculate default result
15746
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
15747
	fmovm.x		(%a0),&0x80		# return default result in fp0
15748
	rts
15749

15750
#
15751
# OVFL is enabled.
15752
# the INEX2 bit has already been updated by the round to the correct precision.
15753
# now, round to extended(and don't alter the FPSR).
15754
#
15755
fsqrt_sd_ovfl_ena:
15756
	mov.l		%d2,-(%sp)		# save d2
15757
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
15758
	mov.l		%d1,%d2			# make a copy
15759
	andi.l		&0x7fff,%d1		# strip sign
15760
	andi.w		&0x8000,%d2		# keep old sign
15761
	sub.l		%d0,%d1			# add scale factor
15762
	subi.l		&0x6000,%d1		# subtract bias
15763
	andi.w		&0x7fff,%d1
15764
	or.w		%d2,%d1			# concat sign,exp
15765
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
15766
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
15767
	mov.l		(%sp)+,%d2		# restore d2
15768
	bra.b		fsqrt_sd_ovfl_dis
15769

15770
#
15771
# the move in MAY underflow. so...
15772
#
15773
fsqrt_sd_may_ovfl:
15774
	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
15775
	bne.w		fsqrt_sd_ovfl		# yes, so overflow
15776

15777
	fmov.l		&0x0,%fpsr		# clear FPSR
15778
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
15779

15780
	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
15781

15782
	fmov.l		%fpsr,%d1		# save status
15783
	fmov.l		&0x0,%fpcr		# clear FPCR
15784

15785
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
15786

15787
	fmov.x		%fp0,%fp1		# make a copy of result
15788
	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
15789
	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
15790

15791
# no, it didn't overflow; we have correct result
15792
	bra.w		fsqrt_sd_normal_exit
15793

15794
##########################################################################
15795

15796
#
15797
# input is not normalized; what is it?
15798
#
15799
fsqrt_not_norm:
15800
	cmpi.b		%d1,&DENORM		# weed out DENORM
15801
	beq.w		fsqrt_denorm
15802
	cmpi.b		%d1,&ZERO		# weed out ZERO
15803
	beq.b		fsqrt_zero
15804
	cmpi.b		%d1,&INF		# weed out INF
15805
	beq.b		fsqrt_inf
15806
	cmpi.b		%d1,&SNAN		# weed out SNAN
15807
	beq.l		res_snan_1op
15808
	bra.l		res_qnan_1op
15809

15810
#
15811
#	fsqrt(+0) = +0
15812
#	fsqrt(-0) = -0
15813
#	fsqrt(+INF) = +INF
15814
#	fsqrt(-INF) = OPERR
15815
#
15816
fsqrt_zero:
15817
	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
15818
	bmi.b		fsqrt_zero_m		# negative
15819
fsqrt_zero_p:
15820
	fmov.s		&0x00000000,%fp0	# return +ZERO
15821
	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
15822
	rts
15823
fsqrt_zero_m:
15824
	fmov.s		&0x80000000,%fp0	# return -ZERO
15825
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
15826
	rts
15827

15828
fsqrt_inf:
15829
	tst.b		SRC_EX(%a0)		# is INF positive or negative?
15830
	bmi.l		res_operr		# negative
15831
fsqrt_inf_p:
15832
	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
15833
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
15834
	rts
15835

15836
##########################################################################
15837

15838
#########################################################################
15839
# XDEF ****************************************************************	#
15840
#	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
15841
#			  OVFL/UNFL exceptions will result		#
15842
#									#
15843
# XREF ****************************************************************	#
15844
#	norm() - normalize mantissa after adjusting exponent		#
15845
#									#
15846
# INPUT ***************************************************************	#
15847
#	FP_SRC(a6) = fp op1(src)					#
15848
#	FP_DST(a6) = fp op2(dst)					#
15849
#									#
15850
# OUTPUT **************************************************************	#
15851
#	FP_SRC(a6) = fp op1 scaled(src)					#
15852
#	FP_DST(a6) = fp op2 scaled(dst)					#
15853
#	d0         = scale amount					#
15854
#									#
15855
# ALGORITHM ***********************************************************	#
15856
#	If the DST exponent is > the SRC exponent, set the DST exponent	#
15857
# equal to 0x3fff and scale the SRC exponent by the value that the	#
15858
# DST exponent was scaled by. If the SRC exponent is greater or equal,	#
15859
# do the opposite. Return this scale factor in d0.			#
15860
#	If the two exponents differ by > the number of mantissa bits	#
15861
# plus two, then set the smallest exponent to a very small value as a	#
15862
# quick shortcut.							#
15863
#									#
15864
#########################################################################
15865

15866
	global		addsub_scaler2
15867
addsub_scaler2:
15868
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
15869
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
15870
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
15871
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
15872
	mov.w		SRC_EX(%a0),%d0
15873
	mov.w		DST_EX(%a1),%d1
15874
	mov.w		%d0,FP_SCR0_EX(%a6)
15875
	mov.w		%d1,FP_SCR1_EX(%a6)
15876

15877
	andi.w		&0x7fff,%d0
15878
	andi.w		&0x7fff,%d1
15879
	mov.w		%d0,L_SCR1(%a6)		# store src exponent
15880
	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
15881

15882
	cmp.w		%d0, %d1		# is src exp >= dst exp?
15883
	bge.l		src_exp_ge2
15884

15885
# dst exp is >  src exp; scale dst to exp = 0x3fff
15886
dst_exp_gt2:
15887
	bsr.l		scale_to_zero_dst
15888
	mov.l		%d0,-(%sp)		# save scale factor
15889

15890
	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
15891
	bne.b		cmpexp12
15892

15893
	lea		FP_SCR0(%a6),%a0
15894
	bsr.l		norm			# normalize the denorm; result is new exp
15895
	neg.w		%d0			# new exp = -(shft val)
15896
	mov.w		%d0,L_SCR1(%a6)		# inset new exp
15897

15898
cmpexp12:
15899
	mov.w		2+L_SCR1(%a6),%d0
15900
	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
15901

15902
	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
15903
	bge.b		quick_scale12
15904

15905
	mov.w		L_SCR1(%a6),%d0
15906
	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
15907
	mov.w		FP_SCR0_EX(%a6),%d1
15908
	and.w		&0x8000,%d1
15909
	or.w		%d1,%d0			# concat {sgn,new exp}
15910
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
15911

15912
	mov.l		(%sp)+,%d0		# return SCALE factor
15913
	rts
15914

15915
quick_scale12:
15916
	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
15917
	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
15918

15919
	mov.l		(%sp)+,%d0		# return SCALE factor
15920
	rts
15921

15922
# src exp is >= dst exp; scale src to exp = 0x3fff
15923
src_exp_ge2:
15924
	bsr.l		scale_to_zero_src
15925
	mov.l		%d0,-(%sp)		# save scale factor
15926

15927
	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
15928
	bne.b		cmpexp22
15929
	lea		FP_SCR1(%a6),%a0
15930
	bsr.l		norm			# normalize the denorm; result is new exp
15931
	neg.w		%d0			# new exp = -(shft val)
15932
	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
15933

15934
cmpexp22:
15935
	mov.w		L_SCR1(%a6),%d0
15936
	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
15937

15938
	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
15939
	bge.b		quick_scale22
15940

15941
	mov.w		2+L_SCR1(%a6),%d0
15942
	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
15943
	mov.w		FP_SCR1_EX(%a6),%d1
15944
	andi.w		&0x8000,%d1
15945
	or.w		%d1,%d0			# concat {sgn,new exp}
15946
	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
15947

15948
	mov.l		(%sp)+,%d0		# return SCALE factor
15949
	rts
15950

15951
quick_scale22:
15952
	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
15953
	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
15954

15955
	mov.l		(%sp)+,%d0		# return SCALE factor
15956
	rts
15957

15958
##########################################################################
15959

15960
#########################################################################
15961
# XDEF ****************************************************************	#
15962
#	scale_to_zero_src(): scale the exponent of extended precision	#
15963
#			     value at FP_SCR0(a6).			#
15964
#									#
15965
# XREF ****************************************************************	#
15966
#	norm() - normalize the mantissa if the operand was a DENORM	#
15967
#									#
15968
# INPUT ***************************************************************	#
15969
#	FP_SCR0(a6) = extended precision operand to be scaled		#
15970
#									#
15971
# OUTPUT **************************************************************	#
15972
#	FP_SCR0(a6) = scaled extended precision operand			#
15973
#	d0	    = scale value					#
15974
#									#
15975
# ALGORITHM ***********************************************************	#
15976
#	Set the exponent of the input operand to 0x3fff. Save the value	#
15977
# of the difference between the original and new exponent. Then,	#
15978
# normalize the operand if it was a DENORM. Add this normalization	#
15979
# value to the previous value. Return the result.			#
15980
#									#
15981
#########################################################################
15982

15983
	global		scale_to_zero_src
15984
scale_to_zero_src:
15985
	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
15986
	mov.w		%d1,%d0			# make a copy
15987

15988
	andi.l		&0x7fff,%d1		# extract operand's exponent
15989

15990
	andi.w		&0x8000,%d0		# extract operand's sgn
15991
	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
15992

15993
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
15994

15995
	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
15996
	beq.b		stzs_denorm		# normalize the DENORM
15997

15998
stzs_norm:
15999
	mov.l		&0x3fff,%d0
16000
	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16001

16002
	rts
16003

16004
stzs_denorm:
16005
	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
16006
	bsr.l		norm			# normalize denorm
16007
	neg.l		%d0			# new exponent = -(shft val)
16008
	mov.l		%d0,%d1			# prepare for op_norm call
16009
	bra.b		stzs_norm		# finish scaling
16010

16011
###
16012

16013
#########################################################################
16014
# XDEF ****************************************************************	#
16015
#	scale_sqrt(): scale the input operand exponent so a subsequent	#
16016
#		      fsqrt operation won't take an exception.		#
16017
#									#
16018
# XREF ****************************************************************	#
16019
#	norm() - normalize the mantissa if the operand was a DENORM	#
16020
#									#
16021
# INPUT ***************************************************************	#
16022
#	FP_SCR0(a6) = extended precision operand to be scaled		#
16023
#									#
16024
# OUTPUT **************************************************************	#
16025
#	FP_SCR0(a6) = scaled extended precision operand			#
16026
#	d0	    = scale value					#
16027
#									#
16028
# ALGORITHM ***********************************************************	#
16029
#	If the input operand is a DENORM, normalize it.			#
16030
#	If the exponent of the input operand is even, set the exponent	#
16031
# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the	#
16032
# exponent of the input operand is off, set the exponent to ox3fff and	#
16033
# return a scale factor of "(exp-0x3fff)/2".				#
16034
#									#
16035
#########################################################################
16036

16037
	global		scale_sqrt
16038
scale_sqrt:
16039
	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
16040
	beq.b		ss_denorm		# normalize the DENORM
16041

16042
	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
16043
	andi.l		&0x7fff,%d1		# extract operand's exponent
16044

16045
	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
16046

16047
	btst		&0x0,%d1		# is exp even or odd?
16048
	beq.b		ss_norm_even
16049

16050
	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16051

16052
	mov.l		&0x3fff,%d0
16053
	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16054
	asr.l		&0x1,%d0		# divide scale factor by 2
16055
	rts
16056

16057
ss_norm_even:
16058
	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16059

16060
	mov.l		&0x3ffe,%d0
16061
	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16062
	asr.l		&0x1,%d0		# divide scale factor by 2
16063
	rts
16064

16065
ss_denorm:
16066
	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
16067
	bsr.l		norm			# normalize denorm
16068

16069
	btst		&0x0,%d0		# is exp even or odd?
16070
	beq.b		ss_denorm_even
16071

16072
	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16073

16074
	add.l		&0x3fff,%d0
16075
	asr.l		&0x1,%d0		# divide scale factor by 2
16076
	rts
16077

16078
ss_denorm_even:
16079
	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
16080

16081
	add.l		&0x3ffe,%d0
16082
	asr.l		&0x1,%d0		# divide scale factor by 2
16083
	rts
16084

16085
###
16086

16087
#########################################################################
16088
# XDEF ****************************************************************	#
16089
#	scale_to_zero_dst(): scale the exponent of extended precision	#
16090
#			     value at FP_SCR1(a6).			#
16091
#									#
16092
# XREF ****************************************************************	#
16093
#	norm() - normalize the mantissa if the operand was a DENORM	#
16094
#									#
16095
# INPUT ***************************************************************	#
16096
#	FP_SCR1(a6) = extended precision operand to be scaled		#
16097
#									#
16098
# OUTPUT **************************************************************	#
16099
#	FP_SCR1(a6) = scaled extended precision operand			#
16100
#	d0	    = scale value					#
16101
#									#
16102
# ALGORITHM ***********************************************************	#
16103
#	Set the exponent of the input operand to 0x3fff. Save the value	#
16104
# of the difference between the original and new exponent. Then,	#
16105
# normalize the operand if it was a DENORM. Add this normalization	#
16106
# value to the previous value. Return the result.			#
16107
#									#
16108
#########################################################################
16109

16110
	global		scale_to_zero_dst
16111
scale_to_zero_dst:
16112
	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
16113
	mov.w		%d1,%d0			# make a copy
16114

16115
	andi.l		&0x7fff,%d1		# extract operand's exponent
16116

16117
	andi.w		&0x8000,%d0		# extract operand's sgn
16118
	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
16119

16120
	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
16121

16122
	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
16123
	beq.b		stzd_denorm		# normalize the DENORM
16124

16125
stzd_norm:
16126
	mov.l		&0x3fff,%d0
16127
	sub.l		%d1,%d0			# scale = BIAS + (-exp)
16128
	rts
16129

16130
stzd_denorm:
16131
	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
16132
	bsr.l		norm			# normalize denorm
16133
	neg.l		%d0			# new exponent = -(shft val)
16134
	mov.l		%d0,%d1			# prepare for op_norm call
16135
	bra.b		stzd_norm		# finish scaling
16136

16137
##########################################################################
16138

16139
#########################################################################
16140
# XDEF ****************************************************************	#
16141
#	res_qnan(): return default result w/ QNAN operand for dyadic	#
16142
#	res_snan(): return default result w/ SNAN operand for dyadic	#
16143
#	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
16144
#	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
16145
#									#
16146
# XREF ****************************************************************	#
16147
#	None								#
16148
#									#
16149
# INPUT ***************************************************************	#
16150
#	FP_SRC(a6) = pointer to extended precision src operand		#
16151
#	FP_DST(a6) = pointer to extended precision dst operand		#
16152
#									#
16153
# OUTPUT **************************************************************	#
16154
#	fp0 = default result						#
16155
#									#
16156
# ALGORITHM ***********************************************************	#
16157
#	If either operand (but not both operands) of an operation is a	#
16158
# nonsignalling NAN, then that NAN is returned as the result. If both	#
16159
# operands are nonsignalling NANs, then the destination operand		#
16160
# nonsignalling NAN is returned as the result.				#
16161
#	If either operand to an operation is a signalling NAN (SNAN),	#
16162
# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
16163
# enable bit is set in the FPCR, then the trap is taken and the		#
16164
# destination is not modified. If the SNAN trap enable bit is not set,	#
16165
# then the SNAN is converted to a nonsignalling NAN (by setting the	#
16166
# SNAN bit in the operand to one), and the operation continues as	#
16167
# described in the preceding paragraph, for nonsignalling NANs.		#
16168
#	Make sure the appropriate FPSR bits are set before exiting.	#
16169
#									#
16170
#########################################################################
16171

16172
	global		res_qnan
16173
	global		res_snan
16174
res_qnan:
16175
res_snan:
16176
	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
16177
	beq.b		dst_snan2
16178
	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
16179
	beq.b		dst_qnan2
16180
src_nan:
16181
	cmp.b		STAG(%a6), &QNAN
16182
	beq.b		src_qnan2
16183
	global		res_snan_1op
16184
res_snan_1op:
16185
src_snan2:
16186
	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
16187
	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16188
	lea		FP_SRC(%a6), %a0
16189
	bra.b		nan_comp
16190
	global		res_qnan_1op
16191
res_qnan_1op:
16192
src_qnan2:
16193
	or.l		&nan_mask, USER_FPSR(%a6)
16194
	lea		FP_SRC(%a6), %a0
16195
	bra.b		nan_comp
16196
dst_snan2:
16197
	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16198
	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
16199
	lea		FP_DST(%a6), %a0
16200
	bra.b		nan_comp
16201
dst_qnan2:
16202
	lea		FP_DST(%a6), %a0
16203
	cmp.b		STAG(%a6), &SNAN
16204
	bne		nan_done
16205
	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
16206
nan_done:
16207
	or.l		&nan_mask, USER_FPSR(%a6)
16208
nan_comp:
16209
	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
16210
	beq.b		nan_not_neg
16211
	or.l		&neg_mask, USER_FPSR(%a6)
16212
nan_not_neg:
16213
	fmovm.x		(%a0), &0x80
16214
	rts
16215

16216
#########################################################################
16217
# XDEF ****************************************************************	#
16218
#	res_operr(): return default result during operand error		#
16219
#									#
16220
# XREF ****************************************************************	#
16221
#	None								#
16222
#									#
16223
# INPUT ***************************************************************	#
16224
#	None								#
16225
#									#
16226
# OUTPUT **************************************************************	#
16227
#	fp0 = default operand error result				#
16228
#									#
16229
# ALGORITHM ***********************************************************	#
16230
#	An nonsignalling NAN is returned as the default result when	#
16231
# an operand error occurs for the following cases:			#
16232
#									#
16233
#	Multiply: (Infinity x Zero)					#
16234
#	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
16235
#									#
16236
#########################################################################
16237

16238
	global		res_operr
16239
res_operr:
16240
	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
16241
	fmovm.x		nan_return(%pc), &0x80
16242
	rts
16243

16244
nan_return:
16245
	long		0x7fff0000, 0xffffffff, 0xffffffff
16246

16247
#########################################################################
16248
# fdbcc(): routine to emulate the fdbcc instruction			#
16249
#									#
16250
# XDEF **************************************************************** #
16251
#	_fdbcc()							#
16252
#									#
16253
# XREF **************************************************************** #
16254
#	fetch_dreg() - fetch Dn value					#
16255
#	store_dreg_l() - store updated Dn value				#
16256
#									#
16257
# INPUT ***************************************************************	#
16258
#	d0 = displacement						#
16259
#									#
16260
# OUTPUT ************************************************************** #
16261
#	none								#
16262
#									#
16263
# ALGORITHM ***********************************************************	#
16264
#	This routine checks which conditional predicate is specified by	#
16265
# the stacked fdbcc instruction opcode and then branches to a routine	#
16266
# for that predicate. The corresponding fbcc instruction is then used	#
16267
# to see whether the condition (specified by the stacked FPSR) is true	#
16268
# or false.								#
16269
#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
16270
# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
16271
# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
16272
# enabled BSUN should not be flagged and the predicate is true, then	#
16273
# Dn is fetched and decremented by one. If Dn is not equal to -1, add	#
16274
# the displacement value to the stacked PC so that when an "rte" is	#
16275
# finally executed, the branch occurs.					#
16276
#									#
16277
#########################################################################
16278
	global		_fdbcc
16279
_fdbcc:
16280
	mov.l		%d0,L_SCR1(%a6)		# save displacement
16281

16282
	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
16283

16284
	clr.l		%d1			# clear scratch reg
16285
	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
16286
	ror.l		&0x8,%d1		# rotate to top byte
16287
	fmov.l		%d1,%fpsr		# insert into FPSR
16288

16289
	mov.w		(tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
16290
	jmp		(tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
16291

16292
tbl_fdbcc:
16293
	short		fdbcc_f		-	tbl_fdbcc	# 00
16294
	short		fdbcc_eq	-	tbl_fdbcc	# 01
16295
	short		fdbcc_ogt	-	tbl_fdbcc	# 02
16296
	short		fdbcc_oge	-	tbl_fdbcc	# 03
16297
	short		fdbcc_olt	-	tbl_fdbcc	# 04
16298
	short		fdbcc_ole	-	tbl_fdbcc	# 05
16299
	short		fdbcc_ogl	-	tbl_fdbcc	# 06
16300
	short		fdbcc_or	-	tbl_fdbcc	# 07
16301
	short		fdbcc_un	-	tbl_fdbcc	# 08
16302
	short		fdbcc_ueq	-	tbl_fdbcc	# 09
16303
	short		fdbcc_ugt	-	tbl_fdbcc	# 10
16304
	short		fdbcc_uge	-	tbl_fdbcc	# 11
16305
	short		fdbcc_ult	-	tbl_fdbcc	# 12
16306
	short		fdbcc_ule	-	tbl_fdbcc	# 13
16307
	short		fdbcc_neq	-	tbl_fdbcc	# 14
16308
	short		fdbcc_t		-	tbl_fdbcc	# 15
16309
	short		fdbcc_sf	-	tbl_fdbcc	# 16
16310
	short		fdbcc_seq	-	tbl_fdbcc	# 17
16311
	short		fdbcc_gt	-	tbl_fdbcc	# 18
16312
	short		fdbcc_ge	-	tbl_fdbcc	# 19
16313
	short		fdbcc_lt	-	tbl_fdbcc	# 20
16314
	short		fdbcc_le	-	tbl_fdbcc	# 21
16315
	short		fdbcc_gl	-	tbl_fdbcc	# 22
16316
	short		fdbcc_gle	-	tbl_fdbcc	# 23
16317
	short		fdbcc_ngle	-	tbl_fdbcc	# 24
16318
	short		fdbcc_ngl	-	tbl_fdbcc	# 25
16319
	short		fdbcc_nle	-	tbl_fdbcc	# 26
16320
	short		fdbcc_nlt	-	tbl_fdbcc	# 27
16321
	short		fdbcc_nge	-	tbl_fdbcc	# 28
16322
	short		fdbcc_ngt	-	tbl_fdbcc	# 29
16323
	short		fdbcc_sneq	-	tbl_fdbcc	# 30
16324
	short		fdbcc_st	-	tbl_fdbcc	# 31
16325

16326
#########################################################################
16327
#									#
16328
# IEEE Nonaware tests							#
16329
#									#
16330
# For the IEEE nonaware tests, only the false branch changes the	#
16331
# counter. However, the true branch may set bsun so we check to see	#
16332
# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
16333
#									#
16334
# The cases EQ and NE are shared by the Aware and Nonaware groups	#
16335
# and are incapable of setting the BSUN exception bit.			#
16336
#									#
16337
# Typically, only one of the two possible branch directions could	#
16338
# have the NAN bit set.							#
16339
# (This is assuming the mutual exclusiveness of FPSR cc bit groupings	#
16340
#  is preserved.)							#
16341
#									#
16342
#########################################################################
16343

16344
#
16345
# equal:
16346
#
16347
#	Z
16348
#
16349
fdbcc_eq:
16350
	fbeq.w		fdbcc_eq_yes		# equal?
16351
fdbcc_eq_no:
16352
	bra.w		fdbcc_false		# no; go handle counter
16353
fdbcc_eq_yes:
16354
	rts
16355

16356
#
16357
# not equal:
16358
#	_
16359
#	Z
16360
#
16361
fdbcc_neq:
16362
	fbneq.w		fdbcc_neq_yes		# not equal?
16363
fdbcc_neq_no:
16364
	bra.w		fdbcc_false		# no; go handle counter
16365
fdbcc_neq_yes:
16366
	rts
16367

16368
#
16369
# greater than:
16370
#	_______
16371
#	NANvZvN
16372
#
16373
fdbcc_gt:
16374
	fbgt.w		fdbcc_gt_yes		# greater than?
16375
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16376
	beq.w		fdbcc_false		# no;go handle counter
16377
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16378
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16379
	bne.w		fdbcc_bsun		# yes; we have an exception
16380
	bra.w		fdbcc_false		# no; go handle counter
16381
fdbcc_gt_yes:
16382
	rts					# do nothing
16383

16384
#
16385
# not greater than:
16386
#
16387
#	NANvZvN
16388
#
16389
fdbcc_ngt:
16390
	fbngt.w		fdbcc_ngt_yes		# not greater than?
16391
fdbcc_ngt_no:
16392
	bra.w		fdbcc_false		# no; go handle counter
16393
fdbcc_ngt_yes:
16394
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16395
	beq.b		fdbcc_ngt_done		# no;go finish
16396
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16397
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16398
	bne.w		fdbcc_bsun		# yes; we have an exception
16399
fdbcc_ngt_done:
16400
	rts					# no; do nothing
16401

16402
#
16403
# greater than or equal:
16404
#	   _____
16405
#	Zv(NANvN)
16406
#
16407
fdbcc_ge:
16408
	fbge.w		fdbcc_ge_yes		# greater than or equal?
16409
fdbcc_ge_no:
16410
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16411
	beq.w		fdbcc_false		# no;go handle counter
16412
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16413
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16414
	bne.w		fdbcc_bsun		# yes; we have an exception
16415
	bra.w		fdbcc_false		# no; go handle counter
16416
fdbcc_ge_yes:
16417
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16418
	beq.b		fdbcc_ge_yes_done	# no;go do nothing
16419
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16420
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16421
	bne.w		fdbcc_bsun		# yes; we have an exception
16422
fdbcc_ge_yes_done:
16423
	rts					# do nothing
16424

16425
#
16426
# not (greater than or equal):
16427
#	       _
16428
#	NANv(N^Z)
16429
#
16430
fdbcc_nge:
16431
	fbnge.w		fdbcc_nge_yes		# not (greater than or equal)?
16432
fdbcc_nge_no:
16433
	bra.w		fdbcc_false		# no; go handle counter
16434
fdbcc_nge_yes:
16435
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16436
	beq.b		fdbcc_nge_done		# no;go finish
16437
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16438
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16439
	bne.w		fdbcc_bsun		# yes; we have an exception
16440
fdbcc_nge_done:
16441
	rts					# no; do nothing
16442

16443
#
16444
# less than:
16445
#	   _____
16446
#	N^(NANvZ)
16447
#
16448
fdbcc_lt:
16449
	fblt.w		fdbcc_lt_yes		# less than?
16450
fdbcc_lt_no:
16451
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16452
	beq.w		fdbcc_false		# no; go handle counter
16453
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16454
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16455
	bne.w		fdbcc_bsun		# yes; we have an exception
16456
	bra.w		fdbcc_false		# no; go handle counter
16457
fdbcc_lt_yes:
16458
	rts					# do nothing
16459

16460
#
16461
# not less than:
16462
#	       _
16463
#	NANv(ZvN)
16464
#
16465
fdbcc_nlt:
16466
	fbnlt.w		fdbcc_nlt_yes		# not less than?
16467
fdbcc_nlt_no:
16468
	bra.w		fdbcc_false		# no; go handle counter
16469
fdbcc_nlt_yes:
16470
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16471
	beq.b		fdbcc_nlt_done		# no;go finish
16472
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16473
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16474
	bne.w		fdbcc_bsun		# yes; we have an exception
16475
fdbcc_nlt_done:
16476
	rts					# no; do nothing
16477

16478
#
16479
# less than or equal:
16480
#	     ___
16481
#	Zv(N^NAN)
16482
#
16483
fdbcc_le:
16484
	fble.w		fdbcc_le_yes		# less than or equal?
16485
fdbcc_le_no:
16486
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16487
	beq.w		fdbcc_false		# no; go handle counter
16488
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16489
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16490
	bne.w		fdbcc_bsun		# yes; we have an exception
16491
	bra.w		fdbcc_false		# no; go handle counter
16492
fdbcc_le_yes:
16493
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16494
	beq.b		fdbcc_le_yes_done	# no; go do nothing
16495
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16496
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16497
	bne.w		fdbcc_bsun		# yes; we have an exception
16498
fdbcc_le_yes_done:
16499
	rts					# do nothing
16500

16501
#
16502
# not (less than or equal):
16503
#	     ___
16504
#	NANv(NvZ)
16505
#
16506
fdbcc_nle:
16507
	fbnle.w		fdbcc_nle_yes		# not (less than or equal)?
16508
fdbcc_nle_no:
16509
	bra.w		fdbcc_false		# no; go handle counter
16510
fdbcc_nle_yes:
16511
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16512
	beq.w		fdbcc_nle_done		# no; go finish
16513
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16514
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16515
	bne.w		fdbcc_bsun		# yes; we have an exception
16516
fdbcc_nle_done:
16517
	rts					# no; do nothing
16518

16519
#
16520
# greater or less than:
16521
#	_____
16522
#	NANvZ
16523
#
16524
fdbcc_gl:
16525
	fbgl.w		fdbcc_gl_yes		# greater or less than?
16526
fdbcc_gl_no:
16527
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16528
	beq.w		fdbcc_false		# no; handle counter
16529
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16530
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16531
	bne.w		fdbcc_bsun		# yes; we have an exception
16532
	bra.w		fdbcc_false		# no; go handle counter
16533
fdbcc_gl_yes:
16534
	rts					# do nothing
16535

16536
#
16537
# not (greater or less than):
16538
#
16539
#	NANvZ
16540
#
16541
fdbcc_ngl:
16542
	fbngl.w		fdbcc_ngl_yes		# not (greater or less than)?
16543
fdbcc_ngl_no:
16544
	bra.w		fdbcc_false		# no; go handle counter
16545
fdbcc_ngl_yes:
16546
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16547
	beq.b		fdbcc_ngl_done		# no; go finish
16548
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16549
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16550
	bne.w		fdbcc_bsun		# yes; we have an exception
16551
fdbcc_ngl_done:
16552
	rts					# no; do nothing
16553

16554
#
16555
# greater, less, or equal:
16556
#	___
16557
#	NAN
16558
#
16559
fdbcc_gle:
16560
	fbgle.w		fdbcc_gle_yes		# greater, less, or equal?
16561
fdbcc_gle_no:
16562
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16563
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16564
	bne.w		fdbcc_bsun		# yes; we have an exception
16565
	bra.w		fdbcc_false		# no; go handle counter
16566
fdbcc_gle_yes:
16567
	rts					# do nothing
16568

16569
#
16570
# not (greater, less, or equal):
16571
#
16572
#	NAN
16573
#
16574
fdbcc_ngle:
16575
	fbngle.w	fdbcc_ngle_yes		# not (greater, less, or equal)?
16576
fdbcc_ngle_no:
16577
	bra.w		fdbcc_false		# no; go handle counter
16578
fdbcc_ngle_yes:
16579
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16580
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16581
	bne.w		fdbcc_bsun		# yes; we have an exception
16582
	rts					# no; do nothing
16583

16584
#########################################################################
16585
#									#
16586
# Miscellaneous tests							#
16587
#									#
16588
# For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
16589
#									#
16590
#########################################################################
16591

16592
#
16593
# false:
16594
#
16595
#	False
16596
#
16597
fdbcc_f:					# no bsun possible
16598
	bra.w		fdbcc_false		# go handle counter
16599

16600
#
16601
# true:
16602
#
16603
#	True
16604
#
16605
fdbcc_t:					# no bsun possible
16606
	rts					# do nothing
16607

16608
#
16609
# signalling false:
16610
#
16611
#	False
16612
#
16613
fdbcc_sf:
16614
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16615
	beq.w		fdbcc_false		# no;go handle counter
16616
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16617
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16618
	bne.w		fdbcc_bsun		# yes; we have an exception
16619
	bra.w		fdbcc_false		# go handle counter
16620

16621
#
16622
# signalling true:
16623
#
16624
#	True
16625
#
16626
fdbcc_st:
16627
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16628
	beq.b		fdbcc_st_done		# no;go finish
16629
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16630
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16631
	bne.w		fdbcc_bsun		# yes; we have an exception
16632
fdbcc_st_done:
16633
	rts
16634

16635
#
16636
# signalling equal:
16637
#
16638
#	Z
16639
#
16640
fdbcc_seq:
16641
	fbseq.w		fdbcc_seq_yes		# signalling equal?
16642
fdbcc_seq_no:
16643
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16644
	beq.w		fdbcc_false		# no;go handle counter
16645
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16646
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16647
	bne.w		fdbcc_bsun		# yes; we have an exception
16648
	bra.w		fdbcc_false		# go handle counter
16649
fdbcc_seq_yes:
16650
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16651
	beq.b		fdbcc_seq_yes_done	# no;go do nothing
16652
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16653
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16654
	bne.w		fdbcc_bsun		# yes; we have an exception
16655
fdbcc_seq_yes_done:
16656
	rts					# yes; do nothing
16657

16658
#
16659
# signalling not equal:
16660
#	_
16661
#	Z
16662
#
16663
fdbcc_sneq:
16664
	fbsneq.w	fdbcc_sneq_yes		# signalling not equal?
16665
fdbcc_sneq_no:
16666
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
16667
	beq.w		fdbcc_false		# no;go handle counter
16668
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16669
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16670
	bne.w		fdbcc_bsun		# yes; we have an exception
16671
	bra.w		fdbcc_false		# go handle counter
16672
fdbcc_sneq_yes:
16673
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
16674
	beq.w		fdbcc_sneq_done		# no;go finish
16675
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16676
	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16677
	bne.w		fdbcc_bsun		# yes; we have an exception
16678
fdbcc_sneq_done:
16679
	rts
16680

16681
#########################################################################
16682
#									#
16683
# IEEE Aware tests							#
16684
#									#
16685
# For the IEEE aware tests, action is only taken if the result is false.#
16686
# Therefore, the opposite branch type is used to jump to the decrement	#
16687
# routine.								#
16688
# The BSUN exception will not be set for any of these tests.		#
16689
#									#
16690
#########################################################################
16691

16692
#
16693
# ordered greater than:
16694
#	_______
16695
#	NANvZvN
16696
#
16697
fdbcc_ogt:
16698
	fbogt.w		fdbcc_ogt_yes		# ordered greater than?
16699
fdbcc_ogt_no:
16700
	bra.w		fdbcc_false		# no; go handle counter
16701
fdbcc_ogt_yes:
16702
	rts					# yes; do nothing
16703

16704
#
16705
# unordered or less or equal:
16706
#	_______
16707
#	NANvZvN
16708
#
16709
fdbcc_ule:
16710
	fbule.w		fdbcc_ule_yes		# unordered or less or equal?
16711
fdbcc_ule_no:
16712
	bra.w		fdbcc_false		# no; go handle counter
16713
fdbcc_ule_yes:
16714
	rts					# yes; do nothing
16715

16716
#
16717
# ordered greater than or equal:
16718
#	   _____
16719
#	Zv(NANvN)
16720
#
16721
fdbcc_oge:
16722
	fboge.w		fdbcc_oge_yes		# ordered greater than or equal?
16723
fdbcc_oge_no:
16724
	bra.w		fdbcc_false		# no; go handle counter
16725
fdbcc_oge_yes:
16726
	rts					# yes; do nothing
16727

16728
#
16729
# unordered or less than:
16730
#	       _
16731
#	NANv(N^Z)
16732
#
16733
fdbcc_ult:
16734
	fbult.w		fdbcc_ult_yes		# unordered or less than?
16735
fdbcc_ult_no:
16736
	bra.w		fdbcc_false		# no; go handle counter
16737
fdbcc_ult_yes:
16738
	rts					# yes; do nothing
16739

16740
#
16741
# ordered less than:
16742
#	   _____
16743
#	N^(NANvZ)
16744
#
16745
fdbcc_olt:
16746
	fbolt.w		fdbcc_olt_yes		# ordered less than?
16747
fdbcc_olt_no:
16748
	bra.w		fdbcc_false		# no; go handle counter
16749
fdbcc_olt_yes:
16750
	rts					# yes; do nothing
16751

16752
#
16753
# unordered or greater or equal:
16754
#
16755
#	NANvZvN
16756
#
16757
fdbcc_uge:
16758
	fbuge.w		fdbcc_uge_yes		# unordered or greater than?
16759
fdbcc_uge_no:
16760
	bra.w		fdbcc_false		# no; go handle counter
16761
fdbcc_uge_yes:
16762
	rts					# yes; do nothing
16763

16764
#
16765
# ordered less than or equal:
16766
#	     ___
16767
#	Zv(N^NAN)
16768
#
16769
fdbcc_ole:
16770
	fbole.w		fdbcc_ole_yes		# ordered greater or less than?
16771
fdbcc_ole_no:
16772
	bra.w		fdbcc_false		# no; go handle counter
16773
fdbcc_ole_yes:
16774
	rts					# yes; do nothing
16775

16776
#
16777
# unordered or greater than:
16778
#	     ___
16779
#	NANv(NvZ)
16780
#
16781
fdbcc_ugt:
16782
	fbugt.w		fdbcc_ugt_yes		# unordered or greater than?
16783
fdbcc_ugt_no:
16784
	bra.w		fdbcc_false		# no; go handle counter
16785
fdbcc_ugt_yes:
16786
	rts					# yes; do nothing
16787

16788
#
16789
# ordered greater or less than:
16790
#	_____
16791
#	NANvZ
16792
#
16793
fdbcc_ogl:
16794
	fbogl.w		fdbcc_ogl_yes		# ordered greater or less than?
16795
fdbcc_ogl_no:
16796
	bra.w		fdbcc_false		# no; go handle counter
16797
fdbcc_ogl_yes:
16798
	rts					# yes; do nothing
16799

16800
#
16801
# unordered or equal:
16802
#
16803
#	NANvZ
16804
#
16805
fdbcc_ueq:
16806
	fbueq.w		fdbcc_ueq_yes		# unordered or equal?
16807
fdbcc_ueq_no:
16808
	bra.w		fdbcc_false		# no; go handle counter
16809
fdbcc_ueq_yes:
16810
	rts					# yes; do nothing
16811

16812
#
16813
# ordered:
16814
#	___
16815
#	NAN
16816
#
16817
fdbcc_or:
16818
	fbor.w		fdbcc_or_yes		# ordered?
16819
fdbcc_or_no:
16820
	bra.w		fdbcc_false		# no; go handle counter
16821
fdbcc_or_yes:
16822
	rts					# yes; do nothing
16823

16824
#
16825
# unordered:
16826
#
16827
#	NAN
16828
#
16829
fdbcc_un:
16830
	fbun.w		fdbcc_un_yes		# unordered?
16831
fdbcc_un_no:
16832
	bra.w		fdbcc_false		# no; go handle counter
16833
fdbcc_un_yes:
16834
	rts					# yes; do nothing
16835

16836
#######################################################################
16837

16838
#
16839
# the bsun exception bit was not set.
16840
#
16841
# (1) subtract 1 from the count register
16842
# (2) if (cr == -1) then
16843
#	pc = pc of next instruction
16844
#     else
16845
#	pc += sign_ext(16-bit displacement)
16846
#
16847
fdbcc_false:
16848
	mov.b		1+EXC_OPWORD(%a6), %d1	# fetch lo opword
16849
	andi.w		&0x7, %d1		# extract count register
16850

16851
	bsr.l		fetch_dreg		# fetch count value
16852
# make sure that d0 isn't corrupted between calls...
16853

16854
	subq.w		&0x1, %d0		# Dn - 1 -> Dn
16855

16856
	bsr.l		store_dreg_l		# store new count value
16857

16858
	cmpi.w		%d0, &-0x1		# is (Dn == -1)?
16859
	bne.b		fdbcc_false_cont	# no;
16860
	rts
16861

16862
fdbcc_false_cont:
16863
	mov.l		L_SCR1(%a6),%d0		# fetch displacement
16864
	add.l		USER_FPIAR(%a6),%d0	# add instruction PC
16865
	addq.l		&0x4,%d0		# add instruction length
16866
	mov.l		%d0,EXC_PC(%a6)		# set new PC
16867
	rts
16868

16869
# the emulation routine set bsun and BSUN was enabled. have to
16870
# fix stack and jump to the bsun handler.
16871
# let the caller of this routine shift the stack frame up to
16872
# eliminate the effective address field.
16873
fdbcc_bsun:
16874
	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
16875
	rts
16876

16877
#########################################################################
16878
# ftrapcc(): routine to emulate the ftrapcc instruction			#
16879
#									#
16880
# XDEF ****************************************************************	#
16881
#	_ftrapcc()							#
16882
#									#
16883
# XREF ****************************************************************	#
16884
#	none								#
16885
#									#
16886
# INPUT *************************************************************** #
16887
#	none								#
16888
#									#
16889
# OUTPUT ************************************************************** #
16890
#	none								#
16891
#									#
16892
# ALGORITHM *********************************************************** #
16893
#	This routine checks which conditional predicate is specified by	#
16894
# the stacked ftrapcc instruction opcode and then branches to a routine	#
16895
# for that predicate. The corresponding fbcc instruction is then used	#
16896
# to see whether the condition (specified by the stacked FPSR) is true	#
16897
# or false.								#
16898
#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
16899
# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
16900
# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
16901
# enabled BSUN should not be flagged and the predicate is true, then	#
16902
# the ftrapcc_flg is set in the SPCOND_FLG location. These special	#
16903
# flags indicate to the calling routine to emulate the exceptional	#
16904
# condition.								#
16905
#									#
16906
#########################################################################
16907

16908
	global		_ftrapcc
16909
_ftrapcc:
16910
	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
16911

16912
	clr.l		%d1			# clear scratch reg
16913
	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
16914
	ror.l		&0x8,%d1		# rotate to top byte
16915
	fmov.l		%d1,%fpsr		# insert into FPSR
16916

16917
	mov.w		(tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
16918
	jmp		(tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
16919

16920
tbl_ftrapcc:
16921
	short		ftrapcc_f	-	tbl_ftrapcc	# 00
16922
	short		ftrapcc_eq	-	tbl_ftrapcc	# 01
16923
	short		ftrapcc_ogt	-	tbl_ftrapcc	# 02
16924
	short		ftrapcc_oge	-	tbl_ftrapcc	# 03
16925
	short		ftrapcc_olt	-	tbl_ftrapcc	# 04
16926
	short		ftrapcc_ole	-	tbl_ftrapcc	# 05
16927
	short		ftrapcc_ogl	-	tbl_ftrapcc	# 06
16928
	short		ftrapcc_or	-	tbl_ftrapcc	# 07
16929
	short		ftrapcc_un	-	tbl_ftrapcc	# 08
16930
	short		ftrapcc_ueq	-	tbl_ftrapcc	# 09
16931
	short		ftrapcc_ugt	-	tbl_ftrapcc	# 10
16932
	short		ftrapcc_uge	-	tbl_ftrapcc	# 11
16933
	short		ftrapcc_ult	-	tbl_ftrapcc	# 12
16934
	short		ftrapcc_ule	-	tbl_ftrapcc	# 13
16935
	short		ftrapcc_neq	-	tbl_ftrapcc	# 14
16936
	short		ftrapcc_t	-	tbl_ftrapcc	# 15
16937
	short		ftrapcc_sf	-	tbl_ftrapcc	# 16
16938
	short		ftrapcc_seq	-	tbl_ftrapcc	# 17
16939
	short		ftrapcc_gt	-	tbl_ftrapcc	# 18
16940
	short		ftrapcc_ge	-	tbl_ftrapcc	# 19
16941
	short		ftrapcc_lt	-	tbl_ftrapcc	# 20
16942
	short		ftrapcc_le	-	tbl_ftrapcc	# 21
16943
	short		ftrapcc_gl	-	tbl_ftrapcc	# 22
16944
	short		ftrapcc_gle	-	tbl_ftrapcc	# 23
16945
	short		ftrapcc_ngle	-	tbl_ftrapcc	# 24
16946
	short		ftrapcc_ngl	-	tbl_ftrapcc	# 25
16947
	short		ftrapcc_nle	-	tbl_ftrapcc	# 26
16948
	short		ftrapcc_nlt	-	tbl_ftrapcc	# 27
16949
	short		ftrapcc_nge	-	tbl_ftrapcc	# 28
16950
	short		ftrapcc_ngt	-	tbl_ftrapcc	# 29
16951
	short		ftrapcc_sneq	-	tbl_ftrapcc	# 30
16952
	short		ftrapcc_st	-	tbl_ftrapcc	# 31
16953

16954
#########################################################################
16955
#									#
16956
# IEEE Nonaware tests							#
16957
#									#
16958
# For the IEEE nonaware tests, we set the result based on the		#
16959
# floating point condition codes. In addition, we check to see		#
16960
# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
16961
#									#
16962
# The cases EQ and NE are shared by the Aware and Nonaware groups	#
16963
# and are incapable of setting the BSUN exception bit.			#
16964
#									#
16965
# Typically, only one of the two possible branch directions could	#
16966
# have the NAN bit set.							#
16967
#									#
16968
#########################################################################
16969

16970
#
16971
# equal:
16972
#
16973
#	Z
16974
#
16975
ftrapcc_eq:
16976
	fbeq.w		ftrapcc_trap		# equal?
16977
ftrapcc_eq_no:
16978
	rts					# do nothing
16979

16980
#
16981
# not equal:
16982
#	_
16983
#	Z
16984
#
16985
ftrapcc_neq:
16986
	fbneq.w		ftrapcc_trap		# not equal?
16987
ftrapcc_neq_no:
16988
	rts					# do nothing
16989

16990
#
16991
# greater than:
16992
#	_______
16993
#	NANvZvN
16994
#
16995
ftrapcc_gt:
16996
	fbgt.w		ftrapcc_trap		# greater than?
16997
ftrapcc_gt_no:
16998
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
16999
	beq.b		ftrapcc_gt_done		# no
17000
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17001
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17002
	bne.w		ftrapcc_bsun		# yes
17003
ftrapcc_gt_done:
17004
	rts					# no; do nothing
17005

17006
#
17007
# not greater than:
17008
#
17009
#	NANvZvN
17010
#
17011
ftrapcc_ngt:
17012
	fbngt.w		ftrapcc_ngt_yes		# not greater than?
17013
ftrapcc_ngt_no:
17014
	rts					# do nothing
17015
ftrapcc_ngt_yes:
17016
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17017
	beq.w		ftrapcc_trap		# no; go take trap
17018
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17019
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17020
	bne.w		ftrapcc_bsun		# yes
17021
	bra.w		ftrapcc_trap		# no; go take trap
17022

17023
#
17024
# greater than or equal:
17025
#	   _____
17026
#	Zv(NANvN)
17027
#
17028
ftrapcc_ge:
17029
	fbge.w		ftrapcc_ge_yes		# greater than or equal?
17030
ftrapcc_ge_no:
17031
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17032
	beq.b		ftrapcc_ge_done		# no; go finish
17033
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17034
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17035
	bne.w		ftrapcc_bsun		# yes
17036
ftrapcc_ge_done:
17037
	rts					# no; do nothing
17038
ftrapcc_ge_yes:
17039
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17040
	beq.w		ftrapcc_trap		# no; go take trap
17041
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17042
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17043
	bne.w		ftrapcc_bsun		# yes
17044
	bra.w		ftrapcc_trap		# no; go take trap
17045

17046
#
17047
# not (greater than or equal):
17048
#	       _
17049
#	NANv(N^Z)
17050
#
17051
ftrapcc_nge:
17052
	fbnge.w		ftrapcc_nge_yes		# not (greater than or equal)?
17053
ftrapcc_nge_no:
17054
	rts					# do nothing
17055
ftrapcc_nge_yes:
17056
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17057
	beq.w		ftrapcc_trap		# no; go take trap
17058
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17059
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17060
	bne.w		ftrapcc_bsun		# yes
17061
	bra.w		ftrapcc_trap		# no; go take trap
17062

17063
#
17064
# less than:
17065
#	   _____
17066
#	N^(NANvZ)
17067
#
17068
ftrapcc_lt:
17069
	fblt.w		ftrapcc_trap		# less than?
17070
ftrapcc_lt_no:
17071
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17072
	beq.b		ftrapcc_lt_done		# no; go finish
17073
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17074
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17075
	bne.w		ftrapcc_bsun		# yes
17076
ftrapcc_lt_done:
17077
	rts					# no; do nothing
17078

17079
#
17080
# not less than:
17081
#	       _
17082
#	NANv(ZvN)
17083
#
17084
ftrapcc_nlt:
17085
	fbnlt.w		ftrapcc_nlt_yes		# not less than?
17086
ftrapcc_nlt_no:
17087
	rts					# do nothing
17088
ftrapcc_nlt_yes:
17089
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17090
	beq.w		ftrapcc_trap		# no; go take trap
17091
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17092
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17093
	bne.w		ftrapcc_bsun		# yes
17094
	bra.w		ftrapcc_trap		# no; go take trap
17095

17096
#
17097
# less than or equal:
17098
#	     ___
17099
#	Zv(N^NAN)
17100
#
17101
ftrapcc_le:
17102
	fble.w		ftrapcc_le_yes		# less than or equal?
17103
ftrapcc_le_no:
17104
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17105
	beq.b		ftrapcc_le_done		# no; go finish
17106
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17107
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17108
	bne.w		ftrapcc_bsun		# yes
17109
ftrapcc_le_done:
17110
	rts					# no; do nothing
17111
ftrapcc_le_yes:
17112
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17113
	beq.w		ftrapcc_trap		# no; go take trap
17114
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17115
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17116
	bne.w		ftrapcc_bsun		# yes
17117
	bra.w		ftrapcc_trap		# no; go take trap
17118

17119
#
17120
# not (less than or equal):
17121
#	     ___
17122
#	NANv(NvZ)
17123
#
17124
ftrapcc_nle:
17125
	fbnle.w		ftrapcc_nle_yes		# not (less than or equal)?
17126
ftrapcc_nle_no:
17127
	rts					# do nothing
17128
ftrapcc_nle_yes:
17129
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17130
	beq.w		ftrapcc_trap		# no; go take trap
17131
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17132
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17133
	bne.w		ftrapcc_bsun		# yes
17134
	bra.w		ftrapcc_trap		# no; go take trap
17135

17136
#
17137
# greater or less than:
17138
#	_____
17139
#	NANvZ
17140
#
17141
ftrapcc_gl:
17142
	fbgl.w		ftrapcc_trap		# greater or less than?
17143
ftrapcc_gl_no:
17144
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17145
	beq.b		ftrapcc_gl_done		# no; go finish
17146
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17147
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17148
	bne.w		ftrapcc_bsun		# yes
17149
ftrapcc_gl_done:
17150
	rts					# no; do nothing
17151

17152
#
17153
# not (greater or less than):
17154
#
17155
#	NANvZ
17156
#
17157
ftrapcc_ngl:
17158
	fbngl.w		ftrapcc_ngl_yes		# not (greater or less than)?
17159
ftrapcc_ngl_no:
17160
	rts					# do nothing
17161
ftrapcc_ngl_yes:
17162
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17163
	beq.w		ftrapcc_trap		# no; go take trap
17164
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17165
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17166
	bne.w		ftrapcc_bsun		# yes
17167
	bra.w		ftrapcc_trap		# no; go take trap
17168

17169
#
17170
# greater, less, or equal:
17171
#	___
17172
#	NAN
17173
#
17174
ftrapcc_gle:
17175
	fbgle.w		ftrapcc_trap		# greater, less, or equal?
17176
ftrapcc_gle_no:
17177
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17178
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17179
	bne.w		ftrapcc_bsun		# yes
17180
	rts					# no; do nothing
17181

17182
#
17183
# not (greater, less, or equal):
17184
#
17185
#	NAN
17186
#
17187
ftrapcc_ngle:
17188
	fbngle.w	ftrapcc_ngle_yes	# not (greater, less, or equal)?
17189
ftrapcc_ngle_no:
17190
	rts					# do nothing
17191
ftrapcc_ngle_yes:
17192
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17193
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17194
	bne.w		ftrapcc_bsun		# yes
17195
	bra.w		ftrapcc_trap		# no; go take trap
17196

17197
#########################################################################
17198
#									#
17199
# Miscellaneous tests							#
17200
#									#
17201
# For the IEEE aware tests, we only have to set the result based on the	#
17202
# floating point condition codes. The BSUN exception will not be	#
17203
# set for any of these tests.						#
17204
#									#
17205
#########################################################################
17206

17207
#
17208
# false:
17209
#
17210
#	False
17211
#
17212
ftrapcc_f:
17213
	rts					# do nothing
17214

17215
#
17216
# true:
17217
#
17218
#	True
17219
#
17220
ftrapcc_t:
17221
	bra.w		ftrapcc_trap		# go take trap
17222

17223
#
17224
# signalling false:
17225
#
17226
#	False
17227
#
17228
ftrapcc_sf:
17229
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17230
	beq.b		ftrapcc_sf_done		# no; go finish
17231
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17232
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17233
	bne.w		ftrapcc_bsun		# yes
17234
ftrapcc_sf_done:
17235
	rts					# no; do nothing
17236

17237
#
17238
# signalling true:
17239
#
17240
#	True
17241
#
17242
ftrapcc_st:
17243
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17244
	beq.w		ftrapcc_trap		# no; go take trap
17245
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17246
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17247
	bne.w		ftrapcc_bsun		# yes
17248
	bra.w		ftrapcc_trap		# no; go take trap
17249

17250
#
17251
# signalling equal:
17252
#
17253
#	Z
17254
#
17255
ftrapcc_seq:
17256
	fbseq.w		ftrapcc_seq_yes		# signalling equal?
17257
ftrapcc_seq_no:
17258
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17259
	beq.w		ftrapcc_seq_done	# no; go finish
17260
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17261
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17262
	bne.w		ftrapcc_bsun		# yes
17263
ftrapcc_seq_done:
17264
	rts					# no; do nothing
17265
ftrapcc_seq_yes:
17266
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17267
	beq.w		ftrapcc_trap		# no; go take trap
17268
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17269
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17270
	bne.w		ftrapcc_bsun		# yes
17271
	bra.w		ftrapcc_trap		# no; go take trap
17272

17273
#
17274
# signalling not equal:
17275
#	_
17276
#	Z
17277
#
17278
ftrapcc_sneq:
17279
	fbsneq.w	ftrapcc_sneq_yes	# signalling equal?
17280
ftrapcc_sneq_no:
17281
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17282
	beq.w		ftrapcc_sneq_no_done	# no; go finish
17283
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17284
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17285
	bne.w		ftrapcc_bsun		# yes
17286
ftrapcc_sneq_no_done:
17287
	rts					# do nothing
17288
ftrapcc_sneq_yes:
17289
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17290
	beq.w		ftrapcc_trap		# no; go take trap
17291
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17292
	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17293
	bne.w		ftrapcc_bsun		# yes
17294
	bra.w		ftrapcc_trap		# no; go take trap
17295

17296
#########################################################################
17297
#									#
17298
# IEEE Aware tests							#
17299
#									#
17300
# For the IEEE aware tests, we only have to set the result based on the	#
17301
# floating point condition codes. The BSUN exception will not be	#
17302
# set for any of these tests.						#
17303
#									#
17304
#########################################################################
17305

17306
#
17307
# ordered greater than:
17308
#	_______
17309
#	NANvZvN
17310
#
17311
ftrapcc_ogt:
17312
	fbogt.w		ftrapcc_trap		# ordered greater than?
17313
ftrapcc_ogt_no:
17314
	rts					# do nothing
17315

17316
#
17317
# unordered or less or equal:
17318
#	_______
17319
#	NANvZvN
17320
#
17321
ftrapcc_ule:
17322
	fbule.w		ftrapcc_trap		# unordered or less or equal?
17323
ftrapcc_ule_no:
17324
	rts					# do nothing
17325

17326
#
17327
# ordered greater than or equal:
17328
#	   _____
17329
#	Zv(NANvN)
17330
#
17331
ftrapcc_oge:
17332
	fboge.w		ftrapcc_trap		# ordered greater than or equal?
17333
ftrapcc_oge_no:
17334
	rts					# do nothing
17335

17336
#
17337
# unordered or less than:
17338
#	       _
17339
#	NANv(N^Z)
17340
#
17341
ftrapcc_ult:
17342
	fbult.w		ftrapcc_trap		# unordered or less than?
17343
ftrapcc_ult_no:
17344
	rts					# do nothing
17345

17346
#
17347
# ordered less than:
17348
#	   _____
17349
#	N^(NANvZ)
17350
#
17351
ftrapcc_olt:
17352
	fbolt.w		ftrapcc_trap		# ordered less than?
17353
ftrapcc_olt_no:
17354
	rts					# do nothing
17355

17356
#
17357
# unordered or greater or equal:
17358
#
17359
#	NANvZvN
17360
#
17361
ftrapcc_uge:
17362
	fbuge.w		ftrapcc_trap		# unordered or greater than?
17363
ftrapcc_uge_no:
17364
	rts					# do nothing
17365

17366
#
17367
# ordered less than or equal:
17368
#	     ___
17369
#	Zv(N^NAN)
17370
#
17371
ftrapcc_ole:
17372
	fbole.w		ftrapcc_trap		# ordered greater or less than?
17373
ftrapcc_ole_no:
17374
	rts					# do nothing
17375

17376
#
17377
# unordered or greater than:
17378
#	     ___
17379
#	NANv(NvZ)
17380
#
17381
ftrapcc_ugt:
17382
	fbugt.w		ftrapcc_trap		# unordered or greater than?
17383
ftrapcc_ugt_no:
17384
	rts					# do nothing
17385

17386
#
17387
# ordered greater or less than:
17388
#	_____
17389
#	NANvZ
17390
#
17391
ftrapcc_ogl:
17392
	fbogl.w		ftrapcc_trap		# ordered greater or less than?
17393
ftrapcc_ogl_no:
17394
	rts					# do nothing
17395

17396
#
17397
# unordered or equal:
17398
#
17399
#	NANvZ
17400
#
17401
ftrapcc_ueq:
17402
	fbueq.w		ftrapcc_trap		# unordered or equal?
17403
ftrapcc_ueq_no:
17404
	rts					# do nothing
17405

17406
#
17407
# ordered:
17408
#	___
17409
#	NAN
17410
#
17411
ftrapcc_or:
17412
	fbor.w		ftrapcc_trap		# ordered?
17413
ftrapcc_or_no:
17414
	rts					# do nothing
17415

17416
#
17417
# unordered:
17418
#
17419
#	NAN
17420
#
17421
ftrapcc_un:
17422
	fbun.w		ftrapcc_trap		# unordered?
17423
ftrapcc_un_no:
17424
	rts					# do nothing
17425

17426
#######################################################################
17427

17428
# the bsun exception bit was not set.
17429
# we will need to jump to the ftrapcc vector. the stack frame
17430
# is the same size as that of the fp unimp instruction. the
17431
# only difference is that the <ea> field should hold the PC
17432
# of the ftrapcc instruction and the vector offset field
17433
# should denote the ftrapcc trap.
17434
ftrapcc_trap:
17435
	mov.b		&ftrapcc_flg,SPCOND_FLG(%a6)
17436
	rts
17437

17438
# the emulation routine set bsun and BSUN was enabled. have to
17439
# fix stack and jump to the bsun handler.
17440
# let the caller of this routine shift the stack frame up to
17441
# eliminate the effective address field.
17442
ftrapcc_bsun:
17443
	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
17444
	rts
17445

17446
#########################################################################
17447
# fscc(): routine to emulate the fscc instruction			#
17448
#									#
17449
# XDEF **************************************************************** #
17450
#	_fscc()								#
17451
#									#
17452
# XREF **************************************************************** #
17453
#	store_dreg_b() - store result to data register file		#
17454
#	dec_areg() - decrement an areg for -(an) mode			#
17455
#	inc_areg() - increment an areg for (an)+ mode			#
17456
#	_dmem_write_byte() - store result to memory			#
17457
#									#
17458
# INPUT ***************************************************************	#
17459
#	none								#
17460
#									#
17461
# OUTPUT ************************************************************** #
17462
#	none								#
17463
#									#
17464
# ALGORITHM ***********************************************************	#
17465
#	This routine checks which conditional predicate is specified by	#
17466
# the stacked fscc instruction opcode and then branches to a routine	#
17467
# for that predicate. The corresponding fbcc instruction is then used	#
17468
# to see whether the condition (specified by the stacked FPSR) is true	#
17469
# or false.								#
17470
#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
17471
# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
17472
# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
17473
# enabled BSUN should not be flagged and the predicate is true, then	#
17474
# the result is stored to the data register file or memory		#
17475
#									#
17476
#########################################################################
17477

17478
	global		_fscc
17479
_fscc:
17480
	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
17481

17482
	clr.l		%d1			# clear scratch reg
17483
	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
17484
	ror.l		&0x8,%d1		# rotate to top byte
17485
	fmov.l		%d1,%fpsr		# insert into FPSR
17486

17487
	mov.w		(tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
17488
	jmp		(tbl_fscc.b,%pc,%d1.w)	# jump to fscc routine
17489

17490
tbl_fscc:
17491
	short		fscc_f		-	tbl_fscc	# 00
17492
	short		fscc_eq		-	tbl_fscc	# 01
17493
	short		fscc_ogt	-	tbl_fscc	# 02
17494
	short		fscc_oge	-	tbl_fscc	# 03
17495
	short		fscc_olt	-	tbl_fscc	# 04
17496
	short		fscc_ole	-	tbl_fscc	# 05
17497
	short		fscc_ogl	-	tbl_fscc	# 06
17498
	short		fscc_or		-	tbl_fscc	# 07
17499
	short		fscc_un		-	tbl_fscc	# 08
17500
	short		fscc_ueq	-	tbl_fscc	# 09
17501
	short		fscc_ugt	-	tbl_fscc	# 10
17502
	short		fscc_uge	-	tbl_fscc	# 11
17503
	short		fscc_ult	-	tbl_fscc	# 12
17504
	short		fscc_ule	-	tbl_fscc	# 13
17505
	short		fscc_neq	-	tbl_fscc	# 14
17506
	short		fscc_t		-	tbl_fscc	# 15
17507
	short		fscc_sf		-	tbl_fscc	# 16
17508
	short		fscc_seq	-	tbl_fscc	# 17
17509
	short		fscc_gt		-	tbl_fscc	# 18
17510
	short		fscc_ge		-	tbl_fscc	# 19
17511
	short		fscc_lt		-	tbl_fscc	# 20
17512
	short		fscc_le		-	tbl_fscc	# 21
17513
	short		fscc_gl		-	tbl_fscc	# 22
17514
	short		fscc_gle	-	tbl_fscc	# 23
17515
	short		fscc_ngle	-	tbl_fscc	# 24
17516
	short		fscc_ngl	-	tbl_fscc	# 25
17517
	short		fscc_nle	-	tbl_fscc	# 26
17518
	short		fscc_nlt	-	tbl_fscc	# 27
17519
	short		fscc_nge	-	tbl_fscc	# 28
17520
	short		fscc_ngt	-	tbl_fscc	# 29
17521
	short		fscc_sneq	-	tbl_fscc	# 30
17522
	short		fscc_st		-	tbl_fscc	# 31
17523

17524
#########################################################################
17525
#									#
17526
# IEEE Nonaware tests							#
17527
#									#
17528
# For the IEEE nonaware tests, we set the result based on the		#
17529
# floating point condition codes. In addition, we check to see		#
17530
# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
17531
#									#
17532
# The cases EQ and NE are shared by the Aware and Nonaware groups	#
17533
# and are incapable of setting the BSUN exception bit.			#
17534
#									#
17535
# Typically, only one of the two possible branch directions could	#
17536
# have the NAN bit set.							#
17537
#									#
17538
#########################################################################
17539

17540
#
17541
# equal:
17542
#
17543
#	Z
17544
#
17545
fscc_eq:
17546
	fbeq.w		fscc_eq_yes		# equal?
17547
fscc_eq_no:
17548
	clr.b		%d0			# set false
17549
	bra.w		fscc_done		# go finish
17550
fscc_eq_yes:
17551
	st		%d0			# set true
17552
	bra.w		fscc_done		# go finish
17553

17554
#
17555
# not equal:
17556
#	_
17557
#	Z
17558
#
17559
fscc_neq:
17560
	fbneq.w		fscc_neq_yes		# not equal?
17561
fscc_neq_no:
17562
	clr.b		%d0			# set false
17563
	bra.w		fscc_done		# go finish
17564
fscc_neq_yes:
17565
	st		%d0			# set true
17566
	bra.w		fscc_done		# go finish
17567

17568
#
17569
# greater than:
17570
#	_______
17571
#	NANvZvN
17572
#
17573
fscc_gt:
17574
	fbgt.w		fscc_gt_yes		# greater than?
17575
fscc_gt_no:
17576
	clr.b		%d0			# set false
17577
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17578
	beq.w		fscc_done		# no;go finish
17579
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17580
	bra.w		fscc_chk_bsun		# go finish
17581
fscc_gt_yes:
17582
	st		%d0			# set true
17583
	bra.w		fscc_done		# go finish
17584

17585
#
17586
# not greater than:
17587
#
17588
#	NANvZvN
17589
#
17590
fscc_ngt:
17591
	fbngt.w		fscc_ngt_yes		# not greater than?
17592
fscc_ngt_no:
17593
	clr.b		%d0			# set false
17594
	bra.w		fscc_done		# go finish
17595
fscc_ngt_yes:
17596
	st		%d0			# set true
17597
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17598
	beq.w		fscc_done		# no;go finish
17599
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17600
	bra.w		fscc_chk_bsun		# go finish
17601

17602
#
17603
# greater than or equal:
17604
#	   _____
17605
#	Zv(NANvN)
17606
#
17607
fscc_ge:
17608
	fbge.w		fscc_ge_yes		# greater than or equal?
17609
fscc_ge_no:
17610
	clr.b		%d0			# set false
17611
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17612
	beq.w		fscc_done		# no;go finish
17613
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17614
	bra.w		fscc_chk_bsun		# go finish
17615
fscc_ge_yes:
17616
	st		%d0			# set true
17617
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17618
	beq.w		fscc_done		# no;go finish
17619
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17620
	bra.w		fscc_chk_bsun		# go finish
17621

17622
#
17623
# not (greater than or equal):
17624
#	       _
17625
#	NANv(N^Z)
17626
#
17627
fscc_nge:
17628
	fbnge.w		fscc_nge_yes		# not (greater than or equal)?
17629
fscc_nge_no:
17630
	clr.b		%d0			# set false
17631
	bra.w		fscc_done		# go finish
17632
fscc_nge_yes:
17633
	st		%d0			# set true
17634
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17635
	beq.w		fscc_done		# no;go finish
17636
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17637
	bra.w		fscc_chk_bsun		# go finish
17638

17639
#
17640
# less than:
17641
#	   _____
17642
#	N^(NANvZ)
17643
#
17644
fscc_lt:
17645
	fblt.w		fscc_lt_yes		# less than?
17646
fscc_lt_no:
17647
	clr.b		%d0			# set false
17648
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17649
	beq.w		fscc_done		# no;go finish
17650
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17651
	bra.w		fscc_chk_bsun		# go finish
17652
fscc_lt_yes:
17653
	st		%d0			# set true
17654
	bra.w		fscc_done		# go finish
17655

17656
#
17657
# not less than:
17658
#	       _
17659
#	NANv(ZvN)
17660
#
17661
fscc_nlt:
17662
	fbnlt.w		fscc_nlt_yes		# not less than?
17663
fscc_nlt_no:
17664
	clr.b		%d0			# set false
17665
	bra.w		fscc_done		# go finish
17666
fscc_nlt_yes:
17667
	st		%d0			# set true
17668
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17669
	beq.w		fscc_done		# no;go finish
17670
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17671
	bra.w		fscc_chk_bsun		# go finish
17672

17673
#
17674
# less than or equal:
17675
#	     ___
17676
#	Zv(N^NAN)
17677
#
17678
fscc_le:
17679
	fble.w		fscc_le_yes		# less than or equal?
17680
fscc_le_no:
17681
	clr.b		%d0			# set false
17682
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17683
	beq.w		fscc_done		# no;go finish
17684
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17685
	bra.w		fscc_chk_bsun		# go finish
17686
fscc_le_yes:
17687
	st		%d0			# set true
17688
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17689
	beq.w		fscc_done		# no;go finish
17690
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17691
	bra.w		fscc_chk_bsun		# go finish
17692

17693
#
17694
# not (less than or equal):
17695
#	     ___
17696
#	NANv(NvZ)
17697
#
17698
fscc_nle:
17699
	fbnle.w		fscc_nle_yes		# not (less than or equal)?
17700
fscc_nle_no:
17701
	clr.b		%d0			# set false
17702
	bra.w		fscc_done		# go finish
17703
fscc_nle_yes:
17704
	st		%d0			# set true
17705
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17706
	beq.w		fscc_done		# no;go finish
17707
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17708
	bra.w		fscc_chk_bsun		# go finish
17709

17710
#
17711
# greater or less than:
17712
#	_____
17713
#	NANvZ
17714
#
17715
fscc_gl:
17716
	fbgl.w		fscc_gl_yes		# greater or less than?
17717
fscc_gl_no:
17718
	clr.b		%d0			# set false
17719
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17720
	beq.w		fscc_done		# no;go finish
17721
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17722
	bra.w		fscc_chk_bsun		# go finish
17723
fscc_gl_yes:
17724
	st		%d0			# set true
17725
	bra.w		fscc_done		# go finish
17726

17727
#
17728
# not (greater or less than):
17729
#
17730
#	NANvZ
17731
#
17732
fscc_ngl:
17733
	fbngl.w		fscc_ngl_yes		# not (greater or less than)?
17734
fscc_ngl_no:
17735
	clr.b		%d0			# set false
17736
	bra.w		fscc_done		# go finish
17737
fscc_ngl_yes:
17738
	st		%d0			# set true
17739
	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
17740
	beq.w		fscc_done		# no;go finish
17741
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17742
	bra.w		fscc_chk_bsun		# go finish
17743

17744
#
17745
# greater, less, or equal:
17746
#	___
17747
#	NAN
17748
#
17749
fscc_gle:
17750
	fbgle.w		fscc_gle_yes		# greater, less, or equal?
17751
fscc_gle_no:
17752
	clr.b		%d0			# set false
17753
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17754
	bra.w		fscc_chk_bsun		# go finish
17755
fscc_gle_yes:
17756
	st		%d0			# set true
17757
	bra.w		fscc_done		# go finish
17758

17759
#
17760
# not (greater, less, or equal):
17761
#
17762
#	NAN
17763
#
17764
fscc_ngle:
17765
	fbngle.w		fscc_ngle_yes	# not (greater, less, or equal)?
17766
fscc_ngle_no:
17767
	clr.b		%d0			# set false
17768
	bra.w		fscc_done		# go finish
17769
fscc_ngle_yes:
17770
	st		%d0			# set true
17771
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17772
	bra.w		fscc_chk_bsun		# go finish
17773

17774
#########################################################################
17775
#									#
17776
# Miscellaneous tests							#
17777
#									#
17778
# For the IEEE aware tests, we only have to set the result based on the	#
17779
# floating point condition codes. The BSUN exception will not be	#
17780
# set for any of these tests.						#
17781
#									#
17782
#########################################################################
17783

17784
#
17785
# false:
17786
#
17787
#	False
17788
#
17789
fscc_f:
17790
	clr.b		%d0			# set false
17791
	bra.w		fscc_done		# go finish
17792

17793
#
17794
# true:
17795
#
17796
#	True
17797
#
17798
fscc_t:
17799
	st		%d0			# set true
17800
	bra.w		fscc_done		# go finish
17801

17802
#
17803
# signalling false:
17804
#
17805
#	False
17806
#
17807
fscc_sf:
17808
	clr.b		%d0			# set false
17809
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17810
	beq.w		fscc_done		# no;go finish
17811
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17812
	bra.w		fscc_chk_bsun		# go finish
17813

17814
#
17815
# signalling true:
17816
#
17817
#	True
17818
#
17819
fscc_st:
17820
	st		%d0			# set false
17821
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17822
	beq.w		fscc_done		# no;go finish
17823
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17824
	bra.w		fscc_chk_bsun		# go finish
17825

17826
#
17827
# signalling equal:
17828
#
17829
#	Z
17830
#
17831
fscc_seq:
17832
	fbseq.w		fscc_seq_yes		# signalling equal?
17833
fscc_seq_no:
17834
	clr.b		%d0			# set false
17835
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17836
	beq.w		fscc_done		# no;go finish
17837
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17838
	bra.w		fscc_chk_bsun		# go finish
17839
fscc_seq_yes:
17840
	st		%d0			# set true
17841
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17842
	beq.w		fscc_done		# no;go finish
17843
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17844
	bra.w		fscc_chk_bsun		# go finish
17845

17846
#
17847
# signalling not equal:
17848
#	_
17849
#	Z
17850
#
17851
fscc_sneq:
17852
	fbsneq.w	fscc_sneq_yes		# signalling equal?
17853
fscc_sneq_no:
17854
	clr.b		%d0			# set false
17855
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17856
	beq.w		fscc_done		# no;go finish
17857
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17858
	bra.w		fscc_chk_bsun		# go finish
17859
fscc_sneq_yes:
17860
	st		%d0			# set true
17861
	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
17862
	beq.w		fscc_done		# no;go finish
17863
	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17864
	bra.w		fscc_chk_bsun		# go finish
17865

17866
#########################################################################
17867
#									#
17868
# IEEE Aware tests							#
17869
#									#
17870
# For the IEEE aware tests, we only have to set the result based on the	#
17871
# floating point condition codes. The BSUN exception will not be	#
17872
# set for any of these tests.						#
17873
#									#
17874
#########################################################################
17875

17876
#
17877
# ordered greater than:
17878
#	_______
17879
#	NANvZvN
17880
#
17881
fscc_ogt:
17882
	fbogt.w		fscc_ogt_yes		# ordered greater than?
17883
fscc_ogt_no:
17884
	clr.b		%d0			# set false
17885
	bra.w		fscc_done		# go finish
17886
fscc_ogt_yes:
17887
	st		%d0			# set true
17888
	bra.w		fscc_done		# go finish
17889

17890
#
17891
# unordered or less or equal:
17892
#	_______
17893
#	NANvZvN
17894
#
17895
fscc_ule:
17896
	fbule.w		fscc_ule_yes		# unordered or less or equal?
17897
fscc_ule_no:
17898
	clr.b		%d0			# set false
17899
	bra.w		fscc_done		# go finish
17900
fscc_ule_yes:
17901
	st		%d0			# set true
17902
	bra.w		fscc_done		# go finish
17903

17904
#
17905
# ordered greater than or equal:
17906
#	   _____
17907
#	Zv(NANvN)
17908
#
17909
fscc_oge:
17910
	fboge.w		fscc_oge_yes		# ordered greater than or equal?
17911
fscc_oge_no:
17912
	clr.b		%d0			# set false
17913
	bra.w		fscc_done		# go finish
17914
fscc_oge_yes:
17915
	st		%d0			# set true
17916
	bra.w		fscc_done		# go finish
17917

17918
#
17919
# unordered or less than:
17920
#	       _
17921
#	NANv(N^Z)
17922
#
17923
fscc_ult:
17924
	fbult.w		fscc_ult_yes		# unordered or less than?
17925
fscc_ult_no:
17926
	clr.b		%d0			# set false
17927
	bra.w		fscc_done		# go finish
17928
fscc_ult_yes:
17929
	st		%d0			# set true
17930
	bra.w		fscc_done		# go finish
17931

17932
#
17933
# ordered less than:
17934
#	   _____
17935
#	N^(NANvZ)
17936
#
17937
fscc_olt:
17938
	fbolt.w		fscc_olt_yes		# ordered less than?
17939
fscc_olt_no:
17940
	clr.b		%d0			# set false
17941
	bra.w		fscc_done		# go finish
17942
fscc_olt_yes:
17943
	st		%d0			# set true
17944
	bra.w		fscc_done		# go finish
17945

17946
#
17947
# unordered or greater or equal:
17948
#
17949
#	NANvZvN
17950
#
17951
fscc_uge:
17952
	fbuge.w		fscc_uge_yes		# unordered or greater than?
17953
fscc_uge_no:
17954
	clr.b		%d0			# set false
17955
	bra.w		fscc_done		# go finish
17956
fscc_uge_yes:
17957
	st		%d0			# set true
17958
	bra.w		fscc_done		# go finish
17959

17960
#
17961
# ordered less than or equal:
17962
#	     ___
17963
#	Zv(N^NAN)
17964
#
17965
fscc_ole:
17966
	fbole.w		fscc_ole_yes		# ordered greater or less than?
17967
fscc_ole_no:
17968
	clr.b		%d0			# set false
17969
	bra.w		fscc_done		# go finish
17970
fscc_ole_yes:
17971
	st		%d0			# set true
17972
	bra.w		fscc_done		# go finish
17973

17974
#
17975
# unordered or greater than:
17976
#	     ___
17977
#	NANv(NvZ)
17978
#
17979
fscc_ugt:
17980
	fbugt.w		fscc_ugt_yes		# unordered or greater than?
17981
fscc_ugt_no:
17982
	clr.b		%d0			# set false
17983
	bra.w		fscc_done		# go finish
17984
fscc_ugt_yes:
17985
	st		%d0			# set true
17986
	bra.w		fscc_done		# go finish
17987

17988
#
17989
# ordered greater or less than:
17990
#	_____
17991
#	NANvZ
17992
#
17993
fscc_ogl:
17994
	fbogl.w		fscc_ogl_yes		# ordered greater or less than?
17995
fscc_ogl_no:
17996
	clr.b		%d0			# set false
17997
	bra.w		fscc_done		# go finish
17998
fscc_ogl_yes:
17999
	st		%d0			# set true
18000
	bra.w		fscc_done		# go finish
18001

18002
#
18003
# unordered or equal:
18004
#
18005
#	NANvZ
18006
#
18007
fscc_ueq:
18008
	fbueq.w		fscc_ueq_yes		# unordered or equal?
18009
fscc_ueq_no:
18010
	clr.b		%d0			# set false
18011
	bra.w		fscc_done		# go finish
18012
fscc_ueq_yes:
18013
	st		%d0			# set true
18014
	bra.w		fscc_done		# go finish
18015

18016
#
18017
# ordered:
18018
#	___
18019
#	NAN
18020
#
18021
fscc_or:
18022
	fbor.w		fscc_or_yes		# ordered?
18023
fscc_or_no:
18024
	clr.b		%d0			# set false
18025
	bra.w		fscc_done		# go finish
18026
fscc_or_yes:
18027
	st		%d0			# set true
18028
	bra.w		fscc_done		# go finish
18029

18030
#
18031
# unordered:
18032
#
18033
#	NAN
18034
#
18035
fscc_un:
18036
	fbun.w		fscc_un_yes		# unordered?
18037
fscc_un_no:
18038
	clr.b		%d0			# set false
18039
	bra.w		fscc_done		# go finish
18040
fscc_un_yes:
18041
	st		%d0			# set true
18042
	bra.w		fscc_done		# go finish
18043

18044
#######################################################################
18045

18046
#
18047
# the bsun exception bit was set. now, check to see is BSUN
18048
# is enabled. if so, don't store result and correct stack frame
18049
# for a bsun exception.
18050
#
18051
fscc_chk_bsun:
18052
	btst		&bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
18053
	bne.w		fscc_bsun
18054

18055
#
18056
# the bsun exception bit was not set.
18057
# the result has been selected.
18058
# now, check to see if the result is to be stored in the data register
18059
# file or in memory.
18060
#
18061
fscc_done:
18062
	mov.l		%d0,%a0			# save result for a moment
18063

18064
	mov.b		1+EXC_OPWORD(%a6),%d1	# fetch lo opword
18065
	mov.l		%d1,%d0			# make a copy
18066
	andi.b		&0x38,%d1		# extract src mode
18067

18068
	bne.b		fscc_mem_op		# it's a memory operation
18069

18070
	mov.l		%d0,%d1
18071
	andi.w		&0x7,%d1		# pass index in d1
18072
	mov.l		%a0,%d0			# pass result in d0
18073
	bsr.l		store_dreg_b		# save result in regfile
18074
	rts
18075

18076
#
18077
# the stacked <ea> is correct with the exception of:
18078
#	-> Dn : <ea> is garbage
18079
#
18080
# if the addressing mode is post-increment or pre-decrement,
18081
# then the address registers have not been updated.
18082
#
18083
fscc_mem_op:
18084
	cmpi.b		%d1,&0x18		# is <ea> (An)+ ?
18085
	beq.b		fscc_mem_inc		# yes
18086
	cmpi.b		%d1,&0x20		# is <ea> -(An) ?
18087
	beq.b		fscc_mem_dec		# yes
18088

18089
	mov.l		%a0,%d0			# pass result in d0
18090
	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18091
	bsr.l		_dmem_write_byte	# write result byte
18092

18093
	tst.l		%d1			# did dstore fail?
18094
	bne.w		fscc_err		# yes
18095

18096
	rts
18097

18098
# addressing mode is post-increment. write the result byte. if the write
18099
# fails then don't update the address register. if write passes then
18100
# call inc_areg() to update the address register.
18101
fscc_mem_inc:
18102
	mov.l		%a0,%d0			# pass result in d0
18103
	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18104
	bsr.l		_dmem_write_byte	# write result byte
18105

18106
	tst.l		%d1			# did dstore fail?
18107
	bne.w		fscc_err		# yes
18108

18109
	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
18110
	andi.w		&0x7,%d1		# pass index in d1
18111
	movq.l		&0x1,%d0		# pass amt to inc by
18112
	bsr.l		inc_areg		# increment address register
18113

18114
	rts
18115

18116
# addressing mode is pre-decrement. write the result byte. if the write
18117
# fails then don't update the address register. if the write passes then
18118
# call dec_areg() to update the address register.
18119
fscc_mem_dec:
18120
	mov.l		%a0,%d0			# pass result in d0
18121
	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
18122
	bsr.l		_dmem_write_byte	# write result byte
18123

18124
	tst.l		%d1			# did dstore fail?
18125
	bne.w		fscc_err		# yes
18126

18127
	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
18128
	andi.w		&0x7,%d1		# pass index in d1
18129
	movq.l		&0x1,%d0		# pass amt to dec by
18130
	bsr.l		dec_areg		# decrement address register
18131

18132
	rts
18133

18134
# the emulation routine set bsun and BSUN was enabled. have to
18135
# fix stack and jump to the bsun handler.
18136
# let the caller of this routine shift the stack frame up to
18137
# eliminate the effective address field.
18138
fscc_bsun:
18139
	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
18140
	rts
18141

18142
# the byte write to memory has failed. pass the failing effective address
18143
# and a FSLW to funimp_dacc().
18144
fscc_err:
18145
	mov.w		&0x00a1,EXC_VOFF(%a6)
18146
	bra.l		facc_finish
18147

18148
#########################################################################
18149
# XDEF ****************************************************************	#
18150
#	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
18151
#									#
18152
# XREF ****************************************************************	#
18153
#	fetch_dreg() - fetch data register				#
18154
#	{i,d,}mem_read() - fetch data from memory			#
18155
#	_mem_write() - write data to memory				#
18156
#	iea_iacc() - instruction memory access error occurred		#
18157
#	iea_dacc() - data memory access error occurred			#
18158
#	restore() - restore An index regs if access error occurred	#
18159
#									#
18160
# INPUT ***************************************************************	#
18161
#	None								#
18162
#									#
18163
# OUTPUT **************************************************************	#
18164
#	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
18165
#		d0 = size of dump					#
18166
#		d1 = Dn							#
18167
#	Else if instruction access error,				#
18168
#		d0 = FSLW						#
18169
#	Else if data access error,					#
18170
#		d0 = FSLW						#
18171
#		a0 = address of fault					#
18172
#	Else								#
18173
#		none.							#
18174
#									#
18175
# ALGORITHM ***********************************************************	#
18176
#	The effective address must be calculated since this is entered	#
18177
# from an "Unimplemented Effective Address" exception handler. So, we	#
18178
# have our own fcalc_ea() routine here. If an access error is flagged	#
18179
# by a _{i,d,}mem_read() call, we must exit through the special		#
18180
# handler.								#
18181
#	The data register is determined and its value loaded to get the	#
18182
# string of FP registers affected. This value is used as an index into	#
18183
# a lookup table such that we can determine the number of bytes		#
18184
# involved.								#
18185
#	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
18186
# to read in all FP values. Again, _mem_read() may fail and require a	#
18187
# special exit.								#
18188
#	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
18189
# to write all FP values. _mem_write() may also fail.			#
18190
#	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
18191
# then we return the size of the dump and the string to the caller	#
18192
# so that the move can occur outside of this routine. This special	#
18193
# case is required so that moves to the system stack are handled	#
18194
# correctly.								#
18195
#									#
18196
# DYNAMIC:								#
18197
#	fmovm.x	dn, <ea>						#
18198
#	fmovm.x	<ea>, dn						#
18199
#									#
18200
#	      <WORD 1>		      <WORD2>				#
18201
#	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
18202
#									#
18203
#	& = (0): predecrement addressing mode				#
18204
#	    (1): postincrement or control addressing mode		#
18205
#	@ = (0): move listed regs from memory to the FPU		#
18206
#	    (1): move listed regs from the FPU to memory		#
18207
#	$$$    : index of data register holding reg select mask		#
18208
#									#
18209
# NOTES:								#
18210
#	If the data register holds a zero, then the			#
18211
#	instruction is a nop.						#
18212
#									#
18213
#########################################################################
18214

18215
	global		fmovm_dynamic
18216
fmovm_dynamic:
18217

18218
# extract the data register in which the bit string resides...
18219
	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
18220
	andi.w		&0x70,%d1		# extract reg bits
18221
	lsr.b		&0x4,%d1		# shift into lo bits
18222

18223
# fetch the bit string into d0...
18224
	bsr.l		fetch_dreg		# fetch reg string
18225

18226
	andi.l		&0x000000ff,%d0		# keep only lo byte
18227

18228
	mov.l		%d0,-(%sp)		# save strg
18229
	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
18230
	mov.l		%d0,-(%sp)		# save size
18231
	bsr.l		fmovm_calc_ea		# calculate <ea>
18232
	mov.l		(%sp)+,%d0		# restore size
18233
	mov.l		(%sp)+,%d1		# restore strg
18234

18235
# if the bit string is a zero, then the operation is a no-op
18236
# but, make sure that we've calculated ea and advanced the opword pointer
18237
	beq.w		fmovm_data_done
18238

18239
# separate move ins from move outs...
18240
	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
18241
	beq.w		fmovm_data_in		# it's a move out
18242

18243
#############
18244
# MOVE OUT: #
18245
#############
18246
fmovm_data_out:
18247
	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
18248
	bne.w		fmovm_out_ctrl		# control
18249

18250
############################
18251
fmovm_out_predec:
18252
# for predecrement mode, the bit string is the opposite of both control
18253
# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
18254
# here, we convert it to be just like the others...
18255
	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
18256

18257
	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
18258
	beq.b		fmovm_out_ctrl		# user
18259

18260
fmovm_out_predec_s:
18261
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
18262
	bne.b		fmovm_out_ctrl
18263

18264
# the operation was unfortunately an: fmovm.x dn,-(sp)
18265
# called from supervisor mode.
18266
# we're also passing "size" and "strg" back to the calling routine
18267
	rts
18268

18269
############################
18270
fmovm_out_ctrl:
18271
	mov.l		%a0,%a1			# move <ea> to a1
18272

18273
	sub.l		%d0,%sp			# subtract size of dump
18274
	lea		(%sp),%a0
18275

18276
	tst.b		%d1			# should FP0 be moved?
18277
	bpl.b		fmovm_out_ctrl_fp1	# no
18278

18279
	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
18280
	mov.l		0x4+EXC_FP0(%a6),(%a0)+
18281
	mov.l		0x8+EXC_FP0(%a6),(%a0)+
18282

18283
fmovm_out_ctrl_fp1:
18284
	lsl.b		&0x1,%d1		# should FP1 be moved?
18285
	bpl.b		fmovm_out_ctrl_fp2	# no
18286

18287
	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
18288
	mov.l		0x4+EXC_FP1(%a6),(%a0)+
18289
	mov.l		0x8+EXC_FP1(%a6),(%a0)+
18290

18291
fmovm_out_ctrl_fp2:
18292
	lsl.b		&0x1,%d1		# should FP2 be moved?
18293
	bpl.b		fmovm_out_ctrl_fp3	# no
18294

18295
	fmovm.x		&0x20,(%a0)		# yes
18296
	add.l		&0xc,%a0
18297

18298
fmovm_out_ctrl_fp3:
18299
	lsl.b		&0x1,%d1		# should FP3 be moved?
18300
	bpl.b		fmovm_out_ctrl_fp4	# no
18301

18302
	fmovm.x		&0x10,(%a0)		# yes
18303
	add.l		&0xc,%a0
18304

18305
fmovm_out_ctrl_fp4:
18306
	lsl.b		&0x1,%d1		# should FP4 be moved?
18307
	bpl.b		fmovm_out_ctrl_fp5	# no
18308

18309
	fmovm.x		&0x08,(%a0)		# yes
18310
	add.l		&0xc,%a0
18311

18312
fmovm_out_ctrl_fp5:
18313
	lsl.b		&0x1,%d1		# should FP5 be moved?
18314
	bpl.b		fmovm_out_ctrl_fp6	# no
18315

18316
	fmovm.x		&0x04,(%a0)		# yes
18317
	add.l		&0xc,%a0
18318

18319
fmovm_out_ctrl_fp6:
18320
	lsl.b		&0x1,%d1		# should FP6 be moved?
18321
	bpl.b		fmovm_out_ctrl_fp7	# no
18322

18323
	fmovm.x		&0x02,(%a0)		# yes
18324
	add.l		&0xc,%a0
18325

18326
fmovm_out_ctrl_fp7:
18327
	lsl.b		&0x1,%d1		# should FP7 be moved?
18328
	bpl.b		fmovm_out_ctrl_done	# no
18329

18330
	fmovm.x		&0x01,(%a0)		# yes
18331
	add.l		&0xc,%a0
18332

18333
fmovm_out_ctrl_done:
18334
	mov.l		%a1,L_SCR1(%a6)
18335

18336
	lea		(%sp),%a0		# pass: supervisor src
18337
	mov.l		%d0,-(%sp)		# save size
18338
	bsr.l		_dmem_write		# copy data to user mem
18339

18340
	mov.l		(%sp)+,%d0
18341
	add.l		%d0,%sp			# clear fpreg data from stack
18342

18343
	tst.l		%d1			# did dstore err?
18344
	bne.w		fmovm_out_err		# yes
18345

18346
	rts
18347

18348
############
18349
# MOVE IN: #
18350
############
18351
fmovm_data_in:
18352
	mov.l		%a0,L_SCR1(%a6)
18353

18354
	sub.l		%d0,%sp			# make room for fpregs
18355
	lea		(%sp),%a1
18356

18357
	mov.l		%d1,-(%sp)		# save bit string for later
18358
	mov.l		%d0,-(%sp)		# save # of bytes
18359

18360
	bsr.l		_dmem_read		# copy data from user mem
18361

18362
	mov.l		(%sp)+,%d0		# retrieve # of bytes
18363

18364
	tst.l		%d1			# did dfetch fail?
18365
	bne.w		fmovm_in_err		# yes
18366

18367
	mov.l		(%sp)+,%d1		# load bit string
18368

18369
	lea		(%sp),%a0		# addr of stack
18370

18371
	tst.b		%d1			# should FP0 be moved?
18372
	bpl.b		fmovm_data_in_fp1	# no
18373

18374
	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
18375
	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
18376
	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
18377

18378
fmovm_data_in_fp1:
18379
	lsl.b		&0x1,%d1		# should FP1 be moved?
18380
	bpl.b		fmovm_data_in_fp2	# no
18381

18382
	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
18383
	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
18384
	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
18385

18386
fmovm_data_in_fp2:
18387
	lsl.b		&0x1,%d1		# should FP2 be moved?
18388
	bpl.b		fmovm_data_in_fp3	# no
18389

18390
	fmovm.x		(%a0)+,&0x20		# yes
18391

18392
fmovm_data_in_fp3:
18393
	lsl.b		&0x1,%d1		# should FP3 be moved?
18394
	bpl.b		fmovm_data_in_fp4	# no
18395

18396
	fmovm.x		(%a0)+,&0x10		# yes
18397

18398
fmovm_data_in_fp4:
18399
	lsl.b		&0x1,%d1		# should FP4 be moved?
18400
	bpl.b		fmovm_data_in_fp5	# no
18401

18402
	fmovm.x		(%a0)+,&0x08		# yes
18403

18404
fmovm_data_in_fp5:
18405
	lsl.b		&0x1,%d1		# should FP5 be moved?
18406
	bpl.b		fmovm_data_in_fp6	# no
18407

18408
	fmovm.x		(%a0)+,&0x04		# yes
18409

18410
fmovm_data_in_fp6:
18411
	lsl.b		&0x1,%d1		# should FP6 be moved?
18412
	bpl.b		fmovm_data_in_fp7	# no
18413

18414
	fmovm.x		(%a0)+,&0x02		# yes
18415

18416
fmovm_data_in_fp7:
18417
	lsl.b		&0x1,%d1		# should FP7 be moved?
18418
	bpl.b		fmovm_data_in_done	# no
18419

18420
	fmovm.x		(%a0)+,&0x01		# yes
18421

18422
fmovm_data_in_done:
18423
	add.l		%d0,%sp			# remove fpregs from stack
18424
	rts
18425

18426
#####################################
18427

18428
fmovm_data_done:
18429
	rts
18430

18431
##############################################################################
18432

18433
#
18434
# table indexed by the operation's bit string that gives the number
18435
# of bytes that will be moved.
18436
#
18437
# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
18438
#
18439
tbl_fmovm_size:
18440
	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
18441
	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18442
	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18443
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18444
	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18445
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18446
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18447
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18448
	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18449
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18450
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18451
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18452
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18453
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18454
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18455
	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18456
	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18457
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18458
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18459
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18460
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18461
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18462
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18463
	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18464
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18465
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18466
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18467
	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18468
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18469
	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18470
	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18471
	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
18472

18473
#
18474
# table to convert a pre-decrement bit string into a post-increment
18475
# or control bit string.
18476
# ex:	0x00	==>	0x00
18477
#	0x01	==>	0x80
18478
#	0x02	==>	0x40
18479
#		.
18480
#		.
18481
#	0xfd	==>	0xbf
18482
#	0xfe	==>	0x7f
18483
#	0xff	==>	0xff
18484
#
18485
tbl_fmovm_convert:
18486
	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
18487
	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
18488
	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
18489
	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
18490
	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
18491
	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
18492
	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
18493
	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
18494
	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
18495
	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
18496
	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
18497
	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
18498
	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
18499
	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
18500
	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
18501
	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
18502
	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
18503
	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
18504
	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
18505
	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
18506
	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
18507
	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
18508
	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
18509
	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
18510
	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
18511
	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
18512
	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
18513
	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
18514
	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
18515
	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
18516
	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
18517
	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
18518

18519
	global		fmovm_calc_ea
18520
###############################################
18521
# _fmovm_calc_ea: calculate effective address #
18522
###############################################
18523
fmovm_calc_ea:
18524
	mov.l		%d0,%a0			# move # bytes to a0
18525

18526
# currently, MODE and REG are taken from the EXC_OPWORD. this could be
18527
# easily changed if they were inputs passed in registers.
18528
	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
18529
	mov.w		%d0,%d1			# make a copy
18530

18531
	andi.w		&0x3f,%d0		# extract mode field
18532
	andi.l		&0x7,%d1		# extract reg  field
18533

18534
# jump to the corresponding function for each {MODE,REG} pair.
18535
	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
18536
	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
18537

18538
	swbeg		&64
18539
tbl_fea_mode:
18540
	short		tbl_fea_mode	-	tbl_fea_mode
18541
	short		tbl_fea_mode	-	tbl_fea_mode
18542
	short		tbl_fea_mode	-	tbl_fea_mode
18543
	short		tbl_fea_mode	-	tbl_fea_mode
18544
	short		tbl_fea_mode	-	tbl_fea_mode
18545
	short		tbl_fea_mode	-	tbl_fea_mode
18546
	short		tbl_fea_mode	-	tbl_fea_mode
18547
	short		tbl_fea_mode	-	tbl_fea_mode
18548

18549
	short		tbl_fea_mode	-	tbl_fea_mode
18550
	short		tbl_fea_mode	-	tbl_fea_mode
18551
	short		tbl_fea_mode	-	tbl_fea_mode
18552
	short		tbl_fea_mode	-	tbl_fea_mode
18553
	short		tbl_fea_mode	-	tbl_fea_mode
18554
	short		tbl_fea_mode	-	tbl_fea_mode
18555
	short		tbl_fea_mode	-	tbl_fea_mode
18556
	short		tbl_fea_mode	-	tbl_fea_mode
18557

18558
	short		faddr_ind_a0	-	tbl_fea_mode
18559
	short		faddr_ind_a1	-	tbl_fea_mode
18560
	short		faddr_ind_a2	-	tbl_fea_mode
18561
	short		faddr_ind_a3	-	tbl_fea_mode
18562
	short		faddr_ind_a4	-	tbl_fea_mode
18563
	short		faddr_ind_a5	-	tbl_fea_mode
18564
	short		faddr_ind_a6	-	tbl_fea_mode
18565
	short		faddr_ind_a7	-	tbl_fea_mode
18566

18567
	short		faddr_ind_p_a0	-	tbl_fea_mode
18568
	short		faddr_ind_p_a1	-	tbl_fea_mode
18569
	short		faddr_ind_p_a2	-	tbl_fea_mode
18570
	short		faddr_ind_p_a3	-	tbl_fea_mode
18571
	short		faddr_ind_p_a4	-	tbl_fea_mode
18572
	short		faddr_ind_p_a5	-	tbl_fea_mode
18573
	short		faddr_ind_p_a6	-	tbl_fea_mode
18574
	short		faddr_ind_p_a7	-	tbl_fea_mode
18575

18576
	short		faddr_ind_m_a0	-	tbl_fea_mode
18577
	short		faddr_ind_m_a1	-	tbl_fea_mode
18578
	short		faddr_ind_m_a2	-	tbl_fea_mode
18579
	short		faddr_ind_m_a3	-	tbl_fea_mode
18580
	short		faddr_ind_m_a4	-	tbl_fea_mode
18581
	short		faddr_ind_m_a5	-	tbl_fea_mode
18582
	short		faddr_ind_m_a6	-	tbl_fea_mode
18583
	short		faddr_ind_m_a7	-	tbl_fea_mode
18584

18585
	short		faddr_ind_disp_a0	-	tbl_fea_mode
18586
	short		faddr_ind_disp_a1	-	tbl_fea_mode
18587
	short		faddr_ind_disp_a2	-	tbl_fea_mode
18588
	short		faddr_ind_disp_a3	-	tbl_fea_mode
18589
	short		faddr_ind_disp_a4	-	tbl_fea_mode
18590
	short		faddr_ind_disp_a5	-	tbl_fea_mode
18591
	short		faddr_ind_disp_a6	-	tbl_fea_mode
18592
	short		faddr_ind_disp_a7	-	tbl_fea_mode
18593

18594
	short		faddr_ind_ext	-	tbl_fea_mode
18595
	short		faddr_ind_ext	-	tbl_fea_mode
18596
	short		faddr_ind_ext	-	tbl_fea_mode
18597
	short		faddr_ind_ext	-	tbl_fea_mode
18598
	short		faddr_ind_ext	-	tbl_fea_mode
18599
	short		faddr_ind_ext	-	tbl_fea_mode
18600
	short		faddr_ind_ext	-	tbl_fea_mode
18601
	short		faddr_ind_ext	-	tbl_fea_mode
18602

18603
	short		fabs_short	-	tbl_fea_mode
18604
	short		fabs_long	-	tbl_fea_mode
18605
	short		fpc_ind		-	tbl_fea_mode
18606
	short		fpc_ind_ext	-	tbl_fea_mode
18607
	short		tbl_fea_mode	-	tbl_fea_mode
18608
	short		tbl_fea_mode	-	tbl_fea_mode
18609
	short		tbl_fea_mode	-	tbl_fea_mode
18610
	short		tbl_fea_mode	-	tbl_fea_mode
18611

18612
###################################
18613
# Address register indirect: (An) #
18614
###################################
18615
faddr_ind_a0:
18616
	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
18617
	rts
18618

18619
faddr_ind_a1:
18620
	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
18621
	rts
18622

18623
faddr_ind_a2:
18624
	mov.l		%a2,%a0			# Get current a2
18625
	rts
18626

18627
faddr_ind_a3:
18628
	mov.l		%a3,%a0			# Get current a3
18629
	rts
18630

18631
faddr_ind_a4:
18632
	mov.l		%a4,%a0			# Get current a4
18633
	rts
18634

18635
faddr_ind_a5:
18636
	mov.l		%a5,%a0			# Get current a5
18637
	rts
18638

18639
faddr_ind_a6:
18640
	mov.l		(%a6),%a0		# Get current a6
18641
	rts
18642

18643
faddr_ind_a7:
18644
	mov.l		EXC_A7(%a6),%a0		# Get current a7
18645
	rts
18646

18647
#####################################################
18648
# Address register indirect w/ postincrement: (An)+ #
18649
#####################################################
18650
faddr_ind_p_a0:
18651
	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
18652
	mov.l		%d0,%d1
18653
	add.l		%a0,%d1			# Increment
18654
	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
18655
	mov.l		%d0,%a0
18656
	rts
18657

18658
faddr_ind_p_a1:
18659
	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
18660
	mov.l		%d0,%d1
18661
	add.l		%a0,%d1			# Increment
18662
	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
18663
	mov.l		%d0,%a0
18664
	rts
18665

18666
faddr_ind_p_a2:
18667
	mov.l		%a2,%d0			# Get current a2
18668
	mov.l		%d0,%d1
18669
	add.l		%a0,%d1			# Increment
18670
	mov.l		%d1,%a2			# Save incr value
18671
	mov.l		%d0,%a0
18672
	rts
18673

18674
faddr_ind_p_a3:
18675
	mov.l		%a3,%d0			# Get current a3
18676
	mov.l		%d0,%d1
18677
	add.l		%a0,%d1			# Increment
18678
	mov.l		%d1,%a3			# Save incr value
18679
	mov.l		%d0,%a0
18680
	rts
18681

18682
faddr_ind_p_a4:
18683
	mov.l		%a4,%d0			# Get current a4
18684
	mov.l		%d0,%d1
18685
	add.l		%a0,%d1			# Increment
18686
	mov.l		%d1,%a4			# Save incr value
18687
	mov.l		%d0,%a0
18688
	rts
18689

18690
faddr_ind_p_a5:
18691
	mov.l		%a5,%d0			# Get current a5
18692
	mov.l		%d0,%d1
18693
	add.l		%a0,%d1			# Increment
18694
	mov.l		%d1,%a5			# Save incr value
18695
	mov.l		%d0,%a0
18696
	rts
18697

18698
faddr_ind_p_a6:
18699
	mov.l		(%a6),%d0		# Get current a6
18700
	mov.l		%d0,%d1
18701
	add.l		%a0,%d1			# Increment
18702
	mov.l		%d1,(%a6)		# Save incr value
18703
	mov.l		%d0,%a0
18704
	rts
18705

18706
faddr_ind_p_a7:
18707
	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
18708

18709
	mov.l		EXC_A7(%a6),%d0		# Get current a7
18710
	mov.l		%d0,%d1
18711
	add.l		%a0,%d1			# Increment
18712
	mov.l		%d1,EXC_A7(%a6)		# Save incr value
18713
	mov.l		%d0,%a0
18714
	rts
18715

18716
####################################################
18717
# Address register indirect w/ predecrement: -(An) #
18718
####################################################
18719
faddr_ind_m_a0:
18720
	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
18721
	sub.l		%a0,%d0			# Decrement
18722
	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
18723
	mov.l		%d0,%a0
18724
	rts
18725

18726
faddr_ind_m_a1:
18727
	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
18728
	sub.l		%a0,%d0			# Decrement
18729
	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
18730
	mov.l		%d0,%a0
18731
	rts
18732

18733
faddr_ind_m_a2:
18734
	mov.l		%a2,%d0			# Get current a2
18735
	sub.l		%a0,%d0			# Decrement
18736
	mov.l		%d0,%a2			# Save decr value
18737
	mov.l		%d0,%a0
18738
	rts
18739

18740
faddr_ind_m_a3:
18741
	mov.l		%a3,%d0			# Get current a3
18742
	sub.l		%a0,%d0			# Decrement
18743
	mov.l		%d0,%a3			# Save decr value
18744
	mov.l		%d0,%a0
18745
	rts
18746

18747
faddr_ind_m_a4:
18748
	mov.l		%a4,%d0			# Get current a4
18749
	sub.l		%a0,%d0			# Decrement
18750
	mov.l		%d0,%a4			# Save decr value
18751
	mov.l		%d0,%a0
18752
	rts
18753

18754
faddr_ind_m_a5:
18755
	mov.l		%a5,%d0			# Get current a5
18756
	sub.l		%a0,%d0			# Decrement
18757
	mov.l		%d0,%a5			# Save decr value
18758
	mov.l		%d0,%a0
18759
	rts
18760

18761
faddr_ind_m_a6:
18762
	mov.l		(%a6),%d0		# Get current a6
18763
	sub.l		%a0,%d0			# Decrement
18764
	mov.l		%d0,(%a6)		# Save decr value
18765
	mov.l		%d0,%a0
18766
	rts
18767

18768
faddr_ind_m_a7:
18769
	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
18770

18771
	mov.l		EXC_A7(%a6),%d0		# Get current a7
18772
	sub.l		%a0,%d0			# Decrement
18773
	mov.l		%d0,EXC_A7(%a6)		# Save decr value
18774
	mov.l		%d0,%a0
18775
	rts
18776

18777
########################################################
18778
# Address register indirect w/ displacement: (d16, An) #
18779
########################################################
18780
faddr_ind_disp_a0:
18781
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18782
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18783
	bsr.l		_imem_read_word
18784

18785
	tst.l		%d1			# did ifetch fail?
18786
	bne.l		iea_iacc		# yes
18787

18788
	mov.w		%d0,%a0			# sign extend displacement
18789

18790
	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
18791
	rts
18792

18793
faddr_ind_disp_a1:
18794
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18795
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18796
	bsr.l		_imem_read_word
18797

18798
	tst.l		%d1			# did ifetch fail?
18799
	bne.l		iea_iacc		# yes
18800

18801
	mov.w		%d0,%a0			# sign extend displacement
18802

18803
	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
18804
	rts
18805

18806
faddr_ind_disp_a2:
18807
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18808
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18809
	bsr.l		_imem_read_word
18810

18811
	tst.l		%d1			# did ifetch fail?
18812
	bne.l		iea_iacc		# yes
18813

18814
	mov.w		%d0,%a0			# sign extend displacement
18815

18816
	add.l		%a2,%a0			# a2 + d16
18817
	rts
18818

18819
faddr_ind_disp_a3:
18820
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18821
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18822
	bsr.l		_imem_read_word
18823

18824
	tst.l		%d1			# did ifetch fail?
18825
	bne.l		iea_iacc		# yes
18826

18827
	mov.w		%d0,%a0			# sign extend displacement
18828

18829
	add.l		%a3,%a0			# a3 + d16
18830
	rts
18831

18832
faddr_ind_disp_a4:
18833
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18834
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18835
	bsr.l		_imem_read_word
18836

18837
	tst.l		%d1			# did ifetch fail?
18838
	bne.l		iea_iacc		# yes
18839

18840
	mov.w		%d0,%a0			# sign extend displacement
18841

18842
	add.l		%a4,%a0			# a4 + d16
18843
	rts
18844

18845
faddr_ind_disp_a5:
18846
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18847
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18848
	bsr.l		_imem_read_word
18849

18850
	tst.l		%d1			# did ifetch fail?
18851
	bne.l		iea_iacc		# yes
18852

18853
	mov.w		%d0,%a0			# sign extend displacement
18854

18855
	add.l		%a5,%a0			# a5 + d16
18856
	rts
18857

18858
faddr_ind_disp_a6:
18859
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18860
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18861
	bsr.l		_imem_read_word
18862

18863
	tst.l		%d1			# did ifetch fail?
18864
	bne.l		iea_iacc		# yes
18865

18866
	mov.w		%d0,%a0			# sign extend displacement
18867

18868
	add.l		(%a6),%a0		# a6 + d16
18869
	rts
18870

18871
faddr_ind_disp_a7:
18872
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18873
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18874
	bsr.l		_imem_read_word
18875

18876
	tst.l		%d1			# did ifetch fail?
18877
	bne.l		iea_iacc		# yes
18878

18879
	mov.w		%d0,%a0			# sign extend displacement
18880

18881
	add.l		EXC_A7(%a6),%a0		# a7 + d16
18882
	rts
18883

18884
########################################################################
18885
# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
18886
#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
18887
# Memory indirect postindexed: ([bd, An], Xn, od)		       #
18888
# Memory indirect preindexed: ([bd, An, Xn], od)		       #
18889
########################################################################
18890
faddr_ind_ext:
18891
	addq.l		&0x8,%d1
18892
	bsr.l		fetch_dreg		# fetch base areg
18893
	mov.l		%d0,-(%sp)
18894

18895
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18896
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18897
	bsr.l		_imem_read_word		# fetch extword in d0
18898

18899
	tst.l		%d1			# did ifetch fail?
18900
	bne.l		iea_iacc		# yes
18901

18902
	mov.l		(%sp)+,%a0
18903

18904
	btst		&0x8,%d0
18905
	bne.w		fcalc_mem_ind
18906

18907
	mov.l		%d0,L_SCR1(%a6)		# hold opword
18908

18909
	mov.l		%d0,%d1
18910
	rol.w		&0x4,%d1
18911
	andi.w		&0xf,%d1		# extract index regno
18912

18913
# count on fetch_dreg() not to alter a0...
18914
	bsr.l		fetch_dreg		# fetch index
18915

18916
	mov.l		%d2,-(%sp)		# save d2
18917
	mov.l		L_SCR1(%a6),%d2		# fetch opword
18918

18919
	btst		&0xb,%d2		# is it word or long?
18920
	bne.b		faii8_long
18921
	ext.l		%d0			# sign extend word index
18922
faii8_long:
18923
	mov.l		%d2,%d1
18924
	rol.w		&0x7,%d1
18925
	andi.l		&0x3,%d1		# extract scale value
18926

18927
	lsl.l		%d1,%d0			# shift index by scale
18928

18929
	extb.l		%d2			# sign extend displacement
18930
	add.l		%d2,%d0			# index + disp
18931
	add.l		%d0,%a0			# An + (index + disp)
18932

18933
	mov.l		(%sp)+,%d2		# restore old d2
18934
	rts
18935

18936
###########################
18937
# Absolute short: (XXX).W #
18938
###########################
18939
fabs_short:
18940
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18941
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18942
	bsr.l		_imem_read_word		# fetch short address
18943

18944
	tst.l		%d1			# did ifetch fail?
18945
	bne.l		iea_iacc		# yes
18946

18947
	mov.w		%d0,%a0			# return <ea> in a0
18948
	rts
18949

18950
##########################
18951
# Absolute long: (XXX).L #
18952
##########################
18953
fabs_long:
18954
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18955
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
18956
	bsr.l		_imem_read_long		# fetch long address
18957

18958
	tst.l		%d1			# did ifetch fail?
18959
	bne.l		iea_iacc		# yes
18960

18961
	mov.l		%d0,%a0			# return <ea> in a0
18962
	rts
18963

18964
#######################################################
18965
# Program counter indirect w/ displacement: (d16, PC) #
18966
#######################################################
18967
fpc_ind:
18968
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18969
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18970
	bsr.l		_imem_read_word		# fetch word displacement
18971

18972
	tst.l		%d1			# did ifetch fail?
18973
	bne.l		iea_iacc		# yes
18974

18975
	mov.w		%d0,%a0			# sign extend displacement
18976

18977
	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
18978

18979
# _imem_read_word() increased the extwptr by 2. need to adjust here.
18980
	subq.l		&0x2,%a0		# adjust <ea>
18981
	rts
18982

18983
##########################################################
18984
# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
18985
# "     "     w/   "  (base displacement): (bd, PC, An)  #
18986
# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
18987
# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
18988
##########################################################
18989
fpc_ind_ext:
18990
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
18991
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
18992
	bsr.l		_imem_read_word		# fetch ext word
18993

18994
	tst.l		%d1			# did ifetch fail?
18995
	bne.l		iea_iacc		# yes
18996

18997
	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
18998
	subq.l		&0x2,%a0		# adjust base
18999

19000
	btst		&0x8,%d0		# is disp only 8 bits?
19001
	bne.w		fcalc_mem_ind		# calc memory indirect
19002

19003
	mov.l		%d0,L_SCR1(%a6)		# store opword
19004

19005
	mov.l		%d0,%d1			# make extword copy
19006
	rol.w		&0x4,%d1		# rotate reg num into place
19007
	andi.w		&0xf,%d1		# extract register number
19008

19009
# count on fetch_dreg() not to alter a0...
19010
	bsr.l		fetch_dreg		# fetch index
19011

19012
	mov.l		%d2,-(%sp)		# save d2
19013
	mov.l		L_SCR1(%a6),%d2		# fetch opword
19014

19015
	btst		&0xb,%d2		# is index word or long?
19016
	bne.b		fpii8_long		# long
19017
	ext.l		%d0			# sign extend word index
19018
fpii8_long:
19019
	mov.l		%d2,%d1
19020
	rol.w		&0x7,%d1		# rotate scale value into place
19021
	andi.l		&0x3,%d1		# extract scale value
19022

19023
	lsl.l		%d1,%d0			# shift index by scale
19024

19025
	extb.l		%d2			# sign extend displacement
19026
	add.l		%d2,%d0			# disp + index
19027
	add.l		%d0,%a0			# An + (index + disp)
19028

19029
	mov.l		(%sp)+,%d2		# restore temp register
19030
	rts
19031

19032
# d2 = index
19033
# d3 = base
19034
# d4 = od
19035
# d5 = extword
19036
fcalc_mem_ind:
19037
	btst		&0x6,%d0		# is the index suppressed?
19038
	beq.b		fcalc_index
19039

19040
	movm.l		&0x3c00,-(%sp)		# save d2-d5
19041

19042
	mov.l		%d0,%d5			# put extword in d5
19043
	mov.l		%a0,%d3			# put base in d3
19044

19045
	clr.l		%d2			# yes, so index = 0
19046
	bra.b		fbase_supp_ck
19047

19048
# index:
19049
fcalc_index:
19050
	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
19051
	bfextu		%d0{&16:&4},%d1		# fetch dreg index
19052
	bsr.l		fetch_dreg
19053

19054
	movm.l		&0x3c00,-(%sp)		# save d2-d5
19055
	mov.l		%d0,%d2			# put index in d2
19056
	mov.l		L_SCR1(%a6),%d5
19057
	mov.l		%a0,%d3
19058

19059
	btst		&0xb,%d5		# is index word or long?
19060
	bne.b		fno_ext
19061
	ext.l		%d2
19062

19063
fno_ext:
19064
	bfextu		%d5{&21:&2},%d0
19065
	lsl.l		%d0,%d2
19066

19067
# base address (passed as parameter in d3):
19068
# we clear the value here if it should actually be suppressed.
19069
fbase_supp_ck:
19070
	btst		&0x7,%d5		# is the bd suppressed?
19071
	beq.b		fno_base_sup
19072
	clr.l		%d3
19073

19074
# base displacement:
19075
fno_base_sup:
19076
	bfextu		%d5{&26:&2},%d0		# get bd size
19077
#	beq.l		fmovm_error		# if (size == 0) it's reserved
19078

19079
	cmpi.b		%d0,&0x2
19080
	blt.b		fno_bd
19081
	beq.b		fget_word_bd
19082

19083
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19084
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19085
	bsr.l		_imem_read_long
19086

19087
	tst.l		%d1			# did ifetch fail?
19088
	bne.l		fcea_iacc		# yes
19089

19090
	bra.b		fchk_ind
19091

19092
fget_word_bd:
19093
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19094
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
19095
	bsr.l		_imem_read_word
19096

19097
	tst.l		%d1			# did ifetch fail?
19098
	bne.l		fcea_iacc		# yes
19099

19100
	ext.l		%d0			# sign extend bd
19101

19102
fchk_ind:
19103
	add.l		%d0,%d3			# base += bd
19104

19105
# outer displacement:
19106
fno_bd:
19107
	bfextu		%d5{&30:&2},%d0		# is od suppressed?
19108
	beq.w		faii_bd
19109

19110
	cmpi.b		%d0,&0x2
19111
	blt.b		fnull_od
19112
	beq.b		fword_od
19113

19114
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19115
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19116
	bsr.l		_imem_read_long
19117

19118
	tst.l		%d1			# did ifetch fail?
19119
	bne.l		fcea_iacc		# yes
19120

19121
	bra.b		fadd_them
19122

19123
fword_od:
19124
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19125
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
19126
	bsr.l		_imem_read_word
19127

19128
	tst.l		%d1			# did ifetch fail?
19129
	bne.l		fcea_iacc		# yes
19130

19131
	ext.l		%d0			# sign extend od
19132
	bra.b		fadd_them
19133

19134
fnull_od:
19135
	clr.l		%d0
19136

19137
fadd_them:
19138
	mov.l		%d0,%d4
19139

19140
	btst		&0x2,%d5		# pre or post indexing?
19141
	beq.b		fpre_indexed
19142

19143
	mov.l		%d3,%a0
19144
	bsr.l		_dmem_read_long
19145

19146
	tst.l		%d1			# did dfetch fail?
19147
	bne.w		fcea_err		# yes
19148

19149
	add.l		%d2,%d0			# <ea> += index
19150
	add.l		%d4,%d0			# <ea> += od
19151
	bra.b		fdone_ea
19152

19153
fpre_indexed:
19154
	add.l		%d2,%d3			# preindexing
19155
	mov.l		%d3,%a0
19156
	bsr.l		_dmem_read_long
19157

19158
	tst.l		%d1			# did dfetch fail?
19159
	bne.w		fcea_err		# yes
19160

19161
	add.l		%d4,%d0			# ea += od
19162
	bra.b		fdone_ea
19163

19164
faii_bd:
19165
	add.l		%d2,%d3			# ea = (base + bd) + index
19166
	mov.l		%d3,%d0
19167
fdone_ea:
19168
	mov.l		%d0,%a0
19169

19170
	movm.l		(%sp)+,&0x003c		# restore d2-d5
19171
	rts
19172

19173
#########################################################
19174
fcea_err:
19175
	mov.l		%d3,%a0
19176

19177
	movm.l		(%sp)+,&0x003c		# restore d2-d5
19178
	mov.w		&0x0101,%d0
19179
	bra.l		iea_dacc
19180

19181
fcea_iacc:
19182
	movm.l		(%sp)+,&0x003c		# restore d2-d5
19183
	bra.l		iea_iacc
19184

19185
fmovm_out_err:
19186
	bsr.l		restore
19187
	mov.w		&0x00e1,%d0
19188
	bra.b		fmovm_err
19189

19190
fmovm_in_err:
19191
	bsr.l		restore
19192
	mov.w		&0x0161,%d0
19193

19194
fmovm_err:
19195
	mov.l		L_SCR1(%a6),%a0
19196
	bra.l		iea_dacc
19197

19198
#########################################################################
19199
# XDEF ****************************************************************	#
19200
#	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
19201
#									#
19202
# XREF ****************************************************************	#
19203
#	_imem_read_long() - read longword from memory			#
19204
#	iea_iacc() - _imem_read_long() failed; error recovery		#
19205
#									#
19206
# INPUT ***************************************************************	#
19207
#	None								#
19208
#									#
19209
# OUTPUT **************************************************************	#
19210
#	If _imem_read_long() doesn't fail:				#
19211
#		USER_FPCR(a6)  = new FPCR value				#
19212
#		USER_FPSR(a6)  = new FPSR value				#
19213
#		USER_FPIAR(a6) = new FPIAR value			#
19214
#									#
19215
# ALGORITHM ***********************************************************	#
19216
#	Decode the instruction type by looking at the extension word	#
19217
# in order to see how many control registers to fetch from memory.	#
19218
# Fetch them using _imem_read_long(). If this fetch fails, exit through	#
19219
# the special access error exit handler iea_iacc().			#
19220
#									#
19221
# Instruction word decoding:						#
19222
#									#
19223
#	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
19224
#									#
19225
#		WORD1			WORD2				#
19226
#	1111 0010 00 111100	100$ $$00 0000 0000			#
19227
#									#
19228
#	$$$ (100): FPCR							#
19229
#	    (010): FPSR							#
19230
#	    (001): FPIAR						#
19231
#	    (000): FPIAR						#
19232
#									#
19233
#########################################################################
19234

19235
	global		fmovm_ctrl
19236
fmovm_ctrl:
19237
	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
19238
	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
19239
	beq.w		fctrl_in_7		# yes
19240
	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
19241
	beq.w		fctrl_in_6		# yes
19242
	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
19243
	beq.b		fctrl_in_5		# yes
19244

19245
# fmovem.l #<data>, fpsr/fpiar
19246
fctrl_in_3:
19247
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19248
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19249
	bsr.l		_imem_read_long		# fetch FPSR from mem
19250

19251
	tst.l		%d1			# did ifetch fail?
19252
	bne.l		iea_iacc		# yes
19253

19254
	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
19255
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19256
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19257
	bsr.l		_imem_read_long		# fetch FPIAR from mem
19258

19259
	tst.l		%d1			# did ifetch fail?
19260
	bne.l		iea_iacc		# yes
19261

19262
	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
19263
	rts
19264

19265
# fmovem.l #<data>, fpcr/fpiar
19266
fctrl_in_5:
19267
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19268
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19269
	bsr.l		_imem_read_long		# fetch FPCR from mem
19270

19271
	tst.l		%d1			# did ifetch fail?
19272
	bne.l		iea_iacc		# yes
19273

19274
	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
19275
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19276
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19277
	bsr.l		_imem_read_long		# fetch FPIAR from mem
19278

19279
	tst.l		%d1			# did ifetch fail?
19280
	bne.l		iea_iacc		# yes
19281

19282
	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
19283
	rts
19284

19285
# fmovem.l #<data>, fpcr/fpsr
19286
fctrl_in_6:
19287
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19288
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19289
	bsr.l		_imem_read_long		# fetch FPCR from mem
19290

19291
	tst.l		%d1			# did ifetch fail?
19292
	bne.l		iea_iacc		# yes
19293

19294
	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
19295
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19296
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19297
	bsr.l		_imem_read_long		# fetch FPSR from mem
19298

19299
	tst.l		%d1			# did ifetch fail?
19300
	bne.l		iea_iacc		# yes
19301

19302
	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
19303
	rts
19304

19305
# fmovem.l #<data>, fpcr/fpsr/fpiar
19306
fctrl_in_7:
19307
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19308
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19309
	bsr.l		_imem_read_long		# fetch FPCR from mem
19310

19311
	tst.l		%d1			# did ifetch fail?
19312
	bne.l		iea_iacc		# yes
19313

19314
	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
19315
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19316
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19317
	bsr.l		_imem_read_long		# fetch FPSR from mem
19318

19319
	tst.l		%d1			# did ifetch fail?
19320
	bne.l		iea_iacc		# yes
19321

19322
	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
19323
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
19324
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
19325
	bsr.l		_imem_read_long		# fetch FPIAR from mem
19326

19327
	tst.l		%d1			# did ifetch fail?
19328
	bne.l		iea_iacc		# yes
19329

19330
	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
19331
	rts
19332

19333
#########################################################################
19334
# XDEF ****************************************************************	#
19335
#	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
19336
#									#
19337
# XREF ****************************************************************	#
19338
#	inc_areg() - increment an address register			#
19339
#	dec_areg() - decrement an address register			#
19340
#									#
19341
# INPUT ***************************************************************	#
19342
#	d0 = number of bytes to adjust <ea> by				#
19343
#									#
19344
# OUTPUT **************************************************************	#
19345
#	None								#
19346
#									#
19347
# ALGORITHM ***********************************************************	#
19348
# "Dummy" CALCulate Effective Address:					#
19349
#	The stacked <ea> for FP unimplemented instructions and opclass	#
19350
#	two packed instructions is correct with the exception of...	#
19351
#									#
19352
#	1) -(An)   : The register is not updated regardless of size.	#
19353
#		     Also, for extended precision and packed, the	#
19354
#		     stacked <ea> value is 8 bytes too big		#
19355
#	2) (An)+   : The register is not updated.			#
19356
#	3) #<data> : The upper longword of the immediate operand is	#
19357
#		     stacked b,w,l and s sizes are completely stacked.	#
19358
#		     d,x, and p are not.				#
19359
#									#
19360
#########################################################################
19361

19362
	global		_dcalc_ea
19363
_dcalc_ea:
19364
	mov.l		%d0, %a0		# move # bytes to %a0
19365

19366
	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
19367
	mov.l		%d0, %d1		# make a copy
19368

19369
	andi.w		&0x38, %d0		# extract mode field
19370
	andi.l		&0x7, %d1		# extract reg  field
19371

19372
	cmpi.b		%d0,&0x18		# is mode (An)+ ?
19373
	beq.b		dcea_pi			# yes
19374

19375
	cmpi.b		%d0,&0x20		# is mode -(An) ?
19376
	beq.b		dcea_pd			# yes
19377

19378
	or.w		%d1,%d0			# concat mode,reg
19379
	cmpi.b		%d0,&0x3c		# is mode #<data>?
19380

19381
	beq.b		dcea_imm		# yes
19382

19383
	mov.l		EXC_EA(%a6),%a0		# return <ea>
19384
	rts
19385

19386
# need to set immediate data flag here since we'll need to do
19387
# an imem_read to fetch this later.
19388
dcea_imm:
19389
	mov.b		&immed_flg,SPCOND_FLG(%a6)
19390
	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
19391
	rts
19392

19393
# here, the <ea> is stacked correctly. however, we must update the
19394
# address register...
19395
dcea_pi:
19396
	mov.l		%a0,%d0			# pass amt to inc by
19397
	bsr.l		inc_areg		# inc addr register
19398

19399
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19400
	rts
19401

19402
# the <ea> is stacked correctly for all but extended and packed which
19403
# the <ea>s are 8 bytes too large.
19404
# it would make no sense to have a pre-decrement to a7 in supervisor
19405
# mode so we don't even worry about this tricky case here : )
19406
dcea_pd:
19407
	mov.l		%a0,%d0			# pass amt to dec by
19408
	bsr.l		dec_areg		# dec addr register
19409

19410
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19411

19412
	cmpi.b		%d0,&0xc		# is opsize ext or packed?
19413
	beq.b		dcea_pd2		# yes
19414
	rts
19415
dcea_pd2:
19416
	sub.l		&0x8,%a0		# correct <ea>
19417
	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
19418
	rts
19419

19420
#########################################################################
19421
# XDEF ****************************************************************	#
19422
#	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
19423
#			 and packed data opclass 3 operations.		#
19424
#									#
19425
# XREF ****************************************************************	#
19426
#	None								#
19427
#									#
19428
# INPUT ***************************************************************	#
19429
#	None								#
19430
#									#
19431
# OUTPUT **************************************************************	#
19432
#	a0 = return correct effective address				#
19433
#									#
19434
# ALGORITHM ***********************************************************	#
19435
#	For opclass 3 extended and packed data operations, the <ea>	#
19436
# stacked for the exception is incorrect for -(an) and (an)+ addressing	#
19437
# modes. Also, while we're at it, the index register itself must get	#
19438
# updated.								#
19439
#	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
19440
# and return that value as the correct <ea> and store that value in An.	#
19441
# For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
19442
#									#
19443
#########################################################################
19444

19445
# This calc_ea is currently used to retrieve the correct <ea>
19446
# for fmove outs of type extended and packed.
19447
	global		_calc_ea_fout
19448
_calc_ea_fout:
19449
	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
19450
	mov.l		%d0,%d1			# make a copy
19451

19452
	andi.w		&0x38,%d0		# extract mode field
19453
	andi.l		&0x7,%d1		# extract reg  field
19454

19455
	cmpi.b		%d0,&0x18		# is mode (An)+ ?
19456
	beq.b		ceaf_pi			# yes
19457

19458
	cmpi.b		%d0,&0x20		# is mode -(An) ?
19459
	beq.w		ceaf_pd			# yes
19460

19461
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
19462
	rts
19463

19464
# (An)+ : extended and packed fmove out
19465
#	: stacked <ea> is correct
19466
#	: "An" not updated
19467
ceaf_pi:
19468
	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
19469
	mov.l		EXC_EA(%a6),%a0
19470
	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
19471

19472
	swbeg		&0x8
19473
tbl_ceaf_pi:
19474
	short		ceaf_pi0 - tbl_ceaf_pi
19475
	short		ceaf_pi1 - tbl_ceaf_pi
19476
	short		ceaf_pi2 - tbl_ceaf_pi
19477
	short		ceaf_pi3 - tbl_ceaf_pi
19478
	short		ceaf_pi4 - tbl_ceaf_pi
19479
	short		ceaf_pi5 - tbl_ceaf_pi
19480
	short		ceaf_pi6 - tbl_ceaf_pi
19481
	short		ceaf_pi7 - tbl_ceaf_pi
19482

19483
ceaf_pi0:
19484
	addi.l		&0xc,EXC_DREGS+0x8(%a6)
19485
	rts
19486
ceaf_pi1:
19487
	addi.l		&0xc,EXC_DREGS+0xc(%a6)
19488
	rts
19489
ceaf_pi2:
19490
	add.l		&0xc,%a2
19491
	rts
19492
ceaf_pi3:
19493
	add.l		&0xc,%a3
19494
	rts
19495
ceaf_pi4:
19496
	add.l		&0xc,%a4
19497
	rts
19498
ceaf_pi5:
19499
	add.l		&0xc,%a5
19500
	rts
19501
ceaf_pi6:
19502
	addi.l		&0xc,EXC_A6(%a6)
19503
	rts
19504
ceaf_pi7:
19505
	mov.b		&mia7_flg,SPCOND_FLG(%a6)
19506
	addi.l		&0xc,EXC_A7(%a6)
19507
	rts
19508

19509
# -(An) : extended and packed fmove out
19510
#	: stacked <ea> = actual <ea> + 8
19511
#	: "An" not updated
19512
ceaf_pd:
19513
	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
19514
	mov.l		EXC_EA(%a6),%a0
19515
	sub.l		&0x8,%a0
19516
	sub.l		&0x8,EXC_EA(%a6)
19517
	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
19518

19519
	swbeg		&0x8
19520
tbl_ceaf_pd:
19521
	short		ceaf_pd0 - tbl_ceaf_pd
19522
	short		ceaf_pd1 - tbl_ceaf_pd
19523
	short		ceaf_pd2 - tbl_ceaf_pd
19524
	short		ceaf_pd3 - tbl_ceaf_pd
19525
	short		ceaf_pd4 - tbl_ceaf_pd
19526
	short		ceaf_pd5 - tbl_ceaf_pd
19527
	short		ceaf_pd6 - tbl_ceaf_pd
19528
	short		ceaf_pd7 - tbl_ceaf_pd
19529

19530
ceaf_pd0:
19531
	mov.l		%a0,EXC_DREGS+0x8(%a6)
19532
	rts
19533
ceaf_pd1:
19534
	mov.l		%a0,EXC_DREGS+0xc(%a6)
19535
	rts
19536
ceaf_pd2:
19537
	mov.l		%a0,%a2
19538
	rts
19539
ceaf_pd3:
19540
	mov.l		%a0,%a3
19541
	rts
19542
ceaf_pd4:
19543
	mov.l		%a0,%a4
19544
	rts
19545
ceaf_pd5:
19546
	mov.l		%a0,%a5
19547
	rts
19548
ceaf_pd6:
19549
	mov.l		%a0,EXC_A6(%a6)
19550
	rts
19551
ceaf_pd7:
19552
	mov.l		%a0,EXC_A7(%a6)
19553
	mov.b		&mda7_flg,SPCOND_FLG(%a6)
19554
	rts
19555

19556
#########################################################################
19557
# XDEF ****************************************************************	#
19558
#	_load_fop(): load operand for unimplemented FP exception	#
19559
#									#
19560
# XREF ****************************************************************	#
19561
#	set_tag_x() - determine ext prec optype tag			#
19562
#	set_tag_s() - determine sgl prec optype tag			#
19563
#	set_tag_d() - determine dbl prec optype tag			#
19564
#	unnorm_fix() - convert normalized number to denorm or zero	#
19565
#	norm() - normalize a denormalized number			#
19566
#	get_packed() - fetch a packed operand from memory		#
19567
#	_dcalc_ea() - calculate <ea>, fixing An in process		#
19568
#									#
19569
#	_imem_read_{word,long}() - read from instruction memory		#
19570
#	_dmem_read() - read from data memory				#
19571
#	_dmem_read_{byte,word,long}() - read from data memory		#
19572
#									#
19573
#	facc_in_{b,w,l,d,x}() - mem read failed; special exit point	#
19574
#									#
19575
# INPUT ***************************************************************	#
19576
#	None								#
19577
#									#
19578
# OUTPUT **************************************************************	#
19579
#	If memory access doesn't fail:					#
19580
#		FP_SRC(a6) = source operand in extended precision	#
19581
#		FP_DST(a6) = destination operand in extended precision	#
19582
#									#
19583
# ALGORITHM ***********************************************************	#
19584
#	This is called from the Unimplemented FP exception handler in	#
19585
# order to load the source and maybe destination operand into		#
19586
# FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load	#
19587
# the source and destination from the FP register file. Set the optype	#
19588
# tags for both if dyadic, one for monadic. If a number is an UNNORM,	#
19589
# convert it to a DENORM or a ZERO.					#
19590
#	If the instruction is opclass two (memory->reg), then fetch	#
19591
# the destination from the register file and the source operand from	#
19592
# memory. Tag and fix both as above w/ opclass zero instructions.	#
19593
#	If the source operand is byte,word,long, or single, it may be	#
19594
# in the data register file. If it's actually out in memory, use one of	#
19595
# the mem_read() routines to fetch it. If the mem_read() access returns	#
19596
# a failing value, exit through the special facc_in() routine which	#
19597
# will create an access error exception frame from the current exception #
19598
# frame.								#
19599
#	Immediate data and regular data accesses are separated because	#
19600
# if an immediate data access fails, the resulting fault status		#
19601
# longword stacked for the access error exception must have the		#
19602
# instruction bit set.							#
19603
#									#
19604
#########################################################################
19605

19606
	global		_load_fop
19607
_load_fop:
19608

19609
#  15     13 12 10  9 7  6       0
19610
# /        \ /   \ /  \ /         \
19611
# ---------------------------------
19612
# | opclass | RX  | RY | EXTENSION |  (2nd word of general FP instruction)
19613
# ---------------------------------
19614
#
19615

19616
#	bfextu		EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
19617
#	cmpi.b		%d0, &0x2		# which class is it? ('000,'010,'011)
19618
#	beq.w		op010			# handle <ea> -> fpn
19619
#	bgt.w		op011			# handle fpn -> <ea>
19620

19621
# we're not using op011 for now...
19622
	btst		&0x6,EXC_CMDREG(%a6)
19623
	bne.b		op010
19624

19625
############################
19626
# OPCLASS '000: reg -> reg #
19627
############################
19628
op000:
19629
	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension word lo
19630
	btst		&0x5,%d0		# testing extension bits
19631
	beq.b		op000_src		# (bit 5 == 0) => monadic
19632
	btst		&0x4,%d0		# (bit 5 == 1)
19633
	beq.b		op000_dst		# (bit 4 == 0) => dyadic
19634
	and.w		&0x007f,%d0		# extract extension bits {6:0}
19635
	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
19636
	bne.b		op000_src		# it's an fcmp
19637

19638
op000_dst:
19639
	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19640
	bsr.l		load_fpn2		# fetch dst fpreg into FP_DST
19641

19642
	bsr.l		set_tag_x		# get dst optype tag
19643

19644
	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
19645
	beq.b		op000_dst_unnorm	# yes
19646
op000_dst_cont:
19647
	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
19648

19649
op000_src:
19650
	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
19651
	bsr.l		load_fpn1		# fetch src fpreg into FP_SRC
19652

19653
	bsr.l		set_tag_x		# get src optype tag
19654

19655
	cmpi.b		%d0, &UNNORM		# is src fpreg an UNNORM?
19656
	beq.b		op000_src_unnorm	# yes
19657
op000_src_cont:
19658
	mov.b		%d0, STAG(%a6)		# store the src optype tag
19659
	rts
19660

19661
op000_dst_unnorm:
19662
	bsr.l		unnorm_fix		# fix the dst UNNORM
19663
	bra.b		op000_dst_cont
19664
op000_src_unnorm:
19665
	bsr.l		unnorm_fix		# fix the src UNNORM
19666
	bra.b		op000_src_cont
19667

19668
#############################
19669
# OPCLASS '010: <ea> -> reg #
19670
#############################
19671
op010:
19672
	mov.w		EXC_CMDREG(%a6),%d0	# fetch extension word
19673
	btst		&0x5,%d0		# testing extension bits
19674
	beq.b		op010_src		# (bit 5 == 0) => monadic
19675
	btst		&0x4,%d0		# (bit 5 == 1)
19676
	beq.b		op010_dst		# (bit 4 == 0) => dyadic
19677
	and.w		&0x007f,%d0		# extract extension bits {6:0}
19678
	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
19679
	bne.b		op010_src		# it's an fcmp
19680

19681
op010_dst:
19682
	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19683
	bsr.l		load_fpn2		# fetch dst fpreg ptr
19684

19685
	bsr.l		set_tag_x		# get dst type tag
19686

19687
	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
19688
	beq.b		op010_dst_unnorm	# yes
19689
op010_dst_cont:
19690
	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
19691

19692
op010_src:
19693
	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
19694

19695
	bfextu		EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
19696
	bne.w		fetch_from_mem		# src op is in memory
19697

19698
op010_dreg:
19699
	clr.b		STAG(%a6)		# either NORM or ZERO
19700
	bfextu		EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
19701

19702
	mov.w		(tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
19703
	jmp		(tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
19704

19705
op010_dst_unnorm:
19706
	bsr.l		unnorm_fix		# fix the dst UNNORM
19707
	bra.b		op010_dst_cont
19708

19709
	swbeg		&0x8
19710
tbl_op010_dreg:
19711
	short		opd_long	- tbl_op010_dreg
19712
	short		opd_sgl		- tbl_op010_dreg
19713
	short		tbl_op010_dreg	- tbl_op010_dreg
19714
	short		tbl_op010_dreg	- tbl_op010_dreg
19715
	short		opd_word	- tbl_op010_dreg
19716
	short		tbl_op010_dreg	- tbl_op010_dreg
19717
	short		opd_byte	- tbl_op010_dreg
19718
	short		tbl_op010_dreg	- tbl_op010_dreg
19719

19720
#
19721
# LONG: can be either NORM or ZERO...
19722
#
19723
opd_long:
19724
	bsr.l		fetch_dreg		# fetch long in d0
19725
	fmov.l		%d0, %fp0		# load a long
19726
	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19727
	fbeq.w		opd_long_zero		# long is a ZERO
19728
	rts
19729
opd_long_zero:
19730
	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19731
	rts
19732

19733
#
19734
# WORD: can be either NORM or ZERO...
19735
#
19736
opd_word:
19737
	bsr.l		fetch_dreg		# fetch word in d0
19738
	fmov.w		%d0, %fp0		# load a word
19739
	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19740
	fbeq.w		opd_word_zero		# WORD is a ZERO
19741
	rts
19742
opd_word_zero:
19743
	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19744
	rts
19745

19746
#
19747
# BYTE: can be either NORM or ZERO...
19748
#
19749
opd_byte:
19750
	bsr.l		fetch_dreg		# fetch word in d0
19751
	fmov.b		%d0, %fp0		# load a byte
19752
	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19753
	fbeq.w		opd_byte_zero		# byte is a ZERO
19754
	rts
19755
opd_byte_zero:
19756
	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
19757
	rts
19758

19759
#
19760
# SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
19761
#
19762
# separate SNANs and DENORMs so they can be loaded w/ special care.
19763
# all others can simply be moved "in" using fmove.
19764
#
19765
opd_sgl:
19766
	bsr.l		fetch_dreg		# fetch sgl in d0
19767
	mov.l		%d0,L_SCR1(%a6)
19768

19769
	lea		L_SCR1(%a6), %a0	# pass: ptr to the sgl
19770
	bsr.l		set_tag_s		# determine sgl type
19771
	mov.b		%d0, STAG(%a6)		# save the src tag
19772

19773
	cmpi.b		%d0, &SNAN		# is it an SNAN?
19774
	beq.w		get_sgl_snan		# yes
19775

19776
	cmpi.b		%d0, &DENORM		# is it a DENORM?
19777
	beq.w		get_sgl_denorm		# yes
19778

19779
	fmov.s		(%a0), %fp0		# no, so can load it regular
19780
	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19781
	rts
19782

19783
##############################################################################
19784

19785
#########################################################################
19786
# fetch_from_mem():							#
19787
# - src is out in memory. must:						#
19788
#	(1) calc ea - must read AFTER you know the src type since	#
19789
#		      if the ea is -() or ()+, need to know # of bytes.	#
19790
#	(2) read it in from either user or supervisor space		#
19791
#	(3) if (b || w || l) then simply read in			#
19792
#	    if (s || d || x) then check for SNAN,UNNORM,DENORM		#
19793
#	    if (packed) then punt for now				#
19794
# INPUT:								#
19795
#	%d0 : src type field						#
19796
#########################################################################
19797
fetch_from_mem:
19798
	clr.b		STAG(%a6)		# either NORM or ZERO
19799

19800
	mov.w		(tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
19801
	jmp		(tbl_fp_type.b,%pc,%d0.w*1)
19802

19803
	swbeg		&0x8
19804
tbl_fp_type:
19805
	short		load_long	- tbl_fp_type
19806
	short		load_sgl	- tbl_fp_type
19807
	short		load_ext	- tbl_fp_type
19808
	short		load_packed	- tbl_fp_type
19809
	short		load_word	- tbl_fp_type
19810
	short		load_dbl	- tbl_fp_type
19811
	short		load_byte	- tbl_fp_type
19812
	short		tbl_fp_type	- tbl_fp_type
19813

19814
#########################################
19815
# load a LONG into %fp0:		#
19816
#	-number can't fault		#
19817
#	(1) calc ea			#
19818
#	(2) read 4 bytes into L_SCR1	#
19819
#	(3) fmov.l into %fp0		#
19820
#########################################
19821
load_long:
19822
	movq.l		&0x4, %d0		# pass: 4 (bytes)
19823
	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19824

19825
	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19826
	beq.b		load_long_immed
19827

19828
	bsr.l		_dmem_read_long		# fetch src operand from memory
19829

19830
	tst.l		%d1			# did dfetch fail?
19831
	bne.l		facc_in_l		# yes
19832

19833
load_long_cont:
19834
	fmov.l		%d0, %fp0		# read into %fp0;convert to xprec
19835
	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19836

19837
	fbeq.w		load_long_zero		# src op is a ZERO
19838
	rts
19839
load_long_zero:
19840
	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19841
	rts
19842

19843
load_long_immed:
19844
	bsr.l		_imem_read_long		# fetch src operand immed data
19845

19846
	tst.l		%d1			# did ifetch fail?
19847
	bne.l		funimp_iacc		# yes
19848
	bra.b		load_long_cont
19849

19850
#########################################
19851
# load a WORD into %fp0:		#
19852
#	-number can't fault		#
19853
#	(1) calc ea			#
19854
#	(2) read 2 bytes into L_SCR1	#
19855
#	(3) fmov.w into %fp0		#
19856
#########################################
19857
load_word:
19858
	movq.l		&0x2, %d0		# pass: 2 (bytes)
19859
	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19860

19861
	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19862
	beq.b		load_word_immed
19863

19864
	bsr.l		_dmem_read_word		# fetch src operand from memory
19865

19866
	tst.l		%d1			# did dfetch fail?
19867
	bne.l		facc_in_w		# yes
19868

19869
load_word_cont:
19870
	fmov.w		%d0, %fp0		# read into %fp0;convert to xprec
19871
	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19872

19873
	fbeq.w		load_word_zero		# src op is a ZERO
19874
	rts
19875
load_word_zero:
19876
	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19877
	rts
19878

19879
load_word_immed:
19880
	bsr.l		_imem_read_word		# fetch src operand immed data
19881

19882
	tst.l		%d1			# did ifetch fail?
19883
	bne.l		funimp_iacc		# yes
19884
	bra.b		load_word_cont
19885

19886
#########################################
19887
# load a BYTE into %fp0:		#
19888
#	-number can't fault		#
19889
#	(1) calc ea			#
19890
#	(2) read 1 byte into L_SCR1	#
19891
#	(3) fmov.b into %fp0		#
19892
#########################################
19893
load_byte:
19894
	movq.l		&0x1, %d0		# pass: 1 (byte)
19895
	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19896

19897
	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19898
	beq.b		load_byte_immed
19899

19900
	bsr.l		_dmem_read_byte		# fetch src operand from memory
19901

19902
	tst.l		%d1			# did dfetch fail?
19903
	bne.l		facc_in_b		# yes
19904

19905
load_byte_cont:
19906
	fmov.b		%d0, %fp0		# read into %fp0;convert to xprec
19907
	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19908

19909
	fbeq.w		load_byte_zero		# src op is a ZERO
19910
	rts
19911
load_byte_zero:
19912
	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
19913
	rts
19914

19915
load_byte_immed:
19916
	bsr.l		_imem_read_word		# fetch src operand immed data
19917

19918
	tst.l		%d1			# did ifetch fail?
19919
	bne.l		funimp_iacc		# yes
19920
	bra.b		load_byte_cont
19921

19922
#########################################
19923
# load a SGL into %fp0:			#
19924
#	-number can't fault		#
19925
#	(1) calc ea			#
19926
#	(2) read 4 bytes into L_SCR1	#
19927
#	(3) fmov.s into %fp0		#
19928
#########################################
19929
load_sgl:
19930
	movq.l		&0x4, %d0		# pass: 4 (bytes)
19931
	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
19932

19933
	cmpi.b		SPCOND_FLG(%a6),&immed_flg
19934
	beq.b		load_sgl_immed
19935

19936
	bsr.l		_dmem_read_long		# fetch src operand from memory
19937
	mov.l		%d0, L_SCR1(%a6)	# store src op on stack
19938

19939
	tst.l		%d1			# did dfetch fail?
19940
	bne.l		facc_in_l		# yes
19941

19942
load_sgl_cont:
19943
	lea		L_SCR1(%a6), %a0	# pass: ptr to sgl src op
19944
	bsr.l		set_tag_s		# determine src type tag
19945
	mov.b		%d0, STAG(%a6)		# save src optype tag on stack
19946

19947
	cmpi.b		%d0, &DENORM		# is it a sgl DENORM?
19948
	beq.w		get_sgl_denorm		# yes
19949

19950
	cmpi.b		%d0, &SNAN		# is it a sgl SNAN?
19951
	beq.w		get_sgl_snan		# yes
19952

19953
	fmov.s		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
19954
	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
19955
	rts
19956

19957
load_sgl_immed:
19958
	bsr.l		_imem_read_long		# fetch src operand immed data
19959

19960
	tst.l		%d1			# did ifetch fail?
19961
	bne.l		funimp_iacc		# yes
19962
	bra.b		load_sgl_cont
19963

19964
# must convert sgl denorm format to an Xprec denorm fmt suitable for
19965
# normalization...
19966
# %a0 : points to sgl denorm
19967
get_sgl_denorm:
19968
	clr.w		FP_SRC_EX(%a6)
19969
	bfextu		(%a0){&9:&23}, %d0	# fetch sgl hi(_mantissa)
19970
	lsl.l		&0x8, %d0
19971
	mov.l		%d0, FP_SRC_HI(%a6)	# set ext hi(_mantissa)
19972
	clr.l		FP_SRC_LO(%a6)		# set ext lo(_mantissa)
19973

19974
	clr.w		FP_SRC_EX(%a6)
19975
	btst		&0x7, (%a0)		# is sgn bit set?
19976
	beq.b		sgl_dnrm_norm
19977
	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
19978

19979
sgl_dnrm_norm:
19980
	lea		FP_SRC(%a6), %a0
19981
	bsr.l		norm			# normalize number
19982
	mov.w		&0x3f81, %d1		# xprec exp = 0x3f81
19983
	sub.w		%d0, %d1		# exp = 0x3f81 - shft amt.
19984
	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
19985

19986
	mov.b		&NORM, STAG(%a6)	# fix src type tag
19987
	rts
19988

19989
# convert sgl to ext SNAN
19990
# %a0 : points to sgl SNAN
19991
get_sgl_snan:
19992
	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
19993
	bfextu		(%a0){&9:&23}, %d0
19994
	lsl.l		&0x8, %d0		# extract and insert hi(man)
19995
	mov.l		%d0, FP_SRC_HI(%a6)
19996
	clr.l		FP_SRC_LO(%a6)
19997

19998
	btst		&0x7, (%a0)		# see if sign of SNAN is set
19999
	beq.b		no_sgl_snan_sgn
20000
	bset		&0x7, FP_SRC_EX(%a6)
20001
no_sgl_snan_sgn:
20002
	rts
20003

20004
#########################################
20005
# load a DBL into %fp0:			#
20006
#	-number can't fault		#
20007
#	(1) calc ea			#
20008
#	(2) read 8 bytes into L_SCR(1,2)#
20009
#	(3) fmov.d into %fp0		#
20010
#########################################
20011
load_dbl:
20012
	movq.l		&0x8, %d0		# pass: 8 (bytes)
20013
	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
20014

20015
	cmpi.b		SPCOND_FLG(%a6),&immed_flg
20016
	beq.b		load_dbl_immed
20017

20018
	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
20019
	movq.l		&0x8, %d0		# pass: # bytes to read
20020
	bsr.l		_dmem_read		# fetch src operand from memory
20021

20022
	tst.l		%d1			# did dfetch fail?
20023
	bne.l		facc_in_d		# yes
20024

20025
load_dbl_cont:
20026
	lea		L_SCR1(%a6), %a0	# pass: ptr to input dbl
20027
	bsr.l		set_tag_d		# determine src type tag
20028
	mov.b		%d0, STAG(%a6)		# set src optype tag
20029

20030
	cmpi.b		%d0, &DENORM		# is it a dbl DENORM?
20031
	beq.w		get_dbl_denorm		# yes
20032

20033
	cmpi.b		%d0, &SNAN		# is it a dbl SNAN?
20034
	beq.w		get_dbl_snan		# yes
20035

20036
	fmov.d		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
20037
	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
20038
	rts
20039

20040
load_dbl_immed:
20041
	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
20042
	movq.l		&0x8, %d0		# pass: # bytes to read
20043
	bsr.l		_imem_read		# fetch src operand from memory
20044

20045
	tst.l		%d1			# did ifetch fail?
20046
	bne.l		funimp_iacc		# yes
20047
	bra.b		load_dbl_cont
20048

20049
# must convert dbl denorm format to an Xprec denorm fmt suitable for
20050
# normalization...
20051
# %a0 : loc. of dbl denorm
20052
get_dbl_denorm:
20053
	clr.w		FP_SRC_EX(%a6)
20054
	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
20055
	mov.l		%d0, FP_SRC_HI(%a6)
20056
	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
20057
	mov.l		&0xb, %d1
20058
	lsl.l		%d1, %d0
20059
	mov.l		%d0, FP_SRC_LO(%a6)
20060

20061
	btst		&0x7, (%a0)		# is sgn bit set?
20062
	beq.b		dbl_dnrm_norm
20063
	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
20064

20065
dbl_dnrm_norm:
20066
	lea		FP_SRC(%a6), %a0
20067
	bsr.l		norm			# normalize number
20068
	mov.w		&0x3c01, %d1		# xprec exp = 0x3c01
20069
	sub.w		%d0, %d1		# exp = 0x3c01 - shft amt.
20070
	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
20071

20072
	mov.b		&NORM, STAG(%a6)	# fix src type tag
20073
	rts
20074

20075
# convert dbl to ext SNAN
20076
# %a0 : points to dbl SNAN
20077
get_dbl_snan:
20078
	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20079

20080
	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
20081
	mov.l		%d0, FP_SRC_HI(%a6)
20082
	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
20083
	mov.l		&0xb, %d1
20084
	lsl.l		%d1, %d0
20085
	mov.l		%d0, FP_SRC_LO(%a6)
20086

20087
	btst		&0x7, (%a0)		# see if sign of SNAN is set
20088
	beq.b		no_dbl_snan_sgn
20089
	bset		&0x7, FP_SRC_EX(%a6)
20090
no_dbl_snan_sgn:
20091
	rts
20092

20093
#################################################
20094
# load a Xprec into %fp0:			#
20095
#	-number can't fault			#
20096
#	(1) calc ea				#
20097
#	(2) read 12 bytes into L_SCR(1,2)	#
20098
#	(3) fmov.x into %fp0			#
20099
#################################################
20100
load_ext:
20101
	mov.l		&0xc, %d0		# pass: 12 (bytes)
20102
	bsr.l		_dcalc_ea		# calc <ea>
20103

20104
	lea		FP_SRC(%a6), %a1	# pass: ptr to input ext tmp space
20105
	mov.l		&0xc, %d0		# pass: # of bytes to read
20106
	bsr.l		_dmem_read		# fetch src operand from memory
20107

20108
	tst.l		%d1			# did dfetch fail?
20109
	bne.l		facc_in_x		# yes
20110

20111
	lea		FP_SRC(%a6), %a0	# pass: ptr to src op
20112
	bsr.l		set_tag_x		# determine src type tag
20113

20114
	cmpi.b		%d0, &UNNORM		# is the src op an UNNORM?
20115
	beq.b		load_ext_unnorm		# yes
20116

20117
	mov.b		%d0, STAG(%a6)		# store the src optype tag
20118
	rts
20119

20120
load_ext_unnorm:
20121
	bsr.l		unnorm_fix		# fix the src UNNORM
20122
	mov.b		%d0, STAG(%a6)		# store the src optype tag
20123
	rts
20124

20125
#################################################
20126
# load a packed into %fp0:			#
20127
#	-number can't fault			#
20128
#	(1) calc ea				#
20129
#	(2) read 12 bytes into L_SCR(1,2,3)	#
20130
#	(3) fmov.x into %fp0			#
20131
#################################################
20132
load_packed:
20133
	bsr.l		get_packed
20134

20135
	lea		FP_SRC(%a6),%a0		# pass ptr to src op
20136
	bsr.l		set_tag_x		# determine src type tag
20137
	cmpi.b		%d0,&UNNORM		# is the src op an UNNORM ZERO?
20138
	beq.b		load_packed_unnorm	# yes
20139

20140
	mov.b		%d0,STAG(%a6)		# store the src optype tag
20141
	rts
20142

20143
load_packed_unnorm:
20144
	bsr.l		unnorm_fix		# fix the UNNORM ZERO
20145
	mov.b		%d0,STAG(%a6)		# store the src optype tag
20146
	rts
20147

20148
#########################################################################
20149
# XDEF ****************************************************************	#
20150
#	fout(): move from fp register to memory or data register	#
20151
#									#
20152
# XREF ****************************************************************	#
20153
#	_round() - needed to create EXOP for sgl/dbl precision		#
20154
#	norm() - needed to create EXOP for extended precision		#
20155
#	ovf_res() - create default overflow result for sgl/dbl precision#
20156
#	unf_res() - create default underflow result for sgl/dbl prec.	#
20157
#	dst_dbl() - create rounded dbl precision result.		#
20158
#	dst_sgl() - create rounded sgl precision result.		#
20159
#	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
20160
#	bindec() - convert FP binary number to packed number.		#
20161
#	_mem_write() - write data to memory.				#
20162
#	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
20163
#	_dmem_write_{byte,word,long}() - write data to memory.		#
20164
#	store_dreg_{b,w,l}() - store data to data register file.	#
20165
#	facc_out_{b,w,l,d,x}() - data access error occurred.		#
20166
#									#
20167
# INPUT ***************************************************************	#
20168
#	a0 = pointer to extended precision source operand		#
20169
#	d0 = round prec,mode						#
20170
#									#
20171
# OUTPUT **************************************************************	#
20172
#	fp0 : intermediate underflow or overflow result if		#
20173
#	      OVFL/UNFL occurred for a sgl or dbl operand		#
20174
#									#
20175
# ALGORITHM ***********************************************************	#
20176
#	This routine is accessed by many handlers that need to do an	#
20177
# opclass three move of an operand out to memory.			#
20178
#	Decode an fmove out (opclass 3) instruction to determine if	#
20179
# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
20180
# register or memory. The algorithm uses a standard "fmove" to create	#
20181
# the rounded result. Also, since exceptions are disabled, this also	#
20182
# create the correct OPERR default result if appropriate.		#
20183
#	For sgl or dbl precision, overflow or underflow can occur. If	#
20184
# either occurs and is enabled, the EXOP.				#
20185
#	For extended precision, the stacked <ea> must be fixed along	#
20186
# w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
20187
# the source is a denorm and if underflow is enabled, an EXOP must be	#
20188
# created.								#
20189
#	For packed, the k-factor must be fetched from the instruction	#
20190
# word or a data register. The <ea> must be fixed as w/ extended	#
20191
# precision. Then, bindec() is called to create the appropriate		#
20192
# packed result.							#
20193
#	If at any time an access error is flagged by one of the move-	#
20194
# to-memory routines, then a special exit must be made so that the	#
20195
# access error can be handled properly.					#
20196
#									#
20197
#########################################################################
20198

20199
	global		fout
20200
fout:
20201
	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
20202
	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
20203
	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
20204

20205
	swbeg		&0x8
20206
tbl_fout:
20207
	short		fout_long	-	tbl_fout
20208
	short		fout_sgl	-	tbl_fout
20209
	short		fout_ext	-	tbl_fout
20210
	short		fout_pack	-	tbl_fout
20211
	short		fout_word	-	tbl_fout
20212
	short		fout_dbl	-	tbl_fout
20213
	short		fout_byte	-	tbl_fout
20214
	short		fout_pack	-	tbl_fout
20215

20216
#################################################################
20217
# fmove.b out ###################################################
20218
#################################################################
20219

20220
# Only "Unimplemented Data Type" exceptions enter here. The operand
20221
# is either a DENORM or a NORM.
20222
fout_byte:
20223
	tst.b		STAG(%a6)		# is operand normalized?
20224
	bne.b		fout_byte_denorm	# no
20225

20226
	fmovm.x		SRC(%a0),&0x80		# load value
20227

20228
fout_byte_norm:
20229
	fmov.l		%d0,%fpcr		# insert rnd prec,mode
20230

20231
	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
20232

20233
	fmov.l		&0x0,%fpcr		# clear FPCR
20234
	fmov.l		%fpsr,%d1		# fetch FPSR
20235
	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20236

20237
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20238
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20239
	beq.b		fout_byte_dn		# must save to integer regfile
20240

20241
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20242
	bsr.l		_dmem_write_byte	# write byte
20243

20244
	tst.l		%d1			# did dstore fail?
20245
	bne.l		facc_out_b		# yes
20246

20247
	rts
20248

20249
fout_byte_dn:
20250
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20251
	andi.w		&0x7,%d1
20252
	bsr.l		store_dreg_b
20253
	rts
20254

20255
fout_byte_denorm:
20256
	mov.l		SRC_EX(%a0),%d1
20257
	andi.l		&0x80000000,%d1		# keep DENORM sign
20258
	ori.l		&0x00800000,%d1		# make smallest sgl
20259
	fmov.s		%d1,%fp0
20260
	bra.b		fout_byte_norm
20261

20262
#################################################################
20263
# fmove.w out ###################################################
20264
#################################################################
20265

20266
# Only "Unimplemented Data Type" exceptions enter here. The operand
20267
# is either a DENORM or a NORM.
20268
fout_word:
20269
	tst.b		STAG(%a6)		# is operand normalized?
20270
	bne.b		fout_word_denorm	# no
20271

20272
	fmovm.x		SRC(%a0),&0x80		# load value
20273

20274
fout_word_norm:
20275
	fmov.l		%d0,%fpcr		# insert rnd prec:mode
20276

20277
	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
20278

20279
	fmov.l		&0x0,%fpcr		# clear FPCR
20280
	fmov.l		%fpsr,%d1		# fetch FPSR
20281
	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20282

20283
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20284
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20285
	beq.b		fout_word_dn		# must save to integer regfile
20286

20287
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20288
	bsr.l		_dmem_write_word	# write word
20289

20290
	tst.l		%d1			# did dstore fail?
20291
	bne.l		facc_out_w		# yes
20292

20293
	rts
20294

20295
fout_word_dn:
20296
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20297
	andi.w		&0x7,%d1
20298
	bsr.l		store_dreg_w
20299
	rts
20300

20301
fout_word_denorm:
20302
	mov.l		SRC_EX(%a0),%d1
20303
	andi.l		&0x80000000,%d1		# keep DENORM sign
20304
	ori.l		&0x00800000,%d1		# make smallest sgl
20305
	fmov.s		%d1,%fp0
20306
	bra.b		fout_word_norm
20307

20308
#################################################################
20309
# fmove.l out ###################################################
20310
#################################################################
20311

20312
# Only "Unimplemented Data Type" exceptions enter here. The operand
20313
# is either a DENORM or a NORM.
20314
fout_long:
20315
	tst.b		STAG(%a6)		# is operand normalized?
20316
	bne.b		fout_long_denorm	# no
20317

20318
	fmovm.x		SRC(%a0),&0x80		# load value
20319

20320
fout_long_norm:
20321
	fmov.l		%d0,%fpcr		# insert rnd prec:mode
20322

20323
	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
20324

20325
	fmov.l		&0x0,%fpcr		# clear FPCR
20326
	fmov.l		%fpsr,%d1		# fetch FPSR
20327
	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
20328

20329
fout_long_write:
20330
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20331
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20332
	beq.b		fout_long_dn		# must save to integer regfile
20333

20334
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20335
	bsr.l		_dmem_write_long	# write long
20336

20337
	tst.l		%d1			# did dstore fail?
20338
	bne.l		facc_out_l		# yes
20339

20340
	rts
20341

20342
fout_long_dn:
20343
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20344
	andi.w		&0x7,%d1
20345
	bsr.l		store_dreg_l
20346
	rts
20347

20348
fout_long_denorm:
20349
	mov.l		SRC_EX(%a0),%d1
20350
	andi.l		&0x80000000,%d1		# keep DENORM sign
20351
	ori.l		&0x00800000,%d1		# make smallest sgl
20352
	fmov.s		%d1,%fp0
20353
	bra.b		fout_long_norm
20354

20355
#################################################################
20356
# fmove.x out ###################################################
20357
#################################################################
20358

20359
# Only "Unimplemented Data Type" exceptions enter here. The operand
20360
# is either a DENORM or a NORM.
20361
# The DENORM causes an Underflow exception.
20362
fout_ext:
20363

20364
# we copy the extended precision result to FP_SCR0 so that the reserved
20365
# 16-bit field gets zeroed. we do this since we promise not to disturb
20366
# what's at SRC(a0).
20367
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20368
	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
20369
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20370
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20371

20372
	fmovm.x		SRC(%a0),&0x80		# return result
20373

20374
	bsr.l		_calc_ea_fout		# fix stacked <ea>
20375

20376
	mov.l		%a0,%a1			# pass: dst addr
20377
	lea		FP_SCR0(%a6),%a0	# pass: src addr
20378
	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
20379

20380
# we must not yet write the extended precision data to the stack
20381
# in the pre-decrement case from supervisor mode or else we'll corrupt
20382
# the stack frame. so, leave it in FP_SRC for now and deal with it later...
20383
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
20384
	beq.b		fout_ext_a7
20385

20386
	bsr.l		_dmem_write		# write ext prec number to memory
20387

20388
	tst.l		%d1			# did dstore fail?
20389
	bne.w		fout_ext_err		# yes
20390

20391
	tst.b		STAG(%a6)		# is operand normalized?
20392
	bne.b		fout_ext_denorm		# no
20393
	rts
20394

20395
# the number is a DENORM. must set the underflow exception bit
20396
fout_ext_denorm:
20397
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
20398

20399
	mov.b		FPCR_ENABLE(%a6),%d0
20400
	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
20401
	bne.b		fout_ext_exc		# yes
20402
	rts
20403

20404
# we don't want to do the write if the exception occurred in supervisor mode
20405
# so _mem_write2() handles this for us.
20406
fout_ext_a7:
20407
	bsr.l		_mem_write2		# write ext prec number to memory
20408

20409
	tst.l		%d1			# did dstore fail?
20410
	bne.w		fout_ext_err		# yes
20411

20412
	tst.b		STAG(%a6)		# is operand normalized?
20413
	bne.b		fout_ext_denorm		# no
20414
	rts
20415

20416
fout_ext_exc:
20417
	lea		FP_SCR0(%a6),%a0
20418
	bsr.l		norm			# normalize the mantissa
20419
	neg.w		%d0			# new exp = -(shft amt)
20420
	andi.w		&0x7fff,%d0
20421
	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
20422
	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
20423
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
20424
	rts
20425

20426
fout_ext_err:
20427
	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
20428
	bra.l		facc_out_x
20429

20430
#########################################################################
20431
# fmove.s out ###########################################################
20432
#########################################################################
20433
fout_sgl:
20434
	andi.b		&0x30,%d0		# clear rnd prec
20435
	ori.b		&s_mode*0x10,%d0	# insert sgl prec
20436
	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
20437

20438
#
20439
# operand is a normalized number. first, we check to see if the move out
20440
# would cause either an underflow or overflow. these cases are handled
20441
# separately. otherwise, set the FPCR to the proper rounding mode and
20442
# execute the move.
20443
#
20444
	mov.w		SRC_EX(%a0),%d0		# extract exponent
20445
	andi.w		&0x7fff,%d0		# strip sign
20446

20447
	cmpi.w		%d0,&SGL_HI		# will operand overflow?
20448
	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
20449
	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
20450
	cmpi.w		%d0,&SGL_LO		# will operand underflow?
20451
	blt.w		fout_sgl_unfl		# yes; go handle underflow
20452

20453
#
20454
# NORMs(in range) can be stored out by a simple "fmov.s"
20455
# Unnormalized inputs can come through this point.
20456
#
20457
fout_sgl_exg:
20458
	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
20459

20460
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20461
	fmov.l		&0x0,%fpsr		# clear FPSR
20462

20463
	fmov.s		%fp0,%d0		# store does convert and round
20464

20465
	fmov.l		&0x0,%fpcr		# clear FPCR
20466
	fmov.l		%fpsr,%d1		# save FPSR
20467

20468
	or.w		%d1,2+USER_FPSR(%a6)	# set possible inex2/ainex
20469

20470
fout_sgl_exg_write:
20471
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20472
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20473
	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
20474

20475
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20476
	bsr.l		_dmem_write_long	# write long
20477

20478
	tst.l		%d1			# did dstore fail?
20479
	bne.l		facc_out_l		# yes
20480

20481
	rts
20482

20483
fout_sgl_exg_write_dn:
20484
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20485
	andi.w		&0x7,%d1
20486
	bsr.l		store_dreg_l
20487
	rts
20488

20489
#
20490
# here, we know that the operand would UNFL if moved out to single prec,
20491
# so, denorm and round and then use generic store single routine to
20492
# write the value to memory.
20493
#
20494
fout_sgl_unfl:
20495
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20496

20497
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20498
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20499
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20500
	mov.l		%a0,-(%sp)
20501

20502
	clr.l		%d0			# pass: S.F. = 0
20503

20504
	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
20505
	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
20506

20507
	lea		FP_SCR0(%a6),%a0
20508
	bsr.l		norm			# normalize the DENORM
20509

20510
fout_sgl_unfl_cont:
20511
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
20512
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
20513
	bsr.l		unf_res			# calc default underflow result
20514

20515
	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
20516
	bsr.l		dst_sgl			# convert to single prec
20517

20518
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20519
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20520
	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
20521

20522
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20523
	bsr.l		_dmem_write_long	# write long
20524

20525
	tst.l		%d1			# did dstore fail?
20526
	bne.l		facc_out_l		# yes
20527

20528
	bra.b		fout_sgl_unfl_chkexc
20529

20530
fout_sgl_unfl_dn:
20531
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20532
	andi.w		&0x7,%d1
20533
	bsr.l		store_dreg_l
20534

20535
fout_sgl_unfl_chkexc:
20536
	mov.b		FPCR_ENABLE(%a6),%d1
20537
	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20538
	bne.w		fout_sd_exc_unfl	# yes
20539
	addq.l		&0x4,%sp
20540
	rts
20541

20542
#
20543
# it's definitely an overflow so call ovf_res to get the correct answer
20544
#
20545
fout_sgl_ovfl:
20546
	tst.b		3+SRC_HI(%a0)		# is result inexact?
20547
	bne.b		fout_sgl_ovfl_inex2
20548
	tst.l		SRC_LO(%a0)		# is result inexact?
20549
	bne.b		fout_sgl_ovfl_inex2
20550
	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20551
	bra.b		fout_sgl_ovfl_cont
20552
fout_sgl_ovfl_inex2:
20553
	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20554

20555
fout_sgl_ovfl_cont:
20556
	mov.l		%a0,-(%sp)
20557

20558
# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
20559
# overflow result. DON'T save the returned ccodes from ovf_res() since
20560
# fmove out doesn't alter them.
20561
	tst.b		SRC_EX(%a0)		# is operand negative?
20562
	smi		%d1			# set if so
20563
	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
20564
	bsr.l		ovf_res			# calc OVFL result
20565
	fmovm.x		(%a0),&0x80		# load default overflow result
20566
	fmov.s		%fp0,%d0		# store to single
20567

20568
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
20569
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
20570
	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
20571

20572
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
20573
	bsr.l		_dmem_write_long	# write long
20574

20575
	tst.l		%d1			# did dstore fail?
20576
	bne.l		facc_out_l		# yes
20577

20578
	bra.b		fout_sgl_ovfl_chkexc
20579

20580
fout_sgl_ovfl_dn:
20581
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
20582
	andi.w		&0x7,%d1
20583
	bsr.l		store_dreg_l
20584

20585
fout_sgl_ovfl_chkexc:
20586
	mov.b		FPCR_ENABLE(%a6),%d1
20587
	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20588
	bne.w		fout_sd_exc_ovfl	# yes
20589
	addq.l		&0x4,%sp
20590
	rts
20591

20592
#
20593
# move out MAY overflow:
20594
# (1) force the exp to 0x3fff
20595
# (2) do a move w/ appropriate rnd mode
20596
# (3) if exp still equals zero, then insert original exponent
20597
#	for the correct result.
20598
#     if exp now equals one, then it overflowed so call ovf_res.
20599
#
20600
fout_sgl_may_ovfl:
20601
	mov.w		SRC_EX(%a0),%d1		# fetch current sign
20602
	andi.w		&0x8000,%d1		# keep it,clear exp
20603
	ori.w		&0x3fff,%d1		# insert exp = 0
20604
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
20605
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20606
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20607

20608
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20609

20610
	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
20611
	fmov.l		&0x0,%fpcr		# clear FPCR
20612

20613
	fabs.x		%fp0			# need absolute value
20614
	fcmp.b		%fp0,&0x2		# did exponent increase?
20615
	fblt.w		fout_sgl_exg		# no; go finish NORM
20616
	bra.w		fout_sgl_ovfl		# yes; go handle overflow
20617

20618
################
20619

20620
fout_sd_exc_unfl:
20621
	mov.l		(%sp)+,%a0
20622

20623
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20624
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20625
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20626

20627
	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
20628
	bne.b		fout_sd_exc_cont	# no
20629

20630
	lea		FP_SCR0(%a6),%a0
20631
	bsr.l		norm
20632
	neg.l		%d0
20633
	andi.w		&0x7fff,%d0
20634
	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
20635
	bra.b		fout_sd_exc_cont
20636

20637
fout_sd_exc:
20638
fout_sd_exc_ovfl:
20639
	mov.l		(%sp)+,%a0		# restore a0
20640

20641
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20642
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20643
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20644

20645
fout_sd_exc_cont:
20646
	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
20647
	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
20648
	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
20649

20650
	mov.b		3+L_SCR3(%a6),%d1
20651
	lsr.b		&0x4,%d1
20652
	andi.w		&0x0c,%d1
20653
	swap		%d1
20654
	mov.b		3+L_SCR3(%a6),%d1
20655
	lsr.b		&0x4,%d1
20656
	andi.w		&0x03,%d1
20657
	clr.l		%d0			# pass: zero g,r,s
20658
	bsr.l		_round			# round the DENORM
20659

20660
	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
20661
	beq.b		fout_sd_exc_done	# no
20662
	bset		&0x7,FP_SCR0_EX(%a6)	# yes
20663

20664
fout_sd_exc_done:
20665
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
20666
	rts
20667

20668
#################################################################
20669
# fmove.d out ###################################################
20670
#################################################################
20671
fout_dbl:
20672
	andi.b		&0x30,%d0		# clear rnd prec
20673
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
20674
	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
20675

20676
#
20677
# operand is a normalized number. first, we check to see if the move out
20678
# would cause either an underflow or overflow. these cases are handled
20679
# separately. otherwise, set the FPCR to the proper rounding mode and
20680
# execute the move.
20681
#
20682
	mov.w		SRC_EX(%a0),%d0		# extract exponent
20683
	andi.w		&0x7fff,%d0		# strip sign
20684

20685
	cmpi.w		%d0,&DBL_HI		# will operand overflow?
20686
	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
20687
	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
20688
	cmpi.w		%d0,&DBL_LO		# will operand underflow?
20689
	blt.w		fout_dbl_unfl		# yes; go handle underflow
20690

20691
#
20692
# NORMs(in range) can be stored out by a simple "fmov.d"
20693
# Unnormalized inputs can come through this point.
20694
#
20695
fout_dbl_exg:
20696
	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
20697

20698
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20699
	fmov.l		&0x0,%fpsr		# clear FPSR
20700

20701
	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
20702

20703
	fmov.l		&0x0,%fpcr		# clear FPCR
20704
	fmov.l		%fpsr,%d0		# save FPSR
20705

20706
	or.w		%d0,2+USER_FPSR(%a6)	# set possible inex2/ainex
20707

20708
	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20709
	lea		L_SCR1(%a6),%a0		# pass: src addr
20710
	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20711
	bsr.l		_dmem_write		# store dbl fop to memory
20712

20713
	tst.l		%d1			# did dstore fail?
20714
	bne.l		facc_out_d		# yes
20715

20716
	rts					# no; so we're finished
20717

20718
#
20719
# here, we know that the operand would UNFL if moved out to double prec,
20720
# so, denorm and round and then use generic store double routine to
20721
# write the value to memory.
20722
#
20723
fout_dbl_unfl:
20724
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20725

20726
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
20727
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
20728
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
20729
	mov.l		%a0,-(%sp)
20730

20731
	clr.l		%d0			# pass: S.F. = 0
20732

20733
	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
20734
	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
20735

20736
	lea		FP_SCR0(%a6),%a0
20737
	bsr.l		norm			# normalize the DENORM
20738

20739
fout_dbl_unfl_cont:
20740
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
20741
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
20742
	bsr.l		unf_res			# calc default underflow result
20743

20744
	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
20745
	bsr.l		dst_dbl			# convert to single prec
20746
	mov.l		%d0,L_SCR1(%a6)
20747
	mov.l		%d1,L_SCR2(%a6)
20748

20749
	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20750
	lea		L_SCR1(%a6),%a0		# pass: src addr
20751
	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20752
	bsr.l		_dmem_write		# store dbl fop to memory
20753

20754
	tst.l		%d1			# did dstore fail?
20755
	bne.l		facc_out_d		# yes
20756

20757
	mov.b		FPCR_ENABLE(%a6),%d1
20758
	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20759
	bne.w		fout_sd_exc_unfl	# yes
20760
	addq.l		&0x4,%sp
20761
	rts
20762

20763
#
20764
# it's definitely an overflow so call ovf_res to get the correct answer
20765
#
20766
fout_dbl_ovfl:
20767
	mov.w		2+SRC_LO(%a0),%d0
20768
	andi.w		&0x7ff,%d0
20769
	bne.b		fout_dbl_ovfl_inex2
20770

20771
	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20772
	bra.b		fout_dbl_ovfl_cont
20773
fout_dbl_ovfl_inex2:
20774
	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20775

20776
fout_dbl_ovfl_cont:
20777
	mov.l		%a0,-(%sp)
20778

20779
# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
20780
# overflow result. DON'T save the returned ccodes from ovf_res() since
20781
# fmove out doesn't alter them.
20782
	tst.b		SRC_EX(%a0)		# is operand negative?
20783
	smi		%d1			# set if so
20784
	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
20785
	bsr.l		ovf_res			# calc OVFL result
20786
	fmovm.x		(%a0),&0x80		# load default overflow result
20787
	fmov.d		%fp0,L_SCR1(%a6)	# store to double
20788

20789
	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
20790
	lea		L_SCR1(%a6),%a0		# pass: src addr
20791
	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
20792
	bsr.l		_dmem_write		# store dbl fop to memory
20793

20794
	tst.l		%d1			# did dstore fail?
20795
	bne.l		facc_out_d		# yes
20796

20797
	mov.b		FPCR_ENABLE(%a6),%d1
20798
	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
20799
	bne.w		fout_sd_exc_ovfl	# yes
20800
	addq.l		&0x4,%sp
20801
	rts
20802

20803
#
20804
# move out MAY overflow:
20805
# (1) force the exp to 0x3fff
20806
# (2) do a move w/ appropriate rnd mode
20807
# (3) if exp still equals zero, then insert original exponent
20808
#	for the correct result.
20809
#     if exp now equals one, then it overflowed so call ovf_res.
20810
#
20811
fout_dbl_may_ovfl:
20812
	mov.w		SRC_EX(%a0),%d1		# fetch current sign
20813
	andi.w		&0x8000,%d1		# keep it,clear exp
20814
	ori.w		&0x3fff,%d1		# insert exp = 0
20815
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
20816
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20817
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20818

20819
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
20820

20821
	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
20822
	fmov.l		&0x0,%fpcr		# clear FPCR
20823

20824
	fabs.x		%fp0			# need absolute value
20825
	fcmp.b		%fp0,&0x2		# did exponent increase?
20826
	fblt.w		fout_dbl_exg		# no; go finish NORM
20827
	bra.w		fout_dbl_ovfl		# yes; go handle overflow
20828

20829
#########################################################################
20830
# XDEF ****************************************************************	#
20831
#	dst_dbl(): create double precision value from extended prec.	#
20832
#									#
20833
# XREF ****************************************************************	#
20834
#	None								#
20835
#									#
20836
# INPUT ***************************************************************	#
20837
#	a0 = pointer to source operand in extended precision		#
20838
#									#
20839
# OUTPUT **************************************************************	#
20840
#	d0 = hi(double precision result)				#
20841
#	d1 = lo(double precision result)				#
20842
#									#
20843
# ALGORITHM ***********************************************************	#
20844
#									#
20845
#  Changes extended precision to double precision.			#
20846
#  Note: no attempt is made to round the extended value to double.	#
20847
#	dbl_sign = ext_sign						#
20848
#	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
20849
#	get rid of ext integer bit					#
20850
#	dbl_mant = ext_mant{62:12}					#
20851
#									#
20852
#		---------------   ---------------    ---------------	#
20853
#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
20854
#		---------------   ---------------    ---------------	#
20855
#		 95	    64    63 62	      32      31     11	  0	#
20856
#				     |			     |		#
20857
#				     |			     |		#
20858
#				     |			     |		#
20859
#			             v			     v		#
20860
#			      ---------------   ---------------		#
20861
#  double   ->		      |s|exp| mant  |   |  mant       |		#
20862
#			      ---------------   ---------------		#
20863
#			      63     51   32   31	       0	#
20864
#									#
20865
#########################################################################
20866

20867
dst_dbl:
20868
	clr.l		%d0			# clear d0
20869
	mov.w		FTEMP_EX(%a0),%d0	# get exponent
20870
	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
20871
	addi.w		&DBL_BIAS,%d0		# add double precision bias
20872
	tst.b		FTEMP_HI(%a0)		# is number a denorm?
20873
	bmi.b		dst_get_dupper		# no
20874
	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
20875
dst_get_dupper:
20876
	swap		%d0			# d0 now in upper word
20877
	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
20878
	tst.b		FTEMP_EX(%a0)		# test sign
20879
	bpl.b		dst_get_dman		# if positive, go process mantissa
20880
	bset		&0x1f,%d0		# if negative, set sign
20881
dst_get_dman:
20882
	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20883
	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
20884
	or.l		%d1,%d0			# put these bits in ms word of double
20885
	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
20886
	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20887
	mov.l		&21,%d0			# load shift count
20888
	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
20889
	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
20890
	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
20891
	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
20892
	mov.l		L_SCR2(%a6),%d1
20893
	or.l		%d0,%d1			# put them in double result
20894
	mov.l		L_SCR1(%a6),%d0
20895
	rts
20896

20897
#########################################################################
20898
# XDEF ****************************************************************	#
20899
#	dst_sgl(): create single precision value from extended prec	#
20900
#									#
20901
# XREF ****************************************************************	#
20902
#									#
20903
# INPUT ***************************************************************	#
20904
#	a0 = pointer to source operand in extended precision		#
20905
#									#
20906
# OUTPUT **************************************************************	#
20907
#	d0 = single precision result					#
20908
#									#
20909
# ALGORITHM ***********************************************************	#
20910
#									#
20911
# Changes extended precision to single precision.			#
20912
#	sgl_sign = ext_sign						#
20913
#	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
20914
#	get rid of ext integer bit					#
20915
#	sgl_mant = ext_mant{62:12}					#
20916
#									#
20917
#		---------------   ---------------    ---------------	#
20918
#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
20919
#		---------------   ---------------    ---------------	#
20920
#		 95	    64    63 62	   40 32      31     12	  0	#
20921
#				     |	   |				#
20922
#				     |	   |				#
20923
#				     |	   |				#
20924
#			             v     v				#
20925
#			      ---------------				#
20926
#  single   ->		      |s|exp| mant  |				#
20927
#			      ---------------				#
20928
#			      31     22     0				#
20929
#									#
20930
#########################################################################
20931

20932
dst_sgl:
20933
	clr.l		%d0
20934
	mov.w		FTEMP_EX(%a0),%d0	# get exponent
20935
	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
20936
	addi.w		&SGL_BIAS,%d0		# add single precision bias
20937
	tst.b		FTEMP_HI(%a0)		# is number a denorm?
20938
	bmi.b		dst_get_supper		# no
20939
	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
20940
dst_get_supper:
20941
	swap		%d0			# put exp in upper word of d0
20942
	lsl.l		&0x7,%d0		# shift it into single exp bits
20943
	tst.b		FTEMP_EX(%a0)		# test sign
20944
	bpl.b		dst_get_sman		# if positive, continue
20945
	bset		&0x1f,%d0		# if negative, put in sign first
20946
dst_get_sman:
20947
	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
20948
	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
20949
	lsr.l		&0x8,%d1		# and put them flush right
20950
	or.l		%d1,%d0			# put these bits in ms word of single
20951
	rts
20952

20953
##############################################################################
20954
fout_pack:
20955
	bsr.l		_calc_ea_fout		# fetch the <ea>
20956
	mov.l		%a0,-(%sp)
20957

20958
	mov.b		STAG(%a6),%d0		# fetch input type
20959
	bne.w		fout_pack_not_norm	# input is not NORM
20960

20961
fout_pack_norm:
20962
	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
20963
	beq.b		fout_pack_s		# static
20964

20965
fout_pack_d:
20966
	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
20967
	lsr.b		&0x4,%d1
20968
	andi.w		&0x7,%d1
20969

20970
	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
20971

20972
	bra.b		fout_pack_type
20973
fout_pack_s:
20974
	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
20975

20976
fout_pack_type:
20977
	bfexts		%d0{&25:&7},%d0		# extract k-factor
20978
	mov.l	%d0,-(%sp)
20979

20980
	lea		FP_SRC(%a6),%a0		# pass: ptr to input
20981

20982
# bindec is currently scrambling FP_SRC for denorm inputs.
20983
# we'll have to change this, but for now, tough luck!!!
20984
	bsr.l		bindec			# convert xprec to packed
20985

20986
#	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
20987
	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
20988

20989
	mov.l	(%sp)+,%d0
20990

20991
	tst.b		3+FP_SCR0_EX(%a6)
20992
	bne.b		fout_pack_set
20993
	tst.l		FP_SCR0_HI(%a6)
20994
	bne.b		fout_pack_set
20995
	tst.l		FP_SCR0_LO(%a6)
20996
	bne.b		fout_pack_set
20997

20998
# add the extra condition that only if the k-factor was zero, too, should
20999
# we zero the exponent
21000
	tst.l		%d0
21001
	bne.b		fout_pack_set
21002
# "mantissa" is all zero which means that the answer is zero. but, the '040
21003
# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
21004
# if the mantissa is zero, I will zero the exponent, too.
21005
# the question now is whether the exponents sign bit is allowed to be non-zero
21006
# for a zero, also...
21007
	andi.w		&0xf000,FP_SCR0(%a6)
21008

21009
fout_pack_set:
21010

21011
	lea		FP_SCR0(%a6),%a0	# pass: src addr
21012

21013
fout_pack_write:
21014
	mov.l		(%sp)+,%a1		# pass: dst addr
21015
	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
21016

21017
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
21018
	beq.b		fout_pack_a7
21019

21020
	bsr.l		_dmem_write		# write ext prec number to memory
21021

21022
	tst.l		%d1			# did dstore fail?
21023
	bne.w		fout_ext_err		# yes
21024

21025
	rts
21026

21027
# we don't want to do the write if the exception occurred in supervisor mode
21028
# so _mem_write2() handles this for us.
21029
fout_pack_a7:
21030
	bsr.l		_mem_write2		# write ext prec number to memory
21031

21032
	tst.l		%d1			# did dstore fail?
21033
	bne.w		fout_ext_err		# yes
21034

21035
	rts
21036

21037
fout_pack_not_norm:
21038
	cmpi.b		%d0,&DENORM		# is it a DENORM?
21039
	beq.w		fout_pack_norm		# yes
21040
	lea		FP_SRC(%a6),%a0
21041
	clr.w		2+FP_SRC_EX(%a6)
21042
	cmpi.b		%d0,&SNAN		# is it an SNAN?
21043
	beq.b		fout_pack_snan		# yes
21044
	bra.b		fout_pack_write		# no
21045

21046
fout_pack_snan:
21047
	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
21048
	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
21049
	bra.b		fout_pack_write
21050

21051
#########################################################################
21052
# XDEF ****************************************************************	#
21053
#	fetch_dreg(): fetch register according to index in d1		#
21054
#									#
21055
# XREF ****************************************************************	#
21056
#	None								#
21057
#									#
21058
# INPUT ***************************************************************	#
21059
#	d1 = index of register to fetch from				#
21060
#									#
21061
# OUTPUT **************************************************************	#
21062
#	d0 = value of register fetched					#
21063
#									#
21064
# ALGORITHM ***********************************************************	#
21065
#	According to the index value in d1 which can range from zero	#
21066
# to fifteen, load the corresponding register file value (where		#
21067
# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
21068
# stack. The rest should still be in their original places.		#
21069
#									#
21070
#########################################################################
21071

21072
# this routine leaves d1 intact for subsequent store_dreg calls.
21073
	global		fetch_dreg
21074
fetch_dreg:
21075
	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
21076
	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
21077

21078
tbl_fdreg:
21079
	short		fdreg0 - tbl_fdreg
21080
	short		fdreg1 - tbl_fdreg
21081
	short		fdreg2 - tbl_fdreg
21082
	short		fdreg3 - tbl_fdreg
21083
	short		fdreg4 - tbl_fdreg
21084
	short		fdreg5 - tbl_fdreg
21085
	short		fdreg6 - tbl_fdreg
21086
	short		fdreg7 - tbl_fdreg
21087
	short		fdreg8 - tbl_fdreg
21088
	short		fdreg9 - tbl_fdreg
21089
	short		fdrega - tbl_fdreg
21090
	short		fdregb - tbl_fdreg
21091
	short		fdregc - tbl_fdreg
21092
	short		fdregd - tbl_fdreg
21093
	short		fdrege - tbl_fdreg
21094
	short		fdregf - tbl_fdreg
21095

21096
fdreg0:
21097
	mov.l		EXC_DREGS+0x0(%a6),%d0
21098
	rts
21099
fdreg1:
21100
	mov.l		EXC_DREGS+0x4(%a6),%d0
21101
	rts
21102
fdreg2:
21103
	mov.l		%d2,%d0
21104
	rts
21105
fdreg3:
21106
	mov.l		%d3,%d0
21107
	rts
21108
fdreg4:
21109
	mov.l		%d4,%d0
21110
	rts
21111
fdreg5:
21112
	mov.l		%d5,%d0
21113
	rts
21114
fdreg6:
21115
	mov.l		%d6,%d0
21116
	rts
21117
fdreg7:
21118
	mov.l		%d7,%d0
21119
	rts
21120
fdreg8:
21121
	mov.l		EXC_DREGS+0x8(%a6),%d0
21122
	rts
21123
fdreg9:
21124
	mov.l		EXC_DREGS+0xc(%a6),%d0
21125
	rts
21126
fdrega:
21127
	mov.l		%a2,%d0
21128
	rts
21129
fdregb:
21130
	mov.l		%a3,%d0
21131
	rts
21132
fdregc:
21133
	mov.l		%a4,%d0
21134
	rts
21135
fdregd:
21136
	mov.l		%a5,%d0
21137
	rts
21138
fdrege:
21139
	mov.l		(%a6),%d0
21140
	rts
21141
fdregf:
21142
	mov.l		EXC_A7(%a6),%d0
21143
	rts
21144

21145
#########################################################################
21146
# XDEF ****************************************************************	#
21147
#	store_dreg_l(): store longword to data register specified by d1	#
21148
#									#
21149
# XREF ****************************************************************	#
21150
#	None								#
21151
#									#
21152
# INPUT ***************************************************************	#
21153
#	d0 = longowrd value to store					#
21154
#	d1 = index of register to fetch from				#
21155
#									#
21156
# OUTPUT **************************************************************	#
21157
#	(data register is updated)					#
21158
#									#
21159
# ALGORITHM ***********************************************************	#
21160
#	According to the index value in d1, store the longword value	#
21161
# in d0 to the corresponding data register. D0/D1 are on the stack	#
21162
# while the rest are in their initial places.				#
21163
#									#
21164
#########################################################################
21165

21166
	global		store_dreg_l
21167
store_dreg_l:
21168
	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
21169
	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
21170

21171
tbl_sdregl:
21172
	short		sdregl0 - tbl_sdregl
21173
	short		sdregl1 - tbl_sdregl
21174
	short		sdregl2 - tbl_sdregl
21175
	short		sdregl3 - tbl_sdregl
21176
	short		sdregl4 - tbl_sdregl
21177
	short		sdregl5 - tbl_sdregl
21178
	short		sdregl6 - tbl_sdregl
21179
	short		sdregl7 - tbl_sdregl
21180

21181
sdregl0:
21182
	mov.l		%d0,EXC_DREGS+0x0(%a6)
21183
	rts
21184
sdregl1:
21185
	mov.l		%d0,EXC_DREGS+0x4(%a6)
21186
	rts
21187
sdregl2:
21188
	mov.l		%d0,%d2
21189
	rts
21190
sdregl3:
21191
	mov.l		%d0,%d3
21192
	rts
21193
sdregl4:
21194
	mov.l		%d0,%d4
21195
	rts
21196
sdregl5:
21197
	mov.l		%d0,%d5
21198
	rts
21199
sdregl6:
21200
	mov.l		%d0,%d6
21201
	rts
21202
sdregl7:
21203
	mov.l		%d0,%d7
21204
	rts
21205

21206
#########################################################################
21207
# XDEF ****************************************************************	#
21208
#	store_dreg_w(): store word to data register specified by d1	#
21209
#									#
21210
# XREF ****************************************************************	#
21211
#	None								#
21212
#									#
21213
# INPUT ***************************************************************	#
21214
#	d0 = word value to store					#
21215
#	d1 = index of register to fetch from				#
21216
#									#
21217
# OUTPUT **************************************************************	#
21218
#	(data register is updated)					#
21219
#									#
21220
# ALGORITHM ***********************************************************	#
21221
#	According to the index value in d1, store the word value	#
21222
# in d0 to the corresponding data register. D0/D1 are on the stack	#
21223
# while the rest are in their initial places.				#
21224
#									#
21225
#########################################################################
21226

21227
	global		store_dreg_w
21228
store_dreg_w:
21229
	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
21230
	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
21231

21232
tbl_sdregw:
21233
	short		sdregw0 - tbl_sdregw
21234
	short		sdregw1 - tbl_sdregw
21235
	short		sdregw2 - tbl_sdregw
21236
	short		sdregw3 - tbl_sdregw
21237
	short		sdregw4 - tbl_sdregw
21238
	short		sdregw5 - tbl_sdregw
21239
	short		sdregw6 - tbl_sdregw
21240
	short		sdregw7 - tbl_sdregw
21241

21242
sdregw0:
21243
	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
21244
	rts
21245
sdregw1:
21246
	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
21247
	rts
21248
sdregw2:
21249
	mov.w		%d0,%d2
21250
	rts
21251
sdregw3:
21252
	mov.w		%d0,%d3
21253
	rts
21254
sdregw4:
21255
	mov.w		%d0,%d4
21256
	rts
21257
sdregw5:
21258
	mov.w		%d0,%d5
21259
	rts
21260
sdregw6:
21261
	mov.w		%d0,%d6
21262
	rts
21263
sdregw7:
21264
	mov.w		%d0,%d7
21265
	rts
21266

21267
#########################################################################
21268
# XDEF ****************************************************************	#
21269
#	store_dreg_b(): store byte to data register specified by d1	#
21270
#									#
21271
# XREF ****************************************************************	#
21272
#	None								#
21273
#									#
21274
# INPUT ***************************************************************	#
21275
#	d0 = byte value to store					#
21276
#	d1 = index of register to fetch from				#
21277
#									#
21278
# OUTPUT **************************************************************	#
21279
#	(data register is updated)					#
21280
#									#
21281
# ALGORITHM ***********************************************************	#
21282
#	According to the index value in d1, store the byte value	#
21283
# in d0 to the corresponding data register. D0/D1 are on the stack	#
21284
# while the rest are in their initial places.				#
21285
#									#
21286
#########################################################################
21287

21288
	global		store_dreg_b
21289
store_dreg_b:
21290
	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
21291
	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
21292

21293
tbl_sdregb:
21294
	short		sdregb0 - tbl_sdregb
21295
	short		sdregb1 - tbl_sdregb
21296
	short		sdregb2 - tbl_sdregb
21297
	short		sdregb3 - tbl_sdregb
21298
	short		sdregb4 - tbl_sdregb
21299
	short		sdregb5 - tbl_sdregb
21300
	short		sdregb6 - tbl_sdregb
21301
	short		sdregb7 - tbl_sdregb
21302

21303
sdregb0:
21304
	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
21305
	rts
21306
sdregb1:
21307
	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
21308
	rts
21309
sdregb2:
21310
	mov.b		%d0,%d2
21311
	rts
21312
sdregb3:
21313
	mov.b		%d0,%d3
21314
	rts
21315
sdregb4:
21316
	mov.b		%d0,%d4
21317
	rts
21318
sdregb5:
21319
	mov.b		%d0,%d5
21320
	rts
21321
sdregb6:
21322
	mov.b		%d0,%d6
21323
	rts
21324
sdregb7:
21325
	mov.b		%d0,%d7
21326
	rts
21327

21328
#########################################################################
21329
# XDEF ****************************************************************	#
21330
#	inc_areg(): increment an address register by the value in d0	#
21331
#									#
21332
# XREF ****************************************************************	#
21333
#	None								#
21334
#									#
21335
# INPUT ***************************************************************	#
21336
#	d0 = amount to increment by					#
21337
#	d1 = index of address register to increment			#
21338
#									#
21339
# OUTPUT **************************************************************	#
21340
#	(address register is updated)					#
21341
#									#
21342
# ALGORITHM ***********************************************************	#
21343
#	Typically used for an instruction w/ a post-increment <ea>,	#
21344
# this routine adds the increment value in d0 to the address register	#
21345
# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
21346
# in their original places.						#
21347
#	For a7, if the increment amount is one, then we have to		#
21348
# increment by two. For any a7 update, set the mia7_flag so that if	#
21349
# an access error exception occurs later in emulation, this address	#
21350
# register update can be undone.					#
21351
#									#
21352
#########################################################################
21353

21354
	global		inc_areg
21355
inc_areg:
21356
	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
21357
	jmp		(tbl_iareg.b,%pc,%d1.w*1)
21358

21359
tbl_iareg:
21360
	short		iareg0 - tbl_iareg
21361
	short		iareg1 - tbl_iareg
21362
	short		iareg2 - tbl_iareg
21363
	short		iareg3 - tbl_iareg
21364
	short		iareg4 - tbl_iareg
21365
	short		iareg5 - tbl_iareg
21366
	short		iareg6 - tbl_iareg
21367
	short		iareg7 - tbl_iareg
21368

21369
iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
21370
	rts
21371
iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
21372
	rts
21373
iareg2:	add.l		%d0,%a2
21374
	rts
21375
iareg3:	add.l		%d0,%a3
21376
	rts
21377
iareg4:	add.l		%d0,%a4
21378
	rts
21379
iareg5:	add.l		%d0,%a5
21380
	rts
21381
iareg6:	add.l		%d0,(%a6)
21382
	rts
21383
iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
21384
	cmpi.b		%d0,&0x1
21385
	beq.b		iareg7b
21386
	add.l		%d0,EXC_A7(%a6)
21387
	rts
21388
iareg7b:
21389
	addq.l		&0x2,EXC_A7(%a6)
21390
	rts
21391

21392
#########################################################################
21393
# XDEF ****************************************************************	#
21394
#	dec_areg(): decrement an address register by the value in d0	#
21395
#									#
21396
# XREF ****************************************************************	#
21397
#	None								#
21398
#									#
21399
# INPUT ***************************************************************	#
21400
#	d0 = amount to decrement by					#
21401
#	d1 = index of address register to decrement			#
21402
#									#
21403
# OUTPUT **************************************************************	#
21404
#	(address register is updated)					#
21405
#									#
21406
# ALGORITHM ***********************************************************	#
21407
#	Typically used for an instruction w/ a pre-decrement <ea>,	#
21408
# this routine adds the decrement value in d0 to the address register	#
21409
# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
21410
# in their original places.						#
21411
#	For a7, if the decrement amount is one, then we have to		#
21412
# decrement by two. For any a7 update, set the mda7_flag so that if	#
21413
# an access error exception occurs later in emulation, this address	#
21414
# register update can be undone.					#
21415
#									#
21416
#########################################################################
21417

21418
	global		dec_areg
21419
dec_areg:
21420
	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
21421
	jmp		(tbl_dareg.b,%pc,%d1.w*1)
21422

21423
tbl_dareg:
21424
	short		dareg0 - tbl_dareg
21425
	short		dareg1 - tbl_dareg
21426
	short		dareg2 - tbl_dareg
21427
	short		dareg3 - tbl_dareg
21428
	short		dareg4 - tbl_dareg
21429
	short		dareg5 - tbl_dareg
21430
	short		dareg6 - tbl_dareg
21431
	short		dareg7 - tbl_dareg
21432

21433
dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
21434
	rts
21435
dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
21436
	rts
21437
dareg2:	sub.l		%d0,%a2
21438
	rts
21439
dareg3:	sub.l		%d0,%a3
21440
	rts
21441
dareg4:	sub.l		%d0,%a4
21442
	rts
21443
dareg5:	sub.l		%d0,%a5
21444
	rts
21445
dareg6:	sub.l		%d0,(%a6)
21446
	rts
21447
dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
21448
	cmpi.b		%d0,&0x1
21449
	beq.b		dareg7b
21450
	sub.l		%d0,EXC_A7(%a6)
21451
	rts
21452
dareg7b:
21453
	subq.l		&0x2,EXC_A7(%a6)
21454
	rts
21455

21456
##############################################################################
21457

21458
#########################################################################
21459
# XDEF ****************************************************************	#
21460
#	load_fpn1(): load FP register value into FP_SRC(a6).		#
21461
#									#
21462
# XREF ****************************************************************	#
21463
#	None								#
21464
#									#
21465
# INPUT ***************************************************************	#
21466
#	d0 = index of FP register to load				#
21467
#									#
21468
# OUTPUT **************************************************************	#
21469
#	FP_SRC(a6) = value loaded from FP register file			#
21470
#									#
21471
# ALGORITHM ***********************************************************	#
21472
#	Using the index in d0, load FP_SRC(a6) with a number from the	#
21473
# FP register file.							#
21474
#									#
21475
#########################################################################
21476

21477
	global		load_fpn1
21478
load_fpn1:
21479
	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
21480
	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
21481

21482
tbl_load_fpn1:
21483
	short		load_fpn1_0 - tbl_load_fpn1
21484
	short		load_fpn1_1 - tbl_load_fpn1
21485
	short		load_fpn1_2 - tbl_load_fpn1
21486
	short		load_fpn1_3 - tbl_load_fpn1
21487
	short		load_fpn1_4 - tbl_load_fpn1
21488
	short		load_fpn1_5 - tbl_load_fpn1
21489
	short		load_fpn1_6 - tbl_load_fpn1
21490
	short		load_fpn1_7 - tbl_load_fpn1
21491

21492
load_fpn1_0:
21493
	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
21494
	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
21495
	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
21496
	lea		FP_SRC(%a6), %a0
21497
	rts
21498
load_fpn1_1:
21499
	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
21500
	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
21501
	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
21502
	lea		FP_SRC(%a6), %a0
21503
	rts
21504
load_fpn1_2:
21505
	fmovm.x		&0x20, FP_SRC(%a6)
21506
	lea		FP_SRC(%a6), %a0
21507
	rts
21508
load_fpn1_3:
21509
	fmovm.x		&0x10, FP_SRC(%a6)
21510
	lea		FP_SRC(%a6), %a0
21511
	rts
21512
load_fpn1_4:
21513
	fmovm.x		&0x08, FP_SRC(%a6)
21514
	lea		FP_SRC(%a6), %a0
21515
	rts
21516
load_fpn1_5:
21517
	fmovm.x		&0x04, FP_SRC(%a6)
21518
	lea		FP_SRC(%a6), %a0
21519
	rts
21520
load_fpn1_6:
21521
	fmovm.x		&0x02, FP_SRC(%a6)
21522
	lea		FP_SRC(%a6), %a0
21523
	rts
21524
load_fpn1_7:
21525
	fmovm.x		&0x01, FP_SRC(%a6)
21526
	lea		FP_SRC(%a6), %a0
21527
	rts
21528

21529
#############################################################################
21530

21531
#########################################################################
21532
# XDEF ****************************************************************	#
21533
#	load_fpn2(): load FP register value into FP_DST(a6).		#
21534
#									#
21535
# XREF ****************************************************************	#
21536
#	None								#
21537
#									#
21538
# INPUT ***************************************************************	#
21539
#	d0 = index of FP register to load				#
21540
#									#
21541
# OUTPUT **************************************************************	#
21542
#	FP_DST(a6) = value loaded from FP register file			#
21543
#									#
21544
# ALGORITHM ***********************************************************	#
21545
#	Using the index in d0, load FP_DST(a6) with a number from the	#
21546
# FP register file.							#
21547
#									#
21548
#########################################################################
21549

21550
	global		load_fpn2
21551
load_fpn2:
21552
	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
21553
	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
21554

21555
tbl_load_fpn2:
21556
	short		load_fpn2_0 - tbl_load_fpn2
21557
	short		load_fpn2_1 - tbl_load_fpn2
21558
	short		load_fpn2_2 - tbl_load_fpn2
21559
	short		load_fpn2_3 - tbl_load_fpn2
21560
	short		load_fpn2_4 - tbl_load_fpn2
21561
	short		load_fpn2_5 - tbl_load_fpn2
21562
	short		load_fpn2_6 - tbl_load_fpn2
21563
	short		load_fpn2_7 - tbl_load_fpn2
21564

21565
load_fpn2_0:
21566
	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
21567
	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
21568
	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
21569
	lea		FP_DST(%a6), %a0
21570
	rts
21571
load_fpn2_1:
21572
	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
21573
	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
21574
	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
21575
	lea		FP_DST(%a6), %a0
21576
	rts
21577
load_fpn2_2:
21578
	fmovm.x		&0x20, FP_DST(%a6)
21579
	lea		FP_DST(%a6), %a0
21580
	rts
21581
load_fpn2_3:
21582
	fmovm.x		&0x10, FP_DST(%a6)
21583
	lea		FP_DST(%a6), %a0
21584
	rts
21585
load_fpn2_4:
21586
	fmovm.x		&0x08, FP_DST(%a6)
21587
	lea		FP_DST(%a6), %a0
21588
	rts
21589
load_fpn2_5:
21590
	fmovm.x		&0x04, FP_DST(%a6)
21591
	lea		FP_DST(%a6), %a0
21592
	rts
21593
load_fpn2_6:
21594
	fmovm.x		&0x02, FP_DST(%a6)
21595
	lea		FP_DST(%a6), %a0
21596
	rts
21597
load_fpn2_7:
21598
	fmovm.x		&0x01, FP_DST(%a6)
21599
	lea		FP_DST(%a6), %a0
21600
	rts
21601

21602
#############################################################################
21603

21604
#########################################################################
21605
# XDEF ****************************************************************	#
21606
#	store_fpreg(): store an fp value to the fpreg designated d0.	#
21607
#									#
21608
# XREF ****************************************************************	#
21609
#	None								#
21610
#									#
21611
# INPUT ***************************************************************	#
21612
#	fp0 = extended precision value to store				#
21613
#	d0  = index of floating-point register				#
21614
#									#
21615
# OUTPUT **************************************************************	#
21616
#	None								#
21617
#									#
21618
# ALGORITHM ***********************************************************	#
21619
#	Store the value in fp0 to the FP register designated by the	#
21620
# value in d0. The FP number can be DENORM or SNAN so we have to be	#
21621
# careful that we don't take an exception here.				#
21622
#									#
21623
#########################################################################
21624

21625
	global		store_fpreg
21626
store_fpreg:
21627
	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
21628
	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
21629

21630
tbl_store_fpreg:
21631
	short		store_fpreg_0 - tbl_store_fpreg
21632
	short		store_fpreg_1 - tbl_store_fpreg
21633
	short		store_fpreg_2 - tbl_store_fpreg
21634
	short		store_fpreg_3 - tbl_store_fpreg
21635
	short		store_fpreg_4 - tbl_store_fpreg
21636
	short		store_fpreg_5 - tbl_store_fpreg
21637
	short		store_fpreg_6 - tbl_store_fpreg
21638
	short		store_fpreg_7 - tbl_store_fpreg
21639

21640
store_fpreg_0:
21641
	fmovm.x		&0x80, EXC_FP0(%a6)
21642
	rts
21643
store_fpreg_1:
21644
	fmovm.x		&0x80, EXC_FP1(%a6)
21645
	rts
21646
store_fpreg_2:
21647
	fmovm.x		&0x01, -(%sp)
21648
	fmovm.x		(%sp)+, &0x20
21649
	rts
21650
store_fpreg_3:
21651
	fmovm.x		&0x01, -(%sp)
21652
	fmovm.x		(%sp)+, &0x10
21653
	rts
21654
store_fpreg_4:
21655
	fmovm.x		&0x01, -(%sp)
21656
	fmovm.x		(%sp)+, &0x08
21657
	rts
21658
store_fpreg_5:
21659
	fmovm.x		&0x01, -(%sp)
21660
	fmovm.x		(%sp)+, &0x04
21661
	rts
21662
store_fpreg_6:
21663
	fmovm.x		&0x01, -(%sp)
21664
	fmovm.x		(%sp)+, &0x02
21665
	rts
21666
store_fpreg_7:
21667
	fmovm.x		&0x01, -(%sp)
21668
	fmovm.x		(%sp)+, &0x01
21669
	rts
21670

21671
#########################################################################
21672
# XDEF ****************************************************************	#
21673
#	_denorm(): denormalize an intermediate result			#
21674
#									#
21675
# XREF ****************************************************************	#
21676
#	None								#
21677
#									#
21678
# INPUT *************************************************************** #
21679
#	a0 = points to the operand to be denormalized			#
21680
#		(in the internal extended format)			#
21681
#									#
21682
#	d0 = rounding precision						#
21683
#									#
21684
# OUTPUT **************************************************************	#
21685
#	a0 = pointer to the denormalized result				#
21686
#		(in the internal extended format)			#
21687
#									#
21688
#	d0 = guard,round,sticky						#
21689
#									#
21690
# ALGORITHM ***********************************************************	#
21691
#	According to the exponent underflow threshold for the given	#
21692
# precision, shift the mantissa bits to the right in order raise the	#
21693
# exponent of the operand to the threshold value. While shifting the	#
21694
# mantissa bits right, maintain the value of the guard, round, and	#
21695
# sticky bits.								#
21696
# other notes:								#
21697
#	(1) _denorm() is called by the underflow routines		#
21698
#	(2) _denorm() does NOT affect the status register		#
21699
#									#
21700
#########################################################################
21701

21702
#
21703
# table of exponent threshold values for each precision
21704
#
21705
tbl_thresh:
21706
	short		0x0
21707
	short		sgl_thresh
21708
	short		dbl_thresh
21709

21710
	global		_denorm
21711
_denorm:
21712
#
21713
# Load the exponent threshold for the precision selected and check
21714
# to see if (threshold - exponent) is > 65 in which case we can
21715
# simply calculate the sticky bit and zero the mantissa. otherwise
21716
# we have to call the denormalization routine.
21717
#
21718
	lsr.b		&0x2, %d0		# shift prec to lo bits
21719
	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
21720
	mov.w		%d1, %d0		# copy d1 into d0
21721
	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
21722
	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
21723
	bpl.b		denorm_set_stky		# yes; just calc sticky
21724

21725
	clr.l		%d0			# clear g,r,s
21726
	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
21727
	beq.b		denorm_call		# no; don't change anything
21728
	bset		&29, %d0		# yes; set sticky bit
21729

21730
denorm_call:
21731
	bsr.l		dnrm_lp			# denormalize the number
21732
	rts
21733

21734
#
21735
# all bit would have been shifted off during the denorm so simply
21736
# calculate if the sticky should be set and clear the entire mantissa.
21737
#
21738
denorm_set_stky:
21739
	mov.l		&0x20000000, %d0	# set sticky bit in return value
21740
	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
21741
	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
21742
	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
21743
	rts
21744

21745
#									#
21746
# dnrm_lp(): normalize exponent/mantissa to specified threshold		#
21747
#									#
21748
# INPUT:								#
21749
#	%a0	   : points to the operand to be denormalized		#
21750
#	%d0{31:29} : initial guard,round,sticky				#
21751
#	%d1{15:0}  : denormalization threshold				#
21752
# OUTPUT:								#
21753
#	%a0	   : points to the denormalized operand			#
21754
#	%d0{31:29} : final guard,round,sticky				#
21755
#									#
21756

21757
# *** Local Equates *** #
21758
set	GRS,		L_SCR2			# g,r,s temp storage
21759
set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
21760

21761
	global		dnrm_lp
21762
dnrm_lp:
21763

21764
#
21765
# make a copy of FTEMP_LO and place the g,r,s bits directly after it
21766
# in memory so as to make the bitfield extraction for denormalization easier.
21767
#
21768
	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
21769
	mov.l		%d0, GRS(%a6)		# place g,r,s after it
21770

21771
#
21772
# check to see how much less than the underflow threshold the operand
21773
# exponent is.
21774
#
21775
	mov.l		%d1, %d0		# copy the denorm threshold
21776
	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
21777
	ble.b		dnrm_no_lp		# d1 <= 0
21778
	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
21779
	blt.b		case_1			# yes
21780
	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
21781
	blt.b		case_2			# yes
21782
	bra.w		case_3			# (d1 >= 64)
21783

21784
#
21785
# No normalization necessary
21786
#
21787
dnrm_no_lp:
21788
	mov.l		GRS(%a6), %d0		# restore original g,r,s
21789
	rts
21790

21791
#
21792
# case (0<d1<32)
21793
#
21794
# %d0 = denorm threshold
21795
# %d1 = "n" = amt to shift
21796
#
21797
#	---------------------------------------------------------
21798
#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21799
#	---------------------------------------------------------
21800
#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21801
#	\	   \		      \			 \
21802
#	 \	    \		       \		  \
21803
#	  \	     \			\		   \
21804
#	   \	      \			 \		    \
21805
#	    \	       \		  \		     \
21806
#	     \		\		   \		      \
21807
#	      \		 \		    \		       \
21808
#	       \	  \		     \			\
21809
#	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
21810
#	---------------------------------------------------------
21811
#	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
21812
#	---------------------------------------------------------
21813
#
21814
case_1:
21815
	mov.l		%d2, -(%sp)		# create temp storage
21816

21817
	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
21818
	mov.l		&32, %d0
21819
	sub.w		%d1, %d0		# %d0 = 32 - %d1
21820

21821
	cmpi.w		%d1, &29		# is shft amt >= 29
21822
	blt.b		case1_extract		# no; no fix needed
21823
	mov.b		GRS(%a6), %d2
21824
	or.b		%d2, 3+FTEMP_LO2(%a6)
21825

21826
case1_extract:
21827
	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
21828
	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
21829
	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
21830

21831
	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
21832
	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
21833

21834
	bftst		%d0{&2:&30}		# were bits shifted off?
21835
	beq.b		case1_sticky_clear	# no; go finish
21836
	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
21837

21838
case1_sticky_clear:
21839
	and.l		&0xe0000000, %d0	# clear all but G,R,S
21840
	mov.l		(%sp)+, %d2		# restore temp register
21841
	rts
21842

21843
#
21844
# case (32<=d1<64)
21845
#
21846
# %d0 = denorm threshold
21847
# %d1 = "n" = amt to shift
21848
#
21849
#	---------------------------------------------------------
21850
#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21851
#	---------------------------------------------------------
21852
#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21853
#	\	   \		      \
21854
#	 \	    \		       \
21855
#	  \	     \			-------------------
21856
#	   \	      --------------------		   \
21857
#	    -------------------		  \		    \
21858
#			       \	   \		     \
21859
#				\	    \		      \
21860
#				 \	     \		       \
21861
#	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
21862
#	---------------------------------------------------------
21863
#	|0...............0|0....0| NEW_LO     |grs		|
21864
#	---------------------------------------------------------
21865
#
21866
case_2:
21867
	mov.l		%d2, -(%sp)		# create temp storage
21868

21869
	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
21870
	subi.w		&0x20, %d1		# %d1 now between 0 and 32
21871
	mov.l		&0x20, %d0
21872
	sub.w		%d1, %d0		# %d0 = 32 - %d1
21873

21874
# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
21875
# the number of bits to check for the sticky detect.
21876
# it only plays a role in shift amounts of 61-63.
21877
	mov.b		GRS(%a6), %d2
21878
	or.b		%d2, 3+FTEMP_LO2(%a6)
21879

21880
	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
21881
	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
21882

21883
	bftst		%d1{&2:&30}		# were any bits shifted off?
21884
	bne.b		case2_set_sticky	# yes; set sticky bit
21885
	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
21886
	bne.b		case2_set_sticky	# yes; set sticky bit
21887

21888
	mov.l		%d1, %d0		# move new G,R,S to %d0
21889
	bra.b		case2_end
21890

21891
case2_set_sticky:
21892
	mov.l		%d1, %d0		# move new G,R,S to %d0
21893
	bset		&rnd_stky_bit, %d0	# set sticky bit
21894

21895
case2_end:
21896
	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
21897
	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
21898
	and.l		&0xe0000000, %d0	# clear all but G,R,S
21899

21900
	mov.l		(%sp)+,%d2		# restore temp register
21901
	rts
21902

21903
#
21904
# case (d1>=64)
21905
#
21906
# %d0 = denorm threshold
21907
# %d1 = amt to shift
21908
#
21909
case_3:
21910
	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
21911

21912
	cmpi.w		%d1, &65		# is shift amt > 65?
21913
	blt.b		case3_64		# no; it's == 64
21914
	beq.b		case3_65		# no; it's == 65
21915

21916
#
21917
# case (d1>65)
21918
#
21919
# Shift value is > 65 and out of range. All bits are shifted off.
21920
# Return a zero mantissa with the sticky bit set
21921
#
21922
	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
21923
	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
21924
	mov.l		&0x20000000, %d0	# set sticky bit
21925
	rts
21926

21927
#
21928
# case (d1 == 64)
21929
#
21930
#	---------------------------------------------------------
21931
#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21932
#	---------------------------------------------------------
21933
#	<-------(32)------>
21934
#	\		   \
21935
#	 \		    \
21936
#	  \		     \
21937
#	   \		      ------------------------------
21938
#	    -------------------------------		    \
21939
#					   \		     \
21940
#					    \		      \
21941
#					     \		       \
21942
#					      <-------(32)------>
21943
#	---------------------------------------------------------
21944
#	|0...............0|0................0|grs		|
21945
#	---------------------------------------------------------
21946
#
21947
case3_64:
21948
	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
21949
	mov.l		%d0, %d1		# make a copy
21950
	and.l		&0xc0000000, %d0	# extract G,R
21951
	and.l		&0x3fffffff, %d1	# extract other bits
21952

21953
	bra.b		case3_complete
21954

21955
#
21956
# case (d1 == 65)
21957
#
21958
#	---------------------------------------------------------
21959
#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
21960
#	---------------------------------------------------------
21961
#	<-------(32)------>
21962
#	\		   \
21963
#	 \		    \
21964
#	  \		     \
21965
#	   \		      ------------------------------
21966
#	    --------------------------------		    \
21967
#					    \		     \
21968
#					     \		      \
21969
#					      \		       \
21970
#					       <-------(31)----->
21971
#	---------------------------------------------------------
21972
#	|0...............0|0................0|0rs		|
21973
#	---------------------------------------------------------
21974
#
21975
case3_65:
21976
	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
21977
	and.l		&0x80000000, %d0	# extract R bit
21978
	lsr.l		&0x1, %d0		# shift high bit into R bit
21979
	and.l		&0x7fffffff, %d1	# extract other bits
21980

21981
case3_complete:
21982
# last operation done was an "and" of the bits shifted off so the condition
21983
# codes are already set so branch accordingly.
21984
	bne.b		case3_set_sticky	# yes; go set new sticky
21985
	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
21986
	bne.b		case3_set_sticky	# yes; go set new sticky
21987
	tst.b		GRS(%a6)		# were any bits shifted off?
21988
	bne.b		case3_set_sticky	# yes; go set new sticky
21989

21990
#
21991
# no bits were shifted off so don't set the sticky bit.
21992
# the guard and
21993
# the entire mantissa is zero.
21994
#
21995
	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
21996
	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
21997
	rts
21998

21999
#
22000
# some bits were shifted off so set the sticky bit.
22001
# the entire mantissa is zero.
22002
#
22003
case3_set_sticky:
22004
	bset		&rnd_stky_bit,%d0	# set new sticky bit
22005
	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
22006
	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
22007
	rts
22008

22009
#########################################################################
22010
# XDEF ****************************************************************	#
22011
#	_round(): round result according to precision/mode		#
22012
#									#
22013
# XREF ****************************************************************	#
22014
#	None								#
22015
#									#
22016
# INPUT ***************************************************************	#
22017
#	a0	  = ptr to input operand in internal extended format	#
22018
#	d1(hi)    = contains rounding precision:			#
22019
#			ext = $0000xxxx					#
22020
#			sgl = $0004xxxx					#
22021
#			dbl = $0008xxxx					#
22022
#	d1(lo)	  = contains rounding mode:				#
22023
#			RN  = $xxxx0000					#
22024
#			RZ  = $xxxx0001					#
22025
#			RM  = $xxxx0002					#
22026
#			RP  = $xxxx0003					#
22027
#	d0{31:29} = contains the g,r,s bits (extended)			#
22028
#									#
22029
# OUTPUT **************************************************************	#
22030
#	a0 = pointer to rounded result					#
22031
#									#
22032
# ALGORITHM ***********************************************************	#
22033
#	On return the value pointed to by a0 is correctly rounded,	#
22034
#	a0 is preserved and the g-r-s bits in d0 are cleared.		#
22035
#	The result is not typed - the tag field is invalid.  The	#
22036
#	result is still in the internal extended format.		#
22037
#									#
22038
#	The INEX bit of USER_FPSR will be set if the rounded result was	#
22039
#	inexact (i.e. if any of the g-r-s bits were set).		#
22040
#									#
22041
#########################################################################
22042

22043
	global		_round
22044
_round:
22045
#
22046
# ext_grs() looks at the rounding precision and sets the appropriate
22047
# G,R,S bits.
22048
# If (G,R,S == 0) then result is exact and round is done, else set
22049
# the inex flag in status reg and continue.
22050
#
22051
	bsr.l		ext_grs			# extract G,R,S
22052

22053
	tst.l		%d0			# are G,R,S zero?
22054
	beq.w		truncate		# yes; round is complete
22055

22056
	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
22057

22058
#
22059
# Use rounding mode as an index into a jump table for these modes.
22060
# All of the following assumes grs != 0.
22061
#
22062
	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
22063
	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
22064

22065
tbl_mode:
22066
	short		rnd_near - tbl_mode
22067
	short		truncate - tbl_mode	# RZ always truncates
22068
	short		rnd_mnus - tbl_mode
22069
	short		rnd_plus - tbl_mode
22070

22071
#################################################################
22072
#	ROUND PLUS INFINITY					#
22073
#								#
22074
#	If sign of fp number = 0 (positive), then add 1 to l.	#
22075
#################################################################
22076
rnd_plus:
22077
	tst.b		FTEMP_SGN(%a0)		# check for sign
22078
	bmi.w		truncate		# if positive then truncate
22079

22080
	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
22081
	swap		%d1			# set up d1 for round prec.
22082

22083
	cmpi.b		%d1, &s_mode		# is prec = sgl?
22084
	beq.w		add_sgl			# yes
22085
	bgt.w		add_dbl			# no; it's dbl
22086
	bra.w		add_ext			# no; it's ext
22087

22088
#################################################################
22089
#	ROUND MINUS INFINITY					#
22090
#								#
22091
#	If sign of fp number = 1 (negative), then add 1 to l.	#
22092
#################################################################
22093
rnd_mnus:
22094
	tst.b		FTEMP_SGN(%a0)		# check for sign
22095
	bpl.w		truncate		# if negative then truncate
22096

22097
	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
22098
	swap		%d1			# set up d1 for round prec.
22099

22100
	cmpi.b		%d1, &s_mode		# is prec = sgl?
22101
	beq.w		add_sgl			# yes
22102
	bgt.w		add_dbl			# no; it's dbl
22103
	bra.w		add_ext			# no; it's ext
22104

22105
#################################################################
22106
#	ROUND NEAREST						#
22107
#								#
22108
#	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
22109
#	Note that this will round to even in case of a tie.	#
22110
#################################################################
22111
rnd_near:
22112
	asl.l		&0x1, %d0		# shift g-bit to c-bit
22113
	bcc.w		truncate		# if (g=1) then
22114

22115
	swap		%d1			# set up d1 for round prec.
22116

22117
	cmpi.b		%d1, &s_mode		# is prec = sgl?
22118
	beq.w		add_sgl			# yes
22119
	bgt.w		add_dbl			# no; it's dbl
22120
	bra.w		add_ext			# no; it's ext
22121

22122
# *** LOCAL EQUATES ***
22123
set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
22124
set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
22125

22126
#########################
22127
#	ADD SINGLE	#
22128
#########################
22129
add_sgl:
22130
	add.l		&ad_1_sgl, FTEMP_HI(%a0)
22131
	bcc.b		scc_clr			# no mantissa overflow
22132
	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
22133
	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
22134
	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
22135
scc_clr:
22136
	tst.l		%d0			# test for rs = 0
22137
	bne.b		sgl_done
22138
	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
22139
sgl_done:
22140
	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
22141
	clr.l		FTEMP_LO(%a0)		# clear d2
22142
	rts
22143

22144
#########################
22145
#	ADD EXTENDED	#
22146
#########################
22147
add_ext:
22148
	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
22149
	bcc.b		xcc_clr			# test for carry out
22150
	addq.l		&1,FTEMP_HI(%a0)	# propagate carry
22151
	bcc.b		xcc_clr
22152
	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
22153
	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
22154
	roxr.w		FTEMP_LO(%a0)
22155
	roxr.w		FTEMP_LO+2(%a0)
22156
	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
22157
xcc_clr:
22158
	tst.l		%d0			# test rs = 0
22159
	bne.b		add_ext_done
22160
	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
22161
add_ext_done:
22162
	rts
22163

22164
#########################
22165
#	ADD DOUBLE	#
22166
#########################
22167
add_dbl:
22168
	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
22169
	bcc.b		dcc_clr			# no carry
22170
	addq.l		&0x1, FTEMP_HI(%a0)	# propagate carry
22171
	bcc.b		dcc_clr			# no carry
22172

22173
	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
22174
	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
22175
	roxr.w		FTEMP_LO(%a0)
22176
	roxr.w		FTEMP_LO+2(%a0)
22177
	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
22178
dcc_clr:
22179
	tst.l		%d0			# test for rs = 0
22180
	bne.b		dbl_done
22181
	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
22182

22183
dbl_done:
22184
	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
22185
	rts
22186

22187
###########################
22188
# Truncate all other bits #
22189
###########################
22190
truncate:
22191
	swap		%d1			# select rnd prec
22192

22193
	cmpi.b		%d1, &s_mode		# is prec sgl?
22194
	beq.w		sgl_done		# yes
22195
	bgt.b		dbl_done		# no; it's dbl
22196
	rts					# no; it's ext
22197

22198

22199
#
22200
# ext_grs(): extract guard, round and sticky bits according to
22201
#	     rounding precision.
22202
#
22203
# INPUT
22204
#	d0	   = extended precision g,r,s (in d0{31:29})
22205
#	d1	   = {PREC,ROUND}
22206
# OUTPUT
22207
#	d0{31:29}  = guard, round, sticky
22208
#
22209
# The ext_grs extract the guard/round/sticky bits according to the
22210
# selected rounding precision. It is called by the round subroutine
22211
# only.  All registers except d0 are kept intact. d0 becomes an
22212
# updated guard,round,sticky in d0{31:29}
22213
#
22214
# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
22215
#	 prior to usage, and needs to restore d1 to original. this
22216
#	 routine is tightly tied to the round routine and not meant to
22217
#	 uphold standard subroutine calling practices.
22218
#
22219

22220
ext_grs:
22221
	swap		%d1			# have d1.w point to round precision
22222
	tst.b		%d1			# is rnd prec = extended?
22223
	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
22224

22225
#
22226
# %d0 actually already hold g,r,s since _round() had it before calling
22227
# this function. so, as long as we don't disturb it, we are "returning" it.
22228
#
22229
ext_grs_ext:
22230
	swap		%d1			# yes; return to correct positions
22231
	rts
22232

22233
ext_grs_not_ext:
22234
	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
22235

22236
	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
22237
	bne.b		ext_grs_dbl		# no; go handle dbl
22238

22239
#
22240
# sgl:
22241
#	96		64	  40	32		0
22242
#	-----------------------------------------------------
22243
#	| EXP	|XXXXXXX|	  |xx	|		|grs|
22244
#	-----------------------------------------------------
22245
#			<--(24)--->nn\			   /
22246
#				   ee ---------------------
22247
#				   ww		|
22248
#						v
22249
#				   gr	   new sticky
22250
#
22251
ext_grs_sgl:
22252
	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
22253
	mov.l		&30, %d2		# of the sgl prec. limits
22254
	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
22255
	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
22256
	and.l		&0x0000003f, %d2	# s bit is the or of all other
22257
	bne.b		ext_grs_st_stky		# bits to the right of g-r
22258
	tst.l		FTEMP_LO(%a0)		# test lower mantissa
22259
	bne.b		ext_grs_st_stky		# if any are set, set sticky
22260
	tst.l		%d0			# test original g,r,s
22261
	bne.b		ext_grs_st_stky		# if any are set, set sticky
22262
	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
22263

22264
#
22265
# dbl:
22266
#	96		64		32	 11	0
22267
#	-----------------------------------------------------
22268
#	| EXP	|XXXXXXX|		|	 |xx	|grs|
22269
#	-----------------------------------------------------
22270
#						  nn\	    /
22271
#						  ee -------
22272
#						  ww	|
22273
#							v
22274
#						  gr	new sticky
22275
#
22276
ext_grs_dbl:
22277
	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
22278
	mov.l		&30, %d2		# of the dbl prec. limits
22279
	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
22280
	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
22281
	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
22282
	bne.b		ext_grs_st_stky		# other bits to the right of g-r
22283
	tst.l		%d0			# test word original g,r,s
22284
	bne.b		ext_grs_st_stky		# if any are set, set sticky
22285
	bra.b		ext_grs_end_sd		# if clear, exit
22286

22287
ext_grs_st_stky:
22288
	bset		&rnd_stky_bit, %d3	# set sticky bit
22289
ext_grs_end_sd:
22290
	mov.l		%d3, %d0		# return grs to d0
22291

22292
	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
22293

22294
	swap		%d1			# restore d1 to original
22295
	rts
22296

22297
#########################################################################
22298
# norm(): normalize the mantissa of an extended precision input. the	#
22299
#	  input operand should not be normalized already.		#
22300
#									#
22301
# XDEF ****************************************************************	#
22302
#	norm()								#
22303
#									#
22304
# XREF **************************************************************** #
22305
#	none								#
22306
#									#
22307
# INPUT *************************************************************** #
22308
#	a0 = pointer fp extended precision operand to normalize		#
22309
#									#
22310
# OUTPUT ************************************************************** #
22311
#	d0 = number of bit positions the mantissa was shifted		#
22312
#	a0 = the input operand's mantissa is normalized; the exponent	#
22313
#	     is unchanged.						#
22314
#									#
22315
#########################################################################
22316
	global		norm
22317
norm:
22318
	mov.l		%d2, -(%sp)		# create some temp regs
22319
	mov.l		%d3, -(%sp)
22320

22321
	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
22322
	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
22323

22324
	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
22325
	beq.b		norm_lo			# hi(man) is all zeroes!
22326

22327
norm_hi:
22328
	lsl.l		%d2, %d0		# left shift hi(man)
22329
	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
22330

22331
	or.l		%d3, %d0		# create hi(man)
22332
	lsl.l		%d2, %d1		# create lo(man)
22333

22334
	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
22335
	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
22336

22337
	mov.l		%d2, %d0		# return shift amount
22338

22339
	mov.l		(%sp)+, %d3		# restore temp regs
22340
	mov.l		(%sp)+, %d2
22341

22342
	rts
22343

22344
norm_lo:
22345
	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
22346
	lsl.l		%d2, %d1		# shift lo(man)
22347
	add.l		&32, %d2		# add 32 to shft amount
22348

22349
	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
22350
	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
22351

22352
	mov.l		%d2, %d0		# return shift amount
22353

22354
	mov.l		(%sp)+, %d3		# restore temp regs
22355
	mov.l		(%sp)+, %d2
22356

22357
	rts
22358

22359
#########################################################################
22360
# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
22361
#		- returns corresponding optype tag			#
22362
#									#
22363
# XDEF ****************************************************************	#
22364
#	unnorm_fix()							#
22365
#									#
22366
# XREF **************************************************************** #
22367
#	norm() - normalize the mantissa					#
22368
#									#
22369
# INPUT *************************************************************** #
22370
#	a0 = pointer to unnormalized extended precision number		#
22371
#									#
22372
# OUTPUT ************************************************************** #
22373
#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
22374
#	a0 = input operand has been converted to a norm, denorm, or	#
22375
#	     zero; both the exponent and mantissa are changed.		#
22376
#									#
22377
#########################################################################
22378

22379
	global		unnorm_fix
22380
unnorm_fix:
22381
	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
22382
	bne.b		unnorm_shift		# hi(man) is not all zeroes
22383

22384
#
22385
# hi(man) is all zeroes so see if any bits in lo(man) are set
22386
#
22387
unnorm_chk_lo:
22388
	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
22389
	beq.w		unnorm_zero		# yes
22390

22391
	add.w		&32, %d0		# no; fix shift distance
22392

22393
#
22394
# d0 = # shifts needed for complete normalization
22395
#
22396
unnorm_shift:
22397
	clr.l		%d1			# clear top word
22398
	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
22399
	and.w		&0x7fff, %d1		# strip off sgn
22400

22401
	cmp.w		%d0, %d1		# will denorm push exp < 0?
22402
	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
22403

22404
#
22405
# exponent would not go < 0. Therefore, number stays normalized
22406
#
22407
	sub.w		%d0, %d1		# shift exponent value
22408
	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
22409
	and.w		&0x8000, %d0		# save old sign
22410
	or.w		%d0, %d1		# {sgn,new exp}
22411
	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
22412

22413
	bsr.l		norm			# normalize UNNORM
22414

22415
	mov.b		&NORM, %d0		# return new optype tag
22416
	rts
22417

22418
#
22419
# exponent would go < 0, so only denormalize until exp = 0
22420
#
22421
unnorm_nrm_zero:
22422
	cmp.b		%d1, &32		# is exp <= 32?
22423
	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
22424

22425
	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
22426
	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
22427

22428
	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
22429
	lsl.l		%d1, %d0		# extract new lo(man)
22430
	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
22431

22432
	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
22433

22434
	mov.b		&DENORM, %d0		# return new optype tag
22435
	rts
22436

22437
#
22438
# only mantissa bits set are in lo(man)
22439
#
22440
unnorm_nrm_zero_lrg:
22441
	sub.w		&32, %d1		# adjust shft amt by 32
22442

22443
	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
22444
	lsl.l		%d1, %d0		# left shift lo(man)
22445

22446
	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
22447
	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
22448

22449
	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
22450

22451
	mov.b		&DENORM, %d0		# return new optype tag
22452
	rts
22453

22454
#
22455
# whole mantissa is zero so this UNNORM is actually a zero
22456
#
22457
unnorm_zero:
22458
	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
22459

22460
	mov.b		&ZERO, %d0		# fix optype tag
22461
	rts
22462

22463
#########################################################################
22464
# XDEF ****************************************************************	#
22465
#	set_tag_x(): return the optype of the input ext fp number	#
22466
#									#
22467
# XREF ****************************************************************	#
22468
#	None								#
22469
#									#
22470
# INPUT ***************************************************************	#
22471
#	a0 = pointer to extended precision operand			#
22472
#									#
22473
# OUTPUT **************************************************************	#
22474
#	d0 = value of type tag						#
22475
#		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
22476
#									#
22477
# ALGORITHM ***********************************************************	#
22478
#	Simply test the exponent, j-bit, and mantissa values to		#
22479
# determine the type of operand.					#
22480
#	If it's an unnormalized zero, alter the operand and force it	#
22481
# to be a normal zero.							#
22482
#									#
22483
#########################################################################
22484

22485
	global		set_tag_x
22486
set_tag_x:
22487
	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
22488
	andi.w		&0x7fff, %d0		# strip off sign
22489
	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
22490
	beq.b		inf_or_nan_x
22491
not_inf_or_nan_x:
22492
	btst		&0x7,FTEMP_HI(%a0)
22493
	beq.b		not_norm_x
22494
is_norm_x:
22495
	mov.b		&NORM, %d0
22496
	rts
22497
not_norm_x:
22498
	tst.w		%d0			# is exponent = 0?
22499
	bne.b		is_unnorm_x
22500
not_unnorm_x:
22501
	tst.l		FTEMP_HI(%a0)
22502
	bne.b		is_denorm_x
22503
	tst.l		FTEMP_LO(%a0)
22504
	bne.b		is_denorm_x
22505
is_zero_x:
22506
	mov.b		&ZERO, %d0
22507
	rts
22508
is_denorm_x:
22509
	mov.b		&DENORM, %d0
22510
	rts
22511
# must distinguish now "Unnormalized zeroes" which we
22512
# must convert to zero.
22513
is_unnorm_x:
22514
	tst.l		FTEMP_HI(%a0)
22515
	bne.b		is_unnorm_reg_x
22516
	tst.l		FTEMP_LO(%a0)
22517
	bne.b		is_unnorm_reg_x
22518
# it's an "unnormalized zero". let's convert it to an actual zero...
22519
	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
22520
	mov.b		&ZERO, %d0
22521
	rts
22522
is_unnorm_reg_x:
22523
	mov.b		&UNNORM, %d0
22524
	rts
22525
inf_or_nan_x:
22526
	tst.l		FTEMP_LO(%a0)
22527
	bne.b		is_nan_x
22528
	mov.l		FTEMP_HI(%a0), %d0
22529
	and.l		&0x7fffffff, %d0	# msb is a don't care!
22530
	bne.b		is_nan_x
22531
is_inf_x:
22532
	mov.b		&INF, %d0
22533
	rts
22534
is_nan_x:
22535
	btst		&0x6, FTEMP_HI(%a0)
22536
	beq.b		is_snan_x
22537
	mov.b		&QNAN, %d0
22538
	rts
22539
is_snan_x:
22540
	mov.b		&SNAN, %d0
22541
	rts
22542

22543
#########################################################################
22544
# XDEF ****************************************************************	#
22545
#	set_tag_d(): return the optype of the input dbl fp number	#
22546
#									#
22547
# XREF ****************************************************************	#
22548
#	None								#
22549
#									#
22550
# INPUT ***************************************************************	#
22551
#	a0 = points to double precision operand				#
22552
#									#
22553
# OUTPUT **************************************************************	#
22554
#	d0 = value of type tag						#
22555
#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
22556
#									#
22557
# ALGORITHM ***********************************************************	#
22558
#	Simply test the exponent, j-bit, and mantissa values to		#
22559
# determine the type of operand.					#
22560
#									#
22561
#########################################################################
22562

22563
	global		set_tag_d
22564
set_tag_d:
22565
	mov.l		FTEMP(%a0), %d0
22566
	mov.l		%d0, %d1
22567

22568
	andi.l		&0x7ff00000, %d0
22569
	beq.b		zero_or_denorm_d
22570

22571
	cmpi.l		%d0, &0x7ff00000
22572
	beq.b		inf_or_nan_d
22573

22574
is_norm_d:
22575
	mov.b		&NORM, %d0
22576
	rts
22577
zero_or_denorm_d:
22578
	and.l		&0x000fffff, %d1
22579
	bne		is_denorm_d
22580
	tst.l		4+FTEMP(%a0)
22581
	bne		is_denorm_d
22582
is_zero_d:
22583
	mov.b		&ZERO, %d0
22584
	rts
22585
is_denorm_d:
22586
	mov.b		&DENORM, %d0
22587
	rts
22588
inf_or_nan_d:
22589
	and.l		&0x000fffff, %d1
22590
	bne		is_nan_d
22591
	tst.l		4+FTEMP(%a0)
22592
	bne		is_nan_d
22593
is_inf_d:
22594
	mov.b		&INF, %d0
22595
	rts
22596
is_nan_d:
22597
	btst		&19, %d1
22598
	bne		is_qnan_d
22599
is_snan_d:
22600
	mov.b		&SNAN, %d0
22601
	rts
22602
is_qnan_d:
22603
	mov.b		&QNAN, %d0
22604
	rts
22605

22606
#########################################################################
22607
# XDEF ****************************************************************	#
22608
#	set_tag_s(): return the optype of the input sgl fp number	#
22609
#									#
22610
# XREF ****************************************************************	#
22611
#	None								#
22612
#									#
22613
# INPUT ***************************************************************	#
22614
#	a0 = pointer to single precision operand			#
22615
#									#
22616
# OUTPUT **************************************************************	#
22617
#	d0 = value of type tag						#
22618
#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
22619
#									#
22620
# ALGORITHM ***********************************************************	#
22621
#	Simply test the exponent, j-bit, and mantissa values to		#
22622
# determine the type of operand.					#
22623
#									#
22624
#########################################################################
22625

22626
	global		set_tag_s
22627
set_tag_s:
22628
	mov.l		FTEMP(%a0), %d0
22629
	mov.l		%d0, %d1
22630

22631
	andi.l		&0x7f800000, %d0
22632
	beq.b		zero_or_denorm_s
22633

22634
	cmpi.l		%d0, &0x7f800000
22635
	beq.b		inf_or_nan_s
22636

22637
is_norm_s:
22638
	mov.b		&NORM, %d0
22639
	rts
22640
zero_or_denorm_s:
22641
	and.l		&0x007fffff, %d1
22642
	bne		is_denorm_s
22643
is_zero_s:
22644
	mov.b		&ZERO, %d0
22645
	rts
22646
is_denorm_s:
22647
	mov.b		&DENORM, %d0
22648
	rts
22649
inf_or_nan_s:
22650
	and.l		&0x007fffff, %d1
22651
	bne		is_nan_s
22652
is_inf_s:
22653
	mov.b		&INF, %d0
22654
	rts
22655
is_nan_s:
22656
	btst		&22, %d1
22657
	bne		is_qnan_s
22658
is_snan_s:
22659
	mov.b		&SNAN, %d0
22660
	rts
22661
is_qnan_s:
22662
	mov.b		&QNAN, %d0
22663
	rts
22664

22665
#########################################################################
22666
# XDEF ****************************************************************	#
22667
#	unf_res(): routine to produce default underflow result of a	#
22668
#		   scaled extended precision number; this is used by	#
22669
#		   fadd/fdiv/fmul/etc. emulation routines.		#
22670
#	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
22671
#		    single round prec and extended prec mode.		#
22672
#									#
22673
# XREF ****************************************************************	#
22674
#	_denorm() - denormalize according to scale factor		#
22675
#	_round() - round denormalized number according to rnd prec	#
22676
#									#
22677
# INPUT ***************************************************************	#
22678
#	a0 = pointer to extended precison operand			#
22679
#	d0 = scale factor						#
22680
#	d1 = rounding precision/mode					#
22681
#									#
22682
# OUTPUT **************************************************************	#
22683
#	a0 = pointer to default underflow result in extended precision	#
22684
#	d0.b = result FPSR_cc which caller may or may not want to save	#
22685
#									#
22686
# ALGORITHM ***********************************************************	#
22687
#	Convert the input operand to "internal format" which means the	#
22688
# exponent is extended to 16 bits and the sign is stored in the unused	#
22689
# portion of the extended precison operand. Denormalize the number	#
22690
# according to the scale factor passed in d0. Then, round the		#
22691
# denormalized result.							#
22692
#	Set the FPSR_exc bits as appropriate but return the cc bits in	#
22693
# d0 in case the caller doesn't want to save them (as is the case for	#
22694
# fmove out).								#
22695
#	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
22696
# precision and the rounding mode to single.				#
22697
#									#
22698
#########################################################################
22699
	global		unf_res
22700
unf_res:
22701
	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
22702

22703
	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
22704
	sne		FTEMP_SGN(%a0)
22705

22706
	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
22707
	and.w		&0x7fff, %d1
22708
	sub.w		%d0, %d1
22709
	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
22710

22711
	mov.l		%a0, -(%sp)		# save operand ptr during calls
22712

22713
	mov.l		0x4(%sp),%d0		# pass rnd prec.
22714
	andi.w		&0x00c0,%d0
22715
	lsr.w		&0x4,%d0
22716
	bsr.l		_denorm			# denorm result
22717

22718
	mov.l		(%sp),%a0
22719
	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
22720
	andi.w		&0xc0,%d1		# extract rnd prec
22721
	lsr.w		&0x4,%d1
22722
	swap		%d1
22723
	mov.w		0x6(%sp),%d1
22724
	andi.w		&0x30,%d1
22725
	lsr.w		&0x4,%d1
22726
	bsr.l		_round			# round the denorm
22727

22728
	mov.l		(%sp)+, %a0
22729

22730
# result is now rounded properly. convert back to normal format
22731
	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
22732
	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
22733
	beq.b		unf_res_chkifzero	# no; result is positive
22734
	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
22735
	clr.b		FTEMP_SGN(%a0)		# clear temp sign
22736

22737
# the number may have become zero after rounding. set ccodes accordingly.
22738
unf_res_chkifzero:
22739
	clr.l		%d0
22740
	tst.l		FTEMP_HI(%a0)		# is value now a zero?
22741
	bne.b		unf_res_cont		# no
22742
	tst.l		FTEMP_LO(%a0)
22743
	bne.b		unf_res_cont		# no
22744
#	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
22745
	bset		&z_bit, %d0		# yes; set zero ccode bit
22746

22747
unf_res_cont:
22748

22749
#
22750
# can inex1 also be set along with unfl and inex2???
22751
#
22752
# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22753
#
22754
	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
22755
	beq.b		unf_res_end		# no
22756
	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
22757

22758
unf_res_end:
22759
	add.l		&0x4, %sp		# clear stack
22760
	rts
22761

22762
# unf_res() for fsglmul() and fsgldiv().
22763
	global		unf_res4
22764
unf_res4:
22765
	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
22766

22767
	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
22768
	sne		FTEMP_SGN(%a0)
22769

22770
	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
22771
	and.w		&0x7fff,%d1
22772
	sub.w		%d0,%d1
22773
	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
22774

22775
	mov.l		%a0,-(%sp)		# save operand ptr during calls
22776

22777
	clr.l		%d0			# force rnd prec = ext
22778
	bsr.l		_denorm			# denorm result
22779

22780
	mov.l		(%sp),%a0
22781
	mov.w		&s_mode,%d1		# force rnd prec = sgl
22782
	swap		%d1
22783
	mov.w		0x6(%sp),%d1		# load rnd mode
22784
	andi.w		&0x30,%d1		# extract rnd prec
22785
	lsr.w		&0x4,%d1
22786
	bsr.l		_round			# round the denorm
22787

22788
	mov.l		(%sp)+,%a0
22789

22790
# result is now rounded properly. convert back to normal format
22791
	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
22792
	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
22793
	beq.b		unf_res4_chkifzero	# no; result is positive
22794
	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
22795
	clr.b		FTEMP_SGN(%a0)		# clear temp sign
22796

22797
# the number may have become zero after rounding. set ccodes accordingly.
22798
unf_res4_chkifzero:
22799
	clr.l		%d0
22800
	tst.l		FTEMP_HI(%a0)		# is value now a zero?
22801
	bne.b		unf_res4_cont		# no
22802
	tst.l		FTEMP_LO(%a0)
22803
	bne.b		unf_res4_cont		# no
22804
#	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
22805
	bset		&z_bit,%d0		# yes; set zero ccode bit
22806

22807
unf_res4_cont:
22808

22809
#
22810
# can inex1 also be set along with unfl and inex2???
22811
#
22812
# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22813
#
22814
	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
22815
	beq.b		unf_res4_end		# no
22816
	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
22817

22818
unf_res4_end:
22819
	add.l		&0x4,%sp		# clear stack
22820
	rts
22821

22822
#########################################################################
22823
# XDEF ****************************************************************	#
22824
#	ovf_res(): routine to produce the default overflow result of	#
22825
#		   an overflowing number.				#
22826
#	ovf_res2(): same as above but the rnd mode/prec are passed	#
22827
#		    differently.					#
22828
#									#
22829
# XREF ****************************************************************	#
22830
#	none								#
22831
#									#
22832
# INPUT ***************************************************************	#
22833
#	d1.b	= '-1' => (-); '0' => (+)				#
22834
#   ovf_res():								#
22835
#	d0	= rnd mode/prec						#
22836
#   ovf_res2():								#
22837
#	hi(d0)	= rnd prec						#
22838
#	lo(d0)	= rnd mode						#
22839
#									#
22840
# OUTPUT **************************************************************	#
22841
#	a0	= points to extended precision result			#
22842
#	d0.b	= condition code bits					#
22843
#									#
22844
# ALGORITHM ***********************************************************	#
22845
#	The default overflow result can be determined by the sign of	#
22846
# the result and the rounding mode/prec in effect. These bits are	#
22847
# concatenated together to create an index into the default result	#
22848
# table. A pointer to the correct result is returned in a0. The		#
22849
# resulting condition codes are returned in d0 in case the caller	#
22850
# doesn't want FPSR_cc altered (as is the case for fmove out).		#
22851
#									#
22852
#########################################################################
22853

22854
	global		ovf_res
22855
ovf_res:
22856
	andi.w		&0x10,%d1		# keep result sign
22857
	lsr.b		&0x4,%d0		# shift prec/mode
22858
	or.b		%d0,%d1			# concat the two
22859
	mov.w		%d1,%d0			# make a copy
22860
	lsl.b		&0x1,%d1		# multiply d1 by 2
22861
	bra.b		ovf_res_load
22862

22863
	global		ovf_res2
22864
ovf_res2:
22865
	and.w		&0x10, %d1		# keep result sign
22866
	or.b		%d0, %d1		# insert rnd mode
22867
	swap		%d0
22868
	or.b		%d0, %d1		# insert rnd prec
22869
	mov.w		%d1, %d0		# make a copy
22870
	lsl.b		&0x1, %d1		# shift left by 1
22871

22872
#
22873
# use the rounding mode, precision, and result sign as in index into the
22874
# two tables below to fetch the default result and the result ccodes.
22875
#
22876
ovf_res_load:
22877
	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
22878
	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
22879

22880
	rts
22881

22882
tbl_ovfl_cc:
22883
	byte		0x2, 0x0, 0x0, 0x2
22884
	byte		0x2, 0x0, 0x0, 0x2
22885
	byte		0x2, 0x0, 0x0, 0x2
22886
	byte		0x0, 0x0, 0x0, 0x0
22887
	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22888
	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22889
	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
22890

22891
tbl_ovfl_result:
22892
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22893
	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
22894
	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
22895
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22896

22897
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22898
	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
22899
	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
22900
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22901

22902
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22903
	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
22904
	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
22905
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22906

22907
	long		0x00000000,0x00000000,0x00000000,0x00000000
22908
	long		0x00000000,0x00000000,0x00000000,0x00000000
22909
	long		0x00000000,0x00000000,0x00000000,0x00000000
22910
	long		0x00000000,0x00000000,0x00000000,0x00000000
22911

22912
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22913
	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
22914
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22915
	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
22916

22917
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22918
	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
22919
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22920
	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
22921

22922
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22923
	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
22924
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22925
	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
22926

22927
#########################################################################
22928
# XDEF ****************************************************************	#
22929
#	get_packed(): fetch a packed operand from memory and then	#
22930
#		      convert it to a floating-point binary number.	#
22931
#									#
22932
# XREF ****************************************************************	#
22933
#	_dcalc_ea() - calculate the correct <ea>			#
22934
#	_mem_read() - fetch the packed operand from memory		#
22935
#	facc_in_x() - the fetch failed so jump to special exit code	#
22936
#	decbin()    - convert packed to binary extended precision	#
22937
#									#
22938
# INPUT ***************************************************************	#
22939
#	None								#
22940
#									#
22941
# OUTPUT **************************************************************	#
22942
#	If no failure on _mem_read():					#
22943
#	FP_SRC(a6) = packed operand now as a binary FP number		#
22944
#									#
22945
# ALGORITHM ***********************************************************	#
22946
#	Get the correct <ea> which is the value on the exception stack	#
22947
# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
22948
# Then, fetch the operand from memory. If the fetch fails, exit		#
22949
# through facc_in_x().							#
22950
#	If the packed operand is a ZERO,NAN, or INF, convert it to	#
22951
# its binary representation here. Else, call decbin() which will	#
22952
# convert the packed value to an extended precision binary value.	#
22953
#									#
22954
#########################################################################
22955

22956
# the stacked <ea> for packed is correct except for -(An).
22957
# the base reg must be updated for both -(An) and (An)+.
22958
	global		get_packed
22959
get_packed:
22960
	mov.l		&0xc,%d0		# packed is 12 bytes
22961
	bsr.l		_dcalc_ea		# fetch <ea>; correct An
22962

22963
	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
22964
	mov.l		&0xc,%d0		# pass: 12 bytes
22965
	bsr.l		_dmem_read		# read packed operand
22966

22967
	tst.l		%d1			# did dfetch fail?
22968
	bne.l		facc_in_x		# yes
22969

22970
# The packed operand is an INF or a NAN if the exponent field is all ones.
22971
	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
22972
	cmpi.w		%d0,&0x7fff		# INF or NAN?
22973
	bne.b		gp_try_zero		# no
22974
	rts					# operand is an INF or NAN
22975

22976
# The packed operand is a zero if the mantissa is all zero, else it's
22977
# a normal packed op.
22978
gp_try_zero:
22979
	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
22980
	andi.b		&0x0f,%d0		# clear all but last nybble
22981
	bne.b		gp_not_spec		# not a zero
22982
	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
22983
	bne.b		gp_not_spec		# not a zero
22984
	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
22985
	bne.b		gp_not_spec		# not a zero
22986
	rts					# operand is a ZERO
22987
gp_not_spec:
22988
	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
22989
	bsr.l		decbin			# convert to extended
22990
	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
22991
	rts
22992

22993
#########################################################################
22994
# decbin(): Converts normalized packed bcd value pointed to by register	#
22995
#	    a0 to extended-precision value in fp0.			#
22996
#									#
22997
# INPUT ***************************************************************	#
22998
#	a0 = pointer to normalized packed bcd value			#
22999
#									#
23000
# OUTPUT **************************************************************	#
23001
#	fp0 = exact fp representation of the packed bcd value.		#
23002
#									#
23003
# ALGORITHM ***********************************************************	#
23004
#	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
23005
#	and NaN operands are dispatched without entering this routine)	#
23006
#	value in 68881/882 format at location (a0).			#
23007
#									#
23008
#	A1. Convert the bcd exponent to binary by successive adds and	#
23009
#	muls. Set the sign according to SE. Subtract 16 to compensate	#
23010
#	for the mantissa which is to be interpreted as 17 integer	#
23011
#	digits, rather than 1 integer and 16 fraction digits.		#
23012
#	Note: this operation can never overflow.			#
23013
#									#
23014
#	A2. Convert the bcd mantissa to binary by successive		#
23015
#	adds and muls in FP0. Set the sign according to SM.		#
23016
#	The mantissa digits will be converted with the decimal point	#
23017
#	assumed following the least-significant digit.			#
23018
#	Note: this operation can never overflow.			#
23019
#									#
23020
#	A3. Count the number of leading/trailing zeros in the		#
23021
#	bcd string.  If SE is positive, count the leading zeros;	#
23022
#	if negative, count the trailing zeros.  Set the adjusted	#
23023
#	exponent equal to the exponent from A1 and the zero count	#
23024
#	added if SM = 1 and subtracted if SM = 0.  Scale the		#
23025
#	mantissa the equivalent of forcing in the bcd value:		#
23026
#									#
23027
#	SM = 0	a non-zero digit in the integer position		#
23028
#	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
23029
#									#
23030
#	this will insure that any value, regardless of its		#
23031
#	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
23032
#	consistently.							#
23033
#									#
23034
#	A4. Calculate the factor 10^exp in FP1 using a table of		#
23035
#	10^(2^n) values.  To reduce the error in forming factors	#
23036
#	greater than 10^27, a directed rounding scheme is used with	#
23037
#	tables rounded to RN, RM, and RP, according to the table	#
23038
#	in the comments of the pwrten section.				#
23039
#									#
23040
#	A5. Form the final binary number by scaling the mantissa by	#
23041
#	the exponent factor.  This is done by multiplying the		#
23042
#	mantissa in FP0 by the factor in FP1 if the adjusted		#
23043
#	exponent sign is positive, and dividing FP0 by FP1 if		#
23044
#	it is negative.							#
23045
#									#
23046
#	Clean up and return. Check if the final mul or div was inexact.	#
23047
#	If so, set INEX1 in USER_FPSR.					#
23048
#									#
23049
#########################################################################
23050

23051
#
23052
#	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
23053
#	to nearest, minus, and plus, respectively.  The tables include
23054
#	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
23055
#	is required until the power is greater than 27, however, all
23056
#	tables include the first 5 for ease of indexing.
23057
#
23058
RTABLE:
23059
	byte		0,0,0,0
23060
	byte		2,3,2,3
23061
	byte		2,3,3,2
23062
	byte		3,2,2,3
23063

23064
	set		FNIBS,7
23065
	set		FSTRT,0
23066

23067
	set		ESTRT,4
23068
	set		EDIGITS,2
23069

23070
	global		decbin
23071
decbin:
23072
	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
23073
	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
23074
	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
23075

23076
	lea		FP_SCR0(%a6),%a0
23077

23078
	movm.l		&0x3c00,-(%sp)		# save d2-d5
23079
	fmovm.x		&0x1,-(%sp)		# save fp1
23080
#
23081
# Calculate exponent:
23082
#  1. Copy bcd value in memory for use as a working copy.
23083
#  2. Calculate absolute value of exponent in d1 by mul and add.
23084
#  3. Correct for exponent sign.
23085
#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
23086
#     (i.e., all digits assumed left of the decimal point.)
23087
#
23088
# Register usage:
23089
#
23090
#  calc_e:
23091
#	(*)  d0: temp digit storage
23092
#	(*)  d1: accumulator for binary exponent
23093
#	(*)  d2: digit count
23094
#	(*)  d3: offset pointer
23095
#	( )  d4: first word of bcd
23096
#	( )  a0: pointer to working bcd value
23097
#	( )  a6: pointer to original bcd value
23098
#	(*)  FP_SCR1: working copy of original bcd value
23099
#	(*)  L_SCR1: copy of original exponent word
23100
#
23101
calc_e:
23102
	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
23103
	mov.l		&ESTRT,%d3		# counter to pick up digits
23104
	mov.l		(%a0),%d4		# get first word of bcd
23105
	clr.l		%d1			# zero d1 for accumulator
23106
e_gd:
23107
	mulu.l		&0xa,%d1		# mul partial product by one digit place
23108
	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
23109
	add.l		%d0,%d1			# d1 = d1 + d0
23110
	addq.b		&4,%d3			# advance d3 to the next digit
23111
	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
23112
	btst		&30,%d4			# get SE
23113
	beq.b		e_pos			# don't negate if pos
23114
	neg.l		%d1			# negate before subtracting
23115
e_pos:
23116
	sub.l		&16,%d1			# sub to compensate for shift of mant
23117
	bge.b		e_save			# if still pos, do not neg
23118
	neg.l		%d1			# now negative, make pos and set SE
23119
	or.l		&0x40000000,%d4		# set SE in d4,
23120
	or.l		&0x40000000,(%a0)	# and in working bcd
23121
e_save:
23122
	mov.l		%d1,-(%sp)		# save exp on stack
23123
#
23124
#
23125
# Calculate mantissa:
23126
#  1. Calculate absolute value of mantissa in fp0 by mul and add.
23127
#  2. Correct for mantissa sign.
23128
#     (i.e., all digits assumed left of the decimal point.)
23129
#
23130
# Register usage:
23131
#
23132
#  calc_m:
23133
#	(*)  d0: temp digit storage
23134
#	(*)  d1: lword counter
23135
#	(*)  d2: digit count
23136
#	(*)  d3: offset pointer
23137
#	( )  d4: words 2 and 3 of bcd
23138
#	( )  a0: pointer to working bcd value
23139
#	( )  a6: pointer to original bcd value
23140
#	(*) fp0: mantissa accumulator
23141
#	( )  FP_SCR1: working copy of original bcd value
23142
#	( )  L_SCR1: copy of original exponent word
23143
#
23144
calc_m:
23145
	mov.l		&1,%d1			# word counter, init to 1
23146
	fmov.s		&0x00000000,%fp0	# accumulator
23147
#
23148
#
23149
#  Since the packed number has a long word between the first & second parts,
23150
#  get the integer digit then skip down & get the rest of the
23151
#  mantissa.  We will unroll the loop once.
23152
#
23153
	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
23154
	fadd.b		%d0,%fp0		# add digit to sum in fp0
23155
#
23156
#
23157
#  Get the rest of the mantissa.
23158
#
23159
loadlw:
23160
	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
23161
	mov.l		&FSTRT,%d3		# counter to pick up digits
23162
	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
23163
md2b:
23164
	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
23165
	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
23166
	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
23167
#
23168
#
23169
#  If all the digits (8) in that long word have been converted (d2=0),
23170
#  then inc d1 (=2) to point to the next long word and reset d3 to 0
23171
#  to initialize the digit offset, and set d2 to 7 for the digit count;
23172
#  else continue with this long word.
23173
#
23174
	addq.b		&4,%d3			# advance d3 to the next digit
23175
	dbf.w		%d2,md2b		# check for last digit in this lw
23176
nextlw:
23177
	addq.l		&1,%d1			# inc lw pointer in mantissa
23178
	cmp.l		%d1,&2			# test for last lw
23179
	ble.b		loadlw			# if not, get last one
23180
#
23181
#  Check the sign of the mant and make the value in fp0 the same sign.
23182
#
23183
m_sign:
23184
	btst		&31,(%a0)		# test sign of the mantissa
23185
	beq.b		ap_st_z			# if clear, go to append/strip zeros
23186
	fneg.x		%fp0			# if set, negate fp0
23187
#
23188
# Append/strip zeros:
23189
#
23190
#  For adjusted exponents which have an absolute value greater than 27*,
23191
#  this routine calculates the amount needed to normalize the mantissa
23192
#  for the adjusted exponent.  That number is subtracted from the exp
23193
#  if the exp was positive, and added if it was negative.  The purpose
23194
#  of this is to reduce the value of the exponent and the possibility
23195
#  of error in calculation of pwrten.
23196
#
23197
#  1. Branch on the sign of the adjusted exponent.
23198
#  2p.(positive exp)
23199
#   2. Check M16 and the digits in lwords 2 and 3 in descending order.
23200
#   3. Add one for each zero encountered until a non-zero digit.
23201
#   4. Subtract the count from the exp.
23202
#   5. Check if the exp has crossed zero in #3 above; make the exp abs
23203
#	   and set SE.
23204
#	6. Multiply the mantissa by 10**count.
23205
#  2n.(negative exp)
23206
#   2. Check the digits in lwords 3 and 2 in descending order.
23207
#   3. Add one for each zero encountered until a non-zero digit.
23208
#   4. Add the count to the exp.
23209
#   5. Check if the exp has crossed zero in #3 above; clear SE.
23210
#   6. Divide the mantissa by 10**count.
23211
#
23212
#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
23213
#   any adjustment due to append/strip zeros will drive the resultane
23214
#   exponent towards zero.  Since all pwrten constants with a power
23215
#   of 27 or less are exact, there is no need to use this routine to
23216
#   attempt to lessen the resultant exponent.
23217
#
23218
# Register usage:
23219
#
23220
#  ap_st_z:
23221
#	(*)  d0: temp digit storage
23222
#	(*)  d1: zero count
23223
#	(*)  d2: digit count
23224
#	(*)  d3: offset pointer
23225
#	( )  d4: first word of bcd
23226
#	(*)  d5: lword counter
23227
#	( )  a0: pointer to working bcd value
23228
#	( )  FP_SCR1: working copy of original bcd value
23229
#	( )  L_SCR1: copy of original exponent word
23230
#
23231
#
23232
# First check the absolute value of the exponent to see if this
23233
# routine is necessary.  If so, then check the sign of the exponent
23234
# and do append (+) or strip (-) zeros accordingly.
23235
# This section handles a positive adjusted exponent.
23236
#
23237
ap_st_z:
23238
	mov.l		(%sp),%d1		# load expA for range test
23239
	cmp.l		%d1,&27			# test is with 27
23240
	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
23241
	btst		&30,(%a0)		# check sign of exp
23242
	bne.b		ap_st_n			# if neg, go to neg side
23243
	clr.l		%d1			# zero count reg
23244
	mov.l		(%a0),%d4		# load lword 1 to d4
23245
	bfextu		%d4{&28:&4},%d0		# get M16 in d0
23246
	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
23247
	addq.l		&1,%d1			# inc zero count
23248
	mov.l		&1,%d5			# init lword counter
23249
	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
23250
	bne.b		ap_p_cl			# if lw 2 is zero, skip it
23251
	addq.l		&8,%d1			# and inc count by 8
23252
	addq.l		&1,%d5			# inc lword counter
23253
	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
23254
ap_p_cl:
23255
	clr.l		%d3			# init offset reg
23256
	mov.l		&7,%d2			# init digit counter
23257
ap_p_gd:
23258
	bfextu		%d4{%d3:&4},%d0		# get digit
23259
	bne.b		ap_p_fx			# if non-zero, go to fix exp
23260
	addq.l		&4,%d3			# point to next digit
23261
	addq.l		&1,%d1			# inc digit counter
23262
	dbf.w		%d2,ap_p_gd		# get next digit
23263
ap_p_fx:
23264
	mov.l		%d1,%d0			# copy counter to d2
23265
	mov.l		(%sp),%d1		# get adjusted exp from memory
23266
	sub.l		%d0,%d1			# subtract count from exp
23267
	bge.b		ap_p_fm			# if still pos, go to pwrten
23268
	neg.l		%d1			# now its neg; get abs
23269
	mov.l		(%a0),%d4		# load lword 1 to d4
23270
	or.l		&0x40000000,%d4		# and set SE in d4
23271
	or.l		&0x40000000,(%a0)	# and in memory
23272
#
23273
# Calculate the mantissa multiplier to compensate for the striping of
23274
# zeros from the mantissa.
23275
#
23276
ap_p_fm:
23277
	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
23278
	clr.l		%d3			# init table index
23279
	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23280
	mov.l		&3,%d2			# init d2 to count bits in counter
23281
ap_p_el:
23282
	asr.l		&1,%d0			# shift lsb into carry
23283
	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
23284
	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23285
ap_p_en:
23286
	add.l		&12,%d3			# inc d3 to next rtable entry
23287
	tst.l		%d0			# check if d0 is zero
23288
	bne.b		ap_p_el			# if not, get next bit
23289
	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
23290
	bra.b		pwrten			# go calc pwrten
23291
#
23292
# This section handles a negative adjusted exponent.
23293
#
23294
ap_st_n:
23295
	clr.l		%d1			# clr counter
23296
	mov.l		&2,%d5			# set up d5 to point to lword 3
23297
	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
23298
	bne.b		ap_n_cl			# if not zero, check digits
23299
	sub.l		&1,%d5			# dec d5 to point to lword 2
23300
	addq.l		&8,%d1			# inc counter by 8
23301
	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
23302
ap_n_cl:
23303
	mov.l		&28,%d3			# point to last digit
23304
	mov.l		&7,%d2			# init digit counter
23305
ap_n_gd:
23306
	bfextu		%d4{%d3:&4},%d0		# get digit
23307
	bne.b		ap_n_fx			# if non-zero, go to exp fix
23308
	subq.l		&4,%d3			# point to previous digit
23309
	addq.l		&1,%d1			# inc digit counter
23310
	dbf.w		%d2,ap_n_gd		# get next digit
23311
ap_n_fx:
23312
	mov.l		%d1,%d0			# copy counter to d0
23313
	mov.l		(%sp),%d1		# get adjusted exp from memory
23314
	sub.l		%d0,%d1			# subtract count from exp
23315
	bgt.b		ap_n_fm			# if still pos, go fix mantissa
23316
	neg.l		%d1			# take abs of exp and clr SE
23317
	mov.l		(%a0),%d4		# load lword 1 to d4
23318
	and.l		&0xbfffffff,%d4		# and clr SE in d4
23319
	and.l		&0xbfffffff,(%a0)	# and in memory
23320
#
23321
# Calculate the mantissa multiplier to compensate for the appending of
23322
# zeros to the mantissa.
23323
#
23324
ap_n_fm:
23325
	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
23326
	clr.l		%d3			# init table index
23327
	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23328
	mov.l		&3,%d2			# init d2 to count bits in counter
23329
ap_n_el:
23330
	asr.l		&1,%d0			# shift lsb into carry
23331
	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
23332
	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23333
ap_n_en:
23334
	add.l		&12,%d3			# inc d3 to next rtable entry
23335
	tst.l		%d0			# check if d0 is zero
23336
	bne.b		ap_n_el			# if not, get next bit
23337
	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
23338
#
23339
#
23340
# Calculate power-of-ten factor from adjusted and shifted exponent.
23341
#
23342
# Register usage:
23343
#
23344
#  pwrten:
23345
#	(*)  d0: temp
23346
#	( )  d1: exponent
23347
#	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
23348
#	(*)  d3: FPCR work copy
23349
#	( )  d4: first word of bcd
23350
#	(*)  a1: RTABLE pointer
23351
#  calc_p:
23352
#	(*)  d0: temp
23353
#	( )  d1: exponent
23354
#	(*)  d3: PWRTxx table index
23355
#	( )  a0: pointer to working copy of bcd
23356
#	(*)  a1: PWRTxx pointer
23357
#	(*) fp1: power-of-ten accumulator
23358
#
23359
# Pwrten calculates the exponent factor in the selected rounding mode
23360
# according to the following table:
23361
#
23362
#	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
23363
#
23364
#	ANY	  ANY	RN	RN
23365
#
23366
#	 +	   +	RP	RP
23367
#	 -	   +	RP	RM
23368
#	 +	   -	RP	RM
23369
#	 -	   -	RP	RP
23370
#
23371
#	 +	   +	RM	RM
23372
#	 -	   +	RM	RP
23373
#	 +	   -	RM	RP
23374
#	 -	   -	RM	RM
23375
#
23376
#	 +	   +	RZ	RM
23377
#	 -	   +	RZ	RM
23378
#	 +	   -	RZ	RP
23379
#	 -	   -	RZ	RP
23380
#
23381
#
23382
pwrten:
23383
	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
23384
	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
23385
	mov.l		(%a0),%d4		# reload 1st bcd word to d4
23386
	asl.l		&2,%d2			# format d2 to be
23387
	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
23388
	add.l		%d0,%d2			# in d2 as index into RTABLE
23389
	lea.l		RTABLE(%pc),%a1		# load rtable base
23390
	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
23391
	clr.l		%d3			# clear d3 to force no exc and extended
23392
	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
23393
	fmov.l		%d3,%fpcr		# write new FPCR
23394
	asr.l		&1,%d0			# write correct PTENxx table
23395
	bcc.b		not_rp			# to a1
23396
	lea.l		PTENRP(%pc),%a1		# it is RP
23397
	bra.b		calc_p			# go to init section
23398
not_rp:
23399
	asr.l		&1,%d0			# keep checking
23400
	bcc.b		not_rm
23401
	lea.l		PTENRM(%pc),%a1		# it is RM
23402
	bra.b		calc_p			# go to init section
23403
not_rm:
23404
	lea.l		PTENRN(%pc),%a1		# it is RN
23405
calc_p:
23406
	mov.l		%d1,%d0			# copy exp to d0;use d0
23407
	bpl.b		no_neg			# if exp is negative,
23408
	neg.l		%d0			# invert it
23409
	or.l		&0x40000000,(%a0)	# and set SE bit
23410
no_neg:
23411
	clr.l		%d3			# table index
23412
	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
23413
e_loop:
23414
	asr.l		&1,%d0			# shift next bit into carry
23415
	bcc.b		e_next			# if zero, skip the mul
23416
	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
23417
e_next:
23418
	add.l		&12,%d3			# inc d3 to next rtable entry
23419
	tst.l		%d0			# check if d0 is zero
23420
	bne.b		e_loop			# not zero, continue shifting
23421
#
23422
#
23423
#  Check the sign of the adjusted exp and make the value in fp0 the
23424
#  same sign. If the exp was pos then multiply fp1*fp0;
23425
#  else divide fp0/fp1.
23426
#
23427
# Register Usage:
23428
#  norm:
23429
#	( )  a0: pointer to working bcd value
23430
#	(*) fp0: mantissa accumulator
23431
#	( ) fp1: scaling factor - 10**(abs(exp))
23432
#
23433
pnorm:
23434
	btst		&30,(%a0)		# test the sign of the exponent
23435
	beq.b		mul			# if clear, go to multiply
23436
div:
23437
	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
23438
	bra.b		end_dec
23439
mul:
23440
	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
23441
#
23442
#
23443
# Clean up and return with result in fp0.
23444
#
23445
# If the final mul/div in decbin incurred an inex exception,
23446
# it will be inex2, but will be reported as inex1 by get_op.
23447
#
23448
end_dec:
23449
	fmov.l		%fpsr,%d0		# get status register
23450
	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
23451
	beq.b		no_exc			# skip this if no exc
23452
	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
23453
no_exc:
23454
	add.l		&0x4,%sp		# clear 1 lw param
23455
	fmovm.x		(%sp)+,&0x40		# restore fp1
23456
	movm.l		(%sp)+,&0x3c		# restore d2-d5
23457
	fmov.l		&0x0,%fpcr
23458
	fmov.l		&0x0,%fpsr
23459
	rts
23460

23461
#########################################################################
23462
# bindec(): Converts an input in extended precision format to bcd format#
23463
#									#
23464
# INPUT ***************************************************************	#
23465
#	a0 = pointer to the input extended precision value in memory.	#
23466
#	     the input may be either normalized, unnormalized, or	#
23467
#	     denormalized.						#
23468
#	d0 = contains the k-factor sign-extended to 32-bits.		#
23469
#									#
23470
# OUTPUT **************************************************************	#
23471
#	FP_SCR0(a6) = bcd format result on the stack.			#
23472
#									#
23473
# ALGORITHM ***********************************************************	#
23474
#									#
23475
#	A1.	Set RM and size ext;  Set SIGMA = sign of input.	#
23476
#		The k-factor is saved for use in d7. Clear the		#
23477
#		BINDEC_FLG for separating normalized/denormalized	#
23478
#		input.  If input is unnormalized or denormalized,	#
23479
#		normalize it.						#
23480
#									#
23481
#	A2.	Set X = abs(input).					#
23482
#									#
23483
#	A3.	Compute ILOG.						#
23484
#		ILOG is the log base 10 of the input value.  It is	#
23485
#		approximated by adding e + 0.f when the original	#
23486
#		value is viewed as 2^^e * 1.f in extended precision.	#
23487
#		This value is stored in d6.				#
23488
#									#
23489
#	A4.	Clr INEX bit.						#
23490
#		The operation in A3 above may have set INEX2.		#
23491
#									#
23492
#	A5.	Set ICTR = 0;						#
23493
#		ICTR is a flag used in A13.  It must be set before the	#
23494
#		loop entry A6.						#
23495
#									#
23496
#	A6.	Calculate LEN.						#
23497
#		LEN is the number of digits to be displayed.  The	#
23498
#		k-factor can dictate either the total number of digits,	#
23499
#		if it is a positive number, or the number of digits	#
23500
#		after the decimal point which are to be included as	#
23501
#		significant.  See the 68882 manual for examples.	#
23502
#		If LEN is computed to be greater than 17, set OPERR in	#
23503
#		USER_FPSR.  LEN is stored in d4.			#
23504
#									#
23505
#	A7.	Calculate SCALE.					#
23506
#		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
23507
#		of decimal places needed to insure LEN integer digits	#
23508
#		in the output before conversion to bcd. LAMBDA is the	#
23509
#		sign of ISCALE, used in A9. Fp1 contains		#
23510
#		10^^(abs(ISCALE)) using a rounding mode which is a	#
23511
#		function of the original rounding mode and the signs	#
23512
#		of ISCALE and X.  A table is given in the code.		#
23513
#									#
23514
#	A8.	Clr INEX; Force RZ.					#
23515
#		The operation in A3 above may have set INEX2.		#
23516
#		RZ mode is forced for the scaling operation to insure	#
23517
#		only one rounding error.  The grs bits are collected in #
23518
#		the INEX flag for use in A10.				#
23519
#									#
23520
#	A9.	Scale X -> Y.						#
23521
#		The mantissa is scaled to the desired number of		#
23522
#		significant digits.  The excess digits are collected	#
23523
#		in INEX2.						#
23524
#									#
23525
#	A10.	Or in INEX.						#
23526
#		If INEX is set, round error occurred.  This is		#
23527
#		compensated for by 'or-ing' in the INEX2 flag to	#
23528
#		the lsb of Y.						#
23529
#									#
23530
#	A11.	Restore original FPCR; set size ext.			#
23531
#		Perform FINT operation in the user's rounding mode.	#
23532
#		Keep the size to extended.				#
23533
#									#
23534
#	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
23535
#		mode.  The FPSP routine sintd0 is used.  The output	#
23536
#		is in fp0.						#
23537
#									#
23538
#	A13.	Check for LEN digits.					#
23539
#		If the int operation results in more than LEN digits,	#
23540
#		or less than LEN -1 digits, adjust ILOG and repeat from	#
23541
#		A6.  This test occurs only on the first pass.  If the	#
23542
#		result is exactly 10^LEN, decrement ILOG and divide	#
23543
#		the mantissa by 10.					#
23544
#									#
23545
#	A14.	Convert the mantissa to bcd.				#
23546
#		The binstr routine is used to convert the LEN digit	#
23547
#		mantissa to bcd in memory.  The input to binstr is	#
23548
#		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
23549
#		such that the decimal point is to the left of bit 63.	#
23550
#		The bcd digits are stored in the correct position in	#
23551
#		the final string area in memory.			#
23552
#									#
23553
#	A15.	Convert the exponent to bcd.				#
23554
#		As in A14 above, the exp is converted to bcd and the	#
23555
#		digits are stored in the final string.			#
23556
#		Test the length of the final exponent string.  If the	#
23557
#		length is 4, set operr.					#
23558
#									#
23559
#	A16.	Write sign bits to final string.			#
23560
#									#
23561
#########################################################################
23562

23563
set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
23564

23565
# Constants in extended precision
23566
PLOG2:
23567
	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
23568
PLOG2UP1:
23569
	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
23570

23571
# Constants in single precision
23572
FONE:
23573
	long		0x3F800000,0x00000000,0x00000000,0x00000000
23574
FTWO:
23575
	long		0x40000000,0x00000000,0x00000000,0x00000000
23576
FTEN:
23577
	long		0x41200000,0x00000000,0x00000000,0x00000000
23578
F4933:
23579
	long		0x459A2800,0x00000000,0x00000000,0x00000000
23580

23581
RBDTBL:
23582
	byte		0,0,0,0
23583
	byte		3,3,2,2
23584
	byte		3,2,2,3
23585
	byte		2,3,3,2
23586

23587
#	Implementation Notes:
23588
#
23589
#	The registers are used as follows:
23590
#
23591
#		d0: scratch; LEN input to binstr
23592
#		d1: scratch
23593
#		d2: upper 32-bits of mantissa for binstr
23594
#		d3: scratch;lower 32-bits of mantissa for binstr
23595
#		d4: LEN
23596
#		d5: LAMBDA/ICTR
23597
#		d6: ILOG
23598
#		d7: k-factor
23599
#		a0: ptr for original operand/final result
23600
#		a1: scratch pointer
23601
#		a2: pointer to FP_X; abs(original value) in ext
23602
#		fp0: scratch
23603
#		fp1: scratch
23604
#		fp2: scratch
23605
#		F_SCR1:
23606
#		F_SCR2:
23607
#		L_SCR1:
23608
#		L_SCR2:
23609

23610
	global		bindec
23611
bindec:
23612
	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
23613
	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
23614

23615
# A1. Set RM and size ext. Set SIGMA = sign input;
23616
#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
23617
#     separating  normalized/denormalized input.  If the input
23618
#     is a denormalized number, set the BINDEC_FLG memory word
23619
#     to signal denorm.  If the input is unnormalized, normalize
23620
#     the input and test for denormalized result.
23621
#
23622
	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
23623
	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
23624
	mov.l		%d0,%d7		# move k-factor to d7
23625

23626
	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
23627
	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
23628
	bne.w		A2_str		# no; input is a NORM
23629

23630
#
23631
# Normalize the denorm
23632
#
23633
un_de_norm:
23634
	mov.w		(%a0),%d0
23635
	and.w		&0x7fff,%d0	# strip sign of normalized exp
23636
	mov.l		4(%a0),%d1
23637
	mov.l		8(%a0),%d2
23638
norm_loop:
23639
	sub.w		&1,%d0
23640
	lsl.l		&1,%d2
23641
	roxl.l		&1,%d1
23642
	tst.l		%d1
23643
	bge.b		norm_loop
23644
#
23645
# Test if the normalized input is denormalized
23646
#
23647
	tst.w		%d0
23648
	bgt.b		pos_exp		# if greater than zero, it is a norm
23649
	st		BINDEC_FLG(%a6)	# set flag for denorm
23650
pos_exp:
23651
	and.w		&0x7fff,%d0	# strip sign of normalized exp
23652
	mov.w		%d0,(%a0)
23653
	mov.l		%d1,4(%a0)
23654
	mov.l		%d2,8(%a0)
23655

23656
# A2. Set X = abs(input).
23657
#
23658
A2_str:
23659
	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
23660
	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
23661
	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
23662
	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
23663

23664
# A3. Compute ILOG.
23665
#     ILOG is the log base 10 of the input value.  It is approx-
23666
#     imated by adding e + 0.f when the original value is viewed
23667
#     as 2^^e * 1.f in extended precision.  This value is stored
23668
#     in d6.
23669
#
23670
# Register usage:
23671
#	Input/Output
23672
#	d0: k-factor/exponent
23673
#	d2: x/x
23674
#	d3: x/x
23675
#	d4: x/x
23676
#	d5: x/x
23677
#	d6: x/ILOG
23678
#	d7: k-factor/Unchanged
23679
#	a0: ptr for original operand/final result
23680
#	a1: x/x
23681
#	a2: x/x
23682
#	fp0: x/float(ILOG)
23683
#	fp1: x/x
23684
#	fp2: x/x
23685
#	F_SCR1:x/x
23686
#	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
23687
#	L_SCR1:x/x
23688
#	L_SCR2:first word of X packed/Unchanged
23689

23690
	tst.b		BINDEC_FLG(%a6)	# check for denorm
23691
	beq.b		A3_cont		# if clr, continue with norm
23692
	mov.l		&-4933,%d6	# force ILOG = -4933
23693
	bra.b		A4_str
23694
A3_cont:
23695
	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
23696
	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
23697
	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
23698
	sub.w		&0x3fff,%d0	# strip off bias
23699
	fadd.w		%d0,%fp0	# add in exp
23700
	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
23701
	fbge.w		pos_res		# if pos, branch
23702
	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
23703
	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
23704
	bra.b		A4_str		# go move out ILOG
23705
pos_res:
23706
	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
23707
	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
23708

23709

23710
# A4. Clr INEX bit.
23711
#     The operation in A3 above may have set INEX2.
23712

23713
A4_str:
23714
	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
23715

23716

23717
# A5. Set ICTR = 0;
23718
#     ICTR is a flag used in A13.  It must be set before the
23719
#     loop entry A6. The lower word of d5 is used for ICTR.
23720

23721
	clr.w		%d5		# clear ICTR
23722

23723
# A6. Calculate LEN.
23724
#     LEN is the number of digits to be displayed.  The k-factor
23725
#     can dictate either the total number of digits, if it is
23726
#     a positive number, or the number of digits after the
23727
#     original decimal point which are to be included as
23728
#     significant.  See the 68882 manual for examples.
23729
#     If LEN is computed to be greater than 17, set OPERR in
23730
#     USER_FPSR.  LEN is stored in d4.
23731
#
23732
# Register usage:
23733
#	Input/Output
23734
#	d0: exponent/Unchanged
23735
#	d2: x/x/scratch
23736
#	d3: x/x
23737
#	d4: exc picture/LEN
23738
#	d5: ICTR/Unchanged
23739
#	d6: ILOG/Unchanged
23740
#	d7: k-factor/Unchanged
23741
#	a0: ptr for original operand/final result
23742
#	a1: x/x
23743
#	a2: x/x
23744
#	fp0: float(ILOG)/Unchanged
23745
#	fp1: x/x
23746
#	fp2: x/x
23747
#	F_SCR1:x/x
23748
#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23749
#	L_SCR1:x/x
23750
#	L_SCR2:first word of X packed/Unchanged
23751

23752
A6_str:
23753
	tst.l		%d7		# branch on sign of k
23754
	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
23755
	mov.l		%d7,%d4		# if k > 0, LEN = k
23756
	bra.b		len_ck		# skip to LEN check
23757
k_neg:
23758
	mov.l		%d6,%d4		# first load ILOG to d4
23759
	sub.l		%d7,%d4		# subtract off k
23760
	addq.l		&1,%d4		# add in the 1
23761
len_ck:
23762
	tst.l		%d4		# LEN check: branch on sign of LEN
23763
	ble.b		LEN_ng		# if neg, set LEN = 1
23764
	cmp.l		%d4,&17		# test if LEN > 17
23765
	ble.b		A7_str		# if not, forget it
23766
	mov.l		&17,%d4		# set max LEN = 17
23767
	tst.l		%d7		# if negative, never set OPERR
23768
	ble.b		A7_str		# if positive, continue
23769
	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
23770
	bra.b		A7_str		# finished here
23771
LEN_ng:
23772
	mov.l		&1,%d4		# min LEN is 1
23773

23774

23775
# A7. Calculate SCALE.
23776
#     SCALE is equal to 10^ISCALE, where ISCALE is the number
23777
#     of decimal places needed to insure LEN integer digits
23778
#     in the output before conversion to bcd. LAMBDA is the sign
23779
#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
23780
#     the rounding mode as given in the following table (see
23781
#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
23782
#     of opposite sign in bindec.sa from Coonen).
23783
#
23784
#	Initial					USE
23785
#	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
23786
#	----------------------------------------------
23787
#	 RN	00	   0	   0		00/0	RN
23788
#	 RN	00	   0	   1		00/0	RN
23789
#	 RN	00	   1	   0		00/0	RN
23790
#	 RN	00	   1	   1		00/0	RN
23791
#	 RZ	01	   0	   0		11/3	RP
23792
#	 RZ	01	   0	   1		11/3	RP
23793
#	 RZ	01	   1	   0		10/2	RM
23794
#	 RZ	01	   1	   1		10/2	RM
23795
#	 RM	10	   0	   0		11/3	RP
23796
#	 RM	10	   0	   1		10/2	RM
23797
#	 RM	10	   1	   0		10/2	RM
23798
#	 RM	10	   1	   1		11/3	RP
23799
#	 RP	11	   0	   0		10/2	RM
23800
#	 RP	11	   0	   1		11/3	RP
23801
#	 RP	11	   1	   0		11/3	RP
23802
#	 RP	11	   1	   1		10/2	RM
23803
#
23804
# Register usage:
23805
#	Input/Output
23806
#	d0: exponent/scratch - final is 0
23807
#	d2: x/0 or 24 for A9
23808
#	d3: x/scratch - offset ptr into PTENRM array
23809
#	d4: LEN/Unchanged
23810
#	d5: 0/ICTR:LAMBDA
23811
#	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
23812
#	d7: k-factor/Unchanged
23813
#	a0: ptr for original operand/final result
23814
#	a1: x/ptr to PTENRM array
23815
#	a2: x/x
23816
#	fp0: float(ILOG)/Unchanged
23817
#	fp1: x/10^ISCALE
23818
#	fp2: x/x
23819
#	F_SCR1:x/x
23820
#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23821
#	L_SCR1:x/x
23822
#	L_SCR2:first word of X packed/Unchanged
23823

23824
A7_str:
23825
	tst.l		%d7		# test sign of k
23826
	bgt.b		k_pos		# if pos and > 0, skip this
23827
	cmp.l		%d7,%d6		# test k - ILOG
23828
	blt.b		k_pos		# if ILOG >= k, skip this
23829
	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
23830
k_pos:
23831
	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
23832
	addq.l		&1,%d0		# add the 1
23833
	sub.l		%d4,%d0		# sub off LEN
23834
	swap		%d5		# use upper word of d5 for LAMBDA
23835
	clr.w		%d5		# set it zero initially
23836
	clr.w		%d2		# set up d2 for very small case
23837
	tst.l		%d0		# test sign of ISCALE
23838
	bge.b		iscale		# if pos, skip next inst
23839
	addq.w		&1,%d5		# if neg, set LAMBDA true
23840
	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
23841
	bgt.b		no_inf		# if false, skip rest
23842
	add.l		&24,%d0		# add in 24 to iscale
23843
	mov.l		&24,%d2		# put 24 in d2 for A9
23844
no_inf:
23845
	neg.l		%d0		# and take abs of ISCALE
23846
iscale:
23847
	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
23848
	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
23849
	lsl.w		&1,%d1		# put them in bits 2:1
23850
	add.w		%d5,%d1		# add in LAMBDA
23851
	lsl.w		&1,%d1		# put them in bits 3:1
23852
	tst.l		L_SCR2(%a6)	# test sign of original x
23853
	bge.b		x_pos		# if pos, don't set bit 0
23854
	addq.l		&1,%d1		# if neg, set bit 0
23855
x_pos:
23856
	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
23857
	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
23858
	lsl.l		&4,%d3		# put bits in proper position
23859
	fmov.l		%d3,%fpcr	# load bits into fpu
23860
	lsr.l		&4,%d3		# put bits in proper position
23861
	tst.b		%d3		# decode new rmode for pten table
23862
	bne.b		not_rn		# if zero, it is RN
23863
	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
23864
	bra.b		rmode		# exit decode
23865
not_rn:
23866
	lsr.b		&1,%d3		# get lsb in carry
23867
	bcc.b		not_rp2		# if carry clear, it is RM
23868
	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
23869
	bra.b		rmode		# exit decode
23870
not_rp2:
23871
	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
23872
rmode:
23873
	clr.l		%d3		# clr table index
23874
e_loop2:
23875
	lsr.l		&1,%d0		# shift next bit into carry
23876
	bcc.b		e_next2		# if zero, skip the mul
23877
	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
23878
e_next2:
23879
	add.l		&12,%d3		# inc d3 to next pwrten table entry
23880
	tst.l		%d0		# test if ISCALE is zero
23881
	bne.b		e_loop2		# if not, loop
23882

23883
# A8. Clr INEX; Force RZ.
23884
#     The operation in A3 above may have set INEX2.
23885
#     RZ mode is forced for the scaling operation to insure
23886
#     only one rounding error.  The grs bits are collected in
23887
#     the INEX flag for use in A10.
23888
#
23889
# Register usage:
23890
#	Input/Output
23891

23892
	fmov.l		&0,%fpsr	# clr INEX
23893
	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
23894

23895
# A9. Scale X -> Y.
23896
#     The mantissa is scaled to the desired number of significant
23897
#     digits.  The excess digits are collected in INEX2. If mul,
23898
#     Check d2 for excess 10 exponential value.  If not zero,
23899
#     the iscale value would have caused the pwrten calculation
23900
#     to overflow.  Only a negative iscale can cause this, so
23901
#     multiply by 10^(d2), which is now only allowed to be 24,
23902
#     with a multiply by 10^8 and 10^16, which is exact since
23903
#     10^24 is exact.  If the input was denormalized, we must
23904
#     create a busy stack frame with the mul command and the
23905
#     two operands, and allow the fpu to complete the multiply.
23906
#
23907
# Register usage:
23908
#	Input/Output
23909
#	d0: FPCR with RZ mode/Unchanged
23910
#	d2: 0 or 24/unchanged
23911
#	d3: x/x
23912
#	d4: LEN/Unchanged
23913
#	d5: ICTR:LAMBDA
23914
#	d6: ILOG/Unchanged
23915
#	d7: k-factor/Unchanged
23916
#	a0: ptr for original operand/final result
23917
#	a1: ptr to PTENRM array/Unchanged
23918
#	a2: x/x
23919
#	fp0: float(ILOG)/X adjusted for SCALE (Y)
23920
#	fp1: 10^ISCALE/Unchanged
23921
#	fp2: x/x
23922
#	F_SCR1:x/x
23923
#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
23924
#	L_SCR1:x/x
23925
#	L_SCR2:first word of X packed/Unchanged
23926

23927
A9_str:
23928
	fmov.x		(%a0),%fp0	# load X from memory
23929
	fabs.x		%fp0		# use abs(X)
23930
	tst.w		%d5		# LAMBDA is in lower word of d5
23931
	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
23932
	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
23933
	bra.w		A10_st		# branch to A10
23934

23935
sc_mul:
23936
	tst.b		BINDEC_FLG(%a6)	# check for denorm
23937
	beq.w		A9_norm		# if norm, continue with mul
23938

23939
# for DENORM, we must calculate:
23940
#	fp0 = input_op * 10^ISCALE * 10^24
23941
# since the input operand is a DENORM, we can't multiply it directly.
23942
# so, we do the multiplication of the exponents and mantissas separately.
23943
# in this way, we avoid underflow on intermediate stages of the
23944
# multiplication and guarantee a result without exception.
23945
	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
23946

23947
	mov.w		(%sp),%d3	# grab exponent
23948
	andi.w		&0x7fff,%d3	# clear sign
23949
	ori.w		&0x8000,(%a0)	# make DENORM exp negative
23950
	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
23951
	subi.w		&0x3fff,%d3	# subtract BIAS
23952
	add.w		36(%a1),%d3
23953
	subi.w		&0x3fff,%d3	# subtract BIAS
23954
	add.w		48(%a1),%d3
23955
	subi.w		&0x3fff,%d3	# subtract BIAS
23956

23957
	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
23958

23959
	andi.w		&0x8000,(%sp)	# keep sign
23960
	or.w		%d3,(%sp)	# insert new exponent
23961
	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
23962
	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
23963
	mov.l		0x4(%a0),-(%sp)
23964
	mov.l		&0x3fff0000,-(%sp) # force exp to zero
23965
	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
23966
	fmul.x		(%sp)+,%fp0
23967

23968
#	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
23969
#	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
23970
	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
23971
	mov.l		36+4(%a1),-(%sp)
23972
	mov.l		&0x3fff0000,-(%sp) # force exp to zero
23973
	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
23974
	mov.l		48+4(%a1),-(%sp)
23975
	mov.l		&0x3fff0000,-(%sp)# force exp to zero
23976
	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
23977
	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
23978
	bra.b		A10_st
23979

23980
sc_mul_err:
23981
	bra.b		sc_mul_err
23982

23983
A9_norm:
23984
	tst.w		%d2		# test for small exp case
23985
	beq.b		A9_con		# if zero, continue as normal
23986
	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
23987
	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
23988
A9_con:
23989
	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
23990

23991
# A10. Or in INEX.
23992
#      If INEX is set, round error occurred.  This is compensated
23993
#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
23994
#
23995
# Register usage:
23996
#	Input/Output
23997
#	d0: FPCR with RZ mode/FPSR with INEX2 isolated
23998
#	d2: x/x
23999
#	d3: x/x
24000
#	d4: LEN/Unchanged
24001
#	d5: ICTR:LAMBDA
24002
#	d6: ILOG/Unchanged
24003
#	d7: k-factor/Unchanged
24004
#	a0: ptr for original operand/final result
24005
#	a1: ptr to PTENxx array/Unchanged
24006
#	a2: x/ptr to FP_SCR1(a6)
24007
#	fp0: Y/Y with lsb adjusted
24008
#	fp1: 10^ISCALE/Unchanged
24009
#	fp2: x/x
24010

24011
A10_st:
24012
	fmov.l		%fpsr,%d0	# get FPSR
24013
	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
24014
	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
24015
	btst		&9,%d0		# check if INEX2 set
24016
	beq.b		A11_st		# if clear, skip rest
24017
	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
24018
	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
24019

24020

24021
# A11. Restore original FPCR; set size ext.
24022
#      Perform FINT operation in the user's rounding mode.  Keep
24023
#      the size to extended.  The sintdo entry point in the sint
24024
#      routine expects the FPCR value to be in USER_FPCR for
24025
#      mode and precision.  The original FPCR is saved in L_SCR1.
24026

24027
A11_st:
24028
	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
24029
	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
24030
#					;block exceptions
24031

24032

24033
# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
24034
#      The FPSP routine sintd0 is used.  The output is in fp0.
24035
#
24036
# Register usage:
24037
#	Input/Output
24038
#	d0: FPSR with AINEX cleared/FPCR with size set to ext
24039
#	d2: x/x/scratch
24040
#	d3: x/x
24041
#	d4: LEN/Unchanged
24042
#	d5: ICTR:LAMBDA/Unchanged
24043
#	d6: ILOG/Unchanged
24044
#	d7: k-factor/Unchanged
24045
#	a0: ptr for original operand/src ptr for sintdo
24046
#	a1: ptr to PTENxx array/Unchanged
24047
#	a2: ptr to FP_SCR1(a6)/Unchanged
24048
#	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
24049
#	fp0: Y/YINT
24050
#	fp1: 10^ISCALE/Unchanged
24051
#	fp2: x/x
24052
#	F_SCR1:x/x
24053
#	F_SCR2:Y adjusted for inex/Y with original exponent
24054
#	L_SCR1:x/original USER_FPCR
24055
#	L_SCR2:first word of X packed/Unchanged
24056

24057
A12_st:
24058
	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
24059
	mov.l	L_SCR1(%a6),-(%sp)
24060
	mov.l	L_SCR2(%a6),-(%sp)
24061

24062
	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
24063
	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
24064
	tst.l		L_SCR2(%a6)	# test sign of original operand
24065
	bge.b		do_fint12		# if pos, use Y
24066
	or.l		&0x80000000,(%a0)	# if neg, use -Y
24067
do_fint12:
24068
	mov.l	USER_FPSR(%a6),-(%sp)
24069
#	bsr	sintdo		# sint routine returns int in fp0
24070

24071
	fmov.l	USER_FPCR(%a6),%fpcr
24072
	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
24073
##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
24074
##	andi.l		&0x00000030,%d0
24075
##	fmov.l		%d0,%fpcr
24076
	fint.x		FP_SCR1(%a6),%fp0	# do fint()
24077
	fmov.l	%fpsr,%d0
24078
	or.w	%d0,FPSR_EXCEPT(%a6)
24079
##	fmov.l		&0x0,%fpcr
24080
##	fmov.l		%fpsr,%d0		# don't keep ccodes
24081
##	or.w		%d0,FPSR_EXCEPT(%a6)
24082

24083
	mov.b	(%sp),USER_FPSR(%a6)
24084
	add.l	&4,%sp
24085

24086
	mov.l	(%sp)+,L_SCR2(%a6)
24087
	mov.l	(%sp)+,L_SCR1(%a6)
24088
	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
24089

24090
	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
24091
	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
24092

24093
# A13. Check for LEN digits.
24094
#      If the int operation results in more than LEN digits,
24095
#      or less than LEN -1 digits, adjust ILOG and repeat from
24096
#      A6.  This test occurs only on the first pass.  If the
24097
#      result is exactly 10^LEN, decrement ILOG and divide
24098
#      the mantissa by 10.  The calculation of 10^LEN cannot
24099
#      be inexact, since all powers of ten up to 10^27 are exact
24100
#      in extended precision, so the use of a previous power-of-ten
24101
#      table will introduce no error.
24102
#
24103
#
24104
# Register usage:
24105
#	Input/Output
24106
#	d0: FPCR with size set to ext/scratch final = 0
24107
#	d2: x/x
24108
#	d3: x/scratch final = x
24109
#	d4: LEN/LEN adjusted
24110
#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24111
#	d6: ILOG/ILOG adjusted
24112
#	d7: k-factor/Unchanged
24113
#	a0: pointer into memory for packed bcd string formation
24114
#	a1: ptr to PTENxx array/Unchanged
24115
#	a2: ptr to FP_SCR1(a6)/Unchanged
24116
#	fp0: int portion of Y/abs(YINT) adjusted
24117
#	fp1: 10^ISCALE/Unchanged
24118
#	fp2: x/10^LEN
24119
#	F_SCR1:x/x
24120
#	F_SCR2:Y with original exponent/Unchanged
24121
#	L_SCR1:original USER_FPCR/Unchanged
24122
#	L_SCR2:first word of X packed/Unchanged
24123

24124
A13_st:
24125
	swap		%d5		# put ICTR in lower word of d5
24126
	tst.w		%d5		# check if ICTR = 0
24127
	bne		not_zr		# if non-zero, go to second test
24128
#
24129
# Compute 10^(LEN-1)
24130
#
24131
	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
24132
	mov.l		%d4,%d0		# put LEN in d0
24133
	subq.l		&1,%d0		# d0 = LEN -1
24134
	clr.l		%d3		# clr table index
24135
l_loop:
24136
	lsr.l		&1,%d0		# shift next bit into carry
24137
	bcc.b		l_next		# if zero, skip the mul
24138
	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
24139
l_next:
24140
	add.l		&12,%d3		# inc d3 to next pwrten table entry
24141
	tst.l		%d0		# test if LEN is zero
24142
	bne.b		l_loop		# if not, loop
24143
#
24144
# 10^LEN-1 is computed for this test and A14.  If the input was
24145
# denormalized, check only the case in which YINT > 10^LEN.
24146
#
24147
	tst.b		BINDEC_FLG(%a6)	# check if input was norm
24148
	beq.b		A13_con		# if norm, continue with checking
24149
	fabs.x		%fp0		# take abs of YINT
24150
	bra		test_2
24151
#
24152
# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
24153
#
24154
A13_con:
24155
	fabs.x		%fp0		# take abs of YINT
24156
	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
24157
	fbge.w		test_2		# if greater, do next test
24158
	subq.l		&1,%d6		# subtract 1 from ILOG
24159
	mov.w		&1,%d5		# set ICTR
24160
	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
24161
	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
24162
	bra.w		A6_str		# return to A6 and recompute YINT
24163
test_2:
24164
	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
24165
	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
24166
	fblt.w		A14_st		# if less, all is ok, go to A14
24167
	fbgt.w		fix_ex		# if greater, fix and redo
24168
	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
24169
	addq.l		&1,%d6		# and inc ILOG
24170
	bra.b		A14_st		# and continue elsewhere
24171
fix_ex:
24172
	addq.l		&1,%d6		# increment ILOG by 1
24173
	mov.w		&1,%d5		# set ICTR
24174
	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
24175
	bra.w		A6_str		# return to A6 and recompute YINT
24176
#
24177
# Since ICTR <> 0, we have already been through one adjustment,
24178
# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
24179
# 10^LEN is again computed using whatever table is in a1 since the
24180
# value calculated cannot be inexact.
24181
#
24182
not_zr:
24183
	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
24184
	mov.l		%d4,%d0		# put LEN in d0
24185
	clr.l		%d3		# clr table index
24186
z_loop:
24187
	lsr.l		&1,%d0		# shift next bit into carry
24188
	bcc.b		z_next		# if zero, skip the mul
24189
	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
24190
z_next:
24191
	add.l		&12,%d3		# inc d3 to next pwrten table entry
24192
	tst.l		%d0		# test if LEN is zero
24193
	bne.b		z_loop		# if not, loop
24194
	fabs.x		%fp0		# get abs(YINT)
24195
	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
24196
	fbneq.w		A14_st		# if not, skip this
24197
	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
24198
	addq.l		&1,%d6		# and inc ILOG by 1
24199
	addq.l		&1,%d4		# and inc LEN
24200
	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
24201

24202
# A14. Convert the mantissa to bcd.
24203
#      The binstr routine is used to convert the LEN digit
24204
#      mantissa to bcd in memory.  The input to binstr is
24205
#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
24206
#      such that the decimal point is to the left of bit 63.
24207
#      The bcd digits are stored in the correct position in
24208
#      the final string area in memory.
24209
#
24210
#
24211
# Register usage:
24212
#	Input/Output
24213
#	d0: x/LEN call to binstr - final is 0
24214
#	d1: x/0
24215
#	d2: x/ms 32-bits of mant of abs(YINT)
24216
#	d3: x/ls 32-bits of mant of abs(YINT)
24217
#	d4: LEN/Unchanged
24218
#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24219
#	d6: ILOG
24220
#	d7: k-factor/Unchanged
24221
#	a0: pointer into memory for packed bcd string formation
24222
#	    /ptr to first mantissa byte in result string
24223
#	a1: ptr to PTENxx array/Unchanged
24224
#	a2: ptr to FP_SCR1(a6)/Unchanged
24225
#	fp0: int portion of Y/abs(YINT) adjusted
24226
#	fp1: 10^ISCALE/Unchanged
24227
#	fp2: 10^LEN/Unchanged
24228
#	F_SCR1:x/Work area for final result
24229
#	F_SCR2:Y with original exponent/Unchanged
24230
#	L_SCR1:original USER_FPCR/Unchanged
24231
#	L_SCR2:first word of X packed/Unchanged
24232

24233
A14_st:
24234
	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
24235
	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
24236
	lea.l		FP_SCR0(%a6),%a0
24237
	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
24238
	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
24239
	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
24240
	clr.l		4(%a0)		# zero word 2 of FP_RES
24241
	clr.l		8(%a0)		# zero word 3 of FP_RES
24242
	mov.l		(%a0),%d0	# move exponent to d0
24243
	swap		%d0		# put exponent in lower word
24244
	beq.b		no_sft		# if zero, don't shift
24245
	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
24246
	tst.l		%d0		# check if > 1
24247
	bgt.b		no_sft		# if so, don't shift
24248
	neg.l		%d0		# make exp positive
24249
m_loop:
24250
	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
24251
	roxr.l		&1,%d3		# the number of places
24252
	dbf.w		%d0,m_loop	# given in d0
24253
no_sft:
24254
	tst.l		%d2		# check for mantissa of zero
24255
	bne.b		no_zr		# if not, go on
24256
	tst.l		%d3		# continue zero check
24257
	beq.b		zer_m		# if zero, go directly to binstr
24258
no_zr:
24259
	clr.l		%d1		# put zero in d1 for addx
24260
	add.l		&0x00000080,%d3	# inc at bit 7
24261
	addx.l		%d1,%d2		# continue inc
24262
	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
24263
zer_m:
24264
	mov.l		%d4,%d0		# put LEN in d0 for binstr call
24265
	addq.l		&3,%a0		# a0 points to M16 byte in result
24266
	bsr		binstr		# call binstr to convert mant
24267

24268

24269
# A15. Convert the exponent to bcd.
24270
#      As in A14 above, the exp is converted to bcd and the
24271
#      digits are stored in the final string.
24272
#
24273
#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
24274
#
24275
#	 32               16 15                0
24276
#	-----------------------------------------
24277
#	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
24278
#	-----------------------------------------
24279
#
24280
# And are moved into their proper places in FP_SCR0.  If digit e4
24281
# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
24282
# written as specified in the 881/882 manual for packed decimal.
24283
#
24284
# Register usage:
24285
#	Input/Output
24286
#	d0: x/LEN call to binstr - final is 0
24287
#	d1: x/scratch (0);shift count for final exponent packing
24288
#	d2: x/ms 32-bits of exp fraction/scratch
24289
#	d3: x/ls 32-bits of exp fraction
24290
#	d4: LEN/Unchanged
24291
#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24292
#	d6: ILOG
24293
#	d7: k-factor/Unchanged
24294
#	a0: ptr to result string/ptr to L_SCR1(a6)
24295
#	a1: ptr to PTENxx array/Unchanged
24296
#	a2: ptr to FP_SCR1(a6)/Unchanged
24297
#	fp0: abs(YINT) adjusted/float(ILOG)
24298
#	fp1: 10^ISCALE/Unchanged
24299
#	fp2: 10^LEN/Unchanged
24300
#	F_SCR1:Work area for final result/BCD result
24301
#	F_SCR2:Y with original exponent/ILOG/10^4
24302
#	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
24303
#	L_SCR2:first word of X packed/Unchanged
24304

24305
A15_st:
24306
	tst.b		BINDEC_FLG(%a6)	# check for denorm
24307
	beq.b		not_denorm
24308
	ftest.x		%fp0		# test for zero
24309
	fbeq.w		den_zero	# if zero, use k-factor or 4933
24310
	fmov.l		%d6,%fp0	# float ILOG
24311
	fabs.x		%fp0		# get abs of ILOG
24312
	bra.b		convrt
24313
den_zero:
24314
	tst.l		%d7		# check sign of the k-factor
24315
	blt.b		use_ilog	# if negative, use ILOG
24316
	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
24317
	bra.b		convrt		# do it
24318
use_ilog:
24319
	fmov.l		%d6,%fp0	# float ILOG
24320
	fabs.x		%fp0		# get abs of ILOG
24321
	bra.b		convrt
24322
not_denorm:
24323
	ftest.x		%fp0		# test for zero
24324
	fbneq.w		not_zero	# if zero, force exponent
24325
	fmov.s		FONE(%pc),%fp0	# force exponent to 1
24326
	bra.b		convrt		# do it
24327
not_zero:
24328
	fmov.l		%d6,%fp0	# float ILOG
24329
	fabs.x		%fp0		# get abs of ILOG
24330
convrt:
24331
	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
24332
	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
24333
	mov.l		4(%a2),%d2	# move word 2 to d2
24334
	mov.l		8(%a2),%d3	# move word 3 to d3
24335
	mov.w		(%a2),%d0	# move exp to d0
24336
	beq.b		x_loop_fin	# if zero, skip the shift
24337
	sub.w		&0x3ffd,%d0	# subtract off bias
24338
	neg.w		%d0		# make exp positive
24339
x_loop:
24340
	lsr.l		&1,%d2		# shift d2:d3 right
24341
	roxr.l		&1,%d3		# the number of places
24342
	dbf.w		%d0,x_loop	# given in d0
24343
x_loop_fin:
24344
	clr.l		%d1		# put zero in d1 for addx
24345
	add.l		&0x00000080,%d3	# inc at bit 6
24346
	addx.l		%d1,%d2		# continue inc
24347
	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
24348
	mov.l		&4,%d0		# put 4 in d0 for binstr call
24349
	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
24350
	bsr		binstr		# call binstr to convert exp
24351
	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
24352
	mov.l		&12,%d1		# use d1 for shift count
24353
	lsr.l		%d1,%d0		# shift d0 right by 12
24354
	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
24355
	lsr.l		%d1,%d0		# shift d0 right by 12
24356
	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
24357
	tst.b		%d0		# check if e4 is zero
24358
	beq.b		A16_st		# if zero, skip rest
24359
	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
24360

24361

24362
# A16. Write sign bits to final string.
24363
#	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
24364
#
24365
# Register usage:
24366
#	Input/Output
24367
#	d0: x/scratch - final is x
24368
#	d2: x/x
24369
#	d3: x/x
24370
#	d4: LEN/Unchanged
24371
#	d5: ICTR:LAMBDA/LAMBDA:ICTR
24372
#	d6: ILOG/ILOG adjusted
24373
#	d7: k-factor/Unchanged
24374
#	a0: ptr to L_SCR1(a6)/Unchanged
24375
#	a1: ptr to PTENxx array/Unchanged
24376
#	a2: ptr to FP_SCR1(a6)/Unchanged
24377
#	fp0: float(ILOG)/Unchanged
24378
#	fp1: 10^ISCALE/Unchanged
24379
#	fp2: 10^LEN/Unchanged
24380
#	F_SCR1:BCD result with correct signs
24381
#	F_SCR2:ILOG/10^4
24382
#	L_SCR1:Exponent digits on return from binstr
24383
#	L_SCR2:first word of X packed/Unchanged
24384

24385
A16_st:
24386
	clr.l		%d0		# clr d0 for collection of signs
24387
	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
24388
	tst.l		L_SCR2(%a6)	# check sign of original mantissa
24389
	bge.b		mant_p		# if pos, don't set SM
24390
	mov.l		&2,%d0		# move 2 in to d0 for SM
24391
mant_p:
24392
	tst.l		%d6		# check sign of ILOG
24393
	bge.b		wr_sgn		# if pos, don't set SE
24394
	addq.l		&1,%d0		# set bit 0 in d0 for SE
24395
wr_sgn:
24396
	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
24397

24398
# Clean up and restore all registers used.
24399

24400
	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
24401
	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
24402
	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
24403
	rts
24404

24405
	global		PTENRN
24406
PTENRN:
24407
	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24408
	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24409
	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24410
	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24411
	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24412
	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
24413
	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
24414
	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
24415
	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
24416
	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
24417
	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
24418
	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
24419
	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
24420

24421
	global		PTENRP
24422
PTENRP:
24423
	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24424
	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24425
	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24426
	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24427
	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24428
	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
24429
	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
24430
	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
24431
	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
24432
	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
24433
	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
24434
	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
24435
	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
24436

24437
	global		PTENRM
24438
PTENRM:
24439
	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
24440
	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
24441
	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
24442
	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
24443
	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
24444
	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
24445
	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
24446
	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
24447
	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
24448
	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
24449
	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
24450
	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
24451
	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
24452

24453
#########################################################################
24454
# binstr(): Converts a 64-bit binary integer to bcd.			#
24455
#									#
24456
# INPUT *************************************************************** #
24457
#	d2:d3 = 64-bit binary integer					#
24458
#	d0    = desired length (LEN)					#
24459
#	a0    = pointer to start in memory for bcd characters		#
24460
#		(This pointer must point to byte 4 of the first		#
24461
#		 lword of the packed decimal memory string.)		#
24462
#									#
24463
# OUTPUT ************************************************************** #
24464
#	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
24465
#									#
24466
# ALGORITHM ***********************************************************	#
24467
#	The 64-bit binary is assumed to have a decimal point before	#
24468
#	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
24469
#	shift and a mul by 8 shift.  The bits shifted out of the	#
24470
#	msb form a decimal digit.  This process is iterated until	#
24471
#	LEN digits are formed.						#
24472
#									#
24473
# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
24474
#     digit formed will be assumed the least significant.  This is	#
24475
#     to force the first byte formed to have a 0 in the upper 4 bits.	#
24476
#									#
24477
# A2. Beginning of the loop:						#
24478
#     Copy the fraction in d2:d3 to d4:d5.				#
24479
#									#
24480
# A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
24481
#     extracts and shifts.  The three msbs from d2 will go into d1.	#
24482
#									#
24483
# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
24484
#     will be collected by the carry.					#
24485
#									#
24486
# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
24487
#     into d2:d3.  D1 will contain the bcd digit formed.		#
24488
#									#
24489
# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
24490
#     zero, it is the ls digit.  Put the digit in its place in the	#
24491
#     upper word of d0.  If it is the ls digit, write the word		#
24492
#     from d0 to memory.						#
24493
#									#
24494
# A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
24495
#									#
24496
#########################################################################
24497

24498
#	Implementation Notes:
24499
#
24500
#	The registers are used as follows:
24501
#
24502
#		d0: LEN counter
24503
#		d1: temp used to form the digit
24504
#		d2: upper 32-bits of fraction for mul by 8
24505
#		d3: lower 32-bits of fraction for mul by 8
24506
#		d4: upper 32-bits of fraction for mul by 2
24507
#		d5: lower 32-bits of fraction for mul by 2
24508
#		d6: temp for bit-field extracts
24509
#		d7: byte digit formation word;digit count {0,1}
24510
#		a0: pointer into memory for packed bcd string formation
24511
#
24512

24513
	global		binstr
24514
binstr:
24515
	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
24516

24517
#
24518
# A1: Init d7
24519
#
24520
	mov.l		&1,%d7		# init d7 for second digit
24521
	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
24522
#
24523
# A2. Copy d2:d3 to d4:d5.  Start loop.
24524
#
24525
loop:
24526
	mov.l		%d2,%d4		# copy the fraction before muls
24527
	mov.l		%d3,%d5		# to d4:d5
24528
#
24529
# A3. Multiply d2:d3 by 8; extract msbs into d1.
24530
#
24531
	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
24532
	asl.l		&3,%d2		# shift d2 left by 3 places
24533
	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
24534
	asl.l		&3,%d3		# shift d3 left by 3 places
24535
	or.l		%d6,%d2		# or in msbs from d3 into d2
24536
#
24537
# A4. Multiply d4:d5 by 2; add carry out to d1.
24538
#
24539
	asl.l		&1,%d5		# mul d5 by 2
24540
	roxl.l		&1,%d4		# mul d4 by 2
24541
	swap		%d6		# put 0 in d6 lower word
24542
	addx.w		%d6,%d1		# add in extend from mul by 2
24543
#
24544
# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
24545
#
24546
	add.l		%d5,%d3		# add lower 32 bits
24547
	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
24548
	addx.l		%d4,%d2		# add with extend upper 32 bits
24549
	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
24550
	addx.w		%d6,%d1		# add in extend from add to d1
24551
	swap		%d6		# with d6 = 0; put 0 in upper word
24552
#
24553
# A6. Test d7 and branch.
24554
#
24555
	tst.w		%d7		# if zero, store digit & to loop
24556
	beq.b		first_d		# if non-zero, form byte & write
24557
sec_d:
24558
	swap		%d7		# bring first digit to word d7b
24559
	asl.w		&4,%d7		# first digit in upper 4 bits d7b
24560
	add.w		%d1,%d7		# add in ls digit to d7b
24561
	mov.b		%d7,(%a0)+	# store d7b byte in memory
24562
	swap		%d7		# put LEN counter in word d7a
24563
	clr.w		%d7		# set d7a to signal no digits done
24564
	dbf.w		%d0,loop	# do loop some more!
24565
	bra.b		end_bstr	# finished, so exit
24566
first_d:
24567
	swap		%d7		# put digit word in d7b
24568
	mov.w		%d1,%d7		# put new digit in d7b
24569
	swap		%d7		# put LEN counter in word d7a
24570
	addq.w		&1,%d7		# set d7a to signal first digit done
24571
	dbf.w		%d0,loop	# do loop some more!
24572
	swap		%d7		# put last digit in string
24573
	lsl.w		&4,%d7		# move it to upper 4 bits
24574
	mov.b		%d7,(%a0)+	# store it in memory string
24575
#
24576
# Clean up and return with result in fp0.
24577
#
24578
end_bstr:
24579
	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
24580
	rts
24581

24582
#########################################################################
24583
# XDEF ****************************************************************	#
24584
#	facc_in_b(): dmem_read_byte failed				#
24585
#	facc_in_w(): dmem_read_word failed				#
24586
#	facc_in_l(): dmem_read_long failed				#
24587
#	facc_in_d(): dmem_read of dbl prec failed			#
24588
#	facc_in_x(): dmem_read of ext prec failed			#
24589
#									#
24590
#	facc_out_b(): dmem_write_byte failed				#
24591
#	facc_out_w(): dmem_write_word failed				#
24592
#	facc_out_l(): dmem_write_long failed				#
24593
#	facc_out_d(): dmem_write of dbl prec failed			#
24594
#	facc_out_x(): dmem_write of ext prec failed			#
24595
#									#
24596
# XREF ****************************************************************	#
24597
#	_real_access() - exit through access error handler		#
24598
#									#
24599
# INPUT ***************************************************************	#
24600
#	None								#
24601
#									#
24602
# OUTPUT **************************************************************	#
24603
#	None								#
24604
#									#
24605
# ALGORITHM ***********************************************************	#
24606
#	Flow jumps here when an FP data fetch call gets an error	#
24607
# result. This means the operating system wants an access error frame	#
24608
# made out of the current exception stack frame.			#
24609
#	So, we first call restore() which makes sure that any updated	#
24610
# -(an)+ register gets returned to its pre-exception value and then	#
24611
# we change the stack to an access error stack frame.			#
24612
#									#
24613
#########################################################################
24614

24615
facc_in_b:
24616
	movq.l		&0x1,%d0			# one byte
24617
	bsr.w		restore				# fix An
24618

24619
	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
24620
	bra.w		facc_finish
24621

24622
facc_in_w:
24623
	movq.l		&0x2,%d0			# two bytes
24624
	bsr.w		restore				# fix An
24625

24626
	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
24627
	bra.b		facc_finish
24628

24629
facc_in_l:
24630
	movq.l		&0x4,%d0			# four bytes
24631
	bsr.w		restore				# fix An
24632

24633
	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
24634
	bra.b		facc_finish
24635

24636
facc_in_d:
24637
	movq.l		&0x8,%d0			# eight bytes
24638
	bsr.w		restore				# fix An
24639

24640
	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
24641
	bra.b		facc_finish
24642

24643
facc_in_x:
24644
	movq.l		&0xc,%d0			# twelve bytes
24645
	bsr.w		restore				# fix An
24646

24647
	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
24648
	bra.b		facc_finish
24649

24650
################################################################
24651

24652
facc_out_b:
24653
	movq.l		&0x1,%d0			# one byte
24654
	bsr.w		restore				# restore An
24655

24656
	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
24657
	bra.b		facc_finish
24658

24659
facc_out_w:
24660
	movq.l		&0x2,%d0			# two bytes
24661
	bsr.w		restore				# restore An
24662

24663
	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
24664
	bra.b		facc_finish
24665

24666
facc_out_l:
24667
	movq.l		&0x4,%d0			# four bytes
24668
	bsr.w		restore				# restore An
24669

24670
	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
24671
	bra.b		facc_finish
24672

24673
facc_out_d:
24674
	movq.l		&0x8,%d0			# eight bytes
24675
	bsr.w		restore				# restore An
24676

24677
	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
24678
	bra.b		facc_finish
24679

24680
facc_out_x:
24681
	mov.l		&0xc,%d0			# twelve bytes
24682
	bsr.w		restore				# restore An
24683

24684
	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
24685

24686
# here's where we actually create the access error frame from the
24687
# current exception stack frame.
24688
facc_finish:
24689
	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
24690

24691
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
24692
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
24693
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
24694

24695
	unlk		%a6
24696

24697
	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
24698
	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
24699
	mov.l		0xc(%sp),0x8(%sp)	# store EA
24700
	mov.l		&0x00000001,0xc(%sp)	# store FSLW
24701
	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
24702
	mov.w		&0x4008,0x6(%sp)	# store voff
24703

24704
	btst		&0x5,(%sp)		# supervisor or user mode?
24705
	beq.b		facc_out2		# user
24706
	bset		&0x2,0xd(%sp)		# set supervisor TM bit
24707

24708
facc_out2:
24709
	bra.l		_real_access
24710

24711
##################################################################
24712

24713
# if the effective addressing mode was predecrement or postincrement,
24714
# the emulation has already changed its value to the correct post-
24715
# instruction value. but since we're exiting to the access error
24716
# handler, then AN must be returned to its pre-instruction value.
24717
# we do that here.
24718
restore:
24719
	mov.b		EXC_OPWORD+0x1(%a6),%d1
24720
	andi.b		&0x38,%d1		# extract opmode
24721
	cmpi.b		%d1,&0x18		# postinc?
24722
	beq.w		rest_inc
24723
	cmpi.b		%d1,&0x20		# predec?
24724
	beq.w		rest_dec
24725
	rts
24726

24727
rest_inc:
24728
	mov.b		EXC_OPWORD+0x1(%a6),%d1
24729
	andi.w		&0x0007,%d1		# fetch An
24730

24731
	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
24732
	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
24733

24734
tbl_rest_inc:
24735
	short		ri_a0 - tbl_rest_inc
24736
	short		ri_a1 - tbl_rest_inc
24737
	short		ri_a2 - tbl_rest_inc
24738
	short		ri_a3 - tbl_rest_inc
24739
	short		ri_a4 - tbl_rest_inc
24740
	short		ri_a5 - tbl_rest_inc
24741
	short		ri_a6 - tbl_rest_inc
24742
	short		ri_a7 - tbl_rest_inc
24743

24744
ri_a0:
24745
	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
24746
	rts
24747
ri_a1:
24748
	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
24749
	rts
24750
ri_a2:
24751
	sub.l		%d0,%a2			# fix a2
24752
	rts
24753
ri_a3:
24754
	sub.l		%d0,%a3			# fix a3
24755
	rts
24756
ri_a4:
24757
	sub.l		%d0,%a4			# fix a4
24758
	rts
24759
ri_a5:
24760
	sub.l		%d0,%a5			# fix a5
24761
	rts
24762
ri_a6:
24763
	sub.l		%d0,(%a6)		# fix stacked a6
24764
	rts
24765
# if it's a fmove out instruction, we don't have to fix a7
24766
# because we hadn't changed it yet. if it's an opclass two
24767
# instruction (data moved in) and the exception was in supervisor
24768
# mode, then also also wasn't updated. if it was user mode, then
24769
# restore the correct a7 which is in the USP currently.
24770
ri_a7:
24771
	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
24772
	bne.b		ri_a7_done		# out
24773

24774
	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
24775
	bne.b		ri_a7_done		# supervisor
24776
	movc		%usp,%a0		# restore USP
24777
	sub.l		%d0,%a0
24778
	movc		%a0,%usp
24779
ri_a7_done:
24780
	rts
24781

24782
# need to invert adjustment value if the <ea> was predec
24783
rest_dec:
24784
	neg.l		%d0
24785
	bra.b		rest_inc
24786

24787
Product

Resources

Company