~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~1MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP2M68000 Hi-Performance Microprocessor Division3M68060 Software Package4Production Release P1.00 -- October 10, 199456M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.78THE SOFTWARE is provided on an "AS IS" basis and without warranty.9To the maximum extent permitted by applicable law,10MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,11INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE12and any warranty against infringement with regard to the SOFTWARE13(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.1415To the maximum extent permitted by applicable law,16IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER17(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,18BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)19ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.20Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.2122You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE23so long as this entire notice is retained without alteration in any modified and/or24redistributed versions, and that such modified versions are clearly identified as such.25No licenses are granted by implication, estoppel or otherwise under any patents26or trademarks of Motorola, Inc.27~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~28#29# freal.s:30# This file is appended to the top of the 060FPSP package31# and contains the entry points into the package. The user, in32# effect, branches to one of the branch table entries located33# after _060FPSP_TABLE.34# Also, subroutine stubs exist in this file (_fpsp_done for35# example) that are referenced by the FPSP package itself in order36# to call a given routine. The stub routine actually performs the37# callout. The FPSP code does a "bsr" to the stub routine. This38# extra layer of hierarchy adds a slight performance penalty but39# it makes the FPSP code easier to read and more mainatinable.40#4142set _off_bsun, 0x0043set _off_snan, 0x0444set _off_operr, 0x0845set _off_ovfl, 0x0c46set _off_unfl, 0x1047set _off_dz, 0x1448set _off_inex, 0x1849set _off_fline, 0x1c50set _off_fpu_dis, 0x2051set _off_trap, 0x2452set _off_trace, 0x2853set _off_access, 0x2c54set _off_done, 0x305556set _off_imr, 0x4057set _off_dmr, 0x4458set _off_dmw, 0x4859set _off_irw, 0x4c60set _off_irl, 0x5061set _off_drb, 0x5462set _off_drw, 0x5863set _off_drl, 0x5c64set _off_dwb, 0x6065set _off_dww, 0x6466set _off_dwl, 0x686768_060FPSP_TABLE:6970###############################################################7172# Here's the table of ENTRY POINTS for those linking the package.73bra.l _fpsp_snan74short 0x000075bra.l _fpsp_operr76short 0x000077bra.l _fpsp_ovfl78short 0x000079bra.l _fpsp_unfl80short 0x000081bra.l _fpsp_dz82short 0x000083bra.l _fpsp_inex84short 0x000085bra.l _fpsp_fline86short 0x000087bra.l _fpsp_unsupp88short 0x000089bra.l _fpsp_effadd90short 0x00009192space 569394###############################################################95global _fpsp_done96_fpsp_done:97mov.l %d0,-(%sp)98mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d099pea.l (_060FPSP_TABLE-0x80,%pc,%d0)100mov.l 0x4(%sp),%d0101rtd &0x4102103global _real_ovfl104_real_ovfl:105mov.l %d0,-(%sp)106mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0107pea.l (_060FPSP_TABLE-0x80,%pc,%d0)108mov.l 0x4(%sp),%d0109rtd &0x4110111global _real_unfl112_real_unfl:113mov.l %d0,-(%sp)114mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0115pea.l (_060FPSP_TABLE-0x80,%pc,%d0)116mov.l 0x4(%sp),%d0117rtd &0x4118119global _real_inex120_real_inex:121mov.l %d0,-(%sp)122mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0123pea.l (_060FPSP_TABLE-0x80,%pc,%d0)124mov.l 0x4(%sp),%d0125rtd &0x4126127global _real_bsun128_real_bsun:129mov.l %d0,-(%sp)130mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0131pea.l (_060FPSP_TABLE-0x80,%pc,%d0)132mov.l 0x4(%sp),%d0133rtd &0x4134135global _real_operr136_real_operr:137mov.l %d0,-(%sp)138mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0139pea.l (_060FPSP_TABLE-0x80,%pc,%d0)140mov.l 0x4(%sp),%d0141rtd &0x4142143global _real_snan144_real_snan:145mov.l %d0,-(%sp)146mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0147pea.l (_060FPSP_TABLE-0x80,%pc,%d0)148mov.l 0x4(%sp),%d0149rtd &0x4150151global _real_dz152_real_dz:153mov.l %d0,-(%sp)154mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0155pea.l (_060FPSP_TABLE-0x80,%pc,%d0)156mov.l 0x4(%sp),%d0157rtd &0x4158159global _real_fline160_real_fline:161mov.l %d0,-(%sp)162mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0163pea.l (_060FPSP_TABLE-0x80,%pc,%d0)164mov.l 0x4(%sp),%d0165rtd &0x4166167global _real_fpu_disabled168_real_fpu_disabled:169mov.l %d0,-(%sp)170mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0171pea.l (_060FPSP_TABLE-0x80,%pc,%d0)172mov.l 0x4(%sp),%d0173rtd &0x4174175global _real_trap176_real_trap:177mov.l %d0,-(%sp)178mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0179pea.l (_060FPSP_TABLE-0x80,%pc,%d0)180mov.l 0x4(%sp),%d0181rtd &0x4182183global _real_trace184_real_trace:185mov.l %d0,-(%sp)186mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0187pea.l (_060FPSP_TABLE-0x80,%pc,%d0)188mov.l 0x4(%sp),%d0189rtd &0x4190191global _real_access192_real_access:193mov.l %d0,-(%sp)194mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0195pea.l (_060FPSP_TABLE-0x80,%pc,%d0)196mov.l 0x4(%sp),%d0197rtd &0x4198199#######################################200201global _imem_read202_imem_read:203mov.l %d0,-(%sp)204mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0205pea.l (_060FPSP_TABLE-0x80,%pc,%d0)206mov.l 0x4(%sp),%d0207rtd &0x4208209global _dmem_read210_dmem_read:211mov.l %d0,-(%sp)212mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0213pea.l (_060FPSP_TABLE-0x80,%pc,%d0)214mov.l 0x4(%sp),%d0215rtd &0x4216217global _dmem_write218_dmem_write:219mov.l %d0,-(%sp)220mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0221pea.l (_060FPSP_TABLE-0x80,%pc,%d0)222mov.l 0x4(%sp),%d0223rtd &0x4224225global _imem_read_word226_imem_read_word:227mov.l %d0,-(%sp)228mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0229pea.l (_060FPSP_TABLE-0x80,%pc,%d0)230mov.l 0x4(%sp),%d0231rtd &0x4232233global _imem_read_long234_imem_read_long:235mov.l %d0,-(%sp)236mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0237pea.l (_060FPSP_TABLE-0x80,%pc,%d0)238mov.l 0x4(%sp),%d0239rtd &0x4240241global _dmem_read_byte242_dmem_read_byte:243mov.l %d0,-(%sp)244mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0245pea.l (_060FPSP_TABLE-0x80,%pc,%d0)246mov.l 0x4(%sp),%d0247rtd &0x4248249global _dmem_read_word250_dmem_read_word:251mov.l %d0,-(%sp)252mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0253pea.l (_060FPSP_TABLE-0x80,%pc,%d0)254mov.l 0x4(%sp),%d0255rtd &0x4256257global _dmem_read_long258_dmem_read_long:259mov.l %d0,-(%sp)260mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0261pea.l (_060FPSP_TABLE-0x80,%pc,%d0)262mov.l 0x4(%sp),%d0263rtd &0x4264265global _dmem_write_byte266_dmem_write_byte:267mov.l %d0,-(%sp)268mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0269pea.l (_060FPSP_TABLE-0x80,%pc,%d0)270mov.l 0x4(%sp),%d0271rtd &0x4272273global _dmem_write_word274_dmem_write_word:275mov.l %d0,-(%sp)276mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0277pea.l (_060FPSP_TABLE-0x80,%pc,%d0)278mov.l 0x4(%sp),%d0279rtd &0x4280281global _dmem_write_long282_dmem_write_long:283mov.l %d0,-(%sp)284mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0285pea.l (_060FPSP_TABLE-0x80,%pc,%d0)286mov.l 0x4(%sp),%d0287rtd &0x4288289#290# This file contains a set of define statements for constants291# in order to promote readability within the corecode itself.292#293294set LOCAL_SIZE, 192 # stack frame size(bytes)295set LV, -LOCAL_SIZE # stack offset296297set EXC_SR, 0x4 # stack status register298set EXC_PC, 0x6 # stack pc299set EXC_VOFF, 0xa # stacked vector offset300set EXC_EA, 0xc # stacked <ea>301302set EXC_FP, 0x0 # frame pointer303304set EXC_AREGS, -68 # offset of all address regs305set EXC_DREGS, -100 # offset of all data regs306set EXC_FPREGS, -36 # offset of all fp regs307308set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7309set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7310set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6311set EXC_A5, EXC_AREGS+(5*4)312set EXC_A4, EXC_AREGS+(4*4)313set EXC_A3, EXC_AREGS+(3*4)314set EXC_A2, EXC_AREGS+(2*4)315set EXC_A1, EXC_AREGS+(1*4)316set EXC_A0, EXC_AREGS+(0*4)317set EXC_D7, EXC_DREGS+(7*4)318set EXC_D6, EXC_DREGS+(6*4)319set EXC_D5, EXC_DREGS+(5*4)320set EXC_D4, EXC_DREGS+(4*4)321set EXC_D3, EXC_DREGS+(3*4)322set EXC_D2, EXC_DREGS+(2*4)323set EXC_D1, EXC_DREGS+(1*4)324set EXC_D0, EXC_DREGS+(0*4)325326set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0327set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1328set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)329330set FP_SCR1, LV+80 # fp scratch 1331set FP_SCR1_EX, FP_SCR1+0332set FP_SCR1_SGN, FP_SCR1+2333set FP_SCR1_HI, FP_SCR1+4334set FP_SCR1_LO, FP_SCR1+8335336set FP_SCR0, LV+68 # fp scratch 0337set FP_SCR0_EX, FP_SCR0+0338set FP_SCR0_SGN, FP_SCR0+2339set FP_SCR0_HI, FP_SCR0+4340set FP_SCR0_LO, FP_SCR0+8341342set FP_DST, LV+56 # fp destination operand343set FP_DST_EX, FP_DST+0344set FP_DST_SGN, FP_DST+2345set FP_DST_HI, FP_DST+4346set FP_DST_LO, FP_DST+8347348set FP_SRC, LV+44 # fp source operand349set FP_SRC_EX, FP_SRC+0350set FP_SRC_SGN, FP_SRC+2351set FP_SRC_HI, FP_SRC+4352set FP_SRC_LO, FP_SRC+8353354set USER_FPIAR, LV+40 # FP instr address register355356set USER_FPSR, LV+36 # FP status register357set FPSR_CC, USER_FPSR+0 # FPSR condition codes358set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte359set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte360set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte361362set USER_FPCR, LV+32 # FP control register363set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable364set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control365366set L_SCR3, LV+28 # integer scratch 3367set L_SCR2, LV+24 # integer scratch 2368set L_SCR1, LV+20 # integer scratch 1369370set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)371372set EXC_TEMP2, LV+24 # temporary space373set EXC_TEMP, LV+16 # temporary space374375set DTAG, LV+15 # destination operand type376set STAG, LV+14 # source operand type377378set SPCOND_FLG, LV+10 # flag: special case (see below)379380set EXC_CC, LV+8 # saved condition codes381set EXC_EXTWPTR, LV+4 # saved current PC (active)382set EXC_EXTWORD, LV+2 # saved extension word383set EXC_CMDREG, LV+2 # saved extension word384set EXC_OPWORD, LV+0 # saved operation word385386################################387388# Helpful macros389390set FTEMP, 0 # offsets within an391set FTEMP_EX, 0 # extended precision392set FTEMP_SGN, 2 # value saved in memory.393set FTEMP_HI, 4394set FTEMP_LO, 8395set FTEMP_GRS, 12396397set LOCAL, 0 # offsets within an398set LOCAL_EX, 0 # extended precision399set LOCAL_SGN, 2 # value saved in memory.400set LOCAL_HI, 4401set LOCAL_LO, 8402set LOCAL_GRS, 12403404set DST, 0 # offsets within an405set DST_EX, 0 # extended precision406set DST_HI, 4 # value saved in memory.407set DST_LO, 8408409set SRC, 0 # offsets within an410set SRC_EX, 0 # extended precision411set SRC_HI, 4 # value saved in memory.412set SRC_LO, 8413414set SGL_LO, 0x3f81 # min sgl prec exponent415set SGL_HI, 0x407e # max sgl prec exponent416set DBL_LO, 0x3c01 # min dbl prec exponent417set DBL_HI, 0x43fe # max dbl prec exponent418set EXT_LO, 0x0 # min ext prec exponent419set EXT_HI, 0x7ffe # max ext prec exponent420421set EXT_BIAS, 0x3fff # extended precision bias422set SGL_BIAS, 0x007f # single precision bias423set DBL_BIAS, 0x03ff # double precision bias424425set NORM, 0x00 # operand type for STAG/DTAG426set ZERO, 0x01 # operand type for STAG/DTAG427set INF, 0x02 # operand type for STAG/DTAG428set QNAN, 0x03 # operand type for STAG/DTAG429set DENORM, 0x04 # operand type for STAG/DTAG430set SNAN, 0x05 # operand type for STAG/DTAG431set UNNORM, 0x06 # operand type for STAG/DTAG432433##################434# FPSR/FPCR bits #435##################436set neg_bit, 0x3 # negative result437set z_bit, 0x2 # zero result438set inf_bit, 0x1 # infinite result439set nan_bit, 0x0 # NAN result440441set q_sn_bit, 0x7 # sign bit of quotient byte442443set bsun_bit, 7 # branch on unordered444set snan_bit, 6 # signalling NAN445set operr_bit, 5 # operand error446set ovfl_bit, 4 # overflow447set unfl_bit, 3 # underflow448set dz_bit, 2 # divide by zero449set inex2_bit, 1 # inexact result 2450set inex1_bit, 0 # inexact result 1451452set aiop_bit, 7 # accrued inexact operation bit453set aovfl_bit, 6 # accrued overflow bit454set aunfl_bit, 5 # accrued underflow bit455set adz_bit, 4 # accrued dz bit456set ainex_bit, 3 # accrued inexact bit457458#############################459# FPSR individual bit masks #460#############################461set neg_mask, 0x08000000 # negative bit mask (lw)462set inf_mask, 0x02000000 # infinity bit mask (lw)463set z_mask, 0x04000000 # zero bit mask (lw)464set nan_mask, 0x01000000 # nan bit mask (lw)465466set neg_bmask, 0x08 # negative bit mask (byte)467set inf_bmask, 0x02 # infinity bit mask (byte)468set z_bmask, 0x04 # zero bit mask (byte)469set nan_bmask, 0x01 # nan bit mask (byte)470471set bsun_mask, 0x00008000 # bsun exception mask472set snan_mask, 0x00004000 # snan exception mask473set operr_mask, 0x00002000 # operr exception mask474set ovfl_mask, 0x00001000 # overflow exception mask475set unfl_mask, 0x00000800 # underflow exception mask476set dz_mask, 0x00000400 # dz exception mask477set inex2_mask, 0x00000200 # inex2 exception mask478set inex1_mask, 0x00000100 # inex1 exception mask479480set aiop_mask, 0x00000080 # accrued illegal operation481set aovfl_mask, 0x00000040 # accrued overflow482set aunfl_mask, 0x00000020 # accrued underflow483set adz_mask, 0x00000010 # accrued divide by zero484set ainex_mask, 0x00000008 # accrued inexact485486######################################487# FPSR combinations used in the FPSP #488######################################489set dzinf_mask, inf_mask+dz_mask+adz_mask490set opnan_mask, nan_mask+operr_mask+aiop_mask491set nzi_mask, 0x01ffffff #clears N, Z, and I492set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask493set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask494set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask495set inx1a_mask, inex1_mask+ainex_mask496set inx2a_mask, inex2_mask+ainex_mask497set snaniop_mask, nan_mask+snan_mask+aiop_mask498set snaniop2_mask, snan_mask+aiop_mask499set naniop_mask, nan_mask+aiop_mask500set neginf_mask, neg_mask+inf_mask501set infaiop_mask, inf_mask+aiop_mask502set negz_mask, neg_mask+z_mask503set opaop_mask, operr_mask+aiop_mask504set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask505set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask506507#########508# misc. #509#########510set rnd_stky_bit, 29 # stky bit pos in longword511512set sign_bit, 0x7 # sign bit513set signan_bit, 0x6 # signalling nan bit514515set sgl_thresh, 0x3f81 # minimum sgl exponent516set dbl_thresh, 0x3c01 # minimum dbl exponent517518set x_mode, 0x0 # extended precision519set s_mode, 0x4 # single precision520set d_mode, 0x8 # double precision521522set rn_mode, 0x0 # round-to-nearest523set rz_mode, 0x1 # round-to-zero524set rm_mode, 0x2 # round-tp-minus-infinity525set rp_mode, 0x3 # round-to-plus-infinity526527set mantissalen, 64 # length of mantissa in bits528529set BYTE, 1 # len(byte) == 1 byte530set WORD, 2 # len(word) == 2 bytes531set LONG, 4 # len(longword) == 2 bytes532533set BSUN_VEC, 0xc0 # bsun vector offset534set INEX_VEC, 0xc4 # inexact vector offset535set DZ_VEC, 0xc8 # dz vector offset536set UNFL_VEC, 0xcc # unfl vector offset537set OPERR_VEC, 0xd0 # operr vector offset538set OVFL_VEC, 0xd4 # ovfl vector offset539set SNAN_VEC, 0xd8 # snan vector offset540541###########################542# SPecial CONDition FLaGs #543###########################544set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception545set fbsun_flg, 0x02 # flag bit: bsun exception546set mia7_flg, 0x04 # flag bit: (a7)+ <ea>547set mda7_flg, 0x08 # flag bit: -(a7) <ea>548set fmovm_flg, 0x40 # flag bit: fmovm instruction549set immed_flg, 0x80 # flag bit: &<data> <ea>550551set ftrapcc_bit, 0x0552set fbsun_bit, 0x1553set mia7_bit, 0x2554set mda7_bit, 0x3555set immed_bit, 0x7556557##################################558# TRANSCENDENTAL "LAST-OP" FLAGS #559##################################560set FMUL_OP, 0x0 # fmul instr performed last561set FDIV_OP, 0x1 # fdiv performed last562set FADD_OP, 0x2 # fadd performed last563set FMOV_OP, 0x3 # fmov performed last564565#############566# CONSTANTS #567#############568T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD569T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL570571PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000572PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000573574TWOBYPI:575long 0x3FE45F30,0x6DC9C883576577#########################################################################578# XDEF **************************************************************** #579# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #580# #581# This handler should be the first code executed upon taking the #582# FP Overflow exception in an operating system. #583# #584# XREF **************************************************************** #585# _imem_read_long() - read instruction longword #586# fix_skewed_ops() - adjust src operand in fsave frame #587# set_tag_x() - determine optype of src/dst operands #588# store_fpreg() - store opclass 0 or 2 result to FP regfile #589# unnorm_fix() - change UNNORM operands to NORM or ZERO #590# load_fpn2() - load dst operand from FP regfile #591# fout() - emulate an opclass 3 instruction #592# tbl_unsupp - add of table of emulation routines for opclass 0,2 #593# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #594# _real_ovfl() - "callout" for Overflow exception enabled code #595# _real_inex() - "callout" for Inexact exception enabled code #596# _real_trace() - "callout" for Trace exception code #597# #598# INPUT *************************************************************** #599# - The system stack contains the FP Ovfl exception stack frame #600# - The fsave frame contains the source operand #601# #602# OUTPUT ************************************************************** #603# Overflow Exception enabled: #604# - The system stack is unchanged #605# - The fsave frame contains the adjusted src op for opclass 0,2 #606# Overflow Exception disabled: #607# - The system stack is unchanged #608# - The "exception present" flag in the fsave frame is cleared #609# #610# ALGORITHM *********************************************************** #611# On the 060, if an FP overflow is present as the result of any #612# instruction, the 060 will take an overflow exception whether the #613# exception is enabled or disabled in the FPCR. For the disabled case, #614# This handler emulates the instruction to determine what the correct #615# default result should be for the operation. This default result is #616# then stored in either the FP regfile, data regfile, or memory. #617# Finally, the handler exits through the "callout" _fpsp_done() #618# denoting that no exceptional conditions exist within the machine. #619# If the exception is enabled, then this handler must create the #620# exceptional operand and plave it in the fsave state frame, and store #621# the default result (only if the instruction is opclass 3). For #622# exceptions enabled, this handler must exit through the "callout" #623# _real_ovfl() so that the operating system enabled overflow handler #624# can handle this case. #625# Two other conditions exist. First, if overflow was disabled #626# but the inexact exception was enabled, this handler must exit #627# through the "callout" _real_inex() regardless of whether the result #628# was inexact. #629# Also, in the case of an opclass three instruction where #630# overflow was disabled and the trace exception was enabled, this #631# handler must exit through the "callout" _real_trace(). #632# #633#########################################################################634635global _fpsp_ovfl636_fpsp_ovfl:637638#$# sub.l &24,%sp # make room for src/dst639640link.w %a6,&-LOCAL_SIZE # init stack frame641642fsave FP_SRC(%a6) # grab the "busy" frame643644movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1645fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs646fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack647648# the FPIAR holds the "current PC" of the faulting instruction649mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)650mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr651addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr652bsr.l _imem_read_long # fetch the instruction words653mov.l %d0,EXC_OPWORD(%a6)654655##############################################################################656657btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?658bne.w fovfl_out659660661lea FP_SRC(%a6),%a0 # pass: ptr to src op662bsr.l fix_skewed_ops # fix src op663664# since, I believe, only NORMs and DENORMs can come through here,665# maybe we can avoid the subroutine call.666lea FP_SRC(%a6),%a0 # pass: ptr to src op667bsr.l set_tag_x # tag the operand type668mov.b %d0,STAG(%a6) # maybe NORM,DENORM669670# bit five of the fp extension word separates the monadic and dyadic operations671# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos672# will never take this exception.673btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?674beq.b fovfl_extract # monadic675676bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg677bsr.l load_fpn2 # load dst into FP_DST678679lea FP_DST(%a6),%a0 # pass: ptr to dst op680bsr.l set_tag_x # tag the operand type681cmpi.b %d0,&UNNORM # is operand an UNNORM?682bne.b fovfl_op2_done # no683bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO684fovfl_op2_done:685mov.b %d0,DTAG(%a6) # save dst optype tag686687fovfl_extract:688689#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)690#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)691#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)692#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)693#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)694#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)695696clr.l %d0697mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode698699mov.b 1+EXC_CMDREG(%a6),%d1700andi.w &0x007f,%d1 # extract extension701702andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field703704fmov.l &0x0,%fpcr # zero current control regs705fmov.l &0x0,%fpsr706707lea FP_SRC(%a6),%a0708lea FP_DST(%a6),%a1709710# maybe we can make these entry points ONLY the OVFL entry points of each routine.711mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr712jsr (tbl_unsupp.l,%pc,%d1.l*1)713714# the operation has been emulated. the result is in fp0.715# the EXOP, if an exception occurred, is in fp1.716# we must save the default result regardless of whether717# traps are enabled or disabled.718bfextu EXC_CMDREG(%a6){&6:&3},%d0719bsr.l store_fpreg720721# the exceptional possibilities we have left ourselves with are ONLY overflow722# and inexact. and, the inexact is such that overflow occurred and was disabled723# but inexact was enabled.724btst &ovfl_bit,FPCR_ENABLE(%a6)725bne.b fovfl_ovfl_on726727btst &inex2_bit,FPCR_ENABLE(%a6)728bne.b fovfl_inex_on729730fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1731fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs732movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1733734unlk %a6735#$# add.l &24,%sp736bra.l _fpsp_done737738# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP739# in fp1. now, simply jump to _real_ovfl()!740fovfl_ovfl_on:741fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack742743mov.w &0xe005,2+FP_SRC(%a6) # save exc status744745fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1746fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs747movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1748749frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!750751unlk %a6752753bra.l _real_ovfl754755# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,756# we must jump to real_inex().757fovfl_inex_on:758759fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack760761mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4762mov.w &0xe001,2+FP_SRC(%a6) # save exc status763764fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1765fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs766movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1767768frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!769770unlk %a6771772bra.l _real_inex773774########################################################################775fovfl_out:776777778#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)779#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)780#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)781782# the src operand is definitely a NORM(!), so tag it as such783mov.b &NORM,STAG(%a6) # set src optype tag784785clr.l %d0786mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode787788and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field789790fmov.l &0x0,%fpcr # zero current control regs791fmov.l &0x0,%fpsr792793lea FP_SRC(%a6),%a0 # pass ptr to src operand794795bsr.l fout796797btst &ovfl_bit,FPCR_ENABLE(%a6)798bne.w fovfl_ovfl_on799800btst &inex2_bit,FPCR_ENABLE(%a6)801bne.w fovfl_inex_on802803fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1804fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs805movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1806807unlk %a6808#$# add.l &24,%sp809810btst &0x7,(%sp) # is trace on?811beq.l _fpsp_done # no812813fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR814mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024815bra.l _real_trace816817#########################################################################818# XDEF **************************************************************** #819# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #820# #821# This handler should be the first code executed upon taking the #822# FP Underflow exception in an operating system. #823# #824# XREF **************************************************************** #825# _imem_read_long() - read instruction longword #826# fix_skewed_ops() - adjust src operand in fsave frame #827# set_tag_x() - determine optype of src/dst operands #828# store_fpreg() - store opclass 0 or 2 result to FP regfile #829# unnorm_fix() - change UNNORM operands to NORM or ZERO #830# load_fpn2() - load dst operand from FP regfile #831# fout() - emulate an opclass 3 instruction #832# tbl_unsupp - add of table of emulation routines for opclass 0,2 #833# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #834# _real_ovfl() - "callout" for Overflow exception enabled code #835# _real_inex() - "callout" for Inexact exception enabled code #836# _real_trace() - "callout" for Trace exception code #837# #838# INPUT *************************************************************** #839# - The system stack contains the FP Unfl exception stack frame #840# - The fsave frame contains the source operand #841# #842# OUTPUT ************************************************************** #843# Underflow Exception enabled: #844# - The system stack is unchanged #845# - The fsave frame contains the adjusted src op for opclass 0,2 #846# Underflow Exception disabled: #847# - The system stack is unchanged #848# - The "exception present" flag in the fsave frame is cleared #849# #850# ALGORITHM *********************************************************** #851# On the 060, if an FP underflow is present as the result of any #852# instruction, the 060 will take an underflow exception whether the #853# exception is enabled or disabled in the FPCR. For the disabled case, #854# This handler emulates the instruction to determine what the correct #855# default result should be for the operation. This default result is #856# then stored in either the FP regfile, data regfile, or memory. #857# Finally, the handler exits through the "callout" _fpsp_done() #858# denoting that no exceptional conditions exist within the machine. #859# If the exception is enabled, then this handler must create the #860# exceptional operand and plave it in the fsave state frame, and store #861# the default result (only if the instruction is opclass 3). For #862# exceptions enabled, this handler must exit through the "callout" #863# _real_unfl() so that the operating system enabled overflow handler #864# can handle this case. #865# Two other conditions exist. First, if underflow was disabled #866# but the inexact exception was enabled and the result was inexact, #867# this handler must exit through the "callout" _real_inex(). #868# was inexact. #869# Also, in the case of an opclass three instruction where #870# underflow was disabled and the trace exception was enabled, this #871# handler must exit through the "callout" _real_trace(). #872# #873#########################################################################874875global _fpsp_unfl876_fpsp_unfl:877878#$# sub.l &24,%sp # make room for src/dst879880link.w %a6,&-LOCAL_SIZE # init stack frame881882fsave FP_SRC(%a6) # grab the "busy" frame883884movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1885fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs886fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack887888# the FPIAR holds the "current PC" of the faulting instruction889mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)890mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr891addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr892bsr.l _imem_read_long # fetch the instruction words893mov.l %d0,EXC_OPWORD(%a6)894895##############################################################################896897btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?898bne.w funfl_out899900901lea FP_SRC(%a6),%a0 # pass: ptr to src op902bsr.l fix_skewed_ops # fix src op903904lea FP_SRC(%a6),%a0 # pass: ptr to src op905bsr.l set_tag_x # tag the operand type906mov.b %d0,STAG(%a6) # maybe NORM,DENORM907908# bit five of the fp ext word separates the monadic and dyadic operations909# that can pass through fpsp_unfl(). remember that fcmp, and ftst910# will never take this exception.911btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?912beq.b funfl_extract # monadic913914# now, what's left that's not dyadic is fsincos. we can distinguish it915# from all dyadics by the '0110xxx pattern916btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?917bne.b funfl_extract # yes918919bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg920bsr.l load_fpn2 # load dst into FP_DST921922lea FP_DST(%a6),%a0 # pass: ptr to dst op923bsr.l set_tag_x # tag the operand type924cmpi.b %d0,&UNNORM # is operand an UNNORM?925bne.b funfl_op2_done # no926bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO927funfl_op2_done:928mov.b %d0,DTAG(%a6) # save dst optype tag929930funfl_extract:931932#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)933#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)934#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)935#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)936#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)937#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)938939clr.l %d0940mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode941942mov.b 1+EXC_CMDREG(%a6),%d1943andi.w &0x007f,%d1 # extract extension944945andi.l &0x00ff01ff,USER_FPSR(%a6)946947fmov.l &0x0,%fpcr # zero current control regs948fmov.l &0x0,%fpsr949950lea FP_SRC(%a6),%a0951lea FP_DST(%a6),%a1952953# maybe we can make these entry points ONLY the OVFL entry points of each routine.954mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr955jsr (tbl_unsupp.l,%pc,%d1.l*1)956957bfextu EXC_CMDREG(%a6){&6:&3},%d0958bsr.l store_fpreg959960# The `060 FPU multiplier hardware is such that if the result of a961# multiply operation is the smallest possible normalized number962# (0x00000000_80000000_00000000), then the machine will take an963# underflow exception. Since this is incorrect, we need to check964# if our emulation, after re-doing the operation, decided that965# no underflow was called for. We do these checks only in966# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this967# special case will simply exit gracefully with the correct result.968969# the exceptional possibilities we have left ourselves with are ONLY overflow970# and inexact. and, the inexact is such that overflow occurred and was disabled971# but inexact was enabled.972btst &unfl_bit,FPCR_ENABLE(%a6)973bne.b funfl_unfl_on974975funfl_chkinex:976btst &inex2_bit,FPCR_ENABLE(%a6)977bne.b funfl_inex_on978979funfl_exit:980fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1981fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs982movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1983984unlk %a6985#$# add.l &24,%sp986bra.l _fpsp_done987988# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP989# in fp1 (don't forget to save fp0). what to do now?990# well, we simply have to get to go to _real_unfl()!991funfl_unfl_on:992993# The `060 FPU multiplier hardware is such that if the result of a994# multiply operation is the smallest possible normalized number995# (0x00000000_80000000_00000000), then the machine will take an996# underflow exception. Since this is incorrect, we check here to see997# if our emulation, after re-doing the operation, decided that998# no underflow was called for.999btst &unfl_bit,FPSR_EXCEPT(%a6)1000beq.w funfl_chkinex10011002funfl_unfl_on2:1003fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack10041005mov.w &0xe003,2+FP_SRC(%a6) # save exc status10061007fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp11008fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1009movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a110101011frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!10121013unlk %a610141015bra.l _real_unfl10161017# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,1018# we must jump to real_inex().1019funfl_inex_on:10201021# The `060 FPU multiplier hardware is such that if the result of a1022# multiply operation is the smallest possible normalized number1023# (0x00000000_80000000_00000000), then the machine will take an1024# underflow exception.1025# But, whether bogus or not, if inexact is enabled AND it occurred,1026# then we have to branch to real_inex.10271028btst &inex2_bit,FPSR_EXCEPT(%a6)1029beq.w funfl_exit10301031funfl_inex_on2:10321033fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack10341035mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc41036mov.w &0xe001,2+FP_SRC(%a6) # save exc status10371038fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp11039fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1040movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a110411042frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!10431044unlk %a610451046bra.l _real_inex10471048#######################################################################1049funfl_out:105010511052#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)1053#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)1054#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)10551056# the src operand is definitely a NORM(!), so tag it as such1057mov.b &NORM,STAG(%a6) # set src optype tag10581059clr.l %d01060mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode10611062and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field10631064fmov.l &0x0,%fpcr # zero current control regs1065fmov.l &0x0,%fpsr10661067lea FP_SRC(%a6),%a0 # pass ptr to src operand10681069bsr.l fout10701071btst &unfl_bit,FPCR_ENABLE(%a6)1072bne.w funfl_unfl_on210731074btst &inex2_bit,FPCR_ENABLE(%a6)1075bne.w funfl_inex_on210761077fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp11078fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1079movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a110801081unlk %a61082#$# add.l &24,%sp10831084btst &0x7,(%sp) # is trace on?1085beq.l _fpsp_done # no10861087fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR1088mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x0241089bra.l _real_trace10901091#########################################################################1092# XDEF **************************************************************** #1093# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #1094# Data Type" exception. #1095# #1096# This handler should be the first code executed upon taking the #1097# FP Unimplemented Data Type exception in an operating system. #1098# #1099# XREF **************************************************************** #1100# _imem_read_{word,long}() - read instruction word/longword #1101# fix_skewed_ops() - adjust src operand in fsave frame #1102# set_tag_x() - determine optype of src/dst operands #1103# store_fpreg() - store opclass 0 or 2 result to FP regfile #1104# unnorm_fix() - change UNNORM operands to NORM or ZERO #1105# load_fpn2() - load dst operand from FP regfile #1106# load_fpn1() - load src operand from FP regfile #1107# fout() - emulate an opclass 3 instruction #1108# tbl_unsupp - add of table of emulation routines for opclass 0,2 #1109# _real_inex() - "callout" to operating system inexact handler #1110# _fpsp_done() - "callout" for exit; work all done #1111# _real_trace() - "callout" for Trace enabled exception #1112# funimp_skew() - adjust fsave src ops to "incorrect" value #1113# _real_snan() - "callout" for SNAN exception #1114# _real_operr() - "callout" for OPERR exception #1115# _real_ovfl() - "callout" for OVFL exception #1116# _real_unfl() - "callout" for UNFL exception #1117# get_packed() - fetch packed operand from memory #1118# #1119# INPUT *************************************************************** #1120# - The system stack contains the "Unimp Data Type" stk frame #1121# - The fsave frame contains the ssrc op (for UNNORM/DENORM) #1122# #1123# OUTPUT ************************************************************** #1124# If Inexact exception (opclass 3): #1125# - The system stack is changed to an Inexact exception stk frame #1126# If SNAN exception (opclass 3): #1127# - The system stack is changed to an SNAN exception stk frame #1128# If OPERR exception (opclass 3): #1129# - The system stack is changed to an OPERR exception stk frame #1130# If OVFL exception (opclass 3): #1131# - The system stack is changed to an OVFL exception stk frame #1132# If UNFL exception (opclass 3): #1133# - The system stack is changed to an UNFL exception stack frame #1134# If Trace exception enabled: #1135# - The system stack is changed to a Trace exception stack frame #1136# Else: (normal case) #1137# - Correct result has been stored as appropriate #1138# #1139# ALGORITHM *********************************************************** #1140# Two main instruction types can enter here: (1) DENORM or UNNORM #1141# unimplemented data types. These can be either opclass 0,2 or 3 #1142# instructions, and (2) PACKED unimplemented data format instructions #1143# also of opclasses 0,2, or 3. #1144# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #1145# operand from the fsave state frame and the dst operand (if dyadic) #1146# from the FP register file. The instruction is then emulated by #1147# choosing an emulation routine from a table of routines indexed by #1148# instruction type. Once the instruction has been emulated and result #1149# saved, then we check to see if any enabled exceptions resulted from #1150# instruction emulation. If none, then we exit through the "callout" #1151# _fpsp_done(). If there is an enabled FP exception, then we insert #1152# this exception into the FPU in the fsave state frame and then exit #1153# through _fpsp_done(). #1154# PACKED opclass 0 and 2 is similar in how the instruction is #1155# emulated and exceptions handled. The differences occur in how the #1156# handler loads the packed op (by calling get_packed() routine) and #1157# by the fact that a Trace exception could be pending for PACKED ops. #1158# If a Trace exception is pending, then the current exception stack #1159# frame is changed to a Trace exception stack frame and an exit is #1160# made through _real_trace(). #1161# For UNNORM/DENORM opclass 3, the actual move out to memory is #1162# performed by calling the routine fout(). If no exception should occur #1163# as the result of emulation, then an exit either occurs through #1164# _fpsp_done() or through _real_trace() if a Trace exception is pending #1165# (a Trace stack frame must be created here, too). If an FP exception #1166# should occur, then we must create an exception stack frame of that #1167# type and jump to either _real_snan(), _real_operr(), _real_inex(), #1168# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #1169# emulation is performed in a similar manner. #1170# #1171#########################################################################11721173#1174# (1) DENORM and UNNORM (unimplemented) data types:1175#1176# post-instruction1177# *****************1178# * EA *1179# pre-instruction * *1180# ***************** *****************1181# * 0x0 * 0x0dc * * 0x3 * 0x0dc *1182# ***************** *****************1183# * Next * * Next *1184# * PC * * PC *1185# ***************** *****************1186# * SR * * SR *1187# ***************** *****************1188#1189# (2) PACKED format (unsupported) opclasses two and three:1190# *****************1191# * EA *1192# * *1193# *****************1194# * 0x2 * 0x0dc *1195# *****************1196# * Next *1197# * PC *1198# *****************1199# * SR *1200# *****************1201#1202global _fpsp_unsupp1203_fpsp_unsupp:12041205link.w %a6,&-LOCAL_SIZE # init stack frame12061207fsave FP_SRC(%a6) # save fp state12081209movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a11210fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs1211fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack12121213btst &0x5,EXC_SR(%a6) # user or supervisor mode?1214bne.b fu_s1215fu_u:1216mov.l %usp,%a0 # fetch user stack pointer1217mov.l %a0,EXC_A7(%a6) # save on stack1218bra.b fu_cont1219# if the exception is an opclass zero or two unimplemented data type1220# exception, then the a7' calculated here is wrong since it doesn't1221# stack an ea. however, we don't need an a7' for this case anyways.1222fu_s:1223lea 0x4+EXC_EA(%a6),%a0 # load old a7'1224mov.l %a0,EXC_A7(%a6) # save on stack12251226fu_cont:12271228# the FPIAR holds the "current PC" of the faulting instruction1229# the FPIAR should be set correctly for ALL exceptions passing through1230# this point.1231mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)1232mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr1233addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr1234bsr.l _imem_read_long # fetch the instruction words1235mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD12361237############################12381239clr.b SPCOND_FLG(%a6) # clear special condition flag12401241# Separate opclass three (fpn-to-mem) ops since they have a different1242# stack frame and protocol.1243btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?1244bne.w fu_out # yes12451246# Separate packed opclass two instructions.1247bfextu EXC_CMDREG(%a6){&0:&6},%d01248cmpi.b %d0,&0x131249beq.w fu_in_pack125012511252# I'm not sure at this point what FPSR bits are valid for this instruction.1253# so, since the emulation routines re-create them anyways, zero exception field1254andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field12551256fmov.l &0x0,%fpcr # zero current control regs1257fmov.l &0x0,%fpsr12581259# Opclass two w/ memory-to-fpn operation will have an incorrect extended1260# precision format if the src format was single or double and the1261# source data type was an INF, NAN, DENORM, or UNNORM1262lea FP_SRC(%a6),%a0 # pass ptr to input1263bsr.l fix_skewed_ops12641265# we don't know whether the src operand or the dst operand (or both) is the1266# UNNORM or DENORM. call the function that tags the operand type. if the1267# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.1268lea FP_SRC(%a6),%a0 # pass: ptr to src op1269bsr.l set_tag_x # tag the operand type1270cmpi.b %d0,&UNNORM # is operand an UNNORM?1271bne.b fu_op2 # no1272bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO12731274fu_op2:1275mov.b %d0,STAG(%a6) # save src optype tag12761277bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg12781279# bit five of the fp extension word separates the monadic and dyadic operations1280# at this point1281btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?1282beq.b fu_extract # monadic1283cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?1284beq.b fu_extract # yes, so it's monadic, too12851286bsr.l load_fpn2 # load dst into FP_DST12871288lea FP_DST(%a6),%a0 # pass: ptr to dst op1289bsr.l set_tag_x # tag the operand type1290cmpi.b %d0,&UNNORM # is operand an UNNORM?1291bne.b fu_op2_done # no1292bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO1293fu_op2_done:1294mov.b %d0,DTAG(%a6) # save dst optype tag12951296fu_extract:1297clr.l %d01298mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec12991300bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension13011302lea FP_SRC(%a6),%a01303lea FP_DST(%a6),%a113041305mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr1306jsr (tbl_unsupp.l,%pc,%d1.l*1)13071308#1309# Exceptions in order of precedence:1310# BSUN : none1311# SNAN : all dyadic ops1312# OPERR : fsqrt(-NORM)1313# OVFL : all except ftst,fcmp1314# UNFL : all except ftst,fcmp1315# DZ : fdiv1316# INEX2 : all except ftst,fcmp1317# INEX1 : none (packed doesn't go through here)1318#13191320# we determine the highest priority exception(if any) set by the1321# emulation routine that has also been enabled by the user.1322mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set1323bne.b fu_in_ena # some are enabled13241325fu_in_cont:1326# fcmp and ftst do not store any result.1327mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension1328andi.b &0x38,%d0 # extract bits 3-51329cmpi.b %d0,&0x38 # is instr fcmp or ftst?1330beq.b fu_in_exit # yes13311332bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg1333bsr.l store_fpreg # store the result13341335fu_in_exit:13361337fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11338fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1339movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a113401341unlk %a613421343bra.l _fpsp_done13441345fu_in_ena:1346and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled1347bfffo %d0{&24:&8},%d0 # find highest priority exception1348bne.b fu_in_exc # there is at least one set13491350#1351# No exceptions occurred that were also enabled. Now:1352#1353# if (OVFL && ovfl_disabled && inexact_enabled) {1354# branch to _real_inex() (even if the result was exact!);1355# } else {1356# save the result in the proper fp reg (unless the op is fcmp or ftst);1357# return;1358# }1359#1360btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?1361beq.b fu_in_cont # no13621363fu_in_ovflchk:1364btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?1365beq.b fu_in_cont # no1366bra.w fu_in_exc_ovfl # go insert overflow frame13671368#1369# An exception occurred and that exception was enabled:1370#1371# shift enabled exception field into lo byte of d0;1372# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||1373# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {1374# /*1375# * this is the case where we must call _real_inex() now or else1376# * there will be no other way to pass it the exceptional operand1377# */1378# call _real_inex();1379# } else {1380# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;1381# }1382#1383fu_in_exc:1384subi.l &24,%d0 # fix offset to be 0-81385cmpi.b %d0,&0x6 # is exception INEX? (6)1386bne.b fu_in_exc_exit # no13871388# the enabled exception was inexact1389btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?1390bne.w fu_in_exc_unfl # yes1391btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?1392bne.w fu_in_exc_ovfl # yes13931394# here, we insert the correct fsave status value into the fsave frame for the1395# corresponding exception. the operand in the fsave frame should be the original1396# src operand.1397fu_in_exc_exit:1398mov.l %d0,-(%sp) # save d01399bsr.l funimp_skew # skew sgl or dbl inputs1400mov.l (%sp)+,%d0 # restore d014011402mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status14031404fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11405fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1406movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a114071408frestore FP_SRC(%a6) # restore src op14091410unlk %a614111412bra.l _fpsp_done14131414tbl_except:1415short 0xe000,0xe006,0xe004,0xe0051416short 0xe003,0xe002,0xe001,0xe00114171418fu_in_exc_unfl:1419mov.w &0x4,%d01420bra.b fu_in_exc_exit1421fu_in_exc_ovfl:1422mov.w &0x03,%d01423bra.b fu_in_exc_exit14241425# If the input operand to this operation was opclass two and a single1426# or double precision denorm, inf, or nan, the operand needs to be1427# "corrected" in order to have the proper equivalent extended precision1428# number.1429global fix_skewed_ops1430fix_skewed_ops:1431bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt1432cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?1433beq.b fso_sgl # yes1434cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?1435beq.b fso_dbl # yes1436rts # no14371438fso_sgl:1439mov.w LOCAL_EX(%a0),%d0 # fetch src exponent1440andi.w &0x7fff,%d0 # strip sign1441cmpi.w %d0,&0x3f80 # is |exp| == $3f80?1442beq.b fso_sgl_dnrm_zero # yes1443cmpi.w %d0,&0x407f # no; is |exp| == $407f?1444beq.b fso_infnan # yes1445rts # no14461447fso_sgl_dnrm_zero:1448andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit1449beq.b fso_zero # it's a skewed zero1450fso_sgl_dnrm:1451# here, we count on norm not to alter a0...1452bsr.l norm # normalize mantissa1453neg.w %d0 # -shft amt1454addi.w &0x3f81,%d0 # adjust new exponent1455andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent1456or.w %d0,LOCAL_EX(%a0) # insert new exponent1457rts14581459fso_zero:1460andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent1461rts14621463fso_infnan:1464andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit1465ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff1466rts14671468fso_dbl:1469mov.w LOCAL_EX(%a0),%d0 # fetch src exponent1470andi.w &0x7fff,%d0 # strip sign1471cmpi.w %d0,&0x3c00 # is |exp| == $3c00?1472beq.b fso_dbl_dnrm_zero # yes1473cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?1474beq.b fso_infnan # yes1475rts # no14761477fso_dbl_dnrm_zero:1478andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit1479bne.b fso_dbl_dnrm # it's a skewed denorm1480tst.l LOCAL_LO(%a0) # is it a zero?1481beq.b fso_zero # yes1482fso_dbl_dnrm:1483# here, we count on norm not to alter a0...1484bsr.l norm # normalize mantissa1485neg.w %d0 # -shft amt1486addi.w &0x3c01,%d0 # adjust new exponent1487andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent1488or.w %d0,LOCAL_EX(%a0) # insert new exponent1489rts14901491#################################################################14921493# fmove out took an unimplemented data type exception.1494# the src operand is in FP_SRC. Call _fout() to write out the result and1495# to determine which exceptions, if any, to take.1496fu_out:14971498# Separate packed move outs from the UNNORM and DENORM move outs.1499bfextu EXC_CMDREG(%a6){&3:&3},%d01500cmpi.b %d0,&0x31501beq.w fu_out_pack1502cmpi.b %d0,&0x71503beq.w fu_out_pack150415051506# I'm not sure at this point what FPSR bits are valid for this instruction.1507# so, since the emulation routines re-create them anyways, zero exception field.1508# fmove out doesn't affect ccodes.1509and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field15101511fmov.l &0x0,%fpcr # zero current control regs1512fmov.l &0x0,%fpsr15131514# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine1515# call here. just figure out what it is...1516mov.w FP_SRC_EX(%a6),%d0 # get exponent1517andi.w &0x7fff,%d0 # strip sign1518beq.b fu_out_denorm # it's a DENORM15191520lea FP_SRC(%a6),%a01521bsr.l unnorm_fix # yes; fix it15221523mov.b %d0,STAG(%a6)15241525bra.b fu_out_cont1526fu_out_denorm:1527mov.b &DENORM,STAG(%a6)1528fu_out_cont:15291530clr.l %d01531mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec15321533lea FP_SRC(%a6),%a0 # pass ptr to src operand15341535mov.l (%a6),EXC_A6(%a6) # in case a6 changes1536bsr.l fout # call fmove out routine15371538# Exceptions in order of precedence:1539# BSUN : none1540# SNAN : none1541# OPERR : fmove.{b,w,l} out of large UNNORM1542# OVFL : fmove.{s,d}1543# UNFL : fmove.{s,d,x}1544# DZ : none1545# INEX2 : all1546# INEX1 : none (packed doesn't travel through here)15471548# determine the highest priority exception(if any) set by the1549# emulation routine that has also been enabled by the user.1550mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled1551bne.w fu_out_ena # some are enabled15521553fu_out_done:15541555mov.l EXC_A6(%a6),(%a6) # in case a6 changed15561557# on extended precision opclass three instructions using pre-decrement or1558# post-increment addressing mode, the address register is not updated. is the1559# address register was the stack pointer used from user mode, then let's update1560# it here. if it was used from supervisor mode, then we have to handle this1561# as a special case.1562btst &0x5,EXC_SR(%a6)1563bne.b fu_out_done_s15641565mov.l EXC_A7(%a6),%a0 # restore a71566mov.l %a0,%usp15671568fu_out_done_cont:1569fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11570fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1571movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a115721573unlk %a615741575btst &0x7,(%sp) # is trace on?1576bne.b fu_out_trace # yes15771578bra.l _fpsp_done15791580# is the ea mode pre-decrement of the stack pointer from supervisor mode?1581# ("fmov.x fpm,-(a7)") if so,1582fu_out_done_s:1583cmpi.b SPCOND_FLG(%a6),&mda7_flg1584bne.b fu_out_done_cont15851586# the extended precision result is still in fp0. but, we need to save it1587# somewhere on the stack until we can copy it to its final resting place.1588# here, we're counting on the top of the stack to be the old place-holders1589# for fp0/fp1 which have already been restored. that way, we can write1590# over those destinations with the shifted stack frame.1591fmovm.x &0x80,FP_SRC(%a6) # put answer on stack15921593fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11594fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1595movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a115961597mov.l (%a6),%a6 # restore frame pointer15981599mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)1600mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)16011602# now, copy the result to the proper place on the stack1603mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)1604mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)1605mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)16061607add.l &LOCAL_SIZE-0x8,%sp16081609btst &0x7,(%sp)1610bne.b fu_out_trace16111612bra.l _fpsp_done16131614fu_out_ena:1615and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled1616bfffo %d0{&24:&8},%d0 # find highest priority exception1617bne.b fu_out_exc # there is at least one set16181619# no exceptions were set.1620# if a disabled overflow occurred and inexact was enabled but the result1621# was exact, then a branch to _real_inex() is made.1622btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?1623beq.w fu_out_done # no16241625fu_out_ovflchk:1626btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?1627beq.w fu_out_done # no1628bra.w fu_inex # yes16291630#1631# The fp move out that took the "Unimplemented Data Type" exception was1632# being traced. Since the stack frames are similar, get the "current" PC1633# from FPIAR and put it in the trace stack frame then jump to _real_trace().1634#1635# UNSUPP FRAME TRACE FRAME1636# ***************** *****************1637# * EA * * Current *1638# * * * PC *1639# ***************** *****************1640# * 0x3 * 0x0dc * * 0x2 * 0x024 *1641# ***************** *****************1642# * Next * * Next *1643# * PC * * PC *1644# ***************** *****************1645# * SR * * SR *1646# ***************** *****************1647#1648fu_out_trace:1649mov.w &0x2024,0x6(%sp)1650fmov.l %fpiar,0x8(%sp)1651bra.l _real_trace16521653# an exception occurred and that exception was enabled.1654fu_out_exc:1655subi.l &24,%d0 # fix offset to be 0-816561657# we don't mess with the existing fsave frame. just re-insert it and1658# jump to the "_real_{}()" handler...1659mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d01660jmp (tbl_fu_out.b,%pc,%d0.w*1)16611662swbeg &0x81663tbl_fu_out:1664short tbl_fu_out - tbl_fu_out # BSUN can't happen1665short tbl_fu_out - tbl_fu_out # SNAN can't happen1666short fu_operr - tbl_fu_out # OPERR1667short fu_ovfl - tbl_fu_out # OVFL1668short fu_unfl - tbl_fu_out # UNFL1669short tbl_fu_out - tbl_fu_out # DZ can't happen1670short fu_inex - tbl_fu_out # INEX21671short tbl_fu_out - tbl_fu_out # INEX1 won't make it here16721673# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just1674# frestore it.1675fu_snan:1676fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11677fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1678movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a116791680mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd81681mov.w &0xe006,2+FP_SRC(%a6)16821683frestore FP_SRC(%a6)16841685unlk %a6168616871688bra.l _real_snan16891690fu_operr:1691fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11692fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1693movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a116941695mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd01696mov.w &0xe004,2+FP_SRC(%a6)16971698frestore FP_SRC(%a6)16991700unlk %a6170117021703bra.l _real_operr17041705fu_ovfl:1706fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack17071708fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11709fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1710movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a117111712mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd41713mov.w &0xe005,2+FP_SRC(%a6)17141715frestore FP_SRC(%a6) # restore EXOP17161717unlk %a617181719bra.l _real_ovfl17201721# underflow can happen for extended precision. extended precision opclass1722# three instruction exceptions don't update the stack pointer. so, if the1723# exception occurred from user mode, then simply update a7 and exit normally.1724# if the exception occurred from supervisor mode, check if1725fu_unfl:1726mov.l EXC_A6(%a6),(%a6) # restore a617271728btst &0x5,EXC_SR(%a6)1729bne.w fu_unfl_s17301731mov.l EXC_A7(%a6),%a0 # restore a7 whether we need1732mov.l %a0,%usp # to or not...17331734fu_unfl_cont:1735fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack17361737fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11738fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1739movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a117401741mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc1742mov.w &0xe003,2+FP_SRC(%a6)17431744frestore FP_SRC(%a6) # restore EXOP17451746unlk %a617471748bra.l _real_unfl17491750fu_unfl_s:1751cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?1752bne.b fu_unfl_cont17531754# the extended precision result is still in fp0. but, we need to save it1755# somewhere on the stack until we can copy it to its final resting place1756# (where the exc frame is currently). make sure it's not at the top of the1757# frame or it will get overwritten when the exc stack frame is shifted "down".1758fmovm.x &0x80,FP_SRC(%a6) # put answer on stack1759fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack17601761fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11762fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1763movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a117641765mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc1766mov.w &0xe003,2+FP_DST(%a6)17671768frestore FP_DST(%a6) # restore EXOP17691770mov.l (%a6),%a6 # restore frame pointer17711772mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)1773mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)1774mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)17751776# now, copy the result to the proper place on the stack1777mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)1778mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)1779mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)17801781add.l &LOCAL_SIZE-0x8,%sp17821783bra.l _real_unfl17841785# fmove in and out enter here.1786fu_inex:1787fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack17881789fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11790fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1791movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a117921793mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc41794mov.w &0xe001,2+FP_SRC(%a6)17951796frestore FP_SRC(%a6) # restore EXOP17971798unlk %a6179918001801bra.l _real_inex18021803#########################################################################1804#########################################################################1805fu_in_pack:180618071808# I'm not sure at this point what FPSR bits are valid for this instruction.1809# so, since the emulation routines re-create them anyways, zero exception field1810andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field18111812fmov.l &0x0,%fpcr # zero current control regs1813fmov.l &0x0,%fpsr18141815bsr.l get_packed # fetch packed src operand18161817lea FP_SRC(%a6),%a0 # pass ptr to src1818bsr.l set_tag_x # set src optype tag18191820mov.b %d0,STAG(%a6) # save src optype tag18211822bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg18231824# bit five of the fp extension word separates the monadic and dyadic operations1825# at this point1826btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?1827beq.b fu_extract_p # monadic1828cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?1829beq.b fu_extract_p # yes, so it's monadic, too18301831bsr.l load_fpn2 # load dst into FP_DST18321833lea FP_DST(%a6),%a0 # pass: ptr to dst op1834bsr.l set_tag_x # tag the operand type1835cmpi.b %d0,&UNNORM # is operand an UNNORM?1836bne.b fu_op2_done_p # no1837bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO1838fu_op2_done_p:1839mov.b %d0,DTAG(%a6) # save dst optype tag18401841fu_extract_p:1842clr.l %d01843mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec18441845bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension18461847lea FP_SRC(%a6),%a01848lea FP_DST(%a6),%a118491850mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr1851jsr (tbl_unsupp.l,%pc,%d1.l*1)18521853#1854# Exceptions in order of precedence:1855# BSUN : none1856# SNAN : all dyadic ops1857# OPERR : fsqrt(-NORM)1858# OVFL : all except ftst,fcmp1859# UNFL : all except ftst,fcmp1860# DZ : fdiv1861# INEX2 : all except ftst,fcmp1862# INEX1 : all1863#18641865# we determine the highest priority exception(if any) set by the1866# emulation routine that has also been enabled by the user.1867mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled1868bne.w fu_in_ena_p # some are enabled18691870fu_in_cont_p:1871# fcmp and ftst do not store any result.1872mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension1873andi.b &0x38,%d0 # extract bits 3-51874cmpi.b %d0,&0x38 # is instr fcmp or ftst?1875beq.b fu_in_exit_p # yes18761877bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg1878bsr.l store_fpreg # store the result18791880fu_in_exit_p:18811882btst &0x5,EXC_SR(%a6) # user or supervisor?1883bne.w fu_in_exit_s_p # supervisor18841885mov.l EXC_A7(%a6),%a0 # update user a71886mov.l %a0,%usp18871888fu_in_exit_cont_p:1889fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11890fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1891movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a118921893unlk %a6 # unravel stack frame18941895btst &0x7,(%sp) # is trace on?1896bne.w fu_trace_p # yes18971898bra.l _fpsp_done # exit to os18991900# the exception occurred in supervisor mode. check to see if the1901# addressing mode was (a7)+. if so, we'll need to shift the1902# stack frame "up".1903fu_in_exit_s_p:1904btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+1905beq.b fu_in_exit_cont_p # no19061907fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11908fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1909movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a119101911unlk %a6 # unravel stack frame19121913# shift the stack frame "up". we don't really care about the <ea> field.1914mov.l 0x4(%sp),0x10(%sp)1915mov.l 0x0(%sp),0xc(%sp)1916add.l &0xc,%sp19171918btst &0x7,(%sp) # is trace on?1919bne.w fu_trace_p # yes19201921bra.l _fpsp_done # exit to os19221923fu_in_ena_p:1924and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set1925bfffo %d0{&24:&8},%d0 # find highest priority exception1926bne.b fu_in_exc_p # at least one was set19271928#1929# No exceptions occurred that were also enabled. Now:1930#1931# if (OVFL && ovfl_disabled && inexact_enabled) {1932# branch to _real_inex() (even if the result was exact!);1933# } else {1934# save the result in the proper fp reg (unless the op is fcmp or ftst);1935# return;1936# }1937#1938btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?1939beq.w fu_in_cont_p # no19401941fu_in_ovflchk_p:1942btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?1943beq.w fu_in_cont_p # no1944bra.w fu_in_exc_ovfl_p # do _real_inex() now19451946#1947# An exception occurred and that exception was enabled:1948#1949# shift enabled exception field into lo byte of d0;1950# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||1951# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {1952# /*1953# * this is the case where we must call _real_inex() now or else1954# * there will be no other way to pass it the exceptional operand1955# */1956# call _real_inex();1957# } else {1958# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;1959# }1960#1961fu_in_exc_p:1962subi.l &24,%d0 # fix offset to be 0-81963cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)1964blt.b fu_in_exc_exit_p # no19651966# the enabled exception was inexact1967btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?1968bne.w fu_in_exc_unfl_p # yes1969btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?1970bne.w fu_in_exc_ovfl_p # yes19711972# here, we insert the correct fsave status value into the fsave frame for the1973# corresponding exception. the operand in the fsave frame should be the original1974# src operand.1975# as a reminder for future predicted pain and agony, we are passing in fsave the1976# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.1977# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!1978fu_in_exc_exit_p:1979btst &0x5,EXC_SR(%a6) # user or supervisor?1980bne.w fu_in_exc_exit_s_p # supervisor19811982mov.l EXC_A7(%a6),%a0 # update user a71983mov.l %a0,%usp19841985fu_in_exc_exit_cont_p:1986mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)19871988fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp11989fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs1990movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a119911992frestore FP_SRC(%a6) # restore src op19931994unlk %a619951996btst &0x7,(%sp) # is trace enabled?1997bne.w fu_trace_p # yes19981999bra.l _fpsp_done20002001tbl_except_p:2002short 0xe000,0xe006,0xe004,0xe0052003short 0xe003,0xe002,0xe001,0xe00120042005fu_in_exc_ovfl_p:2006mov.w &0x3,%d02007bra.w fu_in_exc_exit_p20082009fu_in_exc_unfl_p:2010mov.w &0x4,%d02011bra.w fu_in_exc_exit_p20122013fu_in_exc_exit_s_p:2014btst &mia7_bit,SPCOND_FLG(%a6)2015beq.b fu_in_exc_exit_cont_p20162017mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)20182019fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12020fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2021movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a120222023frestore FP_SRC(%a6) # restore src op20242025unlk %a6 # unravel stack frame20262027# shift stack frame "up". who cares about <ea> field.2028mov.l 0x4(%sp),0x10(%sp)2029mov.l 0x0(%sp),0xc(%sp)2030add.l &0xc,%sp20312032btst &0x7,(%sp) # is trace on?2033bne.b fu_trace_p # yes20342035bra.l _fpsp_done # exit to os20362037#2038# The opclass two PACKED instruction that took an "Unimplemented Data Type"2039# exception was being traced. Make the "current" PC the FPIAR and put it in the2040# trace stack frame then jump to _real_trace().2041#2042# UNSUPP FRAME TRACE FRAME2043# ***************** *****************2044# * EA * * Current *2045# * * * PC *2046# ***************** *****************2047# * 0x2 * 0x0dc * * 0x2 * 0x024 *2048# ***************** *****************2049# * Next * * Next *2050# * PC * * PC *2051# ***************** *****************2052# * SR * * SR *2053# ***************** *****************2054fu_trace_p:2055mov.w &0x2024,0x6(%sp)2056fmov.l %fpiar,0x8(%sp)20572058bra.l _real_trace20592060#########################################################2061#########################################################2062fu_out_pack:206320642065# I'm not sure at this point what FPSR bits are valid for this instruction.2066# so, since the emulation routines re-create them anyways, zero exception field.2067# fmove out doesn't affect ccodes.2068and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field20692070fmov.l &0x0,%fpcr # zero current control regs2071fmov.l &0x0,%fpsr20722073bfextu EXC_CMDREG(%a6){&6:&3},%d02074bsr.l load_fpn120752076# unlike other opclass 3, unimplemented data type exceptions, packed must be2077# able to detect all operand types.2078lea FP_SRC(%a6),%a02079bsr.l set_tag_x # tag the operand type2080cmpi.b %d0,&UNNORM # is operand an UNNORM?2081bne.b fu_op2_p # no2082bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO20832084fu_op2_p:2085mov.b %d0,STAG(%a6) # save src optype tag20862087clr.l %d02088mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec20892090lea FP_SRC(%a6),%a0 # pass ptr to src operand20912092mov.l (%a6),EXC_A6(%a6) # in case a6 changes2093bsr.l fout # call fmove out routine20942095# Exceptions in order of precedence:2096# BSUN : no2097# SNAN : yes2098# OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))2099# OVFL : no2100# UNFL : no2101# DZ : no2102# INEX2 : yes2103# INEX1 : no21042105# determine the highest priority exception(if any) set by the2106# emulation routine that has also been enabled by the user.2107mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled2108bne.w fu_out_ena_p # some are enabled21092110fu_out_exit_p:2111mov.l EXC_A6(%a6),(%a6) # restore a621122113btst &0x5,EXC_SR(%a6) # user or supervisor?2114bne.b fu_out_exit_s_p # supervisor21152116mov.l EXC_A7(%a6),%a0 # update user a72117mov.l %a0,%usp21182119fu_out_exit_cont_p:2120fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12121fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2122movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a121232124unlk %a6 # unravel stack frame21252126btst &0x7,(%sp) # is trace on?2127bne.w fu_trace_p # yes21282129bra.l _fpsp_done # exit to os21302131# the exception occurred in supervisor mode. check to see if the2132# addressing mode was -(a7). if so, we'll need to shift the2133# stack frame "down".2134fu_out_exit_s_p:2135btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)2136beq.b fu_out_exit_cont_p # no21372138fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12139fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2140movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a121412142mov.l (%a6),%a6 # restore frame pointer21432144mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)2145mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)21462147# now, copy the result to the proper place on the stack2148mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)2149mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)2150mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)21512152add.l &LOCAL_SIZE-0x8,%sp21532154btst &0x7,(%sp)2155bne.w fu_trace_p21562157bra.l _fpsp_done21582159fu_out_ena_p:2160and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled2161bfffo %d0{&24:&8},%d0 # find highest priority exception2162beq.w fu_out_exit_p21632164mov.l EXC_A6(%a6),(%a6) # restore a621652166# an exception occurred and that exception was enabled.2167# the only exception possible on packed move out are INEX, OPERR, and SNAN.2168fu_out_exc_p:2169cmpi.b %d0,&0x1a2170bgt.w fu_inex_p22171beq.w fu_operr_p21722173fu_snan_p:2174btst &0x5,EXC_SR(%a6)2175bne.b fu_snan_s_p21762177mov.l EXC_A7(%a6),%a02178mov.l %a0,%usp2179bra.w fu_snan21802181fu_snan_s_p:2182cmpi.b SPCOND_FLG(%a6),&mda7_flg2183bne.w fu_snan21842185# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.2186# the strategy is to move the exception frame "down" 12 bytes. then, we2187# can store the default result where the exception frame was.2188fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12189fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2190movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a121912192mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd02193mov.w &0xe006,2+FP_SRC(%a6) # set fsave status21942195frestore FP_SRC(%a6) # restore src operand21962197mov.l (%a6),%a6 # restore frame pointer21982199mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)2200mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)2201mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)22022203# now, we copy the default result to its proper location2204mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)2205mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)2206mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)22072208add.l &LOCAL_SIZE-0x8,%sp220922102211bra.l _real_snan22122213fu_operr_p:2214btst &0x5,EXC_SR(%a6)2215bne.w fu_operr_p_s22162217mov.l EXC_A7(%a6),%a02218mov.l %a0,%usp2219bra.w fu_operr22202221fu_operr_p_s:2222cmpi.b SPCOND_FLG(%a6),&mda7_flg2223bne.w fu_operr22242225# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.2226# the strategy is to move the exception frame "down" 12 bytes. then, we2227# can store the default result where the exception frame was.2228fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12229fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2230movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a122312232mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd02233mov.w &0xe004,2+FP_SRC(%a6) # set fsave status22342235frestore FP_SRC(%a6) # restore src operand22362237mov.l (%a6),%a6 # restore frame pointer22382239mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)2240mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)2241mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)22422243# now, we copy the default result to its proper location2244mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)2245mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)2246mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)22472248add.l &LOCAL_SIZE-0x8,%sp224922502251bra.l _real_operr22522253fu_inex_p2:2254btst &0x5,EXC_SR(%a6)2255bne.w fu_inex_s_p222562257mov.l EXC_A7(%a6),%a02258mov.l %a0,%usp2259bra.w fu_inex22602261fu_inex_s_p2:2262cmpi.b SPCOND_FLG(%a6),&mda7_flg2263bne.w fu_inex22642265# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.2266# the strategy is to move the exception frame "down" 12 bytes. then, we2267# can store the default result where the exception frame was.2268fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp12269fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2270movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a122712272mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc42273mov.w &0xe001,2+FP_SRC(%a6) # set fsave status22742275frestore FP_SRC(%a6) # restore src operand22762277mov.l (%a6),%a6 # restore frame pointer22782279mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)2280mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)2281mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)22822283# now, we copy the default result to its proper location2284mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)2285mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)2286mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)22872288add.l &LOCAL_SIZE-0x8,%sp228922902291bra.l _real_inex22922293#########################################################################22942295#2296# if we're stuffing a source operand back into an fsave frame then we2297# have to make sure that for single or double source operands that the2298# format stuffed is as weird as the hardware usually makes it.2299#2300global funimp_skew2301funimp_skew:2302bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier2303cmpi.b %d0,&0x1 # was src sgl?2304beq.b funimp_skew_sgl # yes2305cmpi.b %d0,&0x5 # was src dbl?2306beq.b funimp_skew_dbl # yes2307rts23082309funimp_skew_sgl:2310mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent2311andi.w &0x7fff,%d0 # strip sign2312beq.b funimp_skew_sgl_not2313cmpi.w %d0,&0x3f802314bgt.b funimp_skew_sgl_not2315neg.w %d0 # make exponent negative2316addi.w &0x3f81,%d0 # find amt to shift2317mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)2318lsr.l %d0,%d1 # shift it2319bset &31,%d1 # set j-bit2320mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)2321andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent2322ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent2323funimp_skew_sgl_not:2324rts23252326funimp_skew_dbl:2327mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent2328andi.w &0x7fff,%d0 # strip sign2329beq.b funimp_skew_dbl_not2330cmpi.w %d0,&0x3c002331bgt.b funimp_skew_dbl_not23322333tst.b FP_SRC_EX(%a6) # make "internal format"2334smi.b 0x2+FP_SRC(%a6)2335mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign2336clr.l %d0 # clear g,r,s2337lea FP_SRC(%a6),%a0 # pass ptr to src op2338mov.w &0x3c01,%d1 # pass denorm threshold2339bsr.l dnrm_lp # denorm it2340mov.w &0x3c00,%d0 # new exponent2341tst.b 0x2+FP_SRC(%a6) # is sign set?2342beq.b fss_dbl_denorm_done # no2343bset &15,%d0 # set sign2344fss_dbl_denorm_done:2345bset &0x7,FP_SRC_HI(%a6) # set j-bit2346mov.w %d0,FP_SRC_EX(%a6) # insert new exponent2347funimp_skew_dbl_not:2348rts23492350#########################################################################2351global _mem_write22352_mem_write2:2353btst &0x5,EXC_SR(%a6)2354beq.l _dmem_write2355mov.l 0x0(%a0),FP_DST_EX(%a6)2356mov.l 0x4(%a0),FP_DST_HI(%a6)2357mov.l 0x8(%a0),FP_DST_LO(%a6)2358clr.l %d12359rts23602361#########################################################################2362# XDEF **************************************************************** #2363# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #2364# effective address" exception. #2365# #2366# This handler should be the first code executed upon taking the #2367# FP Unimplemented Effective Address exception in an operating #2368# system. #2369# #2370# XREF **************************************************************** #2371# _imem_read_long() - read instruction longword #2372# fix_skewed_ops() - adjust src operand in fsave frame #2373# set_tag_x() - determine optype of src/dst operands #2374# store_fpreg() - store opclass 0 or 2 result to FP regfile #2375# unnorm_fix() - change UNNORM operands to NORM or ZERO #2376# load_fpn2() - load dst operand from FP regfile #2377# tbl_unsupp - add of table of emulation routines for opclass 0,2 #2378# decbin() - convert packed data to FP binary data #2379# _real_fpu_disabled() - "callout" for "FPU disabled" exception #2380# _real_access() - "callout" for access error exception #2381# _mem_read() - read extended immediate operand from memory #2382# _fpsp_done() - "callout" for exit; work all done #2383# _real_trace() - "callout" for Trace enabled exception #2384# fmovm_dynamic() - emulate dynamic fmovm instruction #2385# fmovm_ctrl() - emulate fmovm control instruction #2386# #2387# INPUT *************************************************************** #2388# - The system stack contains the "Unimplemented <ea>" stk frame #2389# #2390# OUTPUT ************************************************************** #2391# If access error: #2392# - The system stack is changed to an access error stack frame #2393# If FPU disabled: #2394# - The system stack is changed to an FPU disabled stack frame #2395# If Trace exception enabled: #2396# - The system stack is changed to a Trace exception stack frame #2397# Else: (normal case) #2398# - None (correct result has been stored as appropriate) #2399# #2400# ALGORITHM *********************************************************** #2401# This exception handles 3 types of operations: #2402# (1) FP Instructions using extended precision or packed immediate #2403# addressing mode. #2404# (2) The "fmovm.x" instruction w/ dynamic register specification. #2405# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #2406# #2407# For immediate data operations, the data is read in w/ a #2408# _mem_read() "callout", converted to FP binary (if packed), and used #2409# as the source operand to the instruction specified by the instruction #2410# word. If no FP exception should be reported ads a result of the #2411# emulation, then the result is stored to the destination register and #2412# the handler exits through _fpsp_done(). If an enabled exc has been #2413# signalled as a result of emulation, then an fsave state frame #2414# corresponding to the FP exception type must be entered into the 060 #2415# FPU before exiting. In either the enabled or disabled cases, we #2416# must also check if a Trace exception is pending, in which case, we #2417# must create a Trace exception stack frame from the current exception #2418# stack frame. If no Trace is pending, we simply exit through #2419# _fpsp_done(). #2420# For "fmovm.x", call the routine fmovm_dynamic() which will #2421# decode and emulate the instruction. No FP exceptions can be pending #2422# as a result of this operation emulation. A Trace exception can be #2423# pending, though, which means the current stack frame must be changed #2424# to a Trace stack frame and an exit made through _real_trace(). #2425# For the case of "fmovm.x Dn,-(a7)", where the offending instruction #2426# was executed from supervisor mode, this handler must store the FP #2427# register file values to the system stack by itself since #2428# fmovm_dynamic() can't handle this. A normal exit is made through #2429# fpsp_done(). #2430# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #2431# Again, a Trace exception may be pending and an exit made through #2432# _real_trace(). Else, a normal exit is made through _fpsp_done(). #2433# #2434# Before any of the above is attempted, it must be checked to #2435# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #2436# before the "FPU disabled" exception, but the "FPU disabled" exception #2437# has higher priority, we check the disabled bit in the PCR. If set, #2438# then we must create an 8 word "FPU disabled" exception stack frame #2439# from the current 4 word exception stack frame. This includes #2440# reproducing the effective address of the instruction to put on the #2441# new stack frame. #2442# #2443# In the process of all emulation work, if a _mem_read() #2444# "callout" returns a failing result indicating an access error, then #2445# we must create an access error stack frame from the current stack #2446# frame. This information includes a faulting address and a fault- #2447# status-longword. These are created within this handler. #2448# #2449#########################################################################24502451global _fpsp_effadd2452_fpsp_effadd:24532454# This exception type takes priority over the "Line F Emulator"2455# exception. Therefore, the FPU could be disabled when entering here.2456# So, we must check to see if it's disabled and handle that case separately.2457mov.l %d0,-(%sp) # save d02458movc %pcr,%d0 # load proc cr2459btst &0x1,%d0 # is FPU disabled?2460bne.w iea_disabled # yes2461mov.l (%sp)+,%d0 # restore d024622463link %a6,&-LOCAL_SIZE # init stack frame24642465movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a12466fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs2467fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack24682469# PC of instruction that took the exception is the PC in the frame2470mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)24712472mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr2473addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr2474bsr.l _imem_read_long # fetch the instruction words2475mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD24762477#########################################################################24782479tst.w %d0 # is operation fmovem?2480bmi.w iea_fmovm # yes24812482#2483# here, we will have:2484# fabs fdabs fsabs facos fmod2485# fadd fdadd fsadd fasin frem2486# fcmp fatan fscale2487# fdiv fddiv fsdiv fatanh fsin2488# fint fcos fsincos2489# fintrz fcosh fsinh2490# fmove fdmove fsmove fetox ftan2491# fmul fdmul fsmul fetoxm1 ftanh2492# fneg fdneg fsneg fgetexp ftentox2493# fsgldiv fgetman ftwotox2494# fsglmul flog102495# fsqrt flog22496# fsub fdsub fssub flogn2497# ftst flognp12498# which can all use f<op>.{x,p}2499# so, now it's immediate data extended precision AND PACKED FORMAT!2500#2501iea_op:2502andi.l &0x00ff00ff,USER_FPSR(%a6)25032504btst &0xa,%d0 # is src fmt x or p?2505bne.b iea_op_pack # packed250625072508mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>2509lea FP_SRC(%a6),%a1 # pass: ptr to super addr2510mov.l &0xc,%d0 # pass: 12 bytes2511bsr.l _imem_read # read extended immediate25122513tst.l %d1 # did ifetch fail?2514bne.w iea_iacc # yes25152516bra.b iea_op_setsrc25172518iea_op_pack:25192520mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>2521lea FP_SRC(%a6),%a1 # pass: ptr to super dst2522mov.l &0xc,%d0 # pass: 12 bytes2523bsr.l _imem_read # read packed operand25242525tst.l %d1 # did ifetch fail?2526bne.w iea_iacc # yes25272528# The packed operand is an INF or a NAN if the exponent field is all ones.2529bfextu FP_SRC(%a6){&1:&15},%d0 # get exp2530cmpi.w %d0,&0x7fff # INF or NAN?2531beq.b iea_op_setsrc # operand is an INF or NAN25322533# The packed operand is a zero if the mantissa is all zero, else it's2534# a normal packed op.2535mov.b 3+FP_SRC(%a6),%d0 # get byte 42536andi.b &0x0f,%d0 # clear all but last nybble2537bne.b iea_op_gp_not_spec # not a zero2538tst.l FP_SRC_HI(%a6) # is lw 2 zero?2539bne.b iea_op_gp_not_spec # not a zero2540tst.l FP_SRC_LO(%a6) # is lw 3 zero?2541beq.b iea_op_setsrc # operand is a ZERO2542iea_op_gp_not_spec:2543lea FP_SRC(%a6),%a0 # pass: ptr to packed op2544bsr.l decbin # convert to extended2545fmovm.x &0x80,FP_SRC(%a6) # make this the srcop25462547iea_op_setsrc:2548addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer25492550# FP_SRC now holds the src operand.2551lea FP_SRC(%a6),%a0 # pass: ptr to src op2552bsr.l set_tag_x # tag the operand type2553mov.b %d0,STAG(%a6) # could be ANYTHING!!!2554cmpi.b %d0,&UNNORM # is operand an UNNORM?2555bne.b iea_op_getdst # no2556bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO2557mov.b %d0,STAG(%a6) # set new optype tag2558iea_op_getdst:2559clr.b STORE_FLG(%a6) # clear "store result" boolean25602561btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?2562beq.b iea_op_extract # monadic2563btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?2564bne.b iea_op_spec # yes25652566iea_op_loaddst:2567bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno2568bsr.l load_fpn2 # load dst operand25692570lea FP_DST(%a6),%a0 # pass: ptr to dst op2571bsr.l set_tag_x # tag the operand type2572mov.b %d0,DTAG(%a6) # could be ANYTHING!!!2573cmpi.b %d0,&UNNORM # is operand an UNNORM?2574bne.b iea_op_extract # no2575bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO2576mov.b %d0,DTAG(%a6) # set new optype tag2577bra.b iea_op_extract25782579# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic2580iea_op_spec:2581btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?2582beq.b iea_op_extract # yes2583# now, we're left with ftst and fcmp. so, first let's tag them so that they don't2584# store a result. then, only fcmp will branch back and pick up a dst operand.2585st STORE_FLG(%a6) # don't store a final result2586btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?2587beq.b iea_op_loaddst # yes25882589iea_op_extract:2590clr.l %d02591mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec25922593mov.b 1+EXC_CMDREG(%a6),%d12594andi.w &0x007f,%d1 # extract extension25952596fmov.l &0x0,%fpcr2597fmov.l &0x0,%fpsr25982599lea FP_SRC(%a6),%a02600lea FP_DST(%a6),%a126012602mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr2603jsr (tbl_unsupp.l,%pc,%d1.l*1)26042605#2606# Exceptions in order of precedence:2607# BSUN : none2608# SNAN : all operations2609# OPERR : all reg-reg or mem-reg operations that can normally operr2610# OVFL : same as OPERR2611# UNFL : same as OPERR2612# DZ : same as OPERR2613# INEX2 : same as OPERR2614# INEX1 : all packed immediate operations2615#26162617# we determine the highest priority exception(if any) set by the2618# emulation routine that has also been enabled by the user.2619mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled2620bne.b iea_op_ena # some are enabled26212622# now, we save the result, unless, of course, the operation was ftst or fcmp.2623# these don't save results.2624iea_op_save:2625tst.b STORE_FLG(%a6) # does this op store a result?2626bne.b iea_op_exit1 # exit with no frestore26272628iea_op_store:2629bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno2630bsr.l store_fpreg # store the result26312632iea_op_exit1:2633mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"2634mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame26352636fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp12637fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2638movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a126392640unlk %a6 # unravel the frame26412642btst &0x7,(%sp) # is trace on?2643bne.w iea_op_trace # yes26442645bra.l _fpsp_done # exit to os26462647iea_op_ena:2648and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set2649bfffo %d0{&24:&8},%d0 # find highest priority exception2650bne.b iea_op_exc # at least one was set26512652# no exception occurred. now, did a disabled, exact overflow occur with inexact2653# enabled? if so, then we have to stuff an overflow frame into the FPU.2654btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?2655beq.b iea_op_save26562657iea_op_ovfl:2658btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?2659beq.b iea_op_store # no2660bra.b iea_op_exc_ovfl # yes26612662# an enabled exception occurred. we have to insert the exception type back into2663# the machine.2664iea_op_exc:2665subi.l &24,%d0 # fix offset to be 0-82666cmpi.b %d0,&0x6 # is exception INEX?2667bne.b iea_op_exc_force # no26682669# the enabled exception was inexact. so, if it occurs with an overflow2670# or underflow that was disabled, then we have to force an overflow or2671# underflow frame.2672btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?2673bne.b iea_op_exc_ovfl # yes2674btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?2675bne.b iea_op_exc_unfl # yes26762677iea_op_exc_force:2678mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)2679bra.b iea_op_exit2 # exit with frestore26802681tbl_iea_except:2682short 0xe002, 0xe006, 0xe004, 0xe0052683short 0xe003, 0xe002, 0xe001, 0xe00126842685iea_op_exc_ovfl:2686mov.w &0xe005,2+FP_SRC(%a6)2687bra.b iea_op_exit226882689iea_op_exc_unfl:2690mov.w &0xe003,2+FP_SRC(%a6)26912692iea_op_exit2:2693mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"2694mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame26952696fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp12697fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2698movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a126992700frestore FP_SRC(%a6) # restore exceptional state27012702unlk %a6 # unravel the frame27032704btst &0x7,(%sp) # is trace on?2705bne.b iea_op_trace # yes27062707bra.l _fpsp_done # exit to os27082709#2710# The opclass two instruction that took an "Unimplemented Effective Address"2711# exception was being traced. Make the "current" PC the FPIAR and put it in2712# the trace stack frame then jump to _real_trace().2713#2714# UNIMP EA FRAME TRACE FRAME2715# ***************** *****************2716# * 0x0 * 0x0f0 * * Current *2717# ***************** * PC *2718# * Current * *****************2719# * PC * * 0x2 * 0x024 *2720# ***************** *****************2721# * SR * * Next *2722# ***************** * PC *2723# *****************2724# * SR *2725# *****************2726iea_op_trace:2727mov.l (%sp),-(%sp) # shift stack frame "down"2728mov.w 0x8(%sp),0x4(%sp)2729mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x0242730fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR27312732bra.l _real_trace27332734#########################################################################2735iea_fmovm:2736btst &14,%d0 # ctrl or data reg2737beq.w iea_fmovm_ctrl27382739iea_fmovm_data:27402741btst &0x5,EXC_SR(%a6) # user or supervisor mode2742bne.b iea_fmovm_data_s27432744iea_fmovm_data_u:2745mov.l %usp,%a02746mov.l %a0,EXC_A7(%a6) # store current a72747bsr.l fmovm_dynamic # do dynamic fmovm2748mov.l EXC_A7(%a6),%a0 # load possibly new a72749mov.l %a0,%usp # update usp2750bra.w iea_fmovm_exit27512752iea_fmovm_data_s:2753clr.b SPCOND_FLG(%a6)2754lea 0x2+EXC_VOFF(%a6),%a02755mov.l %a0,EXC_A7(%a6)2756bsr.l fmovm_dynamic # do dynamic fmovm27572758cmpi.b SPCOND_FLG(%a6),&mda7_flg2759beq.w iea_fmovm_data_predec2760cmpi.b SPCOND_FLG(%a6),&mia7_flg2761bne.w iea_fmovm_exit27622763# right now, d0 = the size.2764# the data has been fetched from the supervisor stack, but we have not2765# incremented the stack pointer by the appropriate number of bytes.2766# do it here.2767iea_fmovm_data_postinc:2768btst &0x7,EXC_SR(%a6)2769bne.b iea_fmovm_data_pi_trace27702771mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)2772mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)2773mov.w &0x00f0,(EXC_VOFF,%a6,%d0)27742775lea (EXC_SR,%a6,%d0),%a02776mov.l %a0,EXC_SR(%a6)27772778fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp12779fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2780movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a127812782unlk %a62783mov.l (%sp)+,%sp2784bra.l _fpsp_done27852786iea_fmovm_data_pi_trace:2787mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)2788mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)2789mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)2790mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)27912792lea (EXC_SR-0x4,%a6,%d0),%a02793mov.l %a0,EXC_SR(%a6)27942795fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp12796fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2797movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a127982799unlk %a62800mov.l (%sp)+,%sp2801bra.l _real_trace28022803# right now, d1 = size and d0 = the strg.2804iea_fmovm_data_predec:2805mov.b %d1,EXC_VOFF(%a6) # store strg2806mov.b %d0,0x1+EXC_VOFF(%a6) # store size28072808fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp12809fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2810movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a128112812mov.l (%a6),-(%sp) # make a copy of a62813mov.l %d0,-(%sp) # save d02814mov.l %d1,-(%sp) # save d12815mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC28162817clr.l %d02818mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size2819neg.l %d0 # get negative of size28202821btst &0x7,EXC_SR(%a6) # is trace enabled?2822beq.b iea_fmovm_data_p228232824mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)2825mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)2826mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)2827mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)28282829pea (%a6,%d0) # create final sp2830bra.b iea_fmovm_data_p328312832iea_fmovm_data_p2:2833mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)2834mov.l (%sp)+,(EXC_PC,%a6,%d0)2835mov.w &0x00f0,(EXC_VOFF,%a6,%d0)28362837pea (0x4,%a6,%d0) # create final sp28382839iea_fmovm_data_p3:2840clr.l %d12841mov.b EXC_VOFF(%a6),%d1 # fetch strg28422843tst.b %d12844bpl.b fm_12845fmovm.x &0x80,(0x4+0x8,%a6,%d0)2846addi.l &0xc,%d02847fm_1:2848lsl.b &0x1,%d12849bpl.b fm_22850fmovm.x &0x40,(0x4+0x8,%a6,%d0)2851addi.l &0xc,%d02852fm_2:2853lsl.b &0x1,%d12854bpl.b fm_32855fmovm.x &0x20,(0x4+0x8,%a6,%d0)2856addi.l &0xc,%d02857fm_3:2858lsl.b &0x1,%d12859bpl.b fm_42860fmovm.x &0x10,(0x4+0x8,%a6,%d0)2861addi.l &0xc,%d02862fm_4:2863lsl.b &0x1,%d12864bpl.b fm_52865fmovm.x &0x08,(0x4+0x8,%a6,%d0)2866addi.l &0xc,%d02867fm_5:2868lsl.b &0x1,%d12869bpl.b fm_62870fmovm.x &0x04,(0x4+0x8,%a6,%d0)2871addi.l &0xc,%d02872fm_6:2873lsl.b &0x1,%d12874bpl.b fm_72875fmovm.x &0x02,(0x4+0x8,%a6,%d0)2876addi.l &0xc,%d02877fm_7:2878lsl.b &0x1,%d12879bpl.b fm_end2880fmovm.x &0x01,(0x4+0x8,%a6,%d0)2881fm_end:2882mov.l 0x4(%sp),%d12883mov.l 0x8(%sp),%d02884mov.l 0xc(%sp),%a62885mov.l (%sp)+,%sp28862887btst &0x7,(%sp) # is trace enabled?2888beq.l _fpsp_done2889bra.l _real_trace28902891#########################################################################2892iea_fmovm_ctrl:28932894bsr.l fmovm_ctrl # load ctrl regs28952896iea_fmovm_exit:2897fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp12898fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs2899movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a129002901btst &0x7,EXC_SR(%a6) # is trace on?2902bne.b iea_fmovm_trace # yes29032904mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC29052906unlk %a6 # unravel the frame29072908bra.l _fpsp_done # exit to os29092910#2911# The control reg instruction that took an "Unimplemented Effective Address"2912# exception was being traced. The "Current PC" for the trace frame is the2913# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.2914# After fixing the stack frame, jump to _real_trace().2915#2916# UNIMP EA FRAME TRACE FRAME2917# ***************** *****************2918# * 0x0 * 0x0f0 * * Current *2919# ***************** * PC *2920# * Current * *****************2921# * PC * * 0x2 * 0x024 *2922# ***************** *****************2923# * SR * * Next *2924# ***************** * PC *2925# *****************2926# * SR *2927# *****************2928# this ain't a pretty solution, but it works:2929# -restore a6 (not with unlk)2930# -shift stack frame down over where old a6 used to be2931# -add LOCAL_SIZE to stack pointer2932iea_fmovm_trace:2933mov.l (%a6),%a6 # restore frame pointer2934mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)2935mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)2936mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)2937mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x0242938add.l &LOCAL_SIZE,%sp # clear stack frame29392940bra.l _real_trace29412942#########################################################################2943# The FPU is disabled and so we should really have taken the "Line2944# F Emulator" exception. So, here we create an 8-word stack frame2945# from our 4-word stack frame. This means we must calculate the length2946# the faulting instruction to get the "next PC". This is trivial for2947# immediate operands but requires some extra work for fmovm dynamic2948# which can use most addressing modes.2949iea_disabled:2950mov.l (%sp)+,%d0 # restore d029512952link %a6,&-LOCAL_SIZE # init stack frame29532954movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a129552956# PC of instruction that took the exception is the PC in the frame2957mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)2958mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr2959addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr2960bsr.l _imem_read_long # fetch the instruction words2961mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD29622963tst.w %d0 # is instr fmovm?2964bmi.b iea_dis_fmovm # yes2965# instruction is using an extended precision immediate operand. Therefore,2966# the total instruction length is 16 bytes.2967iea_dis_immed:2968mov.l &0x10,%d0 # 16 bytes of instruction2969bra.b iea_dis_cont2970iea_dis_fmovm:2971btst &0xe,%d0 # is instr fmovm ctrl2972bne.b iea_dis_fmovm_data # no2973# the instruction is a fmovm.l with 2 or 3 registers.2974bfextu %d0{&19:&3},%d12975mov.l &0xc,%d02976cmpi.b %d1,&0x7 # move all regs?2977bne.b iea_dis_cont2978addq.l &0x4,%d02979bra.b iea_dis_cont2980# the instruction is an fmovm.x dynamic which can use many addressing2981# modes and thus can have several different total instruction lengths.2982# call fmovm_calc_ea which will go through the ea calc process and,2983# as a by-product, will tell us how long the instruction is.2984iea_dis_fmovm_data:2985clr.l %d02986bsr.l fmovm_calc_ea2987mov.l EXC_EXTWPTR(%a6),%d02988sub.l EXC_PC(%a6),%d02989iea_dis_cont:2990mov.w %d0,EXC_VOFF(%a6) # store stack shift value29912992movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a129932994unlk %a629952996# here, we actually create the 8-word frame from the 4-word frame,2997# with the "next PC" as additional info.2998# the <ea> field is let as undefined.2999subq.l &0x8,%sp # make room for new stack3000mov.l %d0,-(%sp) # save d03001mov.w 0xc(%sp),0x4(%sp) # move SR3002mov.l 0xe(%sp),0x6(%sp) # move Current PC3003clr.l %d03004mov.w 0x12(%sp),%d03005mov.l 0x6(%sp),0x10(%sp) # move Current PC3006add.l %d0,0x6(%sp) # make Next PC3007mov.w &0x402c,0xa(%sp) # insert offset,frame format3008mov.l (%sp)+,%d0 # restore d030093010bra.l _real_fpu_disabled30113012##########30133014iea_iacc:3015movc %pcr,%d03016btst &0x1,%d03017bne.b iea_iacc_cont3018fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3019fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack3020iea_iacc_cont:3021movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a130223023unlk %a630243025subq.w &0x8,%sp # make stack frame bigger3026mov.l 0x8(%sp),(%sp) # store SR,hi(PC)3027mov.w 0xc(%sp),0x4(%sp) # store lo(PC)3028mov.w &0x4008,0x6(%sp) # store voff3029mov.l 0x2(%sp),0x8(%sp) # store ea3030mov.l &0x09428001,0xc(%sp) # store fslw30313032iea_acc_done:3033btst &0x5,(%sp) # user or supervisor mode?3034beq.b iea_acc_done2 # user3035bset &0x2,0xd(%sp) # set supervisor TM bit30363037iea_acc_done2:3038bra.l _real_access30393040iea_dacc:3041lea -LOCAL_SIZE(%a6),%sp30423043movc %pcr,%d13044btst &0x1,%d13045bne.b iea_dacc_cont3046fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack3047fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs3048iea_dacc_cont:3049mov.l (%a6),%a630503051mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)3052mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)3053mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)3054mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)3055mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)3056mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)30573058movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a13059add.w &LOCAL_SIZE-0x4,%sp30603061bra.b iea_acc_done30623063#########################################################################3064# XDEF **************************************************************** #3065# _fpsp_operr(): 060FPSP entry point for FP Operr exception. #3066# #3067# This handler should be the first code executed upon taking the #3068# FP Operand Error exception in an operating system. #3069# #3070# XREF **************************************************************** #3071# _imem_read_long() - read instruction longword #3072# fix_skewed_ops() - adjust src operand in fsave frame #3073# _real_operr() - "callout" to operating system operr handler #3074# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #3075# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #3076# facc_out_{b,w,l}() - store to memory took access error (opcl 3) #3077# #3078# INPUT *************************************************************** #3079# - The system stack contains the FP Operr exception frame #3080# - The fsave frame contains the source operand #3081# #3082# OUTPUT ************************************************************** #3083# No access error: #3084# - The system stack is unchanged #3085# - The fsave frame contains the adjusted src op for opclass 0,2 #3086# #3087# ALGORITHM *********************************************************** #3088# In a system where the FP Operr exception is enabled, the goal #3089# is to get to the handler specified at _real_operr(). But, on the 060, #3090# for opclass zero and two instruction taking this exception, the #3091# input operand in the fsave frame may be incorrect for some cases #3092# and needs to be corrected. This handler calls fix_skewed_ops() to #3093# do just this and then exits through _real_operr(). #3094# For opclass 3 instructions, the 060 doesn't store the default #3095# operr result out to memory or data register file as it should. #3096# This code must emulate the move out before finally exiting through #3097# _real_inex(). The move out, if to memory, is performed using #3098# _mem_write() "callout" routines that may return a failing result. #3099# In this special case, the handler must exit through facc_out() #3100# which creates an access error stack frame from the current operr #3101# stack frame. #3102# #3103#########################################################################31043105global _fpsp_operr3106_fpsp_operr:31073108link.w %a6,&-LOCAL_SIZE # init stack frame31093110fsave FP_SRC(%a6) # grab the "busy" frame31113112movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a13113fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs3114fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack31153116# the FPIAR holds the "current PC" of the faulting instruction3117mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)31183119mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr3120addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr3121bsr.l _imem_read_long # fetch the instruction words3122mov.l %d0,EXC_OPWORD(%a6)31233124##############################################################################31253126btst &13,%d0 # is instr an fmove out?3127bne.b foperr_out # fmove out312831293130# here, we simply see if the operand in the fsave frame needs to be "unskewed".3131# this would be the case for opclass two operations with a source infinity or3132# denorm operand in the sgl or dbl format. NANs also become skewed, but can't3133# cause an operr so we don't need to check for them here.3134lea FP_SRC(%a6),%a0 # pass: ptr to src op3135bsr.l fix_skewed_ops # fix src op31363137foperr_exit:3138fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp13139fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3140movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a131413142frestore FP_SRC(%a6)31433144unlk %a63145bra.l _real_operr31463147########################################################################31483149#3150# the hardware does not save the default result to memory on enabled3151# operand error exceptions. we do this here before passing control to3152# the user operand error handler.3153#3154# byte, word, and long destination format operations can pass3155# through here. we simply need to test the sign of the src3156# operand and save the appropriate minimum or maximum integer value3157# to the effective address as pointed to by the stacked effective address.3158#3159# although packed opclass three operations can take operand error3160# exceptions, they won't pass through here since they are caught3161# first by the unsupported data format exception handler. that handler3162# sends them directly to _real_operr() if necessary.3163#3164foperr_out:31653166mov.w FP_SRC_EX(%a6),%d1 # fetch exponent3167andi.w &0x7fff,%d13168cmpi.w %d1,&0x7fff3169bne.b foperr_out_not_qnan3170# the operand is either an infinity or a QNAN.3171tst.l FP_SRC_LO(%a6)3172bne.b foperr_out_qnan3173mov.l FP_SRC_HI(%a6),%d13174andi.l &0x7fffffff,%d13175beq.b foperr_out_not_qnan3176foperr_out_qnan:3177mov.l FP_SRC_HI(%a6),L_SCR1(%a6)3178bra.b foperr_out_jmp31793180foperr_out_not_qnan:3181mov.l &0x7fffffff,%d13182tst.b FP_SRC_EX(%a6)3183bpl.b foperr_out_not_qnan23184addq.l &0x1,%d13185foperr_out_not_qnan2:3186mov.l %d1,L_SCR1(%a6)31873188foperr_out_jmp:3189bfextu %d0{&19:&3},%d0 # extract dst format field3190mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg3191mov.w (tbl_operr.b,%pc,%d0.w*2),%a03192jmp (tbl_operr.b,%pc,%a0)31933194tbl_operr:3195short foperr_out_l - tbl_operr # long word integer3196short tbl_operr - tbl_operr # sgl prec shouldn't happen3197short tbl_operr - tbl_operr # ext prec shouldn't happen3198short foperr_exit - tbl_operr # packed won't enter here3199short foperr_out_w - tbl_operr # word integer3200short tbl_operr - tbl_operr # dbl prec shouldn't happen3201short foperr_out_b - tbl_operr # byte integer3202short tbl_operr - tbl_operr # packed won't enter here32033204foperr_out_b:3205mov.b L_SCR1(%a6),%d0 # load positive default result3206cmpi.b %d1,&0x7 # is <ea> mode a data reg?3207ble.b foperr_out_b_save_dn # yes3208mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3209bsr.l _dmem_write_byte # write the default result32103211tst.l %d1 # did dstore fail?3212bne.l facc_out_b # yes32133214bra.w foperr_exit3215foperr_out_b_save_dn:3216andi.w &0x0007,%d13217bsr.l store_dreg_b # store result to regfile3218bra.w foperr_exit32193220foperr_out_w:3221mov.w L_SCR1(%a6),%d0 # load positive default result3222cmpi.b %d1,&0x7 # is <ea> mode a data reg?3223ble.b foperr_out_w_save_dn # yes3224mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3225bsr.l _dmem_write_word # write the default result32263227tst.l %d1 # did dstore fail?3228bne.l facc_out_w # yes32293230bra.w foperr_exit3231foperr_out_w_save_dn:3232andi.w &0x0007,%d13233bsr.l store_dreg_w # store result to regfile3234bra.w foperr_exit32353236foperr_out_l:3237mov.l L_SCR1(%a6),%d0 # load positive default result3238cmpi.b %d1,&0x7 # is <ea> mode a data reg?3239ble.b foperr_out_l_save_dn # yes3240mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3241bsr.l _dmem_write_long # write the default result32423243tst.l %d1 # did dstore fail?3244bne.l facc_out_l # yes32453246bra.w foperr_exit3247foperr_out_l_save_dn:3248andi.w &0x0007,%d13249bsr.l store_dreg_l # store result to regfile3250bra.w foperr_exit32513252#########################################################################3253# XDEF **************************************************************** #3254# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #3255# #3256# This handler should be the first code executed upon taking the #3257# FP Signalling NAN exception in an operating system. #3258# #3259# XREF **************************************************************** #3260# _imem_read_long() - read instruction longword #3261# fix_skewed_ops() - adjust src operand in fsave frame #3262# _real_snan() - "callout" to operating system SNAN handler #3263# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #3264# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #3265# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #3266# _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #3267# #3268# INPUT *************************************************************** #3269# - The system stack contains the FP SNAN exception frame #3270# - The fsave frame contains the source operand #3271# #3272# OUTPUT ************************************************************** #3273# No access error: #3274# - The system stack is unchanged #3275# - The fsave frame contains the adjusted src op for opclass 0,2 #3276# #3277# ALGORITHM *********************************************************** #3278# In a system where the FP SNAN exception is enabled, the goal #3279# is to get to the handler specified at _real_snan(). But, on the 060, #3280# for opclass zero and two instructions taking this exception, the #3281# input operand in the fsave frame may be incorrect for some cases #3282# and needs to be corrected. This handler calls fix_skewed_ops() to #3283# do just this and then exits through _real_snan(). #3284# For opclass 3 instructions, the 060 doesn't store the default #3285# SNAN result out to memory or data register file as it should. #3286# This code must emulate the move out before finally exiting through #3287# _real_snan(). The move out, if to memory, is performed using #3288# _mem_write() "callout" routines that may return a failing result. #3289# In this special case, the handler must exit through facc_out() #3290# which creates an access error stack frame from the current SNAN #3291# stack frame. #3292# For the case of an extended precision opclass 3 instruction, #3293# if the effective addressing mode was -() or ()+, then the address #3294# register must get updated by calling _calc_ea_fout(). If the <ea> #3295# was -(a7) from supervisor mode, then the exception frame currently #3296# on the system stack must be carefully moved "down" to make room #3297# for the operand being moved. #3298# #3299#########################################################################33003301global _fpsp_snan3302_fpsp_snan:33033304link.w %a6,&-LOCAL_SIZE # init stack frame33053306fsave FP_SRC(%a6) # grab the "busy" frame33073308movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a13309fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs3310fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack33113312# the FPIAR holds the "current PC" of the faulting instruction3313mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)33143315mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr3316addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr3317bsr.l _imem_read_long # fetch the instruction words3318mov.l %d0,EXC_OPWORD(%a6)33193320##############################################################################33213322btst &13,%d0 # is instr an fmove out?3323bne.w fsnan_out # fmove out332433253326# here, we simply see if the operand in the fsave frame needs to be "unskewed".3327# this would be the case for opclass two operations with a source infinity or3328# denorm operand in the sgl or dbl format. NANs also become skewed and must be3329# fixed here.3330lea FP_SRC(%a6),%a0 # pass: ptr to src op3331bsr.l fix_skewed_ops # fix src op33323333fsnan_exit:3334fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp13335fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3336movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a133373338frestore FP_SRC(%a6)33393340unlk %a63341bra.l _real_snan33423343########################################################################33443345#3346# the hardware does not save the default result to memory on enabled3347# snan exceptions. we do this here before passing control to3348# the user snan handler.3349#3350# byte, word, long, and packed destination format operations can pass3351# through here. since packed format operations already were handled by3352# fpsp_unsupp(), then we need to do nothing else for them here.3353# for byte, word, and long, we simply need to test the sign of the src3354# operand and save the appropriate minimum or maximum integer value3355# to the effective address as pointed to by the stacked effective address.3356#3357fsnan_out:33583359bfextu %d0{&19:&3},%d0 # extract dst format field3360mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg3361mov.w (tbl_snan.b,%pc,%d0.w*2),%a03362jmp (tbl_snan.b,%pc,%a0)33633364tbl_snan:3365short fsnan_out_l - tbl_snan # long word integer3366short fsnan_out_s - tbl_snan # sgl prec shouldn't happen3367short fsnan_out_x - tbl_snan # ext prec shouldn't happen3368short tbl_snan - tbl_snan # packed needs no help3369short fsnan_out_w - tbl_snan # word integer3370short fsnan_out_d - tbl_snan # dbl prec shouldn't happen3371short fsnan_out_b - tbl_snan # byte integer3372short tbl_snan - tbl_snan # packed needs no help33733374fsnan_out_b:3375mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN3376bset &6,%d0 # set SNAN bit3377cmpi.b %d1,&0x7 # is <ea> mode a data reg?3378ble.b fsnan_out_b_dn # yes3379mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3380bsr.l _dmem_write_byte # write the default result33813382tst.l %d1 # did dstore fail?3383bne.l facc_out_b # yes33843385bra.w fsnan_exit3386fsnan_out_b_dn:3387andi.w &0x0007,%d13388bsr.l store_dreg_b # store result to regfile3389bra.w fsnan_exit33903391fsnan_out_w:3392mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN3393bset &14,%d0 # set SNAN bit3394cmpi.b %d1,&0x7 # is <ea> mode a data reg?3395ble.b fsnan_out_w_dn # yes3396mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3397bsr.l _dmem_write_word # write the default result33983399tst.l %d1 # did dstore fail?3400bne.l facc_out_w # yes34013402bra.w fsnan_exit3403fsnan_out_w_dn:3404andi.w &0x0007,%d13405bsr.l store_dreg_w # store result to regfile3406bra.w fsnan_exit34073408fsnan_out_l:3409mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN3410bset &30,%d0 # set SNAN bit3411cmpi.b %d1,&0x7 # is <ea> mode a data reg?3412ble.b fsnan_out_l_dn # yes3413mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3414bsr.l _dmem_write_long # write the default result34153416tst.l %d1 # did dstore fail?3417bne.l facc_out_l # yes34183419bra.w fsnan_exit3420fsnan_out_l_dn:3421andi.w &0x0007,%d13422bsr.l store_dreg_l # store result to regfile3423bra.w fsnan_exit34243425fsnan_out_s:3426cmpi.b %d1,&0x7 # is <ea> mode a data reg?3427ble.b fsnan_out_d_dn # yes3428mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign3429andi.l &0x80000000,%d0 # keep sign3430ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit3431mov.l FP_SRC_HI(%a6),%d1 # load mantissa3432lsr.l &0x8,%d1 # shift mantissa for sgl3433or.l %d1,%d0 # create sgl SNAN3434mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result3435bsr.l _dmem_write_long # write the default result34363437tst.l %d1 # did dstore fail?3438bne.l facc_out_l # yes34393440bra.w fsnan_exit3441fsnan_out_d_dn:3442mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign3443andi.l &0x80000000,%d0 # keep sign3444ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit3445mov.l %d1,-(%sp)3446mov.l FP_SRC_HI(%a6),%d1 # load mantissa3447lsr.l &0x8,%d1 # shift mantissa for sgl3448or.l %d1,%d0 # create sgl SNAN3449mov.l (%sp)+,%d13450andi.w &0x0007,%d13451bsr.l store_dreg_l # store result to regfile3452bra.w fsnan_exit34533454fsnan_out_d:3455mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign3456andi.l &0x80000000,%d0 # keep sign3457ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit3458mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa3459mov.l %d0,FP_SCR0_EX(%a6) # store to temp space3460mov.l &11,%d0 # load shift amt3461lsr.l %d0,%d13462or.l %d1,FP_SCR0_EX(%a6) # create dbl hi3463mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa3464andi.l &0x000007ff,%d13465ror.l %d0,%d13466mov.l %d1,FP_SCR0_HI(%a6) # store to temp space3467mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa3468lsr.l %d0,%d13469or.l %d1,FP_SCR0_HI(%a6) # create dbl lo3470lea FP_SCR0(%a6),%a0 # pass: ptr to operand3471mov.l EXC_EA(%a6),%a1 # pass: dst addr3472movq.l &0x8,%d0 # pass: size of 8 bytes3473bsr.l _dmem_write # write the default result34743475tst.l %d1 # did dstore fail?3476bne.l facc_out_d # yes34773478bra.w fsnan_exit34793480# for extended precision, if the addressing mode is pre-decrement or3481# post-increment, then the address register did not get updated.3482# in addition, for pre-decrement, the stacked <ea> is incorrect.3483fsnan_out_x:3484clr.b SPCOND_FLG(%a6) # clear special case flag34853486mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)3487clr.w 2+FP_SCR0(%a6)3488mov.l FP_SRC_HI(%a6),%d03489bset &30,%d03490mov.l %d0,FP_SCR0_HI(%a6)3491mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)34923493btst &0x5,EXC_SR(%a6) # supervisor mode exception?3494bne.b fsnan_out_x_s # yes34953496mov.l %usp,%a0 # fetch user stack pointer3497mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()3498mov.l (%a6),EXC_A6(%a6)34993500bsr.l _calc_ea_fout # find the correct ea,update An3501mov.l %a0,%a13502mov.l %a0,EXC_EA(%a6) # stack correct <ea>35033504mov.l EXC_A7(%a6),%a03505mov.l %a0,%usp # restore user stack pointer3506mov.l EXC_A6(%a6),(%a6)35073508fsnan_out_x_save:3509lea FP_SCR0(%a6),%a0 # pass: ptr to operand3510movq.l &0xc,%d0 # pass: size of extended3511bsr.l _dmem_write # write the default result35123513tst.l %d1 # did dstore fail?3514bne.l facc_out_x # yes35153516bra.w fsnan_exit35173518fsnan_out_x_s:3519mov.l (%a6),EXC_A6(%a6)35203521bsr.l _calc_ea_fout # find the correct ea,update An3522mov.l %a0,%a13523mov.l %a0,EXC_EA(%a6) # stack correct <ea>35243525mov.l EXC_A6(%a6),(%a6)35263527cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?3528bne.b fsnan_out_x_save # no35293530# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.3531fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp13532fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3533movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a135343535frestore FP_SRC(%a6)35363537mov.l EXC_A6(%a6),%a6 # restore frame pointer35383539mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)3540mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)3541mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)35423543mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)3544mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)3545mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)35463547add.l &LOCAL_SIZE-0x8,%sp35483549bra.l _real_snan35503551#########################################################################3552# XDEF **************************************************************** #3553# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #3554# #3555# This handler should be the first code executed upon taking the #3556# FP Inexact exception in an operating system. #3557# #3558# XREF **************************************************************** #3559# _imem_read_long() - read instruction longword #3560# fix_skewed_ops() - adjust src operand in fsave frame #3561# set_tag_x() - determine optype of src/dst operands #3562# store_fpreg() - store opclass 0 or 2 result to FP regfile #3563# unnorm_fix() - change UNNORM operands to NORM or ZERO #3564# load_fpn2() - load dst operand from FP regfile #3565# smovcr() - emulate an "fmovcr" instruction #3566# fout() - emulate an opclass 3 instruction #3567# tbl_unsupp - add of table of emulation routines for opclass 0,2 #3568# _real_inex() - "callout" to operating system inexact handler #3569# #3570# INPUT *************************************************************** #3571# - The system stack contains the FP Inexact exception frame #3572# - The fsave frame contains the source operand #3573# #3574# OUTPUT ************************************************************** #3575# - The system stack is unchanged #3576# - The fsave frame contains the adjusted src op for opclass 0,2 #3577# #3578# ALGORITHM *********************************************************** #3579# In a system where the FP Inexact exception is enabled, the goal #3580# is to get to the handler specified at _real_inex(). But, on the 060, #3581# for opclass zero and two instruction taking this exception, the #3582# hardware doesn't store the correct result to the destination FP #3583# register as did the '040 and '881/2. This handler must emulate the #3584# instruction in order to get this value and then store it to the #3585# correct register before calling _real_inex(). #3586# For opclass 3 instructions, the 060 doesn't store the default #3587# inexact result out to memory or data register file as it should. #3588# This code must emulate the move out by calling fout() before finally #3589# exiting through _real_inex(). #3590# #3591#########################################################################35923593global _fpsp_inex3594_fpsp_inex:35953596link.w %a6,&-LOCAL_SIZE # init stack frame35973598fsave FP_SRC(%a6) # grab the "busy" frame35993600movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a13601fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs3602fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack36033604# the FPIAR holds the "current PC" of the faulting instruction3605mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)36063607mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr3608addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr3609bsr.l _imem_read_long # fetch the instruction words3610mov.l %d0,EXC_OPWORD(%a6)36113612##############################################################################36133614btst &13,%d0 # is instr an fmove out?3615bne.w finex_out # fmove out361636173618# the hardware, for "fabs" and "fneg" w/ a long source format, puts the3619# longword integer directly into the upper longword of the mantissa along3620# w/ an exponent value of 0x401e. we convert this to extended precision here.3621bfextu %d0{&19:&3},%d0 # fetch instr size3622bne.b finex_cont # instr size is not long3623cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?3624bne.b finex_cont # no3625fmov.l &0x0,%fpcr3626fmov.l FP_SRC_HI(%a6),%fp0 # load integer src3627fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision3628mov.w &0xe001,0x2+FP_SRC(%a6)36293630finex_cont:3631lea FP_SRC(%a6),%a0 # pass: ptr to src op3632bsr.l fix_skewed_ops # fix src op36333634# Here, we zero the ccode and exception byte field since we're going to3635# emulate the whole instruction. Notice, though, that we don't kill the3636# INEX1 bit. This is because a packed op has long since been converted3637# to extended before arriving here. Therefore, we need to retain the3638# INEX1 bit from when the operand was first converted.3639andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field36403641fmov.l &0x0,%fpcr # zero current control regs3642fmov.l &0x0,%fpsr36433644bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg3645cmpi.b %d1,&0x17 # is op an fmovecr?3646beq.w finex_fmovcr # yes36473648lea FP_SRC(%a6),%a0 # pass: ptr to src op3649bsr.l set_tag_x # tag the operand type3650mov.b %d0,STAG(%a6) # maybe NORM,DENORM36513652# bits four and five of the fp extension word separate the monadic and dyadic3653# operations that can pass through fpsp_inex(). remember that fcmp and ftst3654# will never take this exception, but fsincos will.3655btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?3656beq.b finex_extract # monadic36573658btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?3659bne.b finex_extract # yes36603661bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg3662bsr.l load_fpn2 # load dst into FP_DST36633664lea FP_DST(%a6),%a0 # pass: ptr to dst op3665bsr.l set_tag_x # tag the operand type3666cmpi.b %d0,&UNNORM # is operand an UNNORM?3667bne.b finex_op2_done # no3668bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO3669finex_op2_done:3670mov.b %d0,DTAG(%a6) # save dst optype tag36713672finex_extract:3673clr.l %d03674mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode36753676mov.b 1+EXC_CMDREG(%a6),%d13677andi.w &0x007f,%d1 # extract extension36783679lea FP_SRC(%a6),%a03680lea FP_DST(%a6),%a136813682mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr3683jsr (tbl_unsupp.l,%pc,%d1.l*1)36843685# the operation has been emulated. the result is in fp0.3686finex_save:3687bfextu EXC_CMDREG(%a6){&6:&3},%d03688bsr.l store_fpreg36893690finex_exit:3691fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp13692fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3693movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a136943695frestore FP_SRC(%a6)36963697unlk %a63698bra.l _real_inex36993700finex_fmovcr:3701clr.l %d03702mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode3703mov.b 1+EXC_CMDREG(%a6),%d13704andi.l &0x0000007f,%d1 # pass rom offset3705bsr.l smovcr3706bra.b finex_save37073708########################################################################37093710#3711# the hardware does not save the default result to memory on enabled3712# inexact exceptions. we do this here before passing control to3713# the user inexact handler.3714#3715# byte, word, and long destination format operations can pass3716# through here. so can double and single precision.3717# although packed opclass three operations can take inexact3718# exceptions, they won't pass through here since they are caught3719# first by the unsupported data format exception handler. that handler3720# sends them directly to _real_inex() if necessary.3721#3722finex_out:37233724mov.b &NORM,STAG(%a6) # src is a NORM37253726clr.l %d03727mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode37283729andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field37303731lea FP_SRC(%a6),%a0 # pass ptr to src operand37323733bsr.l fout # store the default result37343735bra.b finex_exit37363737#########################################################################3738# XDEF **************************************************************** #3739# _fpsp_dz(): 060FPSP entry point for FP DZ exception. #3740# #3741# This handler should be the first code executed upon taking #3742# the FP DZ exception in an operating system. #3743# #3744# XREF **************************************************************** #3745# _imem_read_long() - read instruction longword from memory #3746# fix_skewed_ops() - adjust fsave operand #3747# _real_dz() - "callout" exit point from FP DZ handler #3748# #3749# INPUT *************************************************************** #3750# - The system stack contains the FP DZ exception stack. #3751# - The fsave frame contains the source operand. #3752# #3753# OUTPUT ************************************************************** #3754# - The system stack contains the FP DZ exception stack. #3755# - The fsave frame contains the adjusted source operand. #3756# #3757# ALGORITHM *********************************************************** #3758# In a system where the DZ exception is enabled, the goal is to #3759# get to the handler specified at _real_dz(). But, on the 060, when the #3760# exception is taken, the input operand in the fsave state frame may #3761# be incorrect for some cases and need to be adjusted. So, this package #3762# adjusts the operand using fix_skewed_ops() and then branches to #3763# _real_dz(). #3764# #3765#########################################################################37663767global _fpsp_dz3768_fpsp_dz:37693770link.w %a6,&-LOCAL_SIZE # init stack frame37713772fsave FP_SRC(%a6) # grab the "busy" frame37733774movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a13775fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs3776fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack37773778# the FPIAR holds the "current PC" of the faulting instruction3779mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)37803781mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr3782addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr3783bsr.l _imem_read_long # fetch the instruction words3784mov.l %d0,EXC_OPWORD(%a6)37853786##############################################################################378737883789# here, we simply see if the operand in the fsave frame needs to be "unskewed".3790# this would be the case for opclass two operations with a source zero3791# in the sgl or dbl format.3792lea FP_SRC(%a6),%a0 # pass: ptr to src op3793bsr.l fix_skewed_ops # fix src op37943795fdz_exit:3796fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp13797fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs3798movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a137993800frestore FP_SRC(%a6)38013802unlk %a63803bra.l _real_dz38043805#########################################################################3806# XDEF **************************************************************** #3807# _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc. #3808# #3809# This handler should be the first code executed upon taking the #3810# "Line F Emulator" exception in an operating system. #3811# #3812# XREF **************************************************************** #3813# _fpsp_unimp() - handle "FP Unimplemented" exceptions #3814# _real_fpu_disabled() - handle "FPU disabled" exceptions #3815# _real_fline() - handle "FLINE" exceptions #3816# _imem_read_long() - read instruction longword #3817# #3818# INPUT *************************************************************** #3819# - The system stack contains a "Line F Emulator" exception #3820# stack frame. #3821# #3822# OUTPUT ************************************************************** #3823# - The system stack is unchanged #3824# #3825# ALGORITHM *********************************************************** #3826# When a "Line F Emulator" exception occurs, there are 3 possible #3827# exception types, denoted by the exception stack frame format number: #3828# (1) FPU unimplemented instruction (6 word stack frame) #3829# (2) FPU disabled (8 word stack frame) #3830# (3) Line F (4 word stack frame) #3831# #3832# This module determines which and forks the flow off to the #3833# appropriate "callout" (for "disabled" and "Line F") or to the #3834# correct emulation code (for "FPU unimplemented"). #3835# This code also must check for "fmovecr" instructions w/ a #3836# non-zero <ea> field. These may get flagged as "Line F" but should #3837# really be flagged as "FPU Unimplemented". (This is a "feature" on #3838# the '060. #3839# #3840#########################################################################38413842global _fpsp_fline3843_fpsp_fline:38443845# check to see if this exception is a "FP Unimplemented Instruction"3846# exception. if so, branch directly to that handler's entry point.3847cmpi.w 0x6(%sp),&0x202c3848beq.l _fpsp_unimp38493850# check to see if the FPU is disabled. if so, jump to the OS entry3851# point for that condition.3852cmpi.w 0x6(%sp),&0x402c3853beq.l _real_fpu_disabled38543855# the exception was an "F-Line Illegal" exception. we check to see3856# if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if3857# so, convert the F-Line exception stack frame to an FP Unimplemented3858# Instruction exception stack frame else branch to the OS entry3859# point for the F-Line exception handler.3860link.w %a6,&-LOCAL_SIZE # init stack frame38613862movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a138633864mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)3865mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr3866addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr3867bsr.l _imem_read_long # fetch instruction words38683869bfextu %d0{&0:&10},%d1 # is it an fmovecr?3870cmpi.w %d1,&0x03c83871bne.b fline_fline # no38723873bfextu %d0{&16:&6},%d1 # is it an fmovecr?3874cmpi.b %d1,&0x173875bne.b fline_fline # no38763877# it's an fmovecr w/ a non-zero <ea> that has entered through3878# the F-Line Illegal exception.3879# so, we need to convert the F-Line exception stack frame into an3880# FP Unimplemented Instruction stack frame and jump to that entry3881# point.3882#3883# but, if the FPU is disabled, then we need to jump to the FPU disabled3884# entry point.3885movc %pcr,%d03886btst &0x1,%d03887beq.b fline_fmovcr38883889movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a138903891unlk %a638923893sub.l &0x8,%sp # make room for "Next PC", <ea>3894mov.w 0x8(%sp),(%sp)3895mov.l 0xa(%sp),0x2(%sp) # move "Current PC"3896mov.w &0x402c,0x6(%sp)3897mov.l 0x2(%sp),0xc(%sp)3898addq.l &0x4,0x2(%sp) # set "Next PC"38993900bra.l _real_fpu_disabled39013902fline_fmovcr:3903movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a139043905unlk %a639063907fmov.l 0x2(%sp),%fpiar # set current PC3908addq.l &0x4,0x2(%sp) # set Next PC39093910mov.l (%sp),-(%sp)3911mov.l 0x8(%sp),0x4(%sp)3912mov.b &0x20,0x6(%sp)39133914bra.l _fpsp_unimp39153916fline_fline:3917movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a139183919unlk %a639203921bra.l _real_fline39223923#########################################################################3924# XDEF **************************************************************** #3925# _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented #3926# Instruction" exception. #3927# #3928# This handler should be the first code executed upon taking the #3929# FP Unimplemented Instruction exception in an operating system. #3930# #3931# XREF **************************************************************** #3932# _imem_read_{word,long}() - read instruction word/longword #3933# load_fop() - load src/dst ops from memory and/or FP regfile #3934# store_fpreg() - store opclass 0 or 2 result to FP regfile #3935# tbl_trans - addr of table of emulation routines for trnscndls #3936# _real_access() - "callout" for access error exception #3937# _fpsp_done() - "callout" for exit; work all done #3938# _real_trace() - "callout" for Trace enabled exception #3939# smovcr() - emulate "fmovecr" instruction #3940# funimp_skew() - adjust fsave src ops to "incorrect" value #3941# _ftrapcc() - emulate an "ftrapcc" instruction #3942# _fdbcc() - emulate an "fdbcc" instruction #3943# _fscc() - emulate an "fscc" instruction #3944# _real_trap() - "callout" for Trap exception #3945# _real_bsun() - "callout" for enabled Bsun exception #3946# #3947# INPUT *************************************************************** #3948# - The system stack contains the "Unimplemented Instr" stk frame #3949# #3950# OUTPUT ************************************************************** #3951# If access error: #3952# - The system stack is changed to an access error stack frame #3953# If Trace exception enabled: #3954# - The system stack is changed to a Trace exception stack frame #3955# Else: (normal case) #3956# - Correct result has been stored as appropriate #3957# #3958# ALGORITHM *********************************************************** #3959# There are two main cases of instructions that may enter here to #3960# be emulated: (1) the FPgen instructions, most of which were also #3961# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc". #3962# For the first set, this handler calls the routine load_fop() #3963# to load the source and destination (for dyadic) operands to be used #3964# for instruction emulation. The correct emulation routine is then #3965# chosen by decoding the instruction type and indexing into an #3966# emulation subroutine index table. After emulation returns, this #3967# handler checks to see if an exception should occur as a result of the #3968# FP instruction emulation. If so, then an FP exception of the correct #3969# type is inserted into the FPU state frame using the "frestore" #3970# instruction before exiting through _fpsp_done(). In either the #3971# exceptional or non-exceptional cases, we must check to see if the #3972# Trace exception is enabled. If so, then we must create a Trace #3973# exception frame from the current exception frame and exit through #3974# _real_trace(). #3975# For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines #3976# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three #3977# may flag that a BSUN exception should be taken. If so, then the #3978# current exception stack frame is converted into a BSUN exception #3979# stack frame and an exit is made through _real_bsun(). If the #3980# instruction was "ftrapcc" and a Trap exception should result, a Trap #3981# exception stack frame is created from the current frame and an exit #3982# is made through _real_trap(). If a Trace exception is pending, then #3983# a Trace exception frame is created from the current frame and a jump #3984# is made to _real_trace(). Finally, if none of these conditions exist, #3985# then the handler exits though the callout _fpsp_done(). #3986# #3987# In any of the above scenarios, if a _mem_read() or _mem_write() #3988# "callout" returns a failing value, then an access error stack frame #3989# is created from the current stack frame and an exit is made through #3990# _real_access(). #3991# #3992#########################################################################39933994#3995# FP UNIMPLEMENTED INSTRUCTION STACK FRAME:3996#3997# *****************3998# * * => <ea> of fp unimp instr.3999# - EA -4000# * *4001# *****************4002# * 0x2 * 0x02c * => frame format and vector offset(vector #11)4003# *****************4004# * *4005# - Next PC - => PC of instr to execute after exc handling4006# * *4007# *****************4008# * SR * => SR at the time the exception was taken4009# *****************4010#4011# Note: the !NULL bit does not get set in the fsave frame when the4012# machine encounters an fp unimp exception. Therefore, it must be set4013# before leaving this handler.4014#4015global _fpsp_unimp4016_fpsp_unimp:40174018link.w %a6,&-LOCAL_SIZE # init stack frame40194020movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a14021fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs4022fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp140234024btst &0x5,EXC_SR(%a6) # user mode exception?4025bne.b funimp_s # no; supervisor mode40264027# save the value of the user stack pointer onto the stack frame4028funimp_u:4029mov.l %usp,%a0 # fetch user stack pointer4030mov.l %a0,EXC_A7(%a6) # store in stack frame4031bra.b funimp_cont40324033# store the value of the supervisor stack pointer BEFORE the exc occurred.4034# old_sp is address just above stacked effective address.4035funimp_s:4036lea 4+EXC_EA(%a6),%a0 # load old a7'4037mov.l %a0,EXC_A7(%a6) # store a7'4038mov.l %a0,OLD_A7(%a6) # make a copy40394040funimp_cont:40414042# the FPIAR holds the "current PC" of the faulting instruction.4043mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)40444045mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4046addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr4047bsr.l _imem_read_long # fetch the instruction words4048mov.l %d0,EXC_OPWORD(%a6)40494050############################################################################40514052fmov.l &0x0,%fpcr # clear FPCR4053fmov.l &0x0,%fpsr # clear FPSR40544055clr.b SPCOND_FLG(%a6) # clear "special case" flag40564057# Divide the fp instructions into 8 types based on the TYPE field in4058# bits 6-8 of the opword(classes 6,7 are undefined).4059# (for the '060, only two types can take this exception)4060# bftst %d0{&7:&3} # test TYPE4061btst &22,%d0 # type 0 or 1 ?4062bne.w funimp_misc # type 140634064#########################################4065# TYPE == 0: General instructions #4066#########################################4067funimp_gen:40684069clr.b STORE_FLG(%a6) # clear "store result" flag40704071# clear the ccode byte and exception status byte4072andi.l &0x00ff00ff,USER_FPSR(%a6)40734074bfextu %d0{&16:&6},%d1 # extract upper 6 of cmdreg4075cmpi.b %d1,&0x17 # is op an fmovecr?4076beq.w funimp_fmovcr # yes40774078funimp_gen_op:4079bsr.l _load_fop # load40804081clr.l %d04082mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode40834084mov.b 1+EXC_CMDREG(%a6),%d14085andi.w &0x003f,%d1 # extract extension bits4086lsl.w &0x3,%d1 # shift right 3 bits4087or.b STAG(%a6),%d1 # insert src optag bits40884089lea FP_DST(%a6),%a1 # pass dst ptr in a14090lea FP_SRC(%a6),%a0 # pass src ptr in a040914092mov.w (tbl_trans.w,%pc,%d1.w*2),%d14093jsr (tbl_trans.w,%pc,%d1.w*1) # emulate40944095funimp_fsave:4096mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled4097bne.w funimp_ena # some are enabled40984099funimp_store:4100bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn4101bsr.l store_fpreg # store result to fp regfile41024103funimp_gen_exit:4104fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp14105fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs4106movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a141074108funimp_gen_exit_cmp:4109cmpi.b SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?4110beq.b funimp_gen_exit_a7 # yes41114112cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?4113beq.b funimp_gen_exit_a7 # yes41144115funimp_gen_exit_cont:4116unlk %a641174118funimp_gen_exit_cont2:4119btst &0x7,(%sp) # is trace on?4120beq.l _fpsp_done # no41214122# this catches a problem with the case where an exception will be re-inserted4123# into the machine. the frestore has already been executed...so, the fmov.l4124# alone of the control register would trigger an unwanted exception.4125# until I feel like fixing this, we'll sidestep the exception.4126fsave -(%sp)4127fmov.l %fpiar,0x14(%sp) # "Current PC" is in FPIAR4128frestore (%sp)+4129mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x244130bra.l _real_trace41314132funimp_gen_exit_a7:4133btst &0x5,EXC_SR(%a6) # supervisor or user mode?4134bne.b funimp_gen_exit_a7_s # supervisor41354136mov.l %a0,-(%sp)4137mov.l EXC_A7(%a6),%a04138mov.l %a0,%usp4139mov.l (%sp)+,%a04140bra.b funimp_gen_exit_cont41414142# if the instruction was executed from supervisor mode and the addressing4143# mode was (a7)+, then the stack frame for the rte must be shifted "up"4144# "n" bytes where "n" is the size of the src operand type.4145# f<op>.{b,w,l,s,d,x,p}4146funimp_gen_exit_a7_s:4147mov.l %d0,-(%sp) # save d04148mov.l EXC_A7(%a6),%d0 # load new a7'4149sub.l OLD_A7(%a6),%d0 # subtract old a7'4150mov.l 0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame4151mov.l EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame4152mov.w %d0,EXC_SR(%a6) # store incr number4153mov.l (%sp)+,%d0 # restore d041544155unlk %a641564157add.w (%sp),%sp # stack frame shifted4158bra.b funimp_gen_exit_cont241594160######################4161# fmovecr.x #ccc,fpn #4162######################4163funimp_fmovcr:4164clr.l %d04165mov.b FPCR_MODE(%a6),%d04166mov.b 1+EXC_CMDREG(%a6),%d14167andi.l &0x0000007f,%d1 # pass rom offset in d14168bsr.l smovcr4169bra.w funimp_fsave41704171#########################################################################41724173#4174# the user has enabled some exceptions. we figure not to see this too4175# often so that's why it gets lower priority.4176#4177funimp_ena:41784179# was an exception set that was also enabled?4180and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled and set4181bfffo %d0{&24:&8},%d0 # find highest priority exception4182bne.b funimp_exc # at least one was set41834184# no exception that was enabled was set BUT if we got an exact overflow4185# and overflow wasn't enabled but inexact was (yech!) then this is4186# an inexact exception; otherwise, return to normal non-exception flow.4187btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?4188beq.w funimp_store # no; return to normal flow41894190# the overflow w/ exact result happened but was inexact set in the FPCR?4191funimp_ovfl:4192btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?4193beq.w funimp_store # no; return to normal flow4194bra.b funimp_exc_ovfl # yes41954196# some exception happened that was actually enabled.4197# we'll insert this new exception into the FPU and then return.4198funimp_exc:4199subi.l &24,%d0 # fix offset to be 0-84200cmpi.b %d0,&0x6 # is exception INEX?4201bne.b funimp_exc_force # no42024203# the enabled exception was inexact. so, if it occurs with an overflow4204# or underflow that was disabled, then we have to force an overflow or4205# underflow frame. the eventual overflow or underflow handler will see that4206# it's actually an inexact and act appropriately. this is the only easy4207# way to have the EXOP available for the enabled inexact handler when4208# a disabled overflow or underflow has also happened.4209btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?4210bne.b funimp_exc_ovfl # yes4211btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?4212bne.b funimp_exc_unfl # yes42134214# force the fsave exception status bits to signal an exception of the4215# appropriate type. don't forget to "skew" the source operand in case we4216# "unskewed" the one the hardware initially gave us.4217funimp_exc_force:4218mov.l %d0,-(%sp) # save d04219bsr.l funimp_skew # check for special case4220mov.l (%sp)+,%d0 # restore d04221mov.w (tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)4222bra.b funimp_gen_exit2 # exit with frestore42234224tbl_funimp_except:4225short 0xe002, 0xe006, 0xe004, 0xe0054226short 0xe003, 0xe002, 0xe001, 0xe00142274228# insert an overflow frame4229funimp_exc_ovfl:4230bsr.l funimp_skew # check for special case4231mov.w &0xe005,2+FP_SRC(%a6)4232bra.b funimp_gen_exit242334234# insert an underflow frame4235funimp_exc_unfl:4236bsr.l funimp_skew # check for special case4237mov.w &0xe003,2+FP_SRC(%a6)42384239# this is the general exit point for an enabled exception that will be4240# restored into the machine for the instruction just emulated.4241funimp_gen_exit2:4242fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp14243fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs4244movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a142454246frestore FP_SRC(%a6) # insert exceptional status42474248bra.w funimp_gen_exit_cmp42494250############################################################################42514252#4253# TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>4254#4255# These instructions were implemented on the '881/2 and '040 in hardware but4256# are emulated in software on the '060.4257#4258funimp_misc:4259bfextu %d0{&10:&3},%d1 # extract mode field4260cmpi.b %d1,&0x1 # is it an fdb<cc>?4261beq.w funimp_fdbcc # yes4262cmpi.b %d1,&0x7 # is it an fs<cc>?4263bne.w funimp_fscc # yes4264bfextu %d0{&13:&3},%d14265cmpi.b %d1,&0x2 # is it an fs<cc>?4266blt.w funimp_fscc # yes42674268#########################4269# ftrap<cc> #4270# ftrap<cc>.w #<data> #4271# ftrap<cc>.l #<data> #4272#########################4273funimp_ftrapcc:42744275bsr.l _ftrapcc # FTRAP<cc>()42764277cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?4278beq.w funimp_bsun # yes42794280cmpi.b SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?4281bne.w funimp_done # no42824283# FP UNIMP FRAME TRAP FRAME4284# ***************** *****************4285# ** <EA> ** ** Current PC **4286# ***************** *****************4287# * 0x2 * 0x02c * * 0x2 * 0x01c *4288# ***************** *****************4289# ** Next PC ** ** Next PC **4290# ***************** *****************4291# * SR * * SR *4292# ***************** *****************4293# (6 words) (6 words)4294#4295# the ftrapcc instruction should take a trap. so, here we must create a4296# trap stack frame from an unimplemented fp instruction stack frame and4297# jump to the user supplied entry point for the trap exception4298funimp_ftrapcc_tp:4299mov.l USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC4300mov.w &0x201c,EXC_VOFF(%a6) # Vector Offset = 0x01c43014302fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp14303fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs4304movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a143054306unlk %a64307bra.l _real_trap43084309#########################4310# fdb<cc> Dn,<label> #4311#########################4312funimp_fdbcc:43134314mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr4315addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr4316bsr.l _imem_read_word # read displacement43174318tst.l %d1 # did ifetch fail?4319bne.w funimp_iacc # yes43204321ext.l %d0 # sign extend displacement43224323bsr.l _fdbcc # FDB<cc>()43244325cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?4326beq.w funimp_bsun43274328bra.w funimp_done # branch to finish43294330#################4331# fs<cc>.b <ea> #4332#################4333funimp_fscc:43344335bsr.l _fscc # FS<cc>()43364337# I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction4338# does not need to update "An" before taking a bsun exception.4339cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?4340beq.w funimp_bsun43414342btst &0x5,EXC_SR(%a6) # yes; is it a user mode exception?4343bne.b funimp_fscc_s # no43444345funimp_fscc_u:4346mov.l EXC_A7(%a6),%a0 # yes; set new USP4347mov.l %a0,%usp4348bra.w funimp_done # branch to finish43494350# remember, I'm assuming that post-increment is bogus...(it IS!!!)4351# so, the least significant WORD of the stacked effective address got4352# overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"4353# so that the rte will work correctly without destroying the result.4354# even though the operation size is byte, the stack ptr is decr by 2.4355#4356# remember, also, this instruction may be traced.4357funimp_fscc_s:4358cmpi.b SPCOND_FLG(%a6),&mda7_flg # was a7 modified?4359bne.w funimp_done # no43604361fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp14362fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs4363movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a143644365unlk %a643664367btst &0x7,(%sp) # is trace enabled?4368bne.b funimp_fscc_s_trace # yes43694370subq.l &0x2,%sp4371mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down"4372mov.l 0x6(%sp),0x4(%sp) # shift lo(PC),voff "down"4373bra.l _fpsp_done43744375funimp_fscc_s_trace:4376subq.l &0x2,%sp4377mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down"4378mov.w 0x6(%sp),0x4(%sp) # shift lo(PC)4379mov.w &0x2024,0x6(%sp) # fmt/voff = $20244380fmov.l %fpiar,0x8(%sp) # insert "current PC"43814382bra.l _real_trace43834384#4385# The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert4386# the fp unimplemented instruction exception stack frame into a bsun stack frame,4387# restore a bsun exception into the machine, and branch to the user4388# supplied bsun hook.4389#4390# FP UNIMP FRAME BSUN FRAME4391# ***************** *****************4392# ** <EA> ** * 0x0 * 0x0c0 *4393# ***************** *****************4394# * 0x2 * 0x02c * ** Current PC **4395# ***************** *****************4396# ** Next PC ** * SR *4397# ***************** *****************4398# * SR * (4 words)4399# *****************4400# (6 words)4401#4402funimp_bsun:4403mov.w &0x00c0,2+EXC_EA(%a6) # Fmt = 0x0; Vector Offset = 0x0c04404mov.l USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC4405mov.w EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"44064407mov.w &0xe000,2+FP_SRC(%a6) # bsun exception enabled44084409fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp14410fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs4411movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a144124413frestore FP_SRC(%a6) # restore bsun exception44144415unlk %a644164417addq.l &0x4,%sp # erase sludge44184419bra.l _real_bsun # branch to user bsun hook44204421#4422# all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame4423# and return.4424#4425# as usual, we have to check for trace mode being on here. since instructions4426# modifying the supervisor stack frame don't pass through here, this is a4427# relatively easy task.4428#4429funimp_done:4430fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp14431fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs4432movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a144334434unlk %a644354436btst &0x7,(%sp) # is trace enabled?4437bne.b funimp_trace # yes44384439bra.l _fpsp_done44404441# FP UNIMP FRAME TRACE FRAME4442# ***************** *****************4443# ** <EA> ** ** Current PC **4444# ***************** *****************4445# * 0x2 * 0x02c * * 0x2 * 0x024 *4446# ***************** *****************4447# ** Next PC ** ** Next PC **4448# ***************** *****************4449# * SR * * SR *4450# ***************** *****************4451# (6 words) (6 words)4452#4453# the fscc instruction should take a trace trap. so, here we must create a4454# trace stack frame from an unimplemented fp instruction stack frame and4455# jump to the user supplied entry point for the trace exception4456funimp_trace:4457fmov.l %fpiar,0x8(%sp) # current PC is in fpiar4458mov.b &0x24,0x7(%sp) # vector offset = 0x02444594460bra.l _real_trace44614462################################################################44634464global tbl_trans4465swbeg &0x1c04466tbl_trans:4467short tbl_trans - tbl_trans # $00-0 fmovecr all4468short tbl_trans - tbl_trans # $00-1 fmovecr all4469short tbl_trans - tbl_trans # $00-2 fmovecr all4470short tbl_trans - tbl_trans # $00-3 fmovecr all4471short tbl_trans - tbl_trans # $00-4 fmovecr all4472short tbl_trans - tbl_trans # $00-5 fmovecr all4473short tbl_trans - tbl_trans # $00-6 fmovecr all4474short tbl_trans - tbl_trans # $00-7 fmovecr all44754476short tbl_trans - tbl_trans # $01-0 fint norm4477short tbl_trans - tbl_trans # $01-1 fint zero4478short tbl_trans - tbl_trans # $01-2 fint inf4479short tbl_trans - tbl_trans # $01-3 fint qnan4480short tbl_trans - tbl_trans # $01-5 fint denorm4481short tbl_trans - tbl_trans # $01-4 fint snan4482short tbl_trans - tbl_trans # $01-6 fint unnorm4483short tbl_trans - tbl_trans # $01-7 ERROR44844485short ssinh - tbl_trans # $02-0 fsinh norm4486short src_zero - tbl_trans # $02-1 fsinh zero4487short src_inf - tbl_trans # $02-2 fsinh inf4488short src_qnan - tbl_trans # $02-3 fsinh qnan4489short ssinhd - tbl_trans # $02-5 fsinh denorm4490short src_snan - tbl_trans # $02-4 fsinh snan4491short tbl_trans - tbl_trans # $02-6 fsinh unnorm4492short tbl_trans - tbl_trans # $02-7 ERROR44934494short tbl_trans - tbl_trans # $03-0 fintrz norm4495short tbl_trans - tbl_trans # $03-1 fintrz zero4496short tbl_trans - tbl_trans # $03-2 fintrz inf4497short tbl_trans - tbl_trans # $03-3 fintrz qnan4498short tbl_trans - tbl_trans # $03-5 fintrz denorm4499short tbl_trans - tbl_trans # $03-4 fintrz snan4500short tbl_trans - tbl_trans # $03-6 fintrz unnorm4501short tbl_trans - tbl_trans # $03-7 ERROR45024503short tbl_trans - tbl_trans # $04-0 fsqrt norm4504short tbl_trans - tbl_trans # $04-1 fsqrt zero4505short tbl_trans - tbl_trans # $04-2 fsqrt inf4506short tbl_trans - tbl_trans # $04-3 fsqrt qnan4507short tbl_trans - tbl_trans # $04-5 fsqrt denorm4508short tbl_trans - tbl_trans # $04-4 fsqrt snan4509short tbl_trans - tbl_trans # $04-6 fsqrt unnorm4510short tbl_trans - tbl_trans # $04-7 ERROR45114512short tbl_trans - tbl_trans # $05-0 ERROR4513short tbl_trans - tbl_trans # $05-1 ERROR4514short tbl_trans - tbl_trans # $05-2 ERROR4515short tbl_trans - tbl_trans # $05-3 ERROR4516short tbl_trans - tbl_trans # $05-4 ERROR4517short tbl_trans - tbl_trans # $05-5 ERROR4518short tbl_trans - tbl_trans # $05-6 ERROR4519short tbl_trans - tbl_trans # $05-7 ERROR45204521short slognp1 - tbl_trans # $06-0 flognp1 norm4522short src_zero - tbl_trans # $06-1 flognp1 zero4523short sopr_inf - tbl_trans # $06-2 flognp1 inf4524short src_qnan - tbl_trans # $06-3 flognp1 qnan4525short slognp1d - tbl_trans # $06-5 flognp1 denorm4526short src_snan - tbl_trans # $06-4 flognp1 snan4527short tbl_trans - tbl_trans # $06-6 flognp1 unnorm4528short tbl_trans - tbl_trans # $06-7 ERROR45294530short tbl_trans - tbl_trans # $07-0 ERROR4531short tbl_trans - tbl_trans # $07-1 ERROR4532short tbl_trans - tbl_trans # $07-2 ERROR4533short tbl_trans - tbl_trans # $07-3 ERROR4534short tbl_trans - tbl_trans # $07-4 ERROR4535short tbl_trans - tbl_trans # $07-5 ERROR4536short tbl_trans - tbl_trans # $07-6 ERROR4537short tbl_trans - tbl_trans # $07-7 ERROR45384539short setoxm1 - tbl_trans # $08-0 fetoxm1 norm4540short src_zero - tbl_trans # $08-1 fetoxm1 zero4541short setoxm1i - tbl_trans # $08-2 fetoxm1 inf4542short src_qnan - tbl_trans # $08-3 fetoxm1 qnan4543short setoxm1d - tbl_trans # $08-5 fetoxm1 denorm4544short src_snan - tbl_trans # $08-4 fetoxm1 snan4545short tbl_trans - tbl_trans # $08-6 fetoxm1 unnorm4546short tbl_trans - tbl_trans # $08-7 ERROR45474548short stanh - tbl_trans # $09-0 ftanh norm4549short src_zero - tbl_trans # $09-1 ftanh zero4550short src_one - tbl_trans # $09-2 ftanh inf4551short src_qnan - tbl_trans # $09-3 ftanh qnan4552short stanhd - tbl_trans # $09-5 ftanh denorm4553short src_snan - tbl_trans # $09-4 ftanh snan4554short tbl_trans - tbl_trans # $09-6 ftanh unnorm4555short tbl_trans - tbl_trans # $09-7 ERROR45564557short satan - tbl_trans # $0a-0 fatan norm4558short src_zero - tbl_trans # $0a-1 fatan zero4559short spi_2 - tbl_trans # $0a-2 fatan inf4560short src_qnan - tbl_trans # $0a-3 fatan qnan4561short satand - tbl_trans # $0a-5 fatan denorm4562short src_snan - tbl_trans # $0a-4 fatan snan4563short tbl_trans - tbl_trans # $0a-6 fatan unnorm4564short tbl_trans - tbl_trans # $0a-7 ERROR45654566short tbl_trans - tbl_trans # $0b-0 ERROR4567short tbl_trans - tbl_trans # $0b-1 ERROR4568short tbl_trans - tbl_trans # $0b-2 ERROR4569short tbl_trans - tbl_trans # $0b-3 ERROR4570short tbl_trans - tbl_trans # $0b-4 ERROR4571short tbl_trans - tbl_trans # $0b-5 ERROR4572short tbl_trans - tbl_trans # $0b-6 ERROR4573short tbl_trans - tbl_trans # $0b-7 ERROR45744575short sasin - tbl_trans # $0c-0 fasin norm4576short src_zero - tbl_trans # $0c-1 fasin zero4577short t_operr - tbl_trans # $0c-2 fasin inf4578short src_qnan - tbl_trans # $0c-3 fasin qnan4579short sasind - tbl_trans # $0c-5 fasin denorm4580short src_snan - tbl_trans # $0c-4 fasin snan4581short tbl_trans - tbl_trans # $0c-6 fasin unnorm4582short tbl_trans - tbl_trans # $0c-7 ERROR45834584short satanh - tbl_trans # $0d-0 fatanh norm4585short src_zero - tbl_trans # $0d-1 fatanh zero4586short t_operr - tbl_trans # $0d-2 fatanh inf4587short src_qnan - tbl_trans # $0d-3 fatanh qnan4588short satanhd - tbl_trans # $0d-5 fatanh denorm4589short src_snan - tbl_trans # $0d-4 fatanh snan4590short tbl_trans - tbl_trans # $0d-6 fatanh unnorm4591short tbl_trans - tbl_trans # $0d-7 ERROR45924593short ssin - tbl_trans # $0e-0 fsin norm4594short src_zero - tbl_trans # $0e-1 fsin zero4595short t_operr - tbl_trans # $0e-2 fsin inf4596short src_qnan - tbl_trans # $0e-3 fsin qnan4597short ssind - tbl_trans # $0e-5 fsin denorm4598short src_snan - tbl_trans # $0e-4 fsin snan4599short tbl_trans - tbl_trans # $0e-6 fsin unnorm4600short tbl_trans - tbl_trans # $0e-7 ERROR46014602short stan - tbl_trans # $0f-0 ftan norm4603short src_zero - tbl_trans # $0f-1 ftan zero4604short t_operr - tbl_trans # $0f-2 ftan inf4605short src_qnan - tbl_trans # $0f-3 ftan qnan4606short stand - tbl_trans # $0f-5 ftan denorm4607short src_snan - tbl_trans # $0f-4 ftan snan4608short tbl_trans - tbl_trans # $0f-6 ftan unnorm4609short tbl_trans - tbl_trans # $0f-7 ERROR46104611short setox - tbl_trans # $10-0 fetox norm4612short ld_pone - tbl_trans # $10-1 fetox zero4613short szr_inf - tbl_trans # $10-2 fetox inf4614short src_qnan - tbl_trans # $10-3 fetox qnan4615short setoxd - tbl_trans # $10-5 fetox denorm4616short src_snan - tbl_trans # $10-4 fetox snan4617short tbl_trans - tbl_trans # $10-6 fetox unnorm4618short tbl_trans - tbl_trans # $10-7 ERROR46194620short stwotox - tbl_trans # $11-0 ftwotox norm4621short ld_pone - tbl_trans # $11-1 ftwotox zero4622short szr_inf - tbl_trans # $11-2 ftwotox inf4623short src_qnan - tbl_trans # $11-3 ftwotox qnan4624short stwotoxd - tbl_trans # $11-5 ftwotox denorm4625short src_snan - tbl_trans # $11-4 ftwotox snan4626short tbl_trans - tbl_trans # $11-6 ftwotox unnorm4627short tbl_trans - tbl_trans # $11-7 ERROR46284629short stentox - tbl_trans # $12-0 ftentox norm4630short ld_pone - tbl_trans # $12-1 ftentox zero4631short szr_inf - tbl_trans # $12-2 ftentox inf4632short src_qnan - tbl_trans # $12-3 ftentox qnan4633short stentoxd - tbl_trans # $12-5 ftentox denorm4634short src_snan - tbl_trans # $12-4 ftentox snan4635short tbl_trans - tbl_trans # $12-6 ftentox unnorm4636short tbl_trans - tbl_trans # $12-7 ERROR46374638short tbl_trans - tbl_trans # $13-0 ERROR4639short tbl_trans - tbl_trans # $13-1 ERROR4640short tbl_trans - tbl_trans # $13-2 ERROR4641short tbl_trans - tbl_trans # $13-3 ERROR4642short tbl_trans - tbl_trans # $13-4 ERROR4643short tbl_trans - tbl_trans # $13-5 ERROR4644short tbl_trans - tbl_trans # $13-6 ERROR4645short tbl_trans - tbl_trans # $13-7 ERROR46464647short slogn - tbl_trans # $14-0 flogn norm4648short t_dz2 - tbl_trans # $14-1 flogn zero4649short sopr_inf - tbl_trans # $14-2 flogn inf4650short src_qnan - tbl_trans # $14-3 flogn qnan4651short slognd - tbl_trans # $14-5 flogn denorm4652short src_snan - tbl_trans # $14-4 flogn snan4653short tbl_trans - tbl_trans # $14-6 flogn unnorm4654short tbl_trans - tbl_trans # $14-7 ERROR46554656short slog10 - tbl_trans # $15-0 flog10 norm4657short t_dz2 - tbl_trans # $15-1 flog10 zero4658short sopr_inf - tbl_trans # $15-2 flog10 inf4659short src_qnan - tbl_trans # $15-3 flog10 qnan4660short slog10d - tbl_trans # $15-5 flog10 denorm4661short src_snan - tbl_trans # $15-4 flog10 snan4662short tbl_trans - tbl_trans # $15-6 flog10 unnorm4663short tbl_trans - tbl_trans # $15-7 ERROR46644665short slog2 - tbl_trans # $16-0 flog2 norm4666short t_dz2 - tbl_trans # $16-1 flog2 zero4667short sopr_inf - tbl_trans # $16-2 flog2 inf4668short src_qnan - tbl_trans # $16-3 flog2 qnan4669short slog2d - tbl_trans # $16-5 flog2 denorm4670short src_snan - tbl_trans # $16-4 flog2 snan4671short tbl_trans - tbl_trans # $16-6 flog2 unnorm4672short tbl_trans - tbl_trans # $16-7 ERROR46734674short tbl_trans - tbl_trans # $17-0 ERROR4675short tbl_trans - tbl_trans # $17-1 ERROR4676short tbl_trans - tbl_trans # $17-2 ERROR4677short tbl_trans - tbl_trans # $17-3 ERROR4678short tbl_trans - tbl_trans # $17-4 ERROR4679short tbl_trans - tbl_trans # $17-5 ERROR4680short tbl_trans - tbl_trans # $17-6 ERROR4681short tbl_trans - tbl_trans # $17-7 ERROR46824683short tbl_trans - tbl_trans # $18-0 fabs norm4684short tbl_trans - tbl_trans # $18-1 fabs zero4685short tbl_trans - tbl_trans # $18-2 fabs inf4686short tbl_trans - tbl_trans # $18-3 fabs qnan4687short tbl_trans - tbl_trans # $18-5 fabs denorm4688short tbl_trans - tbl_trans # $18-4 fabs snan4689short tbl_trans - tbl_trans # $18-6 fabs unnorm4690short tbl_trans - tbl_trans # $18-7 ERROR46914692short scosh - tbl_trans # $19-0 fcosh norm4693short ld_pone - tbl_trans # $19-1 fcosh zero4694short ld_pinf - tbl_trans # $19-2 fcosh inf4695short src_qnan - tbl_trans # $19-3 fcosh qnan4696short scoshd - tbl_trans # $19-5 fcosh denorm4697short src_snan - tbl_trans # $19-4 fcosh snan4698short tbl_trans - tbl_trans # $19-6 fcosh unnorm4699short tbl_trans - tbl_trans # $19-7 ERROR47004701short tbl_trans - tbl_trans # $1a-0 fneg norm4702short tbl_trans - tbl_trans # $1a-1 fneg zero4703short tbl_trans - tbl_trans # $1a-2 fneg inf4704short tbl_trans - tbl_trans # $1a-3 fneg qnan4705short tbl_trans - tbl_trans # $1a-5 fneg denorm4706short tbl_trans - tbl_trans # $1a-4 fneg snan4707short tbl_trans - tbl_trans # $1a-6 fneg unnorm4708short tbl_trans - tbl_trans # $1a-7 ERROR47094710short tbl_trans - tbl_trans # $1b-0 ERROR4711short tbl_trans - tbl_trans # $1b-1 ERROR4712short tbl_trans - tbl_trans # $1b-2 ERROR4713short tbl_trans - tbl_trans # $1b-3 ERROR4714short tbl_trans - tbl_trans # $1b-4 ERROR4715short tbl_trans - tbl_trans # $1b-5 ERROR4716short tbl_trans - tbl_trans # $1b-6 ERROR4717short tbl_trans - tbl_trans # $1b-7 ERROR47184719short sacos - tbl_trans # $1c-0 facos norm4720short ld_ppi2 - tbl_trans # $1c-1 facos zero4721short t_operr - tbl_trans # $1c-2 facos inf4722short src_qnan - tbl_trans # $1c-3 facos qnan4723short sacosd - tbl_trans # $1c-5 facos denorm4724short src_snan - tbl_trans # $1c-4 facos snan4725short tbl_trans - tbl_trans # $1c-6 facos unnorm4726short tbl_trans - tbl_trans # $1c-7 ERROR47274728short scos - tbl_trans # $1d-0 fcos norm4729short ld_pone - tbl_trans # $1d-1 fcos zero4730short t_operr - tbl_trans # $1d-2 fcos inf4731short src_qnan - tbl_trans # $1d-3 fcos qnan4732short scosd - tbl_trans # $1d-5 fcos denorm4733short src_snan - tbl_trans # $1d-4 fcos snan4734short tbl_trans - tbl_trans # $1d-6 fcos unnorm4735short tbl_trans - tbl_trans # $1d-7 ERROR47364737short sgetexp - tbl_trans # $1e-0 fgetexp norm4738short src_zero - tbl_trans # $1e-1 fgetexp zero4739short t_operr - tbl_trans # $1e-2 fgetexp inf4740short src_qnan - tbl_trans # $1e-3 fgetexp qnan4741short sgetexpd - tbl_trans # $1e-5 fgetexp denorm4742short src_snan - tbl_trans # $1e-4 fgetexp snan4743short tbl_trans - tbl_trans # $1e-6 fgetexp unnorm4744short tbl_trans - tbl_trans # $1e-7 ERROR47454746short sgetman - tbl_trans # $1f-0 fgetman norm4747short src_zero - tbl_trans # $1f-1 fgetman zero4748short t_operr - tbl_trans # $1f-2 fgetman inf4749short src_qnan - tbl_trans # $1f-3 fgetman qnan4750short sgetmand - tbl_trans # $1f-5 fgetman denorm4751short src_snan - tbl_trans # $1f-4 fgetman snan4752short tbl_trans - tbl_trans # $1f-6 fgetman unnorm4753short tbl_trans - tbl_trans # $1f-7 ERROR47544755short tbl_trans - tbl_trans # $20-0 fdiv norm4756short tbl_trans - tbl_trans # $20-1 fdiv zero4757short tbl_trans - tbl_trans # $20-2 fdiv inf4758short tbl_trans - tbl_trans # $20-3 fdiv qnan4759short tbl_trans - tbl_trans # $20-5 fdiv denorm4760short tbl_trans - tbl_trans # $20-4 fdiv snan4761short tbl_trans - tbl_trans # $20-6 fdiv unnorm4762short tbl_trans - tbl_trans # $20-7 ERROR47634764short smod_snorm - tbl_trans # $21-0 fmod norm4765short smod_szero - tbl_trans # $21-1 fmod zero4766short smod_sinf - tbl_trans # $21-2 fmod inf4767short sop_sqnan - tbl_trans # $21-3 fmod qnan4768short smod_sdnrm - tbl_trans # $21-5 fmod denorm4769short sop_ssnan - tbl_trans # $21-4 fmod snan4770short tbl_trans - tbl_trans # $21-6 fmod unnorm4771short tbl_trans - tbl_trans # $21-7 ERROR47724773short tbl_trans - tbl_trans # $22-0 fadd norm4774short tbl_trans - tbl_trans # $22-1 fadd zero4775short tbl_trans - tbl_trans # $22-2 fadd inf4776short tbl_trans - tbl_trans # $22-3 fadd qnan4777short tbl_trans - tbl_trans # $22-5 fadd denorm4778short tbl_trans - tbl_trans # $22-4 fadd snan4779short tbl_trans - tbl_trans # $22-6 fadd unnorm4780short tbl_trans - tbl_trans # $22-7 ERROR47814782short tbl_trans - tbl_trans # $23-0 fmul norm4783short tbl_trans - tbl_trans # $23-1 fmul zero4784short tbl_trans - tbl_trans # $23-2 fmul inf4785short tbl_trans - tbl_trans # $23-3 fmul qnan4786short tbl_trans - tbl_trans # $23-5 fmul denorm4787short tbl_trans - tbl_trans # $23-4 fmul snan4788short tbl_trans - tbl_trans # $23-6 fmul unnorm4789short tbl_trans - tbl_trans # $23-7 ERROR47904791short tbl_trans - tbl_trans # $24-0 fsgldiv norm4792short tbl_trans - tbl_trans # $24-1 fsgldiv zero4793short tbl_trans - tbl_trans # $24-2 fsgldiv inf4794short tbl_trans - tbl_trans # $24-3 fsgldiv qnan4795short tbl_trans - tbl_trans # $24-5 fsgldiv denorm4796short tbl_trans - tbl_trans # $24-4 fsgldiv snan4797short tbl_trans - tbl_trans # $24-6 fsgldiv unnorm4798short tbl_trans - tbl_trans # $24-7 ERROR47994800short srem_snorm - tbl_trans # $25-0 frem norm4801short srem_szero - tbl_trans # $25-1 frem zero4802short srem_sinf - tbl_trans # $25-2 frem inf4803short sop_sqnan - tbl_trans # $25-3 frem qnan4804short srem_sdnrm - tbl_trans # $25-5 frem denorm4805short sop_ssnan - tbl_trans # $25-4 frem snan4806short tbl_trans - tbl_trans # $25-6 frem unnorm4807short tbl_trans - tbl_trans # $25-7 ERROR48084809short sscale_snorm - tbl_trans # $26-0 fscale norm4810short sscale_szero - tbl_trans # $26-1 fscale zero4811short sscale_sinf - tbl_trans # $26-2 fscale inf4812short sop_sqnan - tbl_trans # $26-3 fscale qnan4813short sscale_sdnrm - tbl_trans # $26-5 fscale denorm4814short sop_ssnan - tbl_trans # $26-4 fscale snan4815short tbl_trans - tbl_trans # $26-6 fscale unnorm4816short tbl_trans - tbl_trans # $26-7 ERROR48174818short tbl_trans - tbl_trans # $27-0 fsglmul norm4819short tbl_trans - tbl_trans # $27-1 fsglmul zero4820short tbl_trans - tbl_trans # $27-2 fsglmul inf4821short tbl_trans - tbl_trans # $27-3 fsglmul qnan4822short tbl_trans - tbl_trans # $27-5 fsglmul denorm4823short tbl_trans - tbl_trans # $27-4 fsglmul snan4824short tbl_trans - tbl_trans # $27-6 fsglmul unnorm4825short tbl_trans - tbl_trans # $27-7 ERROR48264827short tbl_trans - tbl_trans # $28-0 fsub norm4828short tbl_trans - tbl_trans # $28-1 fsub zero4829short tbl_trans - tbl_trans # $28-2 fsub inf4830short tbl_trans - tbl_trans # $28-3 fsub qnan4831short tbl_trans - tbl_trans # $28-5 fsub denorm4832short tbl_trans - tbl_trans # $28-4 fsub snan4833short tbl_trans - tbl_trans # $28-6 fsub unnorm4834short tbl_trans - tbl_trans # $28-7 ERROR48354836short tbl_trans - tbl_trans # $29-0 ERROR4837short tbl_trans - tbl_trans # $29-1 ERROR4838short tbl_trans - tbl_trans # $29-2 ERROR4839short tbl_trans - tbl_trans # $29-3 ERROR4840short tbl_trans - tbl_trans # $29-4 ERROR4841short tbl_trans - tbl_trans # $29-5 ERROR4842short tbl_trans - tbl_trans # $29-6 ERROR4843short tbl_trans - tbl_trans # $29-7 ERROR48444845short tbl_trans - tbl_trans # $2a-0 ERROR4846short tbl_trans - tbl_trans # $2a-1 ERROR4847short tbl_trans - tbl_trans # $2a-2 ERROR4848short tbl_trans - tbl_trans # $2a-3 ERROR4849short tbl_trans - tbl_trans # $2a-4 ERROR4850short tbl_trans - tbl_trans # $2a-5 ERROR4851short tbl_trans - tbl_trans # $2a-6 ERROR4852short tbl_trans - tbl_trans # $2a-7 ERROR48534854short tbl_trans - tbl_trans # $2b-0 ERROR4855short tbl_trans - tbl_trans # $2b-1 ERROR4856short tbl_trans - tbl_trans # $2b-2 ERROR4857short tbl_trans - tbl_trans # $2b-3 ERROR4858short tbl_trans - tbl_trans # $2b-4 ERROR4859short tbl_trans - tbl_trans # $2b-5 ERROR4860short tbl_trans - tbl_trans # $2b-6 ERROR4861short tbl_trans - tbl_trans # $2b-7 ERROR48624863short tbl_trans - tbl_trans # $2c-0 ERROR4864short tbl_trans - tbl_trans # $2c-1 ERROR4865short tbl_trans - tbl_trans # $2c-2 ERROR4866short tbl_trans - tbl_trans # $2c-3 ERROR4867short tbl_trans - tbl_trans # $2c-4 ERROR4868short tbl_trans - tbl_trans # $2c-5 ERROR4869short tbl_trans - tbl_trans # $2c-6 ERROR4870short tbl_trans - tbl_trans # $2c-7 ERROR48714872short tbl_trans - tbl_trans # $2d-0 ERROR4873short tbl_trans - tbl_trans # $2d-1 ERROR4874short tbl_trans - tbl_trans # $2d-2 ERROR4875short tbl_trans - tbl_trans # $2d-3 ERROR4876short tbl_trans - tbl_trans # $2d-4 ERROR4877short tbl_trans - tbl_trans # $2d-5 ERROR4878short tbl_trans - tbl_trans # $2d-6 ERROR4879short tbl_trans - tbl_trans # $2d-7 ERROR48804881short tbl_trans - tbl_trans # $2e-0 ERROR4882short tbl_trans - tbl_trans # $2e-1 ERROR4883short tbl_trans - tbl_trans # $2e-2 ERROR4884short tbl_trans - tbl_trans # $2e-3 ERROR4885short tbl_trans - tbl_trans # $2e-4 ERROR4886short tbl_trans - tbl_trans # $2e-5 ERROR4887short tbl_trans - tbl_trans # $2e-6 ERROR4888short tbl_trans - tbl_trans # $2e-7 ERROR48894890short tbl_trans - tbl_trans # $2f-0 ERROR4891short tbl_trans - tbl_trans # $2f-1 ERROR4892short tbl_trans - tbl_trans # $2f-2 ERROR4893short tbl_trans - tbl_trans # $2f-3 ERROR4894short tbl_trans - tbl_trans # $2f-4 ERROR4895short tbl_trans - tbl_trans # $2f-5 ERROR4896short tbl_trans - tbl_trans # $2f-6 ERROR4897short tbl_trans - tbl_trans # $2f-7 ERROR48984899short ssincos - tbl_trans # $30-0 fsincos norm4900short ssincosz - tbl_trans # $30-1 fsincos zero4901short ssincosi - tbl_trans # $30-2 fsincos inf4902short ssincosqnan - tbl_trans # $30-3 fsincos qnan4903short ssincosd - tbl_trans # $30-5 fsincos denorm4904short ssincossnan - tbl_trans # $30-4 fsincos snan4905short tbl_trans - tbl_trans # $30-6 fsincos unnorm4906short tbl_trans - tbl_trans # $30-7 ERROR49074908short ssincos - tbl_trans # $31-0 fsincos norm4909short ssincosz - tbl_trans # $31-1 fsincos zero4910short ssincosi - tbl_trans # $31-2 fsincos inf4911short ssincosqnan - tbl_trans # $31-3 fsincos qnan4912short ssincosd - tbl_trans # $31-5 fsincos denorm4913short ssincossnan - tbl_trans # $31-4 fsincos snan4914short tbl_trans - tbl_trans # $31-6 fsincos unnorm4915short tbl_trans - tbl_trans # $31-7 ERROR49164917short ssincos - tbl_trans # $32-0 fsincos norm4918short ssincosz - tbl_trans # $32-1 fsincos zero4919short ssincosi - tbl_trans # $32-2 fsincos inf4920short ssincosqnan - tbl_trans # $32-3 fsincos qnan4921short ssincosd - tbl_trans # $32-5 fsincos denorm4922short ssincossnan - tbl_trans # $32-4 fsincos snan4923short tbl_trans - tbl_trans # $32-6 fsincos unnorm4924short tbl_trans - tbl_trans # $32-7 ERROR49254926short ssincos - tbl_trans # $33-0 fsincos norm4927short ssincosz - tbl_trans # $33-1 fsincos zero4928short ssincosi - tbl_trans # $33-2 fsincos inf4929short ssincosqnan - tbl_trans # $33-3 fsincos qnan4930short ssincosd - tbl_trans # $33-5 fsincos denorm4931short ssincossnan - tbl_trans # $33-4 fsincos snan4932short tbl_trans - tbl_trans # $33-6 fsincos unnorm4933short tbl_trans - tbl_trans # $33-7 ERROR49344935short ssincos - tbl_trans # $34-0 fsincos norm4936short ssincosz - tbl_trans # $34-1 fsincos zero4937short ssincosi - tbl_trans # $34-2 fsincos inf4938short ssincosqnan - tbl_trans # $34-3 fsincos qnan4939short ssincosd - tbl_trans # $34-5 fsincos denorm4940short ssincossnan - tbl_trans # $34-4 fsincos snan4941short tbl_trans - tbl_trans # $34-6 fsincos unnorm4942short tbl_trans - tbl_trans # $34-7 ERROR49434944short ssincos - tbl_trans # $35-0 fsincos norm4945short ssincosz - tbl_trans # $35-1 fsincos zero4946short ssincosi - tbl_trans # $35-2 fsincos inf4947short ssincosqnan - tbl_trans # $35-3 fsincos qnan4948short ssincosd - tbl_trans # $35-5 fsincos denorm4949short ssincossnan - tbl_trans # $35-4 fsincos snan4950short tbl_trans - tbl_trans # $35-6 fsincos unnorm4951short tbl_trans - tbl_trans # $35-7 ERROR49524953short ssincos - tbl_trans # $36-0 fsincos norm4954short ssincosz - tbl_trans # $36-1 fsincos zero4955short ssincosi - tbl_trans # $36-2 fsincos inf4956short ssincosqnan - tbl_trans # $36-3 fsincos qnan4957short ssincosd - tbl_trans # $36-5 fsincos denorm4958short ssincossnan - tbl_trans # $36-4 fsincos snan4959short tbl_trans - tbl_trans # $36-6 fsincos unnorm4960short tbl_trans - tbl_trans # $36-7 ERROR49614962short ssincos - tbl_trans # $37-0 fsincos norm4963short ssincosz - tbl_trans # $37-1 fsincos zero4964short ssincosi - tbl_trans # $37-2 fsincos inf4965short ssincosqnan - tbl_trans # $37-3 fsincos qnan4966short ssincosd - tbl_trans # $37-5 fsincos denorm4967short ssincossnan - tbl_trans # $37-4 fsincos snan4968short tbl_trans - tbl_trans # $37-6 fsincos unnorm4969short tbl_trans - tbl_trans # $37-7 ERROR49704971##########49724973# the instruction fetch access for the displacement word for the4974# fdbcc emulation failed. here, we create an access error frame4975# from the current frame and branch to _real_access().4976funimp_iacc:4977movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a14978fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs4979fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp149804981mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC49824983unlk %a649844985mov.l (%sp),-(%sp) # store SR,hi(PC)4986mov.w 0x8(%sp),0x4(%sp) # store lo(PC)4987mov.w &0x4008,0x6(%sp) # store voff4988mov.l 0x2(%sp),0x8(%sp) # store EA4989mov.l &0x09428001,0xc(%sp) # store FSLW49904991btst &0x5,(%sp) # user or supervisor mode?4992beq.b funimp_iacc_end # user4993bset &0x2,0xd(%sp) # set supervisor TM bit49944995funimp_iacc_end:4996bra.l _real_access49974998#########################################################################4999# ssin(): computes the sine of a normalized input #5000# ssind(): computes the sine of a denormalized input #5001# scos(): computes the cosine of a normalized input #5002# scosd(): computes the cosine of a denormalized input #5003# ssincos(): computes the sine and cosine of a normalized input #5004# ssincosd(): computes the sine and cosine of a denormalized input #5005# #5006# INPUT *************************************************************** #5007# a0 = pointer to extended precision input #5008# d0 = round precision,mode #5009# #5010# OUTPUT ************************************************************** #5011# fp0 = sin(X) or cos(X) #5012# #5013# For ssincos(X): #5014# fp0 = sin(X) #5015# fp1 = cos(X) #5016# #5017# ACCURACY and MONOTONICITY ******************************************* #5018# The returned result is within 1 ulp in 64 significant bit, i.e. #5019# within 0.5001 ulp to 53 bits if the result is subsequently #5020# rounded to double precision. The result is provably monotonic #5021# in double precision. #5022# #5023# ALGORITHM *********************************************************** #5024# #5025# SIN and COS: #5026# 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #5027# #5028# 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. #5029# #5030# 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #5031# k = N mod 4, so in particular, k = 0,1,2,or 3. #5032# Overwrite k by k := k + AdjN. #5033# #5034# 4. If k is even, go to 6. #5035# #5036# 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #5037# Return sgn*cos(r) where cos(r) is approximated by an #5038# even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), #5039# s = r*r. #5040# Exit. #5041# #5042# 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) #5043# where sin(r) is approximated by an odd polynomial in r #5044# r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. #5045# Exit. #5046# #5047# 7. If |X| > 1, go to 9. #5048# #5049# 8. (|X|<2**(-40)) If SIN is invoked, return X; #5050# otherwise return 1. #5051# #5052# 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #5053# go back to 3. #5054# #5055# SINCOS: #5056# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #5057# #5058# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #5059# k = N mod 4, so in particular, k = 0,1,2,or 3. #5060# #5061# 3. If k is even, go to 5. #5062# #5063# 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #5064# j1 exclusive or with the l.s.b. of k. #5065# sgn1 := (-1)**j1, sgn2 := (-1)**j2. #5066# SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #5067# sin(r) and cos(r) are computed as odd and even #5068# polynomials in r, respectively. Exit #5069# #5070# 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #5071# SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #5072# sin(r) and cos(r) are computed as odd and even #5073# polynomials in r, respectively. Exit #5074# #5075# 6. If |X| > 1, go to 8. #5076# #5077# 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #5078# #5079# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #5080# go back to 2. #5081# #5082#########################################################################50835084SINA7: long 0xBD6AAA77,0xCCC994F55085SINA6: long 0x3DE61209,0x7AAE8DA15086SINA5: long 0xBE5AE645,0x2A118AE45087SINA4: long 0x3EC71DE3,0xA53415315088SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x000000005089SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x000000005090SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x0000000050915092COSB8: long 0x3D2AC4D0,0xD6011EE35093COSB7: long 0xBDA9396F,0x9F45AC195094COSB6: long 0x3E21EED9,0x0612C9725095COSB5: long 0xBE927E4F,0xB79D9FCF5096COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x000000005097COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x000000005098COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E5099COSB1: long 0xBF00000051005101set INARG,FP_SCR051025103set X,FP_SCR05104# set XDCARE,X+25105set XFRAC,X+451065107set RPRIME,FP_SCR05108set SPRIME,FP_SCR151095110set POSNEG1,L_SCR15111set TWOTO63,L_SCR151125113set ENDFLAG,L_SCR25114set INT,L_SCR251155116set ADJN,L_SCR351175118############################################5119global ssin5120ssin:5121mov.l &0,ADJN(%a6) # yes; SET ADJN TO 05122bra.b SINBGN51235124############################################5125global scos5126scos:5127mov.l &1,ADJN(%a6) # yes; SET ADJN TO 151285129############################################5130SINBGN:5131#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE51325133fmov.x (%a0),%fp0 # LOAD INPUT5134fmov.x %fp0,X(%a6) # save input at X51355136# "COMPACTIFY" X5137mov.l (%a0),%d1 # put exp in hi word5138mov.w 4(%a0),%d1 # fetch hi(man)5139and.l &0x7FFFFFFF,%d1 # strip sign51405141cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)?5142bge.b SOK1 # no5143bra.w SINSM # yes; input is very small51445145SOK1:5146cmp.l %d1,&0x4004BC7E # is |X| < 15 PI?5147blt.b SINMAIN # no5148bra.w SREDUCEX # yes; input is very large51495150#--THIS IS THE USUAL CASE, |X| <= 15 PI.5151#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.5152SINMAIN:5153fmov.x %fp0,%fp15154fmul.d TWOBYPI(%pc),%fp1 # X*2/PI51555156lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,3251575158fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER51595160mov.l INT(%a6),%d1 # make a copy of N5161asl.l &4,%d1 # N *= 165162add.l %d1,%a1 # tbl_addr = a1 + (N*16)51635164# A1 IS THE ADDRESS OF N*PIBY25165# ...WHICH IS IN TWO PIECES Y1 & Y25166fsub.x (%a1)+,%fp0 # X-Y15167fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y251685169SINCONT:5170#--continuation from REDUCEX51715172#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED5173mov.l INT(%a6),%d15174add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN5175ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE5176cmp.l %d1,&05177blt.w COSPOLY51785179#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.5180#--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY5181#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE5182#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS5183#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])5184#--WHERE T=S*S.5185#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION5186#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.5187SINPOLY:5188fmovm.x &0x0c,-(%sp) # save fp2/fp351895190fmov.x %fp0,X(%a6) # X IS R5191fmul.x %fp0,%fp0 # FP0 IS S51925193fmov.d SINA7(%pc),%fp35194fmov.d SINA6(%pc),%fp251955196fmov.x %fp0,%fp15197fmul.x %fp1,%fp1 # FP1 IS T51985199ror.l &1,%d15200and.l &0x80000000,%d15201# ...LEAST SIG. BIT OF D0 IN SIGN POSITION5202eor.l %d1,X(%a6) # X IS NOW R'= SGN*R52035204fmul.x %fp1,%fp3 # TA75205fmul.x %fp1,%fp2 # TA652065207fadd.d SINA5(%pc),%fp3 # A5+TA75208fadd.d SINA4(%pc),%fp2 # A4+TA652095210fmul.x %fp1,%fp3 # T(A5+TA7)5211fmul.x %fp1,%fp2 # T(A4+TA6)52125213fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7)5214fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6)52155216fmul.x %fp3,%fp1 # T(A3+T(A5+TA7))52175218fmul.x %fp0,%fp2 # S(A2+T(A4+TA6))5219fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7))5220fmul.x X(%a6),%fp0 # R'*S52215222fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]52235224fmul.x %fp1,%fp0 # SIN(R')-R'52255226fmovm.x (%sp)+,&0x30 # restore fp2/fp352275228fmov.l %d0,%fpcr # restore users round mode,prec5229fadd.x X(%a6),%fp0 # last inst - possible exception set5230bra t_inx252315232#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.5233#--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY5234#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE5235#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS5236#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])5237#--WHERE T=S*S.5238#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION5239#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/25240#--AND IS THEREFORE STORED AS SINGLE PRECISION.5241COSPOLY:5242fmovm.x &0x0c,-(%sp) # save fp2/fp352435244fmul.x %fp0,%fp0 # FP0 IS S52455246fmov.d COSB8(%pc),%fp25247fmov.d COSB7(%pc),%fp352485249fmov.x %fp0,%fp15250fmul.x %fp1,%fp1 # FP1 IS T52515252fmov.x %fp0,X(%a6) # X IS S5253ror.l &1,%d15254and.l &0x80000000,%d15255# ...LEAST SIG. BIT OF D0 IN SIGN POSITION52565257fmul.x %fp1,%fp2 # TB852585259eor.l %d1,X(%a6) # X IS NOW S'= SGN*S5260and.l &0x80000000,%d152615262fmul.x %fp1,%fp3 # TB752635264or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE5265mov.l %d1,POSNEG1(%a6)52665267fadd.d COSB6(%pc),%fp2 # B6+TB85268fadd.d COSB5(%pc),%fp3 # B5+TB752695270fmul.x %fp1,%fp2 # T(B6+TB8)5271fmul.x %fp1,%fp3 # T(B5+TB7)52725273fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8)5274fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7)52755276fmul.x %fp1,%fp2 # T(B4+T(B6+TB8))5277fmul.x %fp3,%fp1 # T(B3+T(B5+TB7))52785279fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8))5280fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7))52815282fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8)))52835284fadd.x %fp1,%fp052855286fmul.x X(%a6),%fp052875288fmovm.x (%sp)+,&0x30 # restore fp2/fp352895290fmov.l %d0,%fpcr # restore users round mode,prec5291fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set5292bra t_inx252935294##############################################52955296# SINe: Big OR Small?5297#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.5298#--IF |X| < 2**(-40), RETURN X OR 1.5299SINBORS:5300cmp.l %d1,&0x3FFF80005301bgt.l SREDUCEX53025303SINSM:5304mov.l ADJN(%a6),%d15305cmp.l %d1,&05306bgt.b COSTINY53075308# here, the operation may underflow iff the precision is sgl or dbl.5309# extended denorms are handled through another entry point.5310SINTINY:5311# mov.w &0x0000,XDCARE(%a6) # JUST IN CASE53125313fmov.l %d0,%fpcr # restore users round mode,prec5314mov.b &FMOV_OP,%d1 # last inst is MOVE5315fmov.x X(%a6),%fp0 # last inst - possible exception set5316bra t_catch53175318COSTINY:5319fmov.s &0x3F800000,%fp0 # fp0 = 1.05320fmov.l %d0,%fpcr # restore users round mode,prec5321fadd.s &0x80800000,%fp0 # last inst - possible exception set5322bra t_pinx253235324################################################5325global ssind5326#--SIN(X) = X FOR DENORMALIZED X5327ssind:5328bra t_extdnrm53295330############################################5331global scosd5332#--COS(X) = 1 FOR DENORMALIZED X5333scosd:5334fmov.s &0x3F800000,%fp0 # fp0 = 1.05335bra t_pinx253365337##################################################53385339global ssincos5340ssincos:5341#--SET ADJN TO 45342mov.l &4,ADJN(%a6)53435344fmov.x (%a0),%fp0 # LOAD INPUT5345fmov.x %fp0,X(%a6)53465347mov.l (%a0),%d15348mov.w 4(%a0),%d15349and.l &0x7FFFFFFF,%d1 # COMPACTIFY X53505351cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?5352bge.b SCOK15353bra.w SCSM53545355SCOK1:5356cmp.l %d1,&0x4004BC7E # |X| < 15 PI?5357blt.b SCMAIN5358bra.w SREDUCEX535953605361#--THIS IS THE USUAL CASE, |X| <= 15 PI.5362#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.5363SCMAIN:5364fmov.x %fp0,%fp153655366fmul.d TWOBYPI(%pc),%fp1 # X*2/PI53675368lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,3253695370fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER53715372mov.l INT(%a6),%d15373asl.l &4,%d15374add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y253755376fsub.x (%a1)+,%fp0 # X-Y15377fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y253785379SCCONT:5380#--continuation point from REDUCEX53815382mov.l INT(%a6),%d15383ror.l &1,%d15384cmp.l %d1,&0 # D0 < 0 IFF N IS ODD5385bge.w NEVEN53865387SNODD:5388#--REGISTERS SAVED SO FAR: D0, A0, FP2.5389fmovm.x &0x04,-(%sp) # save fp253905391fmov.x %fp0,RPRIME(%a6)5392fmul.x %fp0,%fp0 # FP0 IS S = R*R5393fmov.d SINA7(%pc),%fp1 # A75394fmov.d COSB8(%pc),%fp2 # B85395fmul.x %fp0,%fp1 # SA75396fmul.x %fp0,%fp2 # SB853975398mov.l %d2,-(%sp)5399mov.l %d1,%d25400ror.l &1,%d25401and.l &0x80000000,%d25402eor.l %d1,%d25403and.l &0x80000000,%d254045405fadd.d SINA6(%pc),%fp1 # A6+SA75406fadd.d COSB7(%pc),%fp2 # B7+SB854075408fmul.x %fp0,%fp1 # S(A6+SA7)5409eor.l %d2,RPRIME(%a6)5410mov.l (%sp)+,%d25411fmul.x %fp0,%fp2 # S(B7+SB8)5412ror.l &1,%d15413and.l &0x80000000,%d15414mov.l &0x3F800000,POSNEG1(%a6)5415eor.l %d1,POSNEG1(%a6)54165417fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7)5418fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8)54195420fmul.x %fp0,%fp1 # S(A5+S(A6+SA7))5421fmul.x %fp0,%fp2 # S(B6+S(B7+SB8))5422fmov.x %fp0,SPRIME(%a6)54235424fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7))5425eor.l %d1,SPRIME(%a6)5426fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8))54275428fmul.x %fp0,%fp1 # S(A4+...)5429fmul.x %fp0,%fp2 # S(B5+...)54305431fadd.d SINA3(%pc),%fp1 # A3+S(A4+...)5432fadd.d COSB4(%pc),%fp2 # B4+S(B5+...)54335434fmul.x %fp0,%fp1 # S(A3+...)5435fmul.x %fp0,%fp2 # S(B4+...)54365437fadd.x SINA2(%pc),%fp1 # A2+S(A3+...)5438fadd.x COSB3(%pc),%fp2 # B3+S(B4+...)54395440fmul.x %fp0,%fp1 # S(A2+...)5441fmul.x %fp0,%fp2 # S(B3+...)54425443fadd.x SINA1(%pc),%fp1 # A1+S(A2+...)5444fadd.x COSB2(%pc),%fp2 # B2+S(B3+...)54455446fmul.x %fp0,%fp1 # S(A1+...)5447fmul.x %fp2,%fp0 # S(B2+...)54485449fmul.x RPRIME(%a6),%fp1 # R'S(A1+...)5450fadd.s COSB1(%pc),%fp0 # B1+S(B2...)5451fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...))54525453fmovm.x (%sp)+,&0x20 # restore fp254545455fmov.l %d0,%fpcr5456fadd.x RPRIME(%a6),%fp1 # COS(X)5457bsr sto_cos # store cosine result5458fadd.s POSNEG1(%a6),%fp0 # SIN(X)5459bra t_inx254605461NEVEN:5462#--REGISTERS SAVED SO FAR: FP2.5463fmovm.x &0x04,-(%sp) # save fp254645465fmov.x %fp0,RPRIME(%a6)5466fmul.x %fp0,%fp0 # FP0 IS S = R*R54675468fmov.d COSB8(%pc),%fp1 # B85469fmov.d SINA7(%pc),%fp2 # A754705471fmul.x %fp0,%fp1 # SB85472fmov.x %fp0,SPRIME(%a6)5473fmul.x %fp0,%fp2 # SA754745475ror.l &1,%d15476and.l &0x80000000,%d154775478fadd.d COSB7(%pc),%fp1 # B7+SB85479fadd.d SINA6(%pc),%fp2 # A6+SA754805481eor.l %d1,RPRIME(%a6)5482eor.l %d1,SPRIME(%a6)54835484fmul.x %fp0,%fp1 # S(B7+SB8)54855486or.l &0x3F800000,%d15487mov.l %d1,POSNEG1(%a6)54885489fmul.x %fp0,%fp2 # S(A6+SA7)54905491fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8)5492fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7)54935494fmul.x %fp0,%fp1 # S(B6+S(B7+SB8))5495fmul.x %fp0,%fp2 # S(A5+S(A6+SA7))54965497fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8))5498fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7))54995500fmul.x %fp0,%fp1 # S(B5+...)5501fmul.x %fp0,%fp2 # S(A4+...)55025503fadd.d COSB4(%pc),%fp1 # B4+S(B5+...)5504fadd.d SINA3(%pc),%fp2 # A3+S(A4+...)55055506fmul.x %fp0,%fp1 # S(B4+...)5507fmul.x %fp0,%fp2 # S(A3+...)55085509fadd.x COSB3(%pc),%fp1 # B3+S(B4+...)5510fadd.x SINA2(%pc),%fp2 # A2+S(A3+...)55115512fmul.x %fp0,%fp1 # S(B3+...)5513fmul.x %fp0,%fp2 # S(A2+...)55145515fadd.x COSB2(%pc),%fp1 # B2+S(B3+...)5516fadd.x SINA1(%pc),%fp2 # A1+S(A2+...)55175518fmul.x %fp0,%fp1 # S(B2+...)5519fmul.x %fp2,%fp0 # s(a1+...)552055215522fadd.s COSB1(%pc),%fp1 # B1+S(B2...)5523fmul.x RPRIME(%a6),%fp0 # R'S(A1+...)5524fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...))55255526fmovm.x (%sp)+,&0x20 # restore fp255275528fmov.l %d0,%fpcr5529fadd.s POSNEG1(%a6),%fp1 # COS(X)5530bsr sto_cos # store cosine result5531fadd.x RPRIME(%a6),%fp0 # SIN(X)5532bra t_inx255335534################################################55355536SCBORS:5537cmp.l %d1,&0x3FFF80005538bgt.w SREDUCEX55395540################################################55415542SCSM:5543# mov.w &0x0000,XDCARE(%a6)5544fmov.s &0x3F800000,%fp155455546fmov.l %d0,%fpcr5547fsub.s &0x00800000,%fp15548bsr sto_cos # store cosine result5549fmov.l %fpcr,%d0 # d0 must have fpcr,too5550mov.b &FMOV_OP,%d1 # last inst is MOVE5551fmov.x X(%a6),%fp05552bra t_catch55535554##############################################55555556global ssincosd5557#--SIN AND COS OF X FOR DENORMALIZED X5558ssincosd:5559mov.l %d0,-(%sp) # save d05560fmov.s &0x3F800000,%fp15561bsr sto_cos # store cosine result5562mov.l (%sp)+,%d0 # restore d05563bra t_extdnrm55645565############################################55665567#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.5568#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING5569#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.5570SREDUCEX:5571fmovm.x &0x3c,-(%sp) # save {fp2-fp5}5572mov.l %d2,-(%sp) # save d25573fmov.s &0x00000000,%fp1 # fp1 = 055745575#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that5576#--there is a danger of unwanted overflow in first LOOP iteration. In this5577#--case, reduce argument by one remainder step to make subsequent reduction5578#--safe.5579cmp.l %d1,&0x7ffeffff # is arg dangerously large?5580bne.b SLOOP # no55815582# yes; create 2**16383*PI/25583mov.w &0x7ffe,FP_SCR0_EX(%a6)5584mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)5585clr.l FP_SCR0_LO(%a6)55865587# create low half of 2**16383*PI/2 at FP_SCR15588mov.w &0x7fdc,FP_SCR1_EX(%a6)5589mov.l &0x85a308d3,FP_SCR1_HI(%a6)5590clr.l FP_SCR1_LO(%a6)55915592ftest.x %fp0 # test sign of argument5593fblt.w sred_neg55945595or.b &0x80,FP_SCR0_EX(%a6) # positive arg5596or.b &0x80,FP_SCR1_EX(%a6)5597sred_neg:5598fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact5599fmov.x %fp0,%fp1 # save high result in fp15600fadd.x FP_SCR1(%a6),%fp0 # low part of reduction5601fsub.x %fp0,%fp1 # determine low component of result5602fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.56035604#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.5605#--integer quotient will be stored in N5606#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)5607SLOOP:5608fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 25609mov.w INARG(%a6),%d15610mov.l %d1,%a1 # save a copy of D05611and.l &0x00007FFF,%d15612sub.l &0x00003FFF,%d1 # d0 = K5613cmp.l %d1,&285614ble.b SLASTLOOP5615SCONTLOOP:5616sub.l &27,%d1 # d0 = L := K-275617mov.b &0,ENDFLAG(%a6)5618bra.b SWORK5619SLASTLOOP:5620clr.l %d1 # d0 = L := 05621mov.b &1,ENDFLAG(%a6)56225623SWORK:5624#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN5625#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.56265627#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),5628#--2**L * (PIby2_1), 2**L * (PIby2_2)56295630mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI5631sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)56325633mov.l &0xA2F9836E,FP_SCR0_HI(%a6)5634mov.l &0x4E44152A,FP_SCR0_LO(%a6)5635mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)56365637fmov.x %fp0,%fp25638fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)56395640#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN5641#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N5642#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT5643#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE5644#--US THE DESIRED VALUE IN FLOATING POINT.5645mov.l %a1,%d25646swap %d25647and.l &0x80000000,%d25648or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL5649mov.l %d2,TWOTO63(%a6)5650fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED5651fsub.s TWOTO63(%a6),%fp2 # fp2 = N5652# fint.x %fp256535654#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_25655mov.l %d1,%d2 # d2 = L56565657add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)5658mov.w %d2,FP_SCR0_EX(%a6)5659mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)5660clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_156615662add.l &0x00003FDD,%d15663mov.w %d1,FP_SCR1_EX(%a6)5664mov.l &0x85A308D3,FP_SCR1_HI(%a6)5665clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_256665667mov.b ENDFLAG(%a6),%d156685669#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and5670#--P2 = 2**(L) * Piby2_25671fmov.x %fp2,%fp4 # fp4 = N5672fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P15673fmov.x %fp2,%fp5 # fp5 = N5674fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P25675fmov.x %fp4,%fp3 # fp3 = W = N*P156765677#--we want P+p = W+w but |p| <= half ulp of P5678#--Then, we need to compute A := R-P and a := r-p5679fadd.x %fp5,%fp3 # fp3 = P5680fsub.x %fp3,%fp4 # fp4 = W-P56815682fsub.x %fp3,%fp0 # fp0 = A := R - P5683fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w56845685fmov.x %fp0,%fp3 # fp3 = A5686fsub.x %fp4,%fp1 # fp1 = a := r - p56875688#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but5689#--|r| <= half ulp of R.5690fadd.x %fp1,%fp0 # fp0 = R := A+a5691#--No need to calculate r if this is the last loop5692cmp.b %d1,&05693bgt.w SRESTORE56945695#--Need to calculate r5696fsub.x %fp0,%fp3 # fp3 = A-R5697fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a5698bra.w SLOOP56995700SRESTORE:5701fmov.l %fp2,INT(%a6)5702mov.l (%sp)+,%d2 # restore d25703fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}57045705mov.l ADJN(%a6),%d15706cmp.l %d1,&457075708blt.w SINCONT5709bra.w SCCONT57105711#########################################################################5712# stan(): computes the tangent of a normalized input #5713# stand(): computes the tangent of a denormalized input #5714# #5715# INPUT *************************************************************** #5716# a0 = pointer to extended precision input #5717# d0 = round precision,mode #5718# #5719# OUTPUT ************************************************************** #5720# fp0 = tan(X) #5721# #5722# ACCURACY and MONOTONICITY ******************************************* #5723# The returned result is within 3 ulp in 64 significant bit, i.e. #5724# within 0.5001 ulp to 53 bits if the result is subsequently #5725# rounded to double precision. The result is provably monotonic #5726# in double precision. #5727# #5728# ALGORITHM *********************************************************** #5729# #5730# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #5731# #5732# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #5733# k = N mod 2, so in particular, k = 0 or 1. #5734# #5735# 3. If k is odd, go to 5. #5736# #5737# 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #5738# rational function U/V where #5739# U = r + r*s*(P1 + s*(P2 + s*P3)), and #5740# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. #5741# Exit. #5742# #5743# 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #5744# a rational function U/V where #5745# U = r + r*s*(P1 + s*(P2 + s*P3)), and #5746# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, #5747# -Cot(r) = -V/U. Exit. #5748# #5749# 6. If |X| > 1, go to 8. #5750# #5751# 7. (|X|<2**(-40)) Tan(X) = X. Exit. #5752# #5753# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back #5754# to 2. #5755# #5756#########################################################################57575758TANQ4:5759long 0x3EA0B759,0xF50F86885760TANP3:5761long 0xBEF2BAA5,0xA8924F0457625763TANQ3:5764long 0xBF346F59,0xB39BA65F,0x00000000,0x0000000057655766TANP2:5767long 0x3FF60000,0xE073D3FC,0x199C4A00,0x0000000057685769TANQ2:5770long 0x3FF90000,0xD23CD684,0x15D95FA1,0x0000000057715772TANP1:5773long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x0000000057745775TANQ1:5776long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x0000000057775778INVTWOPI:5779long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x0000000057805781TWOPI1:5782long 0x40010000,0xC90FDAA2,0x00000000,0x000000005783TWOPI2:5784long 0x3FDF0000,0x85A308D4,0x00000000,0x0000000057855786#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING5787#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT5788#--MOST 69 BITS LONG.5789# global PITBL5790PITBL:5791long 0xC0040000,0xC90FDAA2,0x2168C235,0x218000005792long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D000005793long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E800005794long 0xC0040000,0xB6365E22,0xEE46F000,0x214800005795long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA12000005796long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC00005797long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x211000005798long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA15800005799long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E000005800long 0xC0040000,0x90836524,0x88034B96,0x20B000005801long 0xC0040000,0x8A3AE64F,0x76F80584,0xA18800005802long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C400005803long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x200000005804long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x213800005805long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA13000005806long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC000005807long 0xC0030000,0xC90FDAA2,0x2168C235,0x210000005808long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA16800005809long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A000005810long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x209000005811long 0xC0030000,0x96CBE3F9,0x990E91A8,0x216000005812long 0xC0030000,0x8A3AE64F,0x76F80584,0xA10800005813long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F8000005814long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B000005815long 0xC0020000,0xC90FDAA2,0x2168C235,0x208000005816long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA02000005817long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E000005818long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F0000005819long 0xC0010000,0xC90FDAA2,0x2168C235,0x200000005820long 0xC0010000,0x96CBE3F9,0x990E91A8,0x206000005821long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F8000005822long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F0000005823long 0x00000000,0x00000000,0x00000000,0x000000005824long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F0000005825long 0x40000000,0xC90FDAA2,0x2168C235,0x9F8000005826long 0x40010000,0x96CBE3F9,0x990E91A8,0xA06000005827long 0x40010000,0xC90FDAA2,0x2168C235,0xA00000005828long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F0000005829long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E000005830long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x202000005831long 0x40020000,0xC90FDAA2,0x2168C235,0xA08000005832long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B000005833long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F8000005834long 0x40030000,0x8A3AE64F,0x76F80584,0x210800005835long 0x40030000,0x96CBE3F9,0x990E91A8,0xA16000005836long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA09000005837long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A000005838long 0x40030000,0xBC7EDCF7,0xFF523611,0x216800005839long 0x40030000,0xC90FDAA2,0x2168C235,0xA10000005840long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC000005841long 0x40030000,0xE231D5F6,0x6595DA7B,0x213000005842long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA13800005843long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA00000005844long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C400005845long 0x40040000,0x8A3AE64F,0x76F80584,0x218800005846long 0x40040000,0x90836524,0x88034B96,0xA0B000005847long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E000005848long 0x40040000,0x9D1462CE,0xAA19D7B9,0x215800005849long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA11000005850long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC00005851long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x212000005852long 0x40040000,0xB6365E22,0xEE46F000,0xA14800005853long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E800005854long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D000005855long 0x40040000,0xC90FDAA2,0x2168C235,0xA180000058565857set INARG,FP_SCR058585859set TWOTO63,L_SCR15860set INT,L_SCR15861set ENDFLAG,L_SCR258625863global stan5864stan:5865fmov.x (%a0),%fp0 # LOAD INPUT58665867mov.l (%a0),%d15868mov.w 4(%a0),%d15869and.l &0x7FFFFFFF,%d158705871cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?5872bge.b TANOK15873bra.w TANSM5874TANOK1:5875cmp.l %d1,&0x4004BC7E # |X| < 15 PI?5876blt.b TANMAIN5877bra.w REDUCEX58785879TANMAIN:5880#--THIS IS THE USUAL CASE, |X| <= 15 PI.5881#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.5882fmov.x %fp0,%fp15883fmul.d TWOBYPI(%pc),%fp1 # X*2/PI58845885lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,3258865887fmov.l %fp1,%d1 # CONVERT TO INTEGER58885889asl.l &4,%d15890add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y258915892fsub.x (%a1)+,%fp0 # X-Y158935894fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y258955896ror.l &5,%d15897and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 058985899TANCONT:5900fmovm.x &0x0c,-(%sp) # save fp2,fp359015902cmp.l %d1,&05903blt.w NODD59045905fmov.x %fp0,%fp15906fmul.x %fp1,%fp1 # S = R*R59075908fmov.d TANQ4(%pc),%fp35909fmov.d TANP3(%pc),%fp259105911fmul.x %fp1,%fp3 # SQ45912fmul.x %fp1,%fp2 # SP359135914fadd.d TANQ3(%pc),%fp3 # Q3+SQ45915fadd.x TANP2(%pc),%fp2 # P2+SP359165917fmul.x %fp1,%fp3 # S(Q3+SQ4)5918fmul.x %fp1,%fp2 # S(P2+SP3)59195920fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)5921fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)59225923fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4))5924fmul.x %fp1,%fp2 # S(P1+S(P2+SP3))59255926fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))5927fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3))59285929fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4)))59305931fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3))59325933fadd.s &0x3F800000,%fp1 # 1+S(Q1+...)59345935fmovm.x (%sp)+,&0x30 # restore fp2,fp359365937fmov.l %d0,%fpcr # restore users round mode,prec5938fdiv.x %fp1,%fp0 # last inst - possible exception set5939bra t_inx259405941NODD:5942fmov.x %fp0,%fp15943fmul.x %fp0,%fp0 # S = R*R59445945fmov.d TANQ4(%pc),%fp35946fmov.d TANP3(%pc),%fp259475948fmul.x %fp0,%fp3 # SQ45949fmul.x %fp0,%fp2 # SP359505951fadd.d TANQ3(%pc),%fp3 # Q3+SQ45952fadd.x TANP2(%pc),%fp2 # P2+SP359535954fmul.x %fp0,%fp3 # S(Q3+SQ4)5955fmul.x %fp0,%fp2 # S(P2+SP3)59565957fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)5958fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)59595960fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4))5961fmul.x %fp0,%fp2 # S(P1+S(P2+SP3))59625963fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))5964fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3))59655966fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4)))59675968fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3))5969fadd.s &0x3F800000,%fp0 # 1+S(Q1+...)59705971fmovm.x (%sp)+,&0x30 # restore fp2,fp359725973fmov.x %fp1,-(%sp)5974eor.l &0x80000000,(%sp)59755976fmov.l %d0,%fpcr # restore users round mode,prec5977fdiv.x (%sp)+,%fp0 # last inst - possible exception set5978bra t_inx259795980TANBORS:5981#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.5982#--IF |X| < 2**(-40), RETURN X OR 1.5983cmp.l %d1,&0x3FFF80005984bgt.b REDUCEX59855986TANSM:5987fmov.x %fp0,-(%sp)5988fmov.l %d0,%fpcr # restore users round mode,prec5989mov.b &FMOV_OP,%d1 # last inst is MOVE5990fmov.x (%sp)+,%fp0 # last inst - posibble exception set5991bra t_catch59925993global stand5994#--TAN(X) = X FOR DENORMALIZED X5995stand:5996bra t_extdnrm59975998#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.5999#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING6000#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.6001REDUCEX:6002fmovm.x &0x3c,-(%sp) # save {fp2-fp5}6003mov.l %d2,-(%sp) # save d26004fmov.s &0x00000000,%fp1 # fp1 = 060056006#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that6007#--there is a danger of unwanted overflow in first LOOP iteration. In this6008#--case, reduce argument by one remainder step to make subsequent reduction6009#--safe.6010cmp.l %d1,&0x7ffeffff # is arg dangerously large?6011bne.b LOOP # no60126013# yes; create 2**16383*PI/26014mov.w &0x7ffe,FP_SCR0_EX(%a6)6015mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)6016clr.l FP_SCR0_LO(%a6)60176018# create low half of 2**16383*PI/2 at FP_SCR16019mov.w &0x7fdc,FP_SCR1_EX(%a6)6020mov.l &0x85a308d3,FP_SCR1_HI(%a6)6021clr.l FP_SCR1_LO(%a6)60226023ftest.x %fp0 # test sign of argument6024fblt.w red_neg60256026or.b &0x80,FP_SCR0_EX(%a6) # positive arg6027or.b &0x80,FP_SCR1_EX(%a6)6028red_neg:6029fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact6030fmov.x %fp0,%fp1 # save high result in fp16031fadd.x FP_SCR1(%a6),%fp0 # low part of reduction6032fsub.x %fp0,%fp1 # determine low component of result6033fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.60346035#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.6036#--integer quotient will be stored in N6037#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)6038LOOP:6039fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 26040mov.w INARG(%a6),%d16041mov.l %d1,%a1 # save a copy of D06042and.l &0x00007FFF,%d16043sub.l &0x00003FFF,%d1 # d0 = K6044cmp.l %d1,&286045ble.b LASTLOOP6046CONTLOOP:6047sub.l &27,%d1 # d0 = L := K-276048mov.b &0,ENDFLAG(%a6)6049bra.b WORK6050LASTLOOP:6051clr.l %d1 # d0 = L := 06052mov.b &1,ENDFLAG(%a6)60536054WORK:6055#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN6056#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.60576058#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),6059#--2**L * (PIby2_1), 2**L * (PIby2_2)60606061mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI6062sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)60636064mov.l &0xA2F9836E,FP_SCR0_HI(%a6)6065mov.l &0x4E44152A,FP_SCR0_LO(%a6)6066mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)60676068fmov.x %fp0,%fp26069fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)60706071#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN6072#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N6073#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT6074#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE6075#--US THE DESIRED VALUE IN FLOATING POINT.6076mov.l %a1,%d26077swap %d26078and.l &0x80000000,%d26079or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL6080mov.l %d2,TWOTO63(%a6)6081fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED6082fsub.s TWOTO63(%a6),%fp2 # fp2 = N6083# fintrz.x %fp2,%fp260846085#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_26086mov.l %d1,%d2 # d2 = L60876088add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)6089mov.w %d2,FP_SCR0_EX(%a6)6090mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)6091clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_160926093add.l &0x00003FDD,%d16094mov.w %d1,FP_SCR1_EX(%a6)6095mov.l &0x85A308D3,FP_SCR1_HI(%a6)6096clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_260976098mov.b ENDFLAG(%a6),%d160996100#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and6101#--P2 = 2**(L) * Piby2_26102fmov.x %fp2,%fp4 # fp4 = N6103fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P16104fmov.x %fp2,%fp5 # fp5 = N6105fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P26106fmov.x %fp4,%fp3 # fp3 = W = N*P161076108#--we want P+p = W+w but |p| <= half ulp of P6109#--Then, we need to compute A := R-P and a := r-p6110fadd.x %fp5,%fp3 # fp3 = P6111fsub.x %fp3,%fp4 # fp4 = W-P61126113fsub.x %fp3,%fp0 # fp0 = A := R - P6114fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w61156116fmov.x %fp0,%fp3 # fp3 = A6117fsub.x %fp4,%fp1 # fp1 = a := r - p61186119#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but6120#--|r| <= half ulp of R.6121fadd.x %fp1,%fp0 # fp0 = R := A+a6122#--No need to calculate r if this is the last loop6123cmp.b %d1,&06124bgt.w RESTORE61256126#--Need to calculate r6127fsub.x %fp0,%fp3 # fp3 = A-R6128fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a6129bra.w LOOP61306131RESTORE:6132fmov.l %fp2,INT(%a6)6133mov.l (%sp)+,%d2 # restore d26134fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}61356136mov.l INT(%a6),%d16137ror.l &1,%d161386139bra.w TANCONT61406141#########################################################################6142# satan(): computes the arctangent of a normalized number #6143# satand(): computes the arctangent of a denormalized number #6144# #6145# INPUT *************************************************************** #6146# a0 = pointer to extended precision input #6147# d0 = round precision,mode #6148# #6149# OUTPUT ************************************************************** #6150# fp0 = arctan(X) #6151# #6152# ACCURACY and MONOTONICITY ******************************************* #6153# The returned result is within 2 ulps in 64 significant bit, #6154# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #6155# rounded to double precision. The result is provably monotonic #6156# in double precision. #6157# #6158# ALGORITHM *********************************************************** #6159# Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. #6160# #6161# Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. #6162# Note that k = -4, -3,..., or 3. #6163# Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 #6164# significant bits of X with a bit-1 attached at the 6-th #6165# bit position. Define u to be u = (X-F) / (1 + X*F). #6166# #6167# Step 3. Approximate arctan(u) by a polynomial poly. #6168# #6169# Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #6170# table of values calculated beforehand. Exit. #6171# #6172# Step 5. If |X| >= 16, go to Step 7. #6173# #6174# Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #6175# #6176# Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #6177# polynomial in X'. #6178# Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #6179# #6180#########################################################################61816182ATANA3: long 0xBFF6687E,0x314987D86183ATANA2: long 0x4002AC69,0x34A26DB36184ATANA1: long 0xBFC2476F,0x4E1DA28E61856186ATANB6: long 0x3FB34444,0x7F8769896187ATANB5: long 0xBFB744EE,0x7FAF45DB6188ATANB4: long 0x3FBC71C6,0x469402206189ATANB3: long 0xBFC24924,0x921872F96190ATANB2: long 0x3FC99999,0x99998FA96191ATANB1: long 0xBFD55555,0x5555555561926193ATANC5: long 0xBFB70BF3,0x98539E6A6194ATANC4: long 0x3FBC7187,0x962D1D7D6195ATANC3: long 0xBFC24924,0x827107B86196ATANC2: long 0x3FC99999,0x9996263E6197ATANC1: long 0xBFD55555,0x5555553661986199PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x000000006200NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x0000000062016202PTINY: long 0x00010000,0x80000000,0x00000000,0x000000006203NTINY: long 0x80010000,0x80000000,0x00000000,0x0000000062046205ATANTBL:6206long 0x3FFB0000,0x83D152C5,0x060B7A51,0x000000006207long 0x3FFB0000,0x8BC85445,0x65498B8B,0x000000006208long 0x3FFB0000,0x93BE4060,0x17626B0D,0x000000006209long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x000000006210long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x000000006211long 0x3FFB0000,0xAB98E943,0x62765619,0x000000006212long 0x3FFB0000,0xB389E502,0xF9C59862,0x000000006213long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x000000006214long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x000000006215long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x000000006216long 0x3FFB0000,0xD33F62F8,0x2488533E,0x000000006217long 0x3FFB0000,0xDB28DA81,0x62404C77,0x000000006218long 0x3FFB0000,0xE310A407,0x8AD34F18,0x000000006219long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x000000006220long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x000000006221long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x000000006222long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x000000006223long 0x3FFC0000,0x8B232A08,0x304282D8,0x000000006224long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x000000006225long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x000000006226long 0x3FFC0000,0xA29E7630,0x4954F23F,0x000000006227long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x000000006228long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x000000006229long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x000000006230long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x000000006231long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x000000006232long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x000000006233long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x000000006234long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x000000006235long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x000000006236long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x000000006237long 0x3FFC0000,0xF7170A28,0xECC06666,0x000000006238long 0x3FFD0000,0x812FD288,0x332DAD32,0x000000006239long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x000000006240long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x000000006241long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x000000006242long 0x3FFD0000,0x9EB68949,0x3889A227,0x000000006243long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x000000006244long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x000000006245long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x000000006246long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x000000006247long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x000000006248long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x000000006249long 0x3FFD0000,0xCFC98330,0xB4000C70,0x000000006250long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x000000006251long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x000000006252long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x000000006253long 0x3FFD0000,0xEA2D764F,0x64315989,0x000000006254long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x000000006255long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x000000006256long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x000000006257long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x000000006258long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x000000006259long 0x3FFE0000,0x97731420,0x365E538C,0x000000006260long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x000000006261long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x000000006262long 0x3FFE0000,0xA746F2DD,0xB7602294,0x000000006263long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x000000006264long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x000000006265long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x000000006266long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x000000006267long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x000000006268long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x000000006269long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x000000006270long 0x3FFE0000,0xCD000549,0xADEC7159,0x000000006271long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x000000006272long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x000000006273long 0x3FFE0000,0xE23855F9,0x69E8096A,0x000000006274long 0x3FFE0000,0xE8771129,0xC4353259,0x000000006275long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x000000006276long 0x3FFE0000,0xF3E10211,0xA87C3779,0x000000006277long 0x3FFE0000,0xF919039D,0x758B8D41,0x000000006278long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x000000006279long 0x3FFF0000,0x8155FB49,0x7B685D04,0x000000006280long 0x3FFF0000,0x83889E35,0x49D108E1,0x000000006281long 0x3FFF0000,0x859CFA76,0x511D724B,0x000000006282long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x000000006283long 0x3FFF0000,0x89732FD1,0x9557641B,0x000000006284long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x000000006285long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x000000006286long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x000000006287long 0x3FFF0000,0x922DA7D7,0x91888487,0x000000006288long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x000000006289long 0x3FFF0000,0x973AB944,0x19D2A08B,0x000000006290long 0x3FFF0000,0x996FF00E,0x08E10B96,0x000000006291long 0x3FFF0000,0x9B773F95,0x12321DA7,0x000000006292long 0x3FFF0000,0x9D55CC32,0x0F935624,0x000000006293long 0x3FFF0000,0x9F100575,0x006CC571,0x000000006294long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x000000006295long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x000000006296long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x000000006297long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x000000006298long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x000000006299long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x000000006300long 0x3FFF0000,0xA83A5153,0x0956168F,0x000000006301long 0x3FFF0000,0xA93A2007,0x7539546E,0x000000006302long 0x3FFF0000,0xAA9E7245,0x023B2605,0x000000006303long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x000000006304long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x000000006305long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x000000006306long 0x3FFF0000,0xB0656F81,0xF22265C7,0x000000006307long 0x3FFF0000,0xB1846515,0x0F71496A,0x000000006308long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x000000006309long 0x3FFF0000,0xB37B44FF,0x3766B895,0x000000006310long 0x3FFF0000,0xB458C3DC,0xE9630433,0x000000006311long 0x3FFF0000,0xB525529D,0x562246BD,0x000000006312long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x000000006313long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x000000006314long 0x3FFF0000,0xB736AEA7,0xA6925838,0x000000006315long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x000000006316long 0x3FFF0000,0xB85ECC66,0xCB219835,0x000000006317long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x000000006318long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x000000006319long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x000000006320long 0x3FFF0000,0xBB471285,0x7637E17D,0x000000006321long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x000000006322long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x000000006323long 0x3FFF0000,0xBD306A39,0x471ECD86,0x000000006324long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x000000006325long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x000000006326long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x000000006327long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x000000006328long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x000000006329long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x000000006330long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x000000006331long 0x3FFF0000,0xC065B066,0xCFBF6439,0x000000006332long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x000000006333long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x0000000063346335set X,FP_SCR06336set XDCARE,X+26337set XFRAC,X+46338set XFRACLO,X+863396340set ATANF,FP_SCR16341set ATANFHI,ATANF+46342set ATANFLO,ATANF+863436344global satan6345#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S6346satan:6347fmov.x (%a0),%fp0 # LOAD INPUT63486349mov.l (%a0),%d16350mov.w 4(%a0),%d16351fmov.x %fp0,X(%a6)6352and.l &0x7FFFFFFF,%d163536354cmp.l %d1,&0x3FFB8000 # |X| >= 1/16?6355bge.b ATANOK16356bra.w ATANSM63576358ATANOK1:6359cmp.l %d1,&0x4002FFFF # |X| < 16 ?6360ble.b ATANMAIN6361bra.w ATANBIG63626363#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE6364#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).6365#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN6366#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE6367#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS6368#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR6369#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO6370#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE6371#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL6372#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE6373#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION6374#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION6375#--WILL INVOLVE A VERY LONG POLYNOMIAL.63766377#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS6378#--WE CHOSE F TO BE +-2^K * 1.BBBB16379#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE6380#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE6381#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS6382#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).63836384ATANMAIN:63856386and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS6387or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 16388mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F63896390fmov.x %fp0,%fp1 # FP1 IS X6391fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 06392fsub.x X(%a6),%fp0 # FP0 IS X-F6393fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F6394fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F)63956396#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)6397#--CREATE ATAN(F) AND STORE IT IN ATANF, AND6398#--SAVE REGISTERS FP2.63996400mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY6401mov.l %d1,%d2 # THE EXP AND 16 BITS OF X6402and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION6403and.l &0x7FFF0000,%d2 # EXPONENT OF F6404sub.l &0x3FFB0000,%d2 # K+46405asr.l &1,%d26406add.l %d2,%d1 # THE 7 BITS IDENTIFYING F6407asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|)6408lea ATANTBL(%pc),%a16409add.l %d1,%a1 # ADDRESS OF ATAN(|F|)6410mov.l (%a1)+,ATANF(%a6)6411mov.l (%a1)+,ATANFHI(%a6)6412mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|)6413mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN6414and.l &0x80000000,%d1 # SIGN(F)6415or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|)6416mov.l (%sp)+,%d2 # RESTORE d264176418#--THAT'S ALL I HAVE TO DO FOR NOW,6419#--BUT ALAS, THE DIVIDE IS STILL CRANKING!64206421#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS6422#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U6423#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.6424#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))6425#--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.6426#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT6427#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED64286429fmovm.x &0x04,-(%sp) # save fp264306431fmov.x %fp0,%fp16432fmul.x %fp1,%fp16433fmov.d ATANA3(%pc),%fp26434fadd.x %fp1,%fp2 # A3+V6435fmul.x %fp1,%fp2 # V*(A3+V)6436fmul.x %fp0,%fp1 # U*V6437fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V)6438fmul.d ATANA1(%pc),%fp1 # A1*U*V6439fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V))6440fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED64416442fmovm.x (%sp)+,&0x20 # restore fp264436444fmov.l %d0,%fpcr # restore users rnd mode,prec6445fadd.x ATANF(%a6),%fp0 # ATAN(X)6446bra t_inx264476448ATANBORS:6449#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.6450#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.6451cmp.l %d1,&0x3FFF80006452bgt.w ATANBIG # I.E. |X| >= 1664536454ATANSM:6455#--|X| <= 1/166456#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE6457#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))6458#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )6459#--WHERE Y = X*X, AND Z = Y*Y.64606461cmp.l %d1,&0x3FD780006462blt.w ATANTINY64636464#--COMPUTE POLYNOMIAL6465fmovm.x &0x0c,-(%sp) # save fp2/fp364666467fmul.x %fp0,%fp0 # FPO IS Y = X*X64686469fmov.x %fp0,%fp16470fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y64716472fmov.d ATANB6(%pc),%fp26473fmov.d ATANB5(%pc),%fp364746475fmul.x %fp1,%fp2 # Z*B66476fmul.x %fp1,%fp3 # Z*B564776478fadd.d ATANB4(%pc),%fp2 # B4+Z*B66479fadd.d ATANB3(%pc),%fp3 # B3+Z*B564806481fmul.x %fp1,%fp2 # Z*(B4+Z*B6)6482fmul.x %fp3,%fp1 # Z*(B3+Z*B5)64836484fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6)6485fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5)64866487fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6))6488fmul.x X(%a6),%fp0 # X*Y64896490fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]64916492fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])64936494fmovm.x (%sp)+,&0x30 # restore fp2/fp364956496fmov.l %d0,%fpcr # restore users rnd mode,prec6497fadd.x X(%a6),%fp06498bra t_inx264996500ATANTINY:6501#--|X| < 2^(-40), ATAN(X) = X65026503fmov.l %d0,%fpcr # restore users rnd mode,prec6504mov.b &FMOV_OP,%d1 # last inst is MOVE6505fmov.x X(%a6),%fp0 # last inst - possible exception set65066507bra t_catch65086509ATANBIG:6510#--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,6511#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).6512cmp.l %d1,&0x406380006513bgt.w ATANHUGE65146515#--APPROXIMATE ATAN(-1/X) BY6516#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'6517#--THIS CAN BE RE-WRITTEN AS6518#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.65196520fmovm.x &0x0c,-(%sp) # save fp2/fp365216522fmov.s &0xBF800000,%fp1 # LOAD -16523fdiv.x %fp0,%fp1 # FP1 IS -1/X65246525#--DIVIDE IS STILL CRANKING65266527fmov.x %fp1,%fp0 # FP0 IS X'6528fmul.x %fp0,%fp0 # FP0 IS Y = X'*X'6529fmov.x %fp1,X(%a6) # X IS REALLY X'65306531fmov.x %fp0,%fp16532fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y65336534fmov.d ATANC5(%pc),%fp36535fmov.d ATANC4(%pc),%fp265366537fmul.x %fp1,%fp3 # Z*C56538fmul.x %fp1,%fp2 # Z*B465396540fadd.d ATANC3(%pc),%fp3 # C3+Z*C56541fadd.d ATANC2(%pc),%fp2 # C2+Z*C465426543fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED6544fmul.x %fp0,%fp2 # Y*(C2+Z*C4)65456546fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5)6547fmul.x X(%a6),%fp0 # X'*Y65486549fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]65506551fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)]6552# ... +[Y*(B2+Z*(B4+Z*B6))])6553fadd.x X(%a6),%fp065546555fmovm.x (%sp)+,&0x30 # restore fp2/fp365566557fmov.l %d0,%fpcr # restore users rnd mode,prec6558tst.b (%a0)6559bpl.b pos_big65606561neg_big:6562fadd.x NPIBY2(%pc),%fp06563bra t_minx265646565pos_big:6566fadd.x PPIBY2(%pc),%fp06567bra t_pinx265686569ATANHUGE:6570#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY6571tst.b (%a0)6572bpl.b pos_huge65736574neg_huge:6575fmov.x NPIBY2(%pc),%fp06576fmov.l %d0,%fpcr6577fadd.x PTINY(%pc),%fp06578bra t_minx265796580pos_huge:6581fmov.x PPIBY2(%pc),%fp06582fmov.l %d0,%fpcr6583fadd.x NTINY(%pc),%fp06584bra t_pinx265856586global satand6587#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT6588satand:6589bra t_extdnrm65906591#########################################################################6592# sasin(): computes the inverse sine of a normalized input #6593# sasind(): computes the inverse sine of a denormalized input #6594# #6595# INPUT *************************************************************** #6596# a0 = pointer to extended precision input #6597# d0 = round precision,mode #6598# #6599# OUTPUT ************************************************************** #6600# fp0 = arcsin(X) #6601# #6602# ACCURACY and MONOTONICITY ******************************************* #6603# The returned result is within 3 ulps in 64 significant bit, #6604# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #6605# rounded to double precision. The result is provably monotonic #6606# in double precision. #6607# #6608# ALGORITHM *********************************************************** #6609# #6610# ASIN #6611# 1. If |X| >= 1, go to 3. #6612# #6613# 2. (|X| < 1) Calculate asin(X) by #6614# z := sqrt( [1-X][1+X] ) #6615# asin(X) = atan( x / z ). #6616# Exit. #6617# #6618# 3. If |X| > 1, go to 5. #6619# #6620# 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#6621# #6622# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #6623# Exit. #6624# #6625#########################################################################66266627global sasin6628sasin:6629fmov.x (%a0),%fp0 # LOAD INPUT66306631mov.l (%a0),%d16632mov.w 4(%a0),%d16633and.l &0x7FFFFFFF,%d16634cmp.l %d1,&0x3FFF80006635bge.b ASINBIG66366637# This catch is added here for the '060 QSP. Originally, the call to6638# satan() would handle this case by causing the exception which would6639# not be caught until gen_except(). Now, with the exceptions being6640# detected inside of satan(), the exception would have been handled there6641# instead of inside sasin() as expected.6642cmp.l %d1,&0x3FD780006643blt.w ASINTINY66446645#--THIS IS THE USUAL CASE, |X| < 16646#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )66476648ASINMAIN:6649fmov.s &0x3F800000,%fp16650fsub.x %fp0,%fp1 # 1-X6651fmovm.x &0x4,-(%sp) # {fp2}6652fmov.s &0x3F800000,%fp26653fadd.x %fp0,%fp2 # 1+X6654fmul.x %fp2,%fp1 # (1+X)(1-X)6655fmovm.x (%sp)+,&0x20 # {fp2}6656fsqrt.x %fp1 # SQRT([1-X][1+X])6657fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X])6658fmovm.x &0x01,-(%sp) # save X/SQRT(...)6659lea (%sp),%a0 # pass ptr to X/SQRT(...)6660bsr satan6661add.l &0xc,%sp # clear X/SQRT(...) from stack6662bra t_inx266636664ASINBIG:6665fabs.x %fp0 # |X|6666fcmp.s %fp0,&0x3F8000006667fbgt t_operr # cause an operr exception66686669#--|X| = 1, ASIN(X) = +- PI/2.6670ASINONE:6671fmov.x PIBY2(%pc),%fp06672mov.l (%a0),%d16673and.l &0x80000000,%d1 # SIGN BIT OF X6674or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT6675mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT6676fmov.l %d0,%fpcr6677fmul.s (%sp)+,%fp06678bra t_inx266796680#--|X| < 2^(-40), ATAN(X) = X6681ASINTINY:6682fmov.l %d0,%fpcr # restore users rnd mode,prec6683mov.b &FMOV_OP,%d1 # last inst is MOVE6684fmov.x (%a0),%fp0 # last inst - possible exception6685bra t_catch66866687global sasind6688#--ASIN(X) = X FOR DENORMALIZED X6689sasind:6690bra t_extdnrm66916692#########################################################################6693# sacos(): computes the inverse cosine of a normalized input #6694# sacosd(): computes the inverse cosine of a denormalized input #6695# #6696# INPUT *************************************************************** #6697# a0 = pointer to extended precision input #6698# d0 = round precision,mode #6699# #6700# OUTPUT ************************************************************** #6701# fp0 = arccos(X) #6702# #6703# ACCURACY and MONOTONICITY ******************************************* #6704# The returned result is within 3 ulps in 64 significant bit, #6705# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #6706# rounded to double precision. The result is provably monotonic #6707# in double precision. #6708# #6709# ALGORITHM *********************************************************** #6710# #6711# ACOS #6712# 1. If |X| >= 1, go to 3. #6713# #6714# 2. (|X| < 1) Calculate acos(X) by #6715# z := (1-X) / (1+X) #6716# acos(X) = 2 * atan( sqrt(z) ). #6717# Exit. #6718# #6719# 3. If |X| > 1, go to 5. #6720# #6721# 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #6722# #6723# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #6724# Exit. #6725# #6726#########################################################################67276728global sacos6729sacos:6730fmov.x (%a0),%fp0 # LOAD INPUT67316732mov.l (%a0),%d1 # pack exp w/ upper 16 fraction6733mov.w 4(%a0),%d16734and.l &0x7FFFFFFF,%d16735cmp.l %d1,&0x3FFF80006736bge.b ACOSBIG67376738#--THIS IS THE USUAL CASE, |X| < 16739#--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )67406741ACOSMAIN:6742fmov.s &0x3F800000,%fp16743fadd.x %fp0,%fp1 # 1+X6744fneg.x %fp0 # -X6745fadd.s &0x3F800000,%fp0 # 1-X6746fdiv.x %fp1,%fp0 # (1-X)/(1+X)6747fsqrt.x %fp0 # SQRT((1-X)/(1+X))6748mov.l %d0,-(%sp) # save original users fpcr6749clr.l %d06750fmovm.x &0x01,-(%sp) # save SQRT(...) to stack6751lea (%sp),%a0 # pass ptr to sqrt6752bsr satan # ATAN(SQRT([1-X]/[1+X]))6753add.l &0xc,%sp # clear SQRT(...) from stack67546755fmov.l (%sp)+,%fpcr # restore users round prec,mode6756fadd.x %fp0,%fp0 # 2 * ATAN( STUFF )6757bra t_pinx267586759ACOSBIG:6760fabs.x %fp06761fcmp.s %fp0,&0x3F8000006762fbgt t_operr # cause an operr exception67636764#--|X| = 1, ACOS(X) = 0 OR PI6765tst.b (%a0) # is X positive or negative?6766bpl.b ACOSP167676768#--X = -16769#Returns PI and inexact exception6770ACOSM1:6771fmov.x PI(%pc),%fp0 # load PI6772fmov.l %d0,%fpcr # load round mode,prec6773fadd.s &0x00800000,%fp0 # add a small value6774bra t_pinx267756776ACOSP1:6777bra ld_pzero # answer is positive zero67786779global sacosd6780#--ACOS(X) = PI/2 FOR DENORMALIZED X6781sacosd:6782fmov.l %d0,%fpcr # load user's rnd mode/prec6783fmov.x PIBY2(%pc),%fp06784bra t_pinx267856786#########################################################################6787# setox(): computes the exponential for a normalized input #6788# setoxd(): computes the exponential for a denormalized input #6789# setoxm1(): computes the exponential minus 1 for a normalized input #6790# setoxm1d(): computes the exponential minus 1 for a denormalized input #6791# #6792# INPUT *************************************************************** #6793# a0 = pointer to extended precision input #6794# d0 = round precision,mode #6795# #6796# OUTPUT ************************************************************** #6797# fp0 = exp(X) or exp(X)-1 #6798# #6799# ACCURACY and MONOTONICITY ******************************************* #6800# The returned result is within 0.85 ulps in 64 significant bit, #6801# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #6802# rounded to double precision. The result is provably monotonic #6803# in double precision. #6804# #6805# ALGORITHM and IMPLEMENTATION **************************************** #6806# #6807# setoxd #6808# ------ #6809# Step 1. Set ans := 1.0 #6810# #6811# Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #6812# Notes: This will always generate one exception -- inexact. #6813# #6814# #6815# setox #6816# ----- #6817# #6818# Step 1. Filter out extreme cases of input argument. #6819# 1.1 If |X| >= 2^(-65), go to Step 1.3. #6820# 1.2 Go to Step 7. #6821# 1.3 If |X| < 16380 log(2), go to Step 2. #6822# 1.4 Go to Step 8. #6823# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#6824# To avoid the use of floating-point comparisons, a #6825# compact representation of |X| is used. This format is a #6826# 32-bit integer, the upper (more significant) 16 bits #6827# are the sign and biased exponent field of |X|; the #6828# lower 16 bits are the 16 most significant fraction #6829# (including the explicit bit) bits of |X|. Consequently, #6830# the comparisons in Steps 1.1 and 1.3 can be performed #6831# by integer comparison. Note also that the constant #6832# 16380 log(2) used in Step 1.3 is also in the compact #6833# form. Thus taking the branch to Step 2 guarantees #6834# |X| < 16380 log(2). There is no harm to have a small #6835# number of cases where |X| is less than, but close to, #6836# 16380 log(2) and the branch to Step 9 is taken. #6837# #6838# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #6839# 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #6840# was taken) #6841# 2.2 N := round-to-nearest-integer( X * 64/log2 ). #6842# 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #6843# or 63. #6844# 2.4 Calculate M = (N - J)/64; so N = 64M + J. #6845# 2.5 Calculate the address of the stored value of #6846# 2^(J/64). #6847# 2.6 Create the value Scale = 2^M. #6848# Notes: The calculation in 2.2 is really performed by #6849# Z := X * constant #6850# N := round-to-nearest-integer(Z) #6851# where #6852# constant := single-precision( 64/log 2 ). #6853# #6854# Using a single-precision constant avoids memory #6855# access. Another effect of using a single-precision #6856# "constant" is that the calculated value Z is #6857# #6858# Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). #6859# #6860# This error has to be considered later in Steps 3 and 4. #6861# #6862# Step 3. Calculate X - N*log2/64. #6863# 3.1 R := X + N*L1, #6864# where L1 := single-precision(-log2/64). #6865# 3.2 R := R + N*L2, #6866# L2 := extended-precision(-log2/64 - L1).#6867# Notes: a) The way L1 and L2 are chosen ensures L1+L2 #6868# approximate the value -log2/64 to 88 bits of accuracy. #6869# b) N*L1 is exact because N is no longer than 22 bits #6870# and L1 is no longer than 24 bits. #6871# c) The calculation X+N*L1 is also exact due to #6872# cancellation. Thus, R is practically X+N(L1+L2) to full #6873# 64 bits. #6874# d) It is important to estimate how large can |R| be #6875# after Step 3.2. #6876# #6877# N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) #6878# X*64/log2 (1+eps) = N + f, |f| <= 0.5 #6879# X*64/log2 - N = f - eps*X 64/log2 #6880# X - N*log2/64 = f*log2/64 - eps*X #6881# #6882# #6883# Now |X| <= 16446 log2, thus #6884# #6885# |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #6886# <= 0.57 log2/64. #6887# This bound will be used in Step 4. #6888# #6889# Step 4. Approximate exp(R)-1 by a polynomial #6890# p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) #6891# Notes: a) In order to reduce memory access, the coefficients #6892# are made as "short" as possible: A1 (which is 1/2), A4 #6893# and A5 are single precision; A2 and A3 are double #6894# precision. #6895# b) Even with the restrictions above, #6896# |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. #6897# Note that 0.0062 is slightly bigger than 0.57 log2/64. #6898# c) To fully utilize the pipeline, p is separated into #6899# two independent pieces of roughly equal complexities #6900# p = [ R + R*S*(A2 + S*A4) ] + #6901# [ S*(A1 + S*(A3 + S*A5)) ] #6902# where S = R*R. #6903# #6904# Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by #6905# ans := T + ( T*p + t) #6906# where T and t are the stored values for 2^(J/64). #6907# Notes: 2^(J/64) is stored as T and t where T+t approximates #6908# 2^(J/64) to roughly 85 bits; T is in extended precision #6909# and t is in single precision. Note also that T is #6910# rounded to 62 bits so that the last two bits of T are #6911# zero. The reason for such a special form is that T-1, #6912# T-2, and T-8 will all be exact --- a property that will #6913# give much more accurate computation of the function #6914# EXPM1. #6915# #6916# Step 6. Reconstruction of exp(X) #6917# exp(X) = 2^M * 2^(J/64) * exp(R). #6918# 6.1 If AdjFlag = 0, go to 6.3 #6919# 6.2 ans := ans * AdjScale #6920# 6.3 Restore the user FPCR #6921# 6.4 Return ans := ans * Scale. Exit. #6922# Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #6923# |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will #6924# neither overflow nor underflow. If AdjFlag = 1, that #6925# means that #6926# X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #6927# Hence, exp(X) may overflow or underflow or neither. #6928# When that is the case, AdjScale = 2^(M1) where M1 is #6929# approximately M. Thus 6.2 will never cause #6930# over/underflow. Possible exception in 6.4 is overflow #6931# or underflow. The inexact exception is not generated in #6932# 6.4. Although one can argue that the inexact flag #6933# should always be raised, to simulate that exception #6934# cost to much than the flag is worth in practical uses. #6935# #6936# Step 7. Return 1 + X. #6937# 7.1 ans := X #6938# 7.2 Restore user FPCR. #6939# 7.3 Return ans := 1 + ans. Exit #6940# Notes: For non-zero X, the inexact exception will always be #6941# raised by 7.3. That is the only exception raised by 7.3.#6942# Note also that we use the FMOVEM instruction to move X #6943# in Step 7.1 to avoid unnecessary trapping. (Although #6944# the FMOVEM may not seem relevant since X is normalized, #6945# the precaution will be useful in the library version of #6946# this code where the separate entry for denormalized #6947# inputs will be done away with.) #6948# #6949# Step 8. Handle exp(X) where |X| >= 16380log2. #6950# 8.1 If |X| > 16480 log2, go to Step 9. #6951# (mimic 2.2 - 2.6) #6952# 8.2 N := round-to-integer( X * 64/log2 ) #6953# 8.3 Calculate J = N mod 64, J = 0,1,...,63 #6954# 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #6955# AdjFlag := 1. #6956# 8.5 Calculate the address of the stored value #6957# 2^(J/64). #6958# 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #6959# 8.7 Go to Step 3. #6960# Notes: Refer to notes for 2.2 - 2.6. #6961# #6962# Step 9. Handle exp(X), |X| > 16480 log2. #6963# 9.1 If X < 0, go to 9.3 #6964# 9.2 ans := Huge, go to 9.4 #6965# 9.3 ans := Tiny. #6966# 9.4 Restore user FPCR. #6967# 9.5 Return ans := ans * ans. Exit. #6968# Notes: Exp(X) will surely overflow or underflow, depending on #6969# X's sign. "Huge" and "Tiny" are respectively large/tiny #6970# extended-precision numbers whose square over/underflow #6971# with an inexact result. Thus, 9.5 always raises the #6972# inexact together with either overflow or underflow. #6973# #6974# setoxm1d #6975# -------- #6976# #6977# Step 1. Set ans := 0 #6978# #6979# Step 2. Return ans := X + ans. Exit. #6980# Notes: This will return X with the appropriate rounding #6981# precision prescribed by the user FPCR. #6982# #6983# setoxm1 #6984# ------- #6985# #6986# Step 1. Check |X| #6987# 1.1 If |X| >= 1/4, go to Step 1.3. #6988# 1.2 Go to Step 7. #6989# 1.3 If |X| < 70 log(2), go to Step 2. #6990# 1.4 Go to Step 10. #6991# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#6992# However, it is conceivable |X| can be small very often #6993# because EXPM1 is intended to evaluate exp(X)-1 #6994# accurately when |X| is small. For further details on #6995# the comparisons, see the notes on Step 1 of setox. #6996# #6997# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #6998# 2.1 N := round-to-nearest-integer( X * 64/log2 ). #6999# 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #7000# or 63. #7001# 2.3 Calculate M = (N - J)/64; so N = 64M + J. #7002# 2.4 Calculate the address of the stored value of #7003# 2^(J/64). #7004# 2.5 Create the values Sc = 2^M and #7005# OnebySc := -2^(-M). #7006# Notes: See the notes on Step 2 of setox. #7007# #7008# Step 3. Calculate X - N*log2/64. #7009# 3.1 R := X + N*L1, #7010# where L1 := single-precision(-log2/64). #7011# 3.2 R := R + N*L2, #7012# L2 := extended-precision(-log2/64 - L1).#7013# Notes: Applying the analysis of Step 3 of setox in this case #7014# shows that |R| <= 0.0055 (note that |X| <= 70 log2 in #7015# this case). #7016# #7017# Step 4. Approximate exp(R)-1 by a polynomial #7018# p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #7019# Notes: a) In order to reduce memory access, the coefficients #7020# are made as "short" as possible: A1 (which is 1/2), A5 #7021# and A6 are single precision; A2, A3 and A4 are double #7022# precision. #7023# b) Even with the restriction above, #7024# |p - (exp(R)-1)| < |R| * 2^(-72.7) #7025# for all |R| <= 0.0055. #7026# c) To fully utilize the pipeline, p is separated into #7027# two independent pieces of roughly equal complexity #7028# p = [ R*S*(A2 + S*(A4 + S*A6)) ] + #7029# [ R + S*(A1 + S*(A3 + S*A5)) ] #7030# where S = R*R. #7031# #7032# Step 5. Compute 2^(J/64)*p by #7033# p := T*p #7034# where T and t are the stored values for 2^(J/64). #7035# Notes: 2^(J/64) is stored as T and t where T+t approximates #7036# 2^(J/64) to roughly 85 bits; T is in extended precision #7037# and t is in single precision. Note also that T is #7038# rounded to 62 bits so that the last two bits of T are #7039# zero. The reason for such a special form is that T-1, #7040# T-2, and T-8 will all be exact --- a property that will #7041# be exploited in Step 6 below. The total relative error #7042# in p is no bigger than 2^(-67.7) compared to the final #7043# result. #7044# #7045# Step 6. Reconstruction of exp(X)-1 #7046# exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #7047# 6.1 If M <= 63, go to Step 6.3. #7048# 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #7049# 6.3 If M >= -3, go to 6.5. #7050# 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #7051# 6.5 ans := (T + OnebySc) + (p + t). #7052# 6.6 Restore user FPCR. #7053# 6.7 Return ans := Sc * ans. Exit. #7054# Notes: The various arrangements of the expressions give #7055# accurate evaluations. #7056# #7057# Step 7. exp(X)-1 for |X| < 1/4. #7058# 7.1 If |X| >= 2^(-65), go to Step 9. #7059# 7.2 Go to Step 8. #7060# #7061# Step 8. Calculate exp(X)-1, |X| < 2^(-65). #7062# 8.1 If |X| < 2^(-16312), goto 8.3 #7063# 8.2 Restore FPCR; return ans := X - 2^(-16382). #7064# Exit. #7065# 8.3 X := X * 2^(140). #7066# 8.4 Restore FPCR; ans := ans - 2^(-16382). #7067# Return ans := ans*2^(140). Exit #7068# Notes: The idea is to return "X - tiny" under the user #7069# precision and rounding modes. To avoid unnecessary #7070# inefficiency, we stay away from denormalized numbers #7071# the best we can. For |X| >= 2^(-16312), the #7072# straightforward 8.2 generates the inexact exception as #7073# the case warrants. #7074# #7075# Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial #7076# p = X + X*X*(B1 + X*(B2 + ... + X*B12)) #7077# Notes: a) In order to reduce memory access, the coefficients #7078# are made as "short" as possible: B1 (which is 1/2), B9 #7079# to B12 are single precision; B3 to B8 are double #7080# precision; and B2 is double extended. #7081# b) Even with the restriction above, #7082# |p - (exp(X)-1)| < |X| 2^(-70.6) #7083# for all |X| <= 0.251. #7084# Note that 0.251 is slightly bigger than 1/4. #7085# c) To fully preserve accuracy, the polynomial is #7086# computed as #7087# X + ( S*B1 + Q ) where S = X*X and #7088# Q = X*S*(B2 + X*(B3 + ... + X*B12)) #7089# d) To fully utilize the pipeline, Q is separated into #7090# two independent pieces of roughly equal complexity #7091# Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + #7092# [ S*S*(B3 + S*(B5 + ... + S*B11)) ] #7093# #7094# Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. #7095# 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #7096# practical purposes. Therefore, go to Step 1 of setox. #7097# 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #7098# purposes. #7099# ans := -1 #7100# Restore user FPCR #7101# Return ans := ans + 2^(-126). Exit. #7102# Notes: 10.2 will always create an inexact and return -1 + tiny #7103# in the user rounding precision and mode. #7104# #7105#########################################################################71067107L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x0000000071087109EEXPA3: long 0x3FA55555,0x55554CC17110EEXPA2: long 0x3FC55555,0x55554A5471117112EM1A4: long 0x3F811111,0x111743857113EM1A3: long 0x3FA55555,0x55554F5A71147115EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x0000000071167117EM1B8: long 0x3EC71DE3,0xA57746827118EM1B7: long 0x3EFA01A0,0x19D7CB6871197120EM1B6: long 0x3F2A01A0,0x1A019DF37121EM1B5: long 0x3F56C16C,0x16C170E271227123EM1B4: long 0x3F811111,0x111111117124EM1B3: long 0x3FA55555,0x5555555571257126EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB7127long 0x0000000071287129TWO140: long 0x48B00000,0x000000007130TWON140:7131long 0x37300000,0x0000000071327133EEXPTBL:7134long 0x3FFF0000,0x80000000,0x00000000,0x000000007135long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B7136long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B97137long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA07283697138long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C7139long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F7140long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA207297141long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF7142long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF7143long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA7144long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB700517145long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB0297146long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA07814947147long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B07148long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D7149long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D5377150long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD7151long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE430877152long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A8187153long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D7154long 0x3FFF0000,0x9EF53260,0x91A111AC,0x205048907155long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C7156long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A057157long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA07971267158long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A1407159long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA7160long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A7161long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC7162long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC7163long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF26107164long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F907165long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A7166long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB137167long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B307168long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC7169long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE67170long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF707171long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF5187172long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD417173long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B7174long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F15687175long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E7176long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F037177long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D7178long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E47179long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C7180long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB97181long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE217182long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F7183long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F7184long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC2077185long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE1757186long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B7187long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF57188long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A7189long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F227190long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E9457191long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B7192long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E37193long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C057194long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A197195long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D57196long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED227197long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A71987199set ADJFLAG,L_SCR27200set SCALE,FP_SCR07201set ADJSCALE,FP_SCR17202set SC,FP_SCR07203set ONEBYSC,FP_SCR172047205global setox7206setox:7207#--entry point for EXP(X), here X is finite, non-zero, and not NaN's72087209#--Step 1.7210mov.l (%a0),%d1 # load part of input X7211and.l &0x7FFF0000,%d1 # biased expo. of X7212cmp.l %d1,&0x3FBE0000 # 2^(-65)7213bge.b EXPC1 # normal case7214bra EXPSM72157216EXPC1:7217#--The case |X| >= 2^(-65)7218mov.w 4(%a0),%d1 # expo. and partial sig. of |X|7219cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits7220blt.b EXPMAIN # normal case7221bra EEXPBIG72227223EXPMAIN:7224#--Step 2.7225#--This is the normal branch: 2^(-65) <= |X| < 16380 log2.7226fmov.x (%a0),%fp0 # load input from (a0)72277228fmov.x %fp0,%fp17229fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X7230fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}7231mov.l &0,ADJFLAG(%a6)7232fmov.l %fp0,%d1 # N = int( X * 64/log2 )7233lea EEXPTBL(%pc),%a17234fmov.l %d1,%fp0 # convert to floating-format72357236mov.l %d1,L_SCR1(%a6) # save N temporarily7237and.l &0x3F,%d1 # D0 is J = N mod 647238lsl.l &4,%d17239add.l %d1,%a1 # address of 2^(J/64)7240mov.l L_SCR1(%a6),%d17241asr.l &6,%d1 # D0 is M7242add.w &0x3FFF,%d1 # biased expo. of 2^(M)7243mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB72447245EXPCONT1:7246#--Step 3.7247#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,7248#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)7249fmov.x %fp0,%fp27250fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)7251fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/647252fadd.x %fp1,%fp0 # X + N*L17253fadd.x %fp2,%fp0 # fp0 is R, reduced arg.72547255#--Step 4.7256#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL7257#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))7258#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R7259#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]72607261fmov.x %fp0,%fp17262fmul.x %fp1,%fp1 # fp1 IS S = R*R72637264fmov.s &0x3AB60B70,%fp2 # fp2 IS A572657266fmul.x %fp1,%fp2 # fp2 IS S*A57267fmov.x %fp1,%fp37268fmul.s &0x3C088895,%fp3 # fp3 IS S*A472697270fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A57271fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A472727273fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5)7274mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended7275mov.l &0x80000000,SCALE+4(%a6)7276clr.l SCALE+8(%a6)72777278fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4)72797280fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5)7281fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4)72827283fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5))7284fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4),72857286fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64)7287fadd.x %fp2,%fp0 # fp0 is EXP(R) - 172887289#--Step 57290#--final reconstruction process7291#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )72927293fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1)7294fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}7295fadd.s (%a1),%fp0 # accurate 2^(J/64)72967297fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*...7298mov.l ADJFLAG(%a6),%d172997300#--Step 67301tst.l %d17302beq.b NORMAL7303ADJUST:7304fmul.x ADJSCALE(%a6),%fp07305NORMAL:7306fmov.l %d0,%fpcr # restore user FPCR7307mov.b &FMUL_OP,%d1 # last inst is MUL7308fmul.x SCALE(%a6),%fp0 # multiply 2^(M)7309bra t_catch73107311EXPSM:7312#--Step 77313fmovm.x (%a0),&0x80 # load X7314fmov.l %d0,%fpcr7315fadd.s &0x3F800000,%fp0 # 1+X in user mode7316bra t_pinx273177318EEXPBIG:7319#--Step 87320cmp.l %d1,&0x400CB27C # 16480 log27321bgt.b EXP2BIG7322#--Steps 8.2 -- 8.67323fmov.x (%a0),%fp0 # load input from (a0)73247325fmov.x %fp0,%fp17326fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X7327fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}7328mov.l &1,ADJFLAG(%a6)7329fmov.l %fp0,%d1 # N = int( X * 64/log2 )7330lea EEXPTBL(%pc),%a17331fmov.l %d1,%fp0 # convert to floating-format7332mov.l %d1,L_SCR1(%a6) # save N temporarily7333and.l &0x3F,%d1 # D0 is J = N mod 647334lsl.l &4,%d17335add.l %d1,%a1 # address of 2^(J/64)7336mov.l L_SCR1(%a6),%d17337asr.l &6,%d1 # D0 is K7338mov.l %d1,L_SCR1(%a6) # save K temporarily7339asr.l &1,%d1 # D0 is M17340sub.l %d1,L_SCR1(%a6) # a1 is M7341add.w &0x3FFF,%d1 # biased expo. of 2^(M1)7342mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1)7343mov.l &0x80000000,ADJSCALE+4(%a6)7344clr.l ADJSCALE+8(%a6)7345mov.l L_SCR1(%a6),%d1 # D0 is M7346add.w &0x3FFF,%d1 # biased expo. of 2^(M)7347bra.w EXPCONT1 # go back to Step 373487349EXP2BIG:7350#--Step 97351tst.b (%a0) # is X positive or negative?7352bmi t_unfl27353bra t_ovfl273547355global setoxd7356setoxd:7357#--entry point for EXP(X), X is denormalized7358mov.l (%a0),-(%sp)7359andi.l &0x80000000,(%sp)7360ori.l &0x00800000,(%sp) # sign(X)*2^(-126)73617362fmov.s &0x3F800000,%fp073637364fmov.l %d0,%fpcr7365fadd.s (%sp)+,%fp07366bra t_pinx273677368global setoxm17369setoxm1:7370#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN73717372#--Step 1.7373#--Step 1.17374mov.l (%a0),%d1 # load part of input X7375and.l &0x7FFF0000,%d1 # biased expo. of X7376cmp.l %d1,&0x3FFD0000 # 1/47377bge.b EM1CON1 # |X| >= 1/47378bra EM1SM73797380EM1CON1:7381#--Step 1.37382#--The case |X| >= 1/47383mov.w 4(%a0),%d1 # expo. and partial sig. of |X|7384cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits7385ble.b EM1MAIN # 1/4 <= |X| <= 70log27386bra EM1BIG73877388EM1MAIN:7389#--Step 2.7390#--This is the case: 1/4 <= |X| <= 70 log2.7391fmov.x (%a0),%fp0 # load input from (a0)73927393fmov.x %fp0,%fp17394fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X7395fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}7396fmov.l %fp0,%d1 # N = int( X * 64/log2 )7397lea EEXPTBL(%pc),%a17398fmov.l %d1,%fp0 # convert to floating-format73997400mov.l %d1,L_SCR1(%a6) # save N temporarily7401and.l &0x3F,%d1 # D0 is J = N mod 647402lsl.l &4,%d17403add.l %d1,%a1 # address of 2^(J/64)7404mov.l L_SCR1(%a6),%d17405asr.l &6,%d1 # D0 is M7406mov.l %d1,L_SCR1(%a6) # save a copy of M74077408#--Step 3.7409#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,7410#--a0 points to 2^(J/64), D0 and a1 both contain M7411fmov.x %fp0,%fp27412fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)7413fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/647414fadd.x %fp1,%fp0 # X + N*L17415fadd.x %fp2,%fp0 # fp0 is R, reduced arg.7416add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M74177418#--Step 4.7419#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL7420#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))7421#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R7422#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]74237424fmov.x %fp0,%fp17425fmul.x %fp1,%fp1 # fp1 IS S = R*R74267427fmov.s &0x3950097B,%fp2 # fp2 IS a674287429fmul.x %fp1,%fp2 # fp2 IS S*A67430fmov.x %fp1,%fp37431fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A574327433fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A67434fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A57435mov.w %d1,SC(%a6) # SC is 2^(M) in extended7436mov.l &0x80000000,SC+4(%a6)7437clr.l SC+8(%a6)74387439fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6)7440mov.l L_SCR1(%a6),%d1 # D0 is M7441neg.w %d1 # D0 is -M7442fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5)7443add.w &0x3FFF,%d1 # biased expo. of 2^(-M)7444fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6)7445fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5)74467447fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6))7448or.w &0x8000,%d1 # signed/expo. of -2^(-M)7449mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M)7450mov.l &0x80000000,ONEBYSC+4(%a6)7451clr.l ONEBYSC+8(%a6)7452fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5))74537454fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6))7455fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5))74567457fadd.x %fp2,%fp0 # fp0 IS EXP(R)-174587459fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}74607461#--Step 57462#--Compute 2^(J/64)*p74637464fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1)74657466#--Step 67467#--Step 6.17468mov.l L_SCR1(%a6),%d1 # retrieve M7469cmp.l %d1,&637470ble.b MLE637471#--Step 6.2 M >= 647472fmov.s 12(%a1),%fp1 # fp1 is t7473fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc7474fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released7475fadd.x (%a1),%fp0 # T+(p+(t+OnebySc))7476bra EM1SCALE7477MLE63:7478#--Step 6.3 M <= 637479cmp.l %d1,&-37480bge.b MGEN37481MLTN3:7482#--Step 6.4 M <= -47483fadd.s 12(%a1),%fp0 # p+t7484fadd.x (%a1),%fp0 # T+(p+t)7485fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t))7486bra EM1SCALE7487MGEN3:7488#--Step 6.5 -3 <= M <= 637489fmov.x (%a1)+,%fp1 # fp1 is T7490fadd.s (%a1),%fp0 # fp0 is p+t7491fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc7492fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t)74937494EM1SCALE:7495#--Step 6.67496fmov.l %d0,%fpcr7497fmul.x SC(%a6),%fp07498bra t_inx274997500EM1SM:7501#--Step 7 |X| < 1/4.7502cmp.l %d1,&0x3FBE0000 # 2^(-65)7503bge.b EM1POLY75047505EM1TINY:7506#--Step 8 |X| < 2^(-65)7507cmp.l %d1,&0x00330000 # 2^(-16312)7508blt.b EM12TINY7509#--Step 8.27510mov.l &0x80010000,SC(%a6) # SC is -2^(-16382)7511mov.l &0x80000000,SC+4(%a6)7512clr.l SC+8(%a6)7513fmov.x (%a0),%fp07514fmov.l %d0,%fpcr7515mov.b &FADD_OP,%d1 # last inst is ADD7516fadd.x SC(%a6),%fp07517bra t_catch75187519EM12TINY:7520#--Step 8.37521fmov.x (%a0),%fp07522fmul.d TWO140(%pc),%fp07523mov.l &0x80010000,SC(%a6)7524mov.l &0x80000000,SC+4(%a6)7525clr.l SC+8(%a6)7526fadd.x SC(%a6),%fp07527fmov.l %d0,%fpcr7528mov.b &FMUL_OP,%d1 # last inst is MUL7529fmul.d TWON140(%pc),%fp07530bra t_catch75317532EM1POLY:7533#--Step 9 exp(X)-1 by a simple polynomial7534fmov.x (%a0),%fp0 # fp0 is X7535fmul.x %fp0,%fp0 # fp0 is S := X*X7536fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}7537fmov.s &0x2F30CAA8,%fp1 # fp1 is B127538fmul.x %fp0,%fp1 # fp1 is S*B127539fmov.s &0x310F8290,%fp2 # fp2 is B117540fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B1275417542fmul.x %fp0,%fp2 # fp2 is S*B117543fmul.x %fp0,%fp1 # fp1 is S*(B10 + ...75447545fadd.s &0x3493F281,%fp2 # fp2 is B9+S*...7546fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*...75477548fmul.x %fp0,%fp2 # fp2 is S*(B9+...7549fmul.x %fp0,%fp1 # fp1 is S*(B8+...75507551fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*...7552fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*...75537554fmul.x %fp0,%fp2 # fp2 is S*(B7+...7555fmul.x %fp0,%fp1 # fp1 is S*(B6+...75567557fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*...7558fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*...75597560fmul.x %fp0,%fp2 # fp2 is S*(B5+...7561fmul.x %fp0,%fp1 # fp1 is S*(B4+...75627563fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*...7564fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*...75657566fmul.x %fp0,%fp2 # fp2 is S*(B3+...7567fmul.x %fp0,%fp1 # fp1 is S*(B2+...75687569fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...)7570fmul.x (%a0),%fp1 # fp1 is X*S*(B2...75717572fmul.s &0x3F000000,%fp0 # fp0 is S*B17573fadd.x %fp2,%fp1 # fp1 is Q75747575fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}75767577fadd.x %fp1,%fp0 # fp0 is S*B1+Q75787579fmov.l %d0,%fpcr7580fadd.x (%a0),%fp07581bra t_inx275827583EM1BIG:7584#--Step 10 |X| > 70 log27585mov.l (%a0),%d17586cmp.l %d1,&07587bgt.w EXPC17588#--Step 10.27589fmov.s &0xBF800000,%fp0 # fp0 is -17590fmov.l %d0,%fpcr7591fadd.s &0x00800000,%fp0 # -1 + 2^(-126)7592bra t_minx275937594global setoxm1d7595setoxm1d:7596#--entry point for EXPM1(X), here X is denormalized7597#--Step 0.7598bra t_extdnrm75997600#########################################################################7601# sgetexp(): returns the exponent portion of the input argument. #7602# The exponent bias is removed and the exponent value is #7603# returned as an extended precision number in fp0. #7604# sgetexpd(): handles denormalized numbers. #7605# #7606# sgetman(): extracts the mantissa of the input argument. The #7607# mantissa is converted to an extended precision number w/ #7608# an exponent of $3fff and is returned in fp0. The range of #7609# the result is [1.0 - 2.0). #7610# sgetmand(): handles denormalized numbers. #7611# #7612# INPUT *************************************************************** #7613# a0 = pointer to extended precision input #7614# #7615# OUTPUT ************************************************************** #7616# fp0 = exponent(X) or mantissa(X) #7617# #7618#########################################################################76197620global sgetexp7621sgetexp:7622mov.w SRC_EX(%a0),%d0 # get the exponent7623bclr &0xf,%d0 # clear the sign bit7624subi.w &0x3fff,%d0 # subtract off the bias7625fmov.w %d0,%fp0 # return exp in fp07626blt.b sgetexpn # it's negative7627rts76287629sgetexpn:7630mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit7631rts76327633global sgetexpd7634sgetexpd:7635bsr.l norm # normalize7636neg.w %d0 # new exp = -(shft amt)7637subi.w &0x3fff,%d0 # subtract off the bias7638fmov.w %d0,%fp0 # return exp in fp07639mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit7640rts76417642global sgetman7643sgetman:7644mov.w SRC_EX(%a0),%d0 # get the exp7645ori.w &0x7fff,%d0 # clear old exp7646bclr &0xe,%d0 # make it the new exp +-3fff76477648# here, we build the result in a tmp location so as not to disturb the input7649mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc7650mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc7651mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent7652fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp07653bmi.b sgetmann # it's negative7654rts76557656sgetmann:7657mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit7658rts76597660#7661# For denormalized numbers, shift the mantissa until the j-bit = 1,7662# then load the exponent with +/1 $3fff.7663#7664global sgetmand7665sgetmand:7666bsr.l norm # normalize exponent7667bra.b sgetman76687669#########################################################################7670# scosh(): computes the hyperbolic cosine of a normalized input #7671# scoshd(): computes the hyperbolic cosine of a denormalized input #7672# #7673# INPUT *************************************************************** #7674# a0 = pointer to extended precision input #7675# d0 = round precision,mode #7676# #7677# OUTPUT ************************************************************** #7678# fp0 = cosh(X) #7679# #7680# ACCURACY and MONOTONICITY ******************************************* #7681# The returned result is within 3 ulps in 64 significant bit, #7682# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #7683# rounded to double precision. The result is provably monotonic #7684# in double precision. #7685# #7686# ALGORITHM *********************************************************** #7687# #7688# COSH #7689# 1. If |X| > 16380 log2, go to 3. #7690# #7691# 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae #7692# y = |X|, z = exp(Y), and #7693# cosh(X) = (1/2)*( z + 1/z ). #7694# Exit. #7695# #7696# 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. #7697# #7698# 4. (16380 log2 < |X| <= 16480 log2) #7699# cosh(X) = sign(X) * exp(|X|)/2. #7700# However, invoking exp(|X|) may cause premature #7701# overflow. Thus, we calculate sinh(X) as follows: #7702# Y := |X| #7703# Fact := 2**(16380) #7704# Y' := Y - 16381 log2 #7705# cosh(X) := Fact * exp(Y'). #7706# Exit. #7707# #7708# 5. (|X| > 16480 log2) sinh(X) must overflow. Return #7709# Huge*Huge to generate overflow and an infinity with #7710# the appropriate sign. Huge is the largest finite number #7711# in extended format. Exit. #7712# #7713#########################################################################77147715TWO16380:7716long 0x7FFB0000,0x80000000,0x00000000,0x0000000077177718global scosh7719scosh:7720fmov.x (%a0),%fp0 # LOAD INPUT77217722mov.l (%a0),%d17723mov.w 4(%a0),%d17724and.l &0x7FFFFFFF,%d17725cmp.l %d1,&0x400CB1677726bgt.b COSHBIG77277728#--THIS IS THE USUAL CASE, |X| < 16380 LOG27729#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )77307731fabs.x %fp0 # |X|77327733mov.l %d0,-(%sp)7734clr.l %d07735fmovm.x &0x01,-(%sp) # save |X| to stack7736lea (%sp),%a0 # pass ptr to |X|7737bsr setox # FP0 IS EXP(|X|)7738add.l &0xc,%sp # erase |X| from stack7739fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|)7740mov.l (%sp)+,%d077417742fmov.s &0x3E800000,%fp1 # (1/4)7743fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|))77447745fmov.l %d0,%fpcr7746mov.b &FADD_OP,%d1 # last inst is ADD7747fadd.x %fp1,%fp07748bra t_catch77497750COSHBIG:7751cmp.l %d1,&0x400CB2B37752bgt.b COSHHUGE77537754fabs.x %fp07755fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)7756fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE77577758mov.l %d0,-(%sp)7759clr.l %d07760fmovm.x &0x01,-(%sp) # save fp0 to stack7761lea (%sp),%a0 # pass ptr to fp07762bsr setox7763add.l &0xc,%sp # clear fp0 from stack7764mov.l (%sp)+,%d077657766fmov.l %d0,%fpcr7767mov.b &FMUL_OP,%d1 # last inst is MUL7768fmul.x TWO16380(%pc),%fp07769bra t_catch77707771COSHHUGE:7772bra t_ovfl277737774global scoshd7775#--COSH(X) = 1 FOR DENORMALIZED X7776scoshd:7777fmov.s &0x3F800000,%fp077787779fmov.l %d0,%fpcr7780fadd.s &0x00800000,%fp07781bra t_pinx277827783#########################################################################7784# ssinh(): computes the hyperbolic sine of a normalized input #7785# ssinhd(): computes the hyperbolic sine of a denormalized input #7786# #7787# INPUT *************************************************************** #7788# a0 = pointer to extended precision input #7789# d0 = round precision,mode #7790# #7791# OUTPUT ************************************************************** #7792# fp0 = sinh(X) #7793# #7794# ACCURACY and MONOTONICITY ******************************************* #7795# The returned result is within 3 ulps in 64 significant bit, #7796# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #7797# rounded to double precision. The result is provably monotonic #7798# in double precision. #7799# #7800# ALGORITHM *********************************************************** #7801# #7802# SINH #7803# 1. If |X| > 16380 log2, go to 3. #7804# #7805# 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula #7806# y = |X|, sgn = sign(X), and z = expm1(Y), #7807# sinh(X) = sgn*(1/2)*( z + z/(1+z) ). #7808# Exit. #7809# #7810# 3. If |X| > 16480 log2, go to 5. #7811# #7812# 4. (16380 log2 < |X| <= 16480 log2) #7813# sinh(X) = sign(X) * exp(|X|)/2. #7814# However, invoking exp(|X|) may cause premature overflow. #7815# Thus, we calculate sinh(X) as follows: #7816# Y := |X| #7817# sgn := sign(X) #7818# sgnFact := sgn * 2**(16380) #7819# Y' := Y - 16381 log2 #7820# sinh(X) := sgnFact * exp(Y'). #7821# Exit. #7822# #7823# 5. (|X| > 16480 log2) sinh(X) must overflow. Return #7824# sign(X)*Huge*Huge to generate overflow and an infinity with #7825# the appropriate sign. Huge is the largest finite number in #7826# extended format. Exit. #7827# #7828#########################################################################78297830global ssinh7831ssinh:7832fmov.x (%a0),%fp0 # LOAD INPUT78337834mov.l (%a0),%d17835mov.w 4(%a0),%d17836mov.l %d1,%a1 # save (compacted) operand7837and.l &0x7FFFFFFF,%d17838cmp.l %d1,&0x400CB1677839bgt.b SINHBIG78407841#--THIS IS THE USUAL CASE, |X| < 16380 LOG27842#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )78437844fabs.x %fp0 # Y = |X|78457846movm.l &0x8040,-(%sp) # {a1/d0}7847fmovm.x &0x01,-(%sp) # save Y on stack7848lea (%sp),%a0 # pass ptr to Y7849clr.l %d07850bsr setoxm1 # FP0 IS Z = EXPM1(Y)7851add.l &0xc,%sp # clear Y from stack7852fmov.l &0,%fpcr7853movm.l (%sp)+,&0x0201 # {a1/d0}78547855fmov.x %fp0,%fp17856fadd.s &0x3F800000,%fp1 # 1+Z7857fmov.x %fp0,-(%sp)7858fdiv.x %fp1,%fp0 # Z/(1+Z)7859mov.l %a1,%d17860and.l &0x80000000,%d17861or.l &0x3F000000,%d17862fadd.x (%sp)+,%fp07863mov.l %d1,-(%sp)78647865fmov.l %d0,%fpcr7866mov.b &FMUL_OP,%d1 # last inst is MUL7867fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set7868bra t_catch78697870SINHBIG:7871cmp.l %d1,&0x400CB2B37872bgt t_ovfl7873fabs.x %fp07874fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)7875mov.l &0,-(%sp)7876mov.l &0x80000000,-(%sp)7877mov.l %a1,%d17878and.l &0x80000000,%d17879or.l &0x7FFB0000,%d17880mov.l %d1,-(%sp) # EXTENDED FMT7881fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE78827883mov.l %d0,-(%sp)7884clr.l %d07885fmovm.x &0x01,-(%sp) # save fp0 on stack7886lea (%sp),%a0 # pass ptr to fp07887bsr setox7888add.l &0xc,%sp # clear fp0 from stack78897890mov.l (%sp)+,%d07891fmov.l %d0,%fpcr7892mov.b &FMUL_OP,%d1 # last inst is MUL7893fmul.x (%sp)+,%fp0 # possible exception7894bra t_catch78957896global ssinhd7897#--SINH(X) = X FOR DENORMALIZED X7898ssinhd:7899bra t_extdnrm79007901#########################################################################7902# stanh(): computes the hyperbolic tangent of a normalized input #7903# stanhd(): computes the hyperbolic tangent of a denormalized input #7904# #7905# INPUT *************************************************************** #7906# a0 = pointer to extended precision input #7907# d0 = round precision,mode #7908# #7909# OUTPUT ************************************************************** #7910# fp0 = tanh(X) #7911# #7912# ACCURACY and MONOTONICITY ******************************************* #7913# The returned result is within 3 ulps in 64 significant bit, #7914# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #7915# rounded to double precision. The result is provably monotonic #7916# in double precision. #7917# #7918# ALGORITHM *********************************************************** #7919# #7920# TANH #7921# 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. #7922# #7923# 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by #7924# sgn := sign(X), y := 2|X|, z := expm1(Y), and #7925# tanh(X) = sgn*( z/(2+z) ). #7926# Exit. #7927# #7928# 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, #7929# go to 7. #7930# #7931# 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. #7932# #7933# 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by #7934# sgn := sign(X), y := 2|X|, z := exp(Y), #7935# tanh(X) = sgn - [ sgn*2/(1+z) ]. #7936# Exit. #7937# #7938# 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #7939# calculate Tanh(X) by #7940# sgn := sign(X), Tiny := 2**(-126), #7941# tanh(X) := sgn - sgn*Tiny. #7942# Exit. #7943# #7944# 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. #7945# #7946#########################################################################79477948set X,FP_SCR07949set XFRAC,X+479507951set SGN,L_SCR379527953set V,FP_SCR079547955global stanh7956stanh:7957fmov.x (%a0),%fp0 # LOAD INPUT79587959fmov.x %fp0,X(%a6)7960mov.l (%a0),%d17961mov.w 4(%a0),%d17962mov.l %d1,X(%a6)7963and.l &0x7FFFFFFF,%d17964cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)?7965blt.w TANHBORS # yes7966cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2?7967bgt.w TANHBORS # yes79687969#--THIS IS THE USUAL CASE7970#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).79717972mov.l X(%a6),%d17973mov.l %d1,SGN(%a6)7974and.l &0x7FFF0000,%d17975add.l &0x00010000,%d1 # EXPONENT OF 2|X|7976mov.l %d1,X(%a6)7977and.l &0x80000000,SGN(%a6)7978fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X|79797980mov.l %d0,-(%sp)7981clr.l %d07982fmovm.x &0x1,-(%sp) # save Y on stack7983lea (%sp),%a0 # pass ptr to Y7984bsr setoxm1 # FP0 IS Z = EXPM1(Y)7985add.l &0xc,%sp # clear Y from stack7986mov.l (%sp)+,%d079877988fmov.x %fp0,%fp17989fadd.s &0x40000000,%fp1 # Z+27990mov.l SGN(%a6),%d17991fmov.x %fp1,V(%a6)7992eor.l %d1,V(%a6)79937994fmov.l %d0,%fpcr # restore users round prec,mode7995fdiv.x V(%a6),%fp07996bra t_inx279977998TANHBORS:7999cmp.l %d1,&0x3FFF80008000blt.w TANHSM80018002cmp.l %d1,&0x40048AA18003bgt.w TANHHUGE80048005#-- (5/2) LOG2 < |X| < 50 LOG2,8006#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),8007#--TANH(X) = SGN - SGN*2/[EXP(Y)+1].80088009mov.l X(%a6),%d18010mov.l %d1,SGN(%a6)8011and.l &0x7FFF0000,%d18012add.l &0x00010000,%d1 # EXPO OF 2|X|8013mov.l %d1,X(%a6) # Y = 2|X|8014and.l &0x80000000,SGN(%a6)8015mov.l SGN(%a6),%d18016fmov.x X(%a6),%fp0 # Y = 2|X|80178018mov.l %d0,-(%sp)8019clr.l %d08020fmovm.x &0x01,-(%sp) # save Y on stack8021lea (%sp),%a0 # pass ptr to Y8022bsr setox # FP0 IS EXP(Y)8023add.l &0xc,%sp # clear Y from stack8024mov.l (%sp)+,%d08025mov.l SGN(%a6),%d18026fadd.s &0x3F800000,%fp0 # EXP(Y)+180278028eor.l &0xC0000000,%d1 # -SIGN(X)*28029fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT8030fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ]80318032mov.l SGN(%a6),%d18033or.l &0x3F800000,%d1 # SGN8034fmov.s %d1,%fp0 # SGN IN SGL FMT80358036fmov.l %d0,%fpcr # restore users round prec,mode8037mov.b &FADD_OP,%d1 # last inst is ADD8038fadd.x %fp1,%fp08039bra t_inx280408041TANHSM:8042fmov.l %d0,%fpcr # restore users round prec,mode8043mov.b &FMOV_OP,%d1 # last inst is MOVE8044fmov.x X(%a6),%fp0 # last inst - possible exception set8045bra t_catch80468047#---RETURN SGN(X) - SGN(X)EPS8048TANHHUGE:8049mov.l X(%a6),%d18050and.l &0x80000000,%d18051or.l &0x3F800000,%d18052fmov.s %d1,%fp08053and.l &0x80000000,%d18054eor.l &0x80800000,%d1 # -SIGN(X)*EPS80558056fmov.l %d0,%fpcr # restore users round prec,mode8057fadd.s %d1,%fp08058bra t_inx280598060global stanhd8061#--TANH(X) = X FOR DENORMALIZED X8062stanhd:8063bra t_extdnrm80648065#########################################################################8066# slogn(): computes the natural logarithm of a normalized input #8067# slognd(): computes the natural logarithm of a denormalized input #8068# slognp1(): computes the log(1+X) of a normalized input #8069# slognp1d(): computes the log(1+X) of a denormalized input #8070# #8071# INPUT *************************************************************** #8072# a0 = pointer to extended precision input #8073# d0 = round precision,mode #8074# #8075# OUTPUT ************************************************************** #8076# fp0 = log(X) or log(1+X) #8077# #8078# ACCURACY and MONOTONICITY ******************************************* #8079# The returned result is within 2 ulps in 64 significant bit, #8080# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #8081# rounded to double precision. The result is provably monotonic #8082# in double precision. #8083# #8084# ALGORITHM *********************************************************** #8085# LOGN: #8086# Step 1. If |X-1| < 1/16, approximate log(X) by an odd #8087# polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #8088# move on to Step 2. #8089# #8090# Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #8091# seven significant bits of Y plus 2**(-7), i.e. #8092# F = 1.xxxxxx1 in base 2 where the six "x" match those #8093# of Y. Note that |Y-F| <= 2**(-7). #8094# #8095# Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #8096# polynomial in u, log(1+u) = poly. #8097# #8098# Step 4. Reconstruct #8099# log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #8100# by k*log(2) + (log(F) + poly). The values of log(F) are #8101# calculated beforehand and stored in the program. #8102# #8103# lognp1: #8104# Step 1: If |X| < 1/16, approximate log(1+X) by an odd #8105# polynomial in u where u = 2X/(2+X). Otherwise, move on #8106# to Step 2. #8107# #8108# Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done #8109# in Step 2 of the algorithm for LOGN and compute #8110# log(1+X) as k*log(2) + log(F) + poly where poly #8111# approximates log(1+u), u = (Y-F)/F. #8112# #8113# Implementation Notes: #8114# Note 1. There are 64 different possible values for F, thus 64 #8115# log(F)'s need to be tabulated. Moreover, the values of #8116# 1/F are also tabulated so that the division in (Y-F)/F #8117# can be performed by a multiplication. #8118# #8119# Note 2. In Step 2 of lognp1, in order to preserved accuracy, #8120# the value Y-F has to be calculated carefully when #8121# 1/2 <= X < 3/2. #8122# #8123# Note 3. To fully exploit the pipeline, polynomials are usually #8124# separated into two parts evaluated independently before #8125# being added up. #8126# #8127#########################################################################8128LOGOF2:8129long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x0000000081308131one:8132long 0x3F8000008133zero:8134long 0x000000008135infty:8136long 0x7F8000008137negone:8138long 0xBF80000081398140LOGA6:8141long 0x3FC2499A,0xB5E4040B8142LOGA5:8143long 0xBFC555B5,0x848CB7DB81448145LOGA4:8146long 0x3FC99999,0x987D87308147LOGA3:8148long 0xBFCFFFFF,0xFF6F7E9781498150LOGA2:8151long 0x3FD55555,0x555555A48152LOGA1:8153long 0xBFE00000,0x0000000881548155LOGB5:8156long 0x3F175496,0xADD7DAD68157LOGB4:8158long 0x3F3C71C2,0xFE80C7E081598160LOGB3:8161long 0x3F624924,0x928BCCFF8162LOGB2:8163long 0x3F899999,0x999995EC81648165LOGB1:8166long 0x3FB55555,0x555555558167TWO:8168long 0x40000000,0x0000000081698170LTHOLD:8171long 0x3f990000,0x80000000,0x00000000,0x0000000081728173LOGTBL:8174long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x000000008175long 0x3FF70000,0xFF015358,0x833C47E2,0x000000008176long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x000000008177long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x000000008178long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x000000008179long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x000000008180long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x000000008181long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x000000008182long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x000000008183long 0x3FFB0000,0x8B29B775,0x1BD70743,0x000000008184long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x000000008185long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x000000008186long 0x3FFE0000,0xE865AC7B,0x7603A197,0x000000008187long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x000000008188long 0x3FFE0000,0xE525982A,0xF70C880E,0x000000008189long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x000000008190long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x000000008191long 0x3FFB0000,0xFF64898E,0xDF55D551,0x000000008192long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x000000008193long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x000000008194long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x000000008195long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x000000008196long 0x3FFE0000,0xD901B203,0x6406C80E,0x000000008197long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x000000008198long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x000000008199long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x000000008200long 0x3FFE0000,0xD3680D36,0x80D3680D,0x000000008201long 0x3FFC0000,0xC3FD0329,0x06488481,0x000000008202long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x000000008203long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x000000008204long 0x3FFE0000,0xCE168A77,0x25080CE1,0x000000008205long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x000000008206long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x000000008207long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x000000008208long 0x3FFE0000,0xC907DA4E,0x871146AD,0x000000008209long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x000000008210long 0x3FFE0000,0xC6980C69,0x80C6980C,0x000000008211long 0x3FFD0000,0x82012CA5,0xA68206D7,0x000000008212long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x000000008213long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x000000008214long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x000000008215long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x000000008216long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x000000008217long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x000000008218long 0x3FFE0000,0xBD691047,0x07661AA3,0x000000008219long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x000000008220long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x000000008221long 0x3FFD0000,0xA0218434,0x353F1DE8,0x000000008222long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x000000008223long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x000000008224long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x000000008225long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x000000008226long 0x3FFE0000,0xB509E68A,0x9B94821F,0x000000008227long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x000000008228long 0x3FFE0000,0xB30F6352,0x8917C80B,0x000000008229long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x000000008230long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x000000008231long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x000000008232long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x000000008233long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x000000008234long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x000000008235long 0x3FFD0000,0xC788F439,0xB3163BF1,0x000000008236long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x000000008237long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x000000008238long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x000000008239long 0x3FFD0000,0xD2420487,0x2DD85160,0x000000008240long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x000000008241long 0x3FFD0000,0xD7894992,0x3BC3588A,0x000000008242long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x000000008243long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x000000008244long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x000000008245long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x000000008246long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x000000008247long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x000000008248long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x000000008249long 0x3FFD0000,0xEC1F392C,0x5179F283,0x000000008250long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x000000008251long 0x3FFD0000,0xF12440D3,0xE36130E6,0x000000008252long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x000000008253long 0x3FFD0000,0xF61CCE92,0x346600BB,0x000000008254long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x000000008255long 0x3FFD0000,0xFB091FD3,0x8145630A,0x000000008256long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x000000008257long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x000000008258long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x000000008259long 0x3FFE0000,0x825EFCED,0x49369330,0x000000008260long 0x3FFE0000,0x9868C809,0x868C8098,0x000000008261long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x000000008262long 0x3FFE0000,0x97012E02,0x5C04B809,0x000000008263long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x000000008264long 0x3FFE0000,0x95A02568,0x095A0257,0x000000008265long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x000000008266long 0x3FFE0000,0x94458094,0x45809446,0x000000008267long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x000000008268long 0x3FFE0000,0x92F11384,0x0497889C,0x000000008269long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x000000008270long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x000000008271long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x000000008272long 0x3FFE0000,0x905A3863,0x3E06C43B,0x000000008273long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x000000008274long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x000000008275long 0x3FFE0000,0x94E9BFF6,0x15845643,0x000000008276long 0x3FFE0000,0x8DDA5202,0x37694809,0x000000008277long 0x3FFE0000,0x9723A1B7,0x20134203,0x000000008278long 0x3FFE0000,0x8CA29C04,0x6514E023,0x000000008279long 0x3FFE0000,0x995899C8,0x90EB8990,0x000000008280long 0x3FFE0000,0x8B70344A,0x139BC75A,0x000000008281long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x000000008282long 0x3FFE0000,0x8A42F870,0x5669DB46,0x000000008283long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x000000008284long 0x3FFE0000,0x891AC73A,0xE9819B50,0x000000008285long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x000000008286long 0x3FFE0000,0x87F78087,0xF78087F8,0x000000008287long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x000000008288long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x000000008289long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x000000008290long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x000000008291long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x000000008292long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x000000008293long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x000000008294long 0x3FFE0000,0x83993052,0x3FBE3368,0x000000008295long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x000000008296long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x000000008297long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x000000008298long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x000000008299long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x000000008300long 0x3FFE0000,0x80808080,0x80808081,0x000000008301long 0x3FFE0000,0xB07197A2,0x3C46C654,0x0000000083028303set ADJK,L_SCR183048305set X,FP_SCR08306set XDCARE,X+28307set XFRAC,X+483088309set F,FP_SCR18310set FFRAC,F+483118312set KLOG2,FP_SCR083138314set SAVEU,FP_SCR083158316global slogn8317#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S8318slogn:8319fmov.x (%a0),%fp0 # LOAD INPUT8320mov.l &0x00000000,ADJK(%a6)83218322LOGBGN:8323#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS8324#--A FINITE, NON-ZERO, NORMALIZED NUMBER.83258326mov.l (%a0),%d18327mov.w 4(%a0),%d183288329mov.l (%a0),X(%a6)8330mov.l 4(%a0),X+4(%a6)8331mov.l 8(%a0),X+8(%a6)83328333cmp.l %d1,&0 # CHECK IF X IS NEGATIVE8334blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID8335# X IS POSITIVE, CHECK IF X IS NEAR 18336cmp.l %d1,&0x3ffef07d # IS X < 15/16?8337blt.b LOGMAIN # YES8338cmp.l %d1,&0x3fff8841 # IS X > 17/16?8339ble.w LOGNEAR1 # NO83408341LOGMAIN:8342#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 183438344#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.8345#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.8346#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)8347#-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).8348#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING8349#--LOG(1+U) CAN BE VERY EFFICIENT.8350#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO8351#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.83528353#--GET K, Y, F, AND ADDRESS OF 1/F.8354asr.l &8,%d18355asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X8356sub.l &0x3FFF,%d1 # THIS IS K8357add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM.8358lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F)8359fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT83608361#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F8362mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X8363mov.l XFRAC(%a6),FFRAC(%a6)8364and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y8365or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT8366mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F8367and.l &0x7E000000,%d18368asr.l &8,%d18369asr.l &8,%d18370asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT8371add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F83728373fmov.x X(%a6),%fp08374mov.l &0x3fff0000,F(%a6)8375clr.l F+8(%a6)8376fsub.x F(%a6),%fp0 # Y-F8377fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY8378#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K8379#--REGISTERS SAVED: FPCR, FP1, FP283808381LP1CONT1:8382#--AN RE-ENTRY POINT FOR LOGNP18383fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F8384fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY8385fmov.x %fp0,%fp28386fmul.x %fp2,%fp2 # FP2 IS V=U*U8387fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP183888389#--LOG(1+U) IS APPROXIMATED BY8390#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS8391#--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]83928393fmov.x %fp2,%fp38394fmov.x %fp2,%fp183958396fmul.d LOGA6(%pc),%fp1 # V*A68397fmul.d LOGA5(%pc),%fp2 # V*A583988399fadd.d LOGA4(%pc),%fp1 # A4+V*A68400fadd.d LOGA3(%pc),%fp2 # A3+V*A584018402fmul.x %fp3,%fp1 # V*(A4+V*A6)8403fmul.x %fp3,%fp2 # V*(A3+V*A5)84048405fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6)8406fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5)84078408fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6))8409add.l &16,%a0 # ADDRESS OF LOG(F)8410fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5))84118412fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6))8413fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5))84148415fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6))8416fmovm.x (%sp)+,&0x30 # RESTORE FP2-38417fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U)84188419fmov.l %d0,%fpcr8420fadd.x KLOG2(%a6),%fp0 # FINAL ADD8421bra t_inx2842284238424LOGNEAR1:84258426# if the input is exactly equal to one, then exit through ld_pzero.8427# if these 2 lines weren't here, the correct answer would be returned8428# but the INEX2 bit would be set.8429fcmp.b %fp0,&0x1 # is it equal to one?8430fbeq.l ld_pzero # yes84318432#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.8433fmov.x %fp0,%fp18434fsub.s one(%pc),%fp1 # FP1 IS X-18435fadd.s one(%pc),%fp0 # FP0 IS X+18436fadd.x %fp1,%fp1 # FP1 IS 2(X-1)8437#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL8438#--IN U, U = 2(X-1)/(X+1) = FP1/FP084398440LP1CONT2:8441#--THIS IS AN RE-ENTRY POINT FOR LOGNP18442fdiv.x %fp0,%fp1 # FP1 IS U8443fmovm.x &0xc,-(%sp) # SAVE FP2-38444#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP38445#--LET V=U*U, W=V*V, CALCULATE8446#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY8447#--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )8448fmov.x %fp1,%fp08449fmul.x %fp0,%fp0 # FP0 IS V8450fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP18451fmov.x %fp0,%fp18452fmul.x %fp1,%fp1 # FP1 IS W84538454fmov.d LOGB5(%pc),%fp38455fmov.d LOGB4(%pc),%fp284568457fmul.x %fp1,%fp3 # W*B58458fmul.x %fp1,%fp2 # W*B484598460fadd.d LOGB3(%pc),%fp3 # B3+W*B58461fadd.d LOGB2(%pc),%fp2 # B2+W*B484628463fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED84648465fmul.x %fp0,%fp2 # V*(B2+W*B4)84668467fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5)8468fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V84698470fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED8471fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED84728473fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )84748475fmov.l %d0,%fpcr8476fadd.x SAVEU(%a6),%fp08477bra t_inx284788479#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID8480LOGNEG:8481bra t_operr84828483global slognd8484slognd:8485#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT84868487mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP084888489#----normalize the input value by left shifting k bits (k to be determined8490#----below), adjusting exponent and storing -k to ADJK8491#----the value TWOTO100 is no longer needed.8492#----Note that this code assumes the denormalized input is NON-ZERO.84938494movm.l &0x3f00,-(%sp) # save some registers {d2-d7}8495mov.l (%a0),%d3 # D3 is exponent of smallest norm. #8496mov.l 4(%a0),%d48497mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X)8498clr.l %d2 # D2 used for holding K84998500tst.l %d48501bne.b Hi_not085028503Hi_0:8504mov.l %d5,%d48505clr.l %d58506mov.l &32,%d28507clr.l %d68508bfffo %d4{&0:&32},%d68509lsl.l %d6,%d48510add.l %d6,%d2 # (D3,D4,D5) is normalized85118512mov.l %d3,X(%a6)8513mov.l %d4,XFRAC(%a6)8514mov.l %d5,XFRAC+4(%a6)8515neg.l %d28516mov.l %d2,ADJK(%a6)8517fmov.x X(%a6),%fp08518movm.l (%sp)+,&0xfc # restore registers {d2-d7}8519lea X(%a6),%a08520bra.w LOGBGN # begin regular log(X)85218522Hi_not0:8523clr.l %d68524bfffo %d4{&0:&32},%d6 # find first 18525mov.l %d6,%d2 # get k8526lsl.l %d6,%d48527mov.l %d5,%d7 # a copy of D58528lsl.l %d6,%d58529neg.l %d68530add.l &32,%d68531lsr.l %d6,%d78532or.l %d7,%d4 # (D3,D4,D5) normalized85338534mov.l %d3,X(%a6)8535mov.l %d4,XFRAC(%a6)8536mov.l %d5,XFRAC+4(%a6)8537neg.l %d28538mov.l %d2,ADJK(%a6)8539fmov.x X(%a6),%fp08540movm.l (%sp)+,&0xfc # restore registers {d2-d7}8541lea X(%a6),%a08542bra.w LOGBGN # begin regular log(X)85438544global slognp18545#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S8546slognp1:8547fmov.x (%a0),%fp0 # LOAD INPUT8548fabs.x %fp0 # test magnitude8549fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold8550fbgt.w LP1REAL # if greater, continue8551fmov.l %d0,%fpcr8552mov.b &FMOV_OP,%d1 # last inst is MOVE8553fmov.x (%a0),%fp0 # return signed argument8554bra t_catch85558556LP1REAL:8557fmov.x (%a0),%fp0 # LOAD INPUT8558mov.l &0x00000000,ADJK(%a6)8559fmov.x %fp0,%fp1 # FP1 IS INPUT Z8560fadd.s one(%pc),%fp0 # X := ROUND(1+Z)8561fmov.x %fp0,X(%a6)8562mov.w XFRAC(%a6),XDCARE(%a6)8563mov.l X(%a6),%d18564cmp.l %d1,&08565ble.w LP1NEG0 # LOG OF ZERO OR -VE8566cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]?8567blt.w LOGMAIN8568cmp.l %d1,&0x3fffc0008569bgt.w LOGMAIN8570#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,8571#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,8572#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).85738574LP1NEAR1:8575#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)8576cmp.l %d1,&0x3ffef07d8577blt.w LP1CARE8578cmp.l %d1,&0x3fff88418579bgt.w LP1CARE85808581LP1ONE16:8582#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)8583#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).8584fadd.x %fp1,%fp1 # FP1 IS 2Z8585fadd.s one(%pc),%fp0 # FP0 IS 1+X8586#--U = FP1/FP08587bra.w LP1CONT285888589LP1CARE:8590#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE8591#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST8592#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],8593#--THERE ARE ONLY TWO CASES.8594#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z8595#--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z8596#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF8597#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.85988599mov.l XFRAC(%a6),FFRAC(%a6)8600and.l &0xFE000000,FFRAC(%a6)8601or.l &0x01000000,FFRAC(%a6) # F OBTAINED8602cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 18603bge.b KISZERO86048605KISNEG1:8606fmov.s TWO(%pc),%fp08607mov.l &0x3fff0000,F(%a6)8608clr.l F+8(%a6)8609fsub.x F(%a6),%fp0 # 2-F8610mov.l FFRAC(%a6),%d18611and.l &0x7E000000,%d18612asr.l &8,%d18613asr.l &8,%d18614asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F8615fadd.x %fp1,%fp1 # GET 2Z8616fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3}8617fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z8618lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F8619add.l %d1,%a08620fmov.s negone(%pc),%fp1 # FP1 IS K = -18621bra.w LP1CONT186228623KISZERO:8624fmov.s one(%pc),%fp08625mov.l &0x3fff0000,F(%a6)8626clr.l F+8(%a6)8627fsub.x F(%a6),%fp0 # 1-F8628mov.l FFRAC(%a6),%d18629and.l &0x7E000000,%d18630asr.l &8,%d18631asr.l &8,%d18632asr.l &4,%d18633fadd.x %fp1,%fp0 # FP0 IS Y-F8634fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3}8635lea LOGTBL(%pc),%a08636add.l %d1,%a0 # A0 IS ADDRESS OF 1/F8637fmov.s zero(%pc),%fp1 # FP1 IS K = 08638bra.w LP1CONT186398640LP1NEG0:8641#--FPCR SAVED. D0 IS X IN COMPACT FORM.8642cmp.l %d1,&08643blt.b LP1NEG8644LP1ZERO:8645fmov.s negone(%pc),%fp086468647fmov.l %d0,%fpcr8648bra t_dz86498650LP1NEG:8651fmov.s zero(%pc),%fp086528653fmov.l %d0,%fpcr8654bra t_operr86558656global slognp1d8657#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT8658# Simply return the denorm8659slognp1d:8660bra t_extdnrm86618662#########################################################################8663# satanh(): computes the inverse hyperbolic tangent of a norm input #8664# satanhd(): computes the inverse hyperbolic tangent of a denorm input #8665# #8666# INPUT *************************************************************** #8667# a0 = pointer to extended precision input #8668# d0 = round precision,mode #8669# #8670# OUTPUT ************************************************************** #8671# fp0 = arctanh(X) #8672# #8673# ACCURACY and MONOTONICITY ******************************************* #8674# The returned result is within 3 ulps in 64 significant bit, #8675# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #8676# rounded to double precision. The result is provably monotonic #8677# in double precision. #8678# #8679# ALGORITHM *********************************************************** #8680# #8681# ATANH #8682# 1. If |X| >= 1, go to 3. #8683# #8684# 2. (|X| < 1) Calculate atanh(X) by #8685# sgn := sign(X) #8686# y := |X| #8687# z := 2y/(1-y) #8688# atanh(X) := sgn * (1/2) * logp1(z) #8689# Exit. #8690# #8691# 3. If |X| > 1, go to 5. #8692# #8693# 4. (|X| = 1) Generate infinity with an appropriate sign and #8694# divide-by-zero by #8695# sgn := sign(X) #8696# atan(X) := sgn / (+0). #8697# Exit. #8698# #8699# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #8700# Exit. #8701# #8702#########################################################################87038704global satanh8705satanh:8706mov.l (%a0),%d18707mov.w 4(%a0),%d18708and.l &0x7FFFFFFF,%d18709cmp.l %d1,&0x3FFF80008710bge.b ATANHBIG87118712#--THIS IS THE USUAL CASE, |X| < 18713#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).87148715fabs.x (%a0),%fp0 # Y = |X|8716fmov.x %fp0,%fp18717fneg.x %fp1 # -Y8718fadd.x %fp0,%fp0 # 2Y8719fadd.s &0x3F800000,%fp1 # 1-Y8720fdiv.x %fp1,%fp0 # 2Y/(1-Y)8721mov.l (%a0),%d18722and.l &0x80000000,%d18723or.l &0x3F000000,%d1 # SIGN(X)*HALF8724mov.l %d1,-(%sp)87258726mov.l %d0,-(%sp) # save rnd prec,mode8727clr.l %d0 # pass ext prec,RN8728fmovm.x &0x01,-(%sp) # save Z on stack8729lea (%sp),%a0 # pass ptr to Z8730bsr slognp1 # LOG1P(Z)8731add.l &0xc,%sp # clear Z from stack87328733mov.l (%sp)+,%d0 # fetch old prec,mode8734fmov.l %d0,%fpcr # load it8735mov.b &FMUL_OP,%d1 # last inst is MUL8736fmul.s (%sp)+,%fp08737bra t_catch87388739ATANHBIG:8740fabs.x (%a0),%fp0 # |X|8741fcmp.s %fp0,&0x3F8000008742fbgt t_operr8743bra t_dz87448745global satanhd8746#--ATANH(X) = X FOR DENORMALIZED X8747satanhd:8748bra t_extdnrm87498750#########################################################################8751# slog10(): computes the base-10 logarithm of a normalized input #8752# slog10d(): computes the base-10 logarithm of a denormalized input #8753# slog2(): computes the base-2 logarithm of a normalized input #8754# slog2d(): computes the base-2 logarithm of a denormalized input #8755# #8756# INPUT *************************************************************** #8757# a0 = pointer to extended precision input #8758# d0 = round precision,mode #8759# #8760# OUTPUT ************************************************************** #8761# fp0 = log_10(X) or log_2(X) #8762# #8763# ACCURACY and MONOTONICITY ******************************************* #8764# The returned result is within 1.7 ulps in 64 significant bit, #8765# i.e. within 0.5003 ulp to 53 bits if the result is subsequently #8766# rounded to double precision. The result is provably monotonic #8767# in double precision. #8768# #8769# ALGORITHM *********************************************************** #8770# #8771# slog10d: #8772# #8773# Step 0. If X < 0, create a NaN and raise the invalid operation #8774# flag. Otherwise, save FPCR in D1; set FpCR to default. #8775# Notes: Default means round-to-nearest mode, no floating-point #8776# traps, and precision control = double extended. #8777# #8778# Step 1. Call slognd to obtain Y = log(X), the natural log of X. #8779# Notes: Even if X is denormalized, log(X) is always normalized. #8780# #8781# Step 2. Compute log_10(X) = log(X) * (1/log(10)). #8782# 2.1 Restore the user FPCR #8783# 2.2 Return ans := Y * INV_L10. #8784# #8785# slog10: #8786# #8787# Step 0. If X < 0, create a NaN and raise the invalid operation #8788# flag. Otherwise, save FPCR in D1; set FpCR to default. #8789# Notes: Default means round-to-nearest mode, no floating-point #8790# traps, and precision control = double extended. #8791# #8792# Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #8793# #8794# Step 2. Compute log_10(X) = log(X) * (1/log(10)). #8795# 2.1 Restore the user FPCR #8796# 2.2 Return ans := Y * INV_L10. #8797# #8798# sLog2d: #8799# #8800# Step 0. If X < 0, create a NaN and raise the invalid operation #8801# flag. Otherwise, save FPCR in D1; set FpCR to default. #8802# Notes: Default means round-to-nearest mode, no floating-point #8803# traps, and precision control = double extended. #8804# #8805# Step 1. Call slognd to obtain Y = log(X), the natural log of X. #8806# Notes: Even if X is denormalized, log(X) is always normalized. #8807# #8808# Step 2. Compute log_10(X) = log(X) * (1/log(2)). #8809# 2.1 Restore the user FPCR #8810# 2.2 Return ans := Y * INV_L2. #8811# #8812# sLog2: #8813# #8814# Step 0. If X < 0, create a NaN and raise the invalid operation #8815# flag. Otherwise, save FPCR in D1; set FpCR to default. #8816# Notes: Default means round-to-nearest mode, no floating-point #8817# traps, and precision control = double extended. #8818# #8819# Step 1. If X is not an integer power of two, i.e., X != 2^k, #8820# go to Step 3. #8821# #8822# Step 2. Return k. #8823# 2.1 Get integer k, X = 2^k. #8824# 2.2 Restore the user FPCR. #8825# 2.3 Return ans := convert-to-double-extended(k). #8826# #8827# Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #8828# #8829# Step 4. Compute log_2(X) = log(X) * (1/log(2)). #8830# 4.1 Restore the user FPCR #8831# 4.2 Return ans := Y * INV_L2. #8832# #8833#########################################################################88348835INV_L10:8836long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x0000000088378838INV_L2:8839long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x0000000088408841global slog108842#--entry point for Log10(X), X is normalized8843slog10:8844fmov.b &0x1,%fp08845fcmp.x %fp0,(%a0) # if operand == 1,8846fbeq.l ld_pzero # return an EXACT zero88478848mov.l (%a0),%d18849blt.w invalid8850mov.l %d0,-(%sp)8851clr.l %d08852bsr slogn # log(X), X normal.8853fmov.l (%sp)+,%fpcr8854fmul.x INV_L10(%pc),%fp08855bra t_inx288568857global slog10d8858#--entry point for Log10(X), X is denormalized8859slog10d:8860mov.l (%a0),%d18861blt.w invalid8862mov.l %d0,-(%sp)8863clr.l %d08864bsr slognd # log(X), X denorm.8865fmov.l (%sp)+,%fpcr8866fmul.x INV_L10(%pc),%fp08867bra t_minx288688869global slog28870#--entry point for Log2(X), X is normalized8871slog2:8872mov.l (%a0),%d18873blt.w invalid88748875mov.l 8(%a0),%d18876bne.b continue # X is not 2^k88778878mov.l 4(%a0),%d18879and.l &0x7FFFFFFF,%d18880bne.b continue88818882#--X = 2^k.8883mov.w (%a0),%d18884and.l &0x00007FFF,%d18885sub.l &0x3FFF,%d18886beq.l ld_pzero8887fmov.l %d0,%fpcr8888fmov.l %d1,%fp08889bra t_inx288908891continue:8892mov.l %d0,-(%sp)8893clr.l %d08894bsr slogn # log(X), X normal.8895fmov.l (%sp)+,%fpcr8896fmul.x INV_L2(%pc),%fp08897bra t_inx288988899invalid:8900bra t_operr89018902global slog2d8903#--entry point for Log2(X), X is denormalized8904slog2d:8905mov.l (%a0),%d18906blt.w invalid8907mov.l %d0,-(%sp)8908clr.l %d08909bsr slognd # log(X), X denorm.8910fmov.l (%sp)+,%fpcr8911fmul.x INV_L2(%pc),%fp08912bra t_minx289138914#########################################################################8915# stwotox(): computes 2**X for a normalized input #8916# stwotoxd(): computes 2**X for a denormalized input #8917# stentox(): computes 10**X for a normalized input #8918# stentoxd(): computes 10**X for a denormalized input #8919# #8920# INPUT *************************************************************** #8921# a0 = pointer to extended precision input #8922# d0 = round precision,mode #8923# #8924# OUTPUT ************************************************************** #8925# fp0 = 2**X or 10**X #8926# #8927# ACCURACY and MONOTONICITY ******************************************* #8928# The returned result is within 2 ulps in 64 significant bit, #8929# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #8930# rounded to double precision. The result is provably monotonic #8931# in double precision. #8932# #8933# ALGORITHM *********************************************************** #8934# #8935# twotox #8936# 1. If |X| > 16480, go to ExpBig. #8937# #8938# 2. If |X| < 2**(-70), go to ExpSm. #8939# #8940# 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore #8941# decompose N as #8942# N = 64(M + M') + j, j = 0,1,2,...,63. #8943# #8944# 4. Overwrite r := r * log2. Then #8945# 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #8946# Go to expr to compute that expression. #8947# #8948# tentox #8949# 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #8950# #8951# 2. If |X| < 2**(-70), go to ExpSm. #8952# #8953# 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set #8954# N := round-to-int(y). Decompose N as #8955# N = 64(M + M') + j, j = 0,1,2,...,63. #8956# #8957# 4. Define r as #8958# r := ((X - N*L1)-N*L2) * L10 #8959# where L1, L2 are the leading and trailing parts of #8960# log_10(2)/64 and L10 is the natural log of 10. Then #8961# 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #8962# Go to expr to compute that expression. #8963# #8964# expr #8965# 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #8966# #8967# 2. Overwrite Fact1 and Fact2 by #8968# Fact1 := 2**(M) * Fact1 #8969# Fact2 := 2**(M) * Fact2 #8970# Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). #8971# #8972# 3. Calculate P where 1 + P approximates exp(r): #8973# P = r + r*r*(A1+r*(A2+...+r*A5)). #8974# #8975# 4. Let AdjFact := 2**(M'). Return #8976# AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #8977# Exit. #8978# #8979# ExpBig #8980# 1. Generate overflow by Huge * Huge if X > 0; otherwise, #8981# generate underflow by Tiny * Tiny. #8982# #8983# ExpSm #8984# 1. Return 1 + X. #8985# #8986#########################################################################89878988L2TEN64:8989long 0x406A934F,0x0979A371 # 64LOG10/LOG28990L10TWO1:8991long 0x3F734413,0x509F8000 # LOG2/64LOG1089928993L10TWO2:8994long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x0000000089958996LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x0000000089978998LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x0000000089999000EXPA5: long 0x3F56C16D,0x6F7BD0B29001EXPA4: long 0x3F811112,0x302C712C9002EXPA3: long 0x3FA55555,0x55554CC19003EXPA2: long 0x3FC55555,0x55554A549004EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x0000000090059006TEXPTBL:9007long 0x3FFF0000,0x80000000,0x00000000,0x3F7380009008long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA9009long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A99010long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C99011long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA9012long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C9013long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF19014long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA9015long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA83739016long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE96709017long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB7009018long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB09019long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D9020long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB3199021long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B9022long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D59023long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A9024long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B9025long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF9026long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA9027long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD9028long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E9029long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B9030long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB9031long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB9032long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC2749033long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C9034long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB009035long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE93019036long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD83679037long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F9038long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C9039long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB9040long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB9041long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C9042long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA9043long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD9044long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF519045long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A9046long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B29047long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB9048long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB179049long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C9050long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F89051long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE539052long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE9053long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC91249054long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB2439055long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A9056long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC619057long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF6109058long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE19059long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB129060long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE9061long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F49062long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F9063long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A9064long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A9065long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC9066long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F9067long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A9068long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B7959069long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B9070long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF58190719072set INT,L_SCR190739074set X,FP_SCR09075set XDCARE,X+29076set XFRAC,X+490779078set ADJFACT,FP_SCR090799080set FACT1,FP_SCR09081set FACT1HI,FACT1+49082set FACT1LOW,FACT1+890839084set FACT2,FP_SCR19085set FACT2HI,FACT2+49086set FACT2LOW,FACT2+890879088global stwotox9089#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S9090stwotox:9091fmovm.x (%a0),&0x80 # LOAD INPUT90929093mov.l (%a0),%d19094mov.w 4(%a0),%d19095fmov.x %fp0,X(%a6)9096and.l &0x7FFFFFFF,%d190979098cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?9099bge.b TWOOK19100bra.w EXPBORS91019102TWOOK1:9103cmp.l %d1,&0x400D80C0 # |X| > 16480?9104ble.b TWOMAIN9105bra.w EXPBORS91069107TWOMAIN:9108#--USUAL CASE, 2^(-70) <= |X| <= 1648091099110fmov.x %fp0,%fp19111fmul.s &0x42800000,%fp1 # 64 * X9112fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X)9113mov.l %d2,-(%sp)9114lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)9115fmov.l INT(%a6),%fp1 # N --> FLOATING FMT9116mov.l INT(%a6),%d19117mov.l %d1,%d29118and.l &0x3F,%d1 # D0 IS J9119asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)9120add.l %d1,%a1 # ADDRESS FOR 2^(J/64)9121asr.l &6,%d2 # d2 IS L, N = 64L + J9122mov.l %d2,%d19123asr.l &1,%d1 # D0 IS M9124sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J9125add.l &0x3FFF,%d291269127#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),9128#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.9129#--ADJFACT = 2^(M').9130#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.91319132fmovm.x &0x0c,-(%sp) # save fp2/fp391339134fmul.s &0x3C800000,%fp1 # (1/64)*N9135mov.l (%a1)+,FACT1(%a6)9136mov.l (%a1)+,FACT1HI(%a6)9137mov.l (%a1)+,FACT1LOW(%a6)9138mov.w (%a1)+,FACT2(%a6)91399140fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X)91419142mov.w (%a1)+,FACT2HI(%a6)9143clr.w FACT2HI+2(%a6)9144clr.l FACT2LOW(%a6)9145add.w %d1,FACT1(%a6)9146fmul.x LOG2(%pc),%fp0 # FP0 IS R9147add.w %d1,FACT2(%a6)91489149bra.w expr91509151EXPBORS:9152#--FPCR, D0 SAVED9153cmp.l %d1,&0x3FFF80009154bgt.b TEXPBIG91559156#--|X| IS SMALL, RETURN 1 + X91579158fmov.l %d0,%fpcr # restore users round prec,mode9159fadd.s &0x3F800000,%fp0 # RETURN 1 + X9160bra t_pinx291619162TEXPBIG:9163#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW9164#--REGISTERS SAVE SO FAR ARE FPCR AND D09165mov.l X(%a6),%d19166cmp.l %d1,&09167blt.b EXPNEG91689169bra t_ovfl2 # t_ovfl expects positive value91709171EXPNEG:9172bra t_unfl2 # t_unfl expects positive value91739174global stwotoxd9175stwotoxd:9176#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT91779178fmov.l %d0,%fpcr # set user's rounding mode/precision9179fmov.s &0x3F800000,%fp0 # RETURN 1 + X9180mov.l (%a0),%d19181or.l &0x00800001,%d19182fadd.s %d1,%fp09183bra t_pinx291849185global stentox9186#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S9187stentox:9188fmovm.x (%a0),&0x80 # LOAD INPUT91899190mov.l (%a0),%d19191mov.w 4(%a0),%d19192fmov.x %fp0,X(%a6)9193and.l &0x7FFFFFFF,%d191949195cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?9196bge.b TENOK19197bra.w EXPBORS91989199TENOK1:9200cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ?9201ble.b TENMAIN9202bra.w EXPBORS92039204TENMAIN:9205#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 1092069207fmov.x %fp0,%fp19208fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG29209fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2)9210mov.l %d2,-(%sp)9211lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)9212fmov.l INT(%a6),%fp1 # N --> FLOATING FMT9213mov.l INT(%a6),%d19214mov.l %d1,%d29215and.l &0x3F,%d1 # D0 IS J9216asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)9217add.l %d1,%a1 # ADDRESS FOR 2^(J/64)9218asr.l &6,%d2 # d2 IS L, N = 64L + J9219mov.l %d2,%d19220asr.l &1,%d1 # D0 IS M9221sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J9222add.l &0x3FFF,%d292239224#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),9225#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.9226#--ADJFACT = 2^(M').9227#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.9228fmovm.x &0x0c,-(%sp) # save fp2/fp392299230fmov.x %fp1,%fp292319232fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD9233mov.l (%a1)+,FACT1(%a6)92349235fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL92369237mov.l (%a1)+,FACT1HI(%a6)9238mov.l (%a1)+,FACT1LOW(%a6)9239fsub.x %fp1,%fp0 # X - N L_LEAD9240mov.w (%a1)+,FACT2(%a6)92419242fsub.x %fp2,%fp0 # X - N L_TRAIL92439244mov.w (%a1)+,FACT2HI(%a6)9245clr.w FACT2HI+2(%a6)9246clr.l FACT2LOW(%a6)92479248fmul.x LOG10(%pc),%fp0 # FP0 IS R9249add.w %d1,FACT1(%a6)9250add.w %d1,FACT2(%a6)92519252expr:9253#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.9254#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).9255#--FP0 IS R. THE FOLLOWING CODE COMPUTES9256#-- 2**(M'+M) * 2**(J/64) * EXP(R)92579258fmov.x %fp0,%fp19259fmul.x %fp1,%fp1 # FP1 IS S = R*R92609261fmov.d EXPA5(%pc),%fp2 # FP2 IS A59262fmov.d EXPA4(%pc),%fp3 # FP3 IS A492639264fmul.x %fp1,%fp2 # FP2 IS S*A59265fmul.x %fp1,%fp3 # FP3 IS S*A492669267fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A59268fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A492699270fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5)9271fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4)92729273fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5)9274fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4)92759276fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5))9277fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4)9278fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 192799280fmovm.x (%sp)+,&0x30 # restore fp2/fp392819282#--FINAL RECONSTRUCTION PROCESS9283#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)92849285fmul.x FACT1(%a6),%fp09286fadd.x FACT2(%a6),%fp09287fadd.x FACT1(%a6),%fp092889289fmov.l %d0,%fpcr # restore users round prec,mode9290mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT9291mov.l (%sp)+,%d29292mov.l &0x80000000,ADJFACT+4(%a6)9293clr.l ADJFACT+8(%a6)9294mov.b &FMUL_OP,%d1 # last inst is MUL9295fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT9296bra t_catch92979298global stentoxd9299stentoxd:9300#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT93019302fmov.l %d0,%fpcr # set user's rounding mode/precision9303fmov.s &0x3F800000,%fp0 # RETURN 1 + X9304mov.l (%a0),%d19305or.l &0x00800001,%d19306fadd.s %d1,%fp09307bra t_pinx293089309#########################################################################9310# smovcr(): returns the ROM constant at the offset specified in d1 #9311# rounded to the mode and precision specified in d0. #9312# #9313# INPUT *************************************************************** #9314# d0 = rnd prec,mode #9315# d1 = ROM offset #9316# #9317# OUTPUT ************************************************************** #9318# fp0 = the ROM constant rounded to the user's rounding mode,prec #9319# #9320#########################################################################93219322global smovcr9323smovcr:9324mov.l %d1,-(%sp) # save rom offset for a sec93259326lsr.b &0x4,%d0 # shift ctrl bits to lo9327mov.l %d0,%d1 # make a copy9328andi.w &0x3,%d1 # extract rnd mode9329andi.w &0xc,%d0 # extract rnd prec9330swap %d0 # put rnd prec in hi9331mov.w %d1,%d0 # put rnd mode in lo93329333mov.l (%sp)+,%d1 # get rom offset93349335#9336# check range of offset9337#9338tst.b %d1 # if zero, offset is to pi9339beq.b pi_tbl # it is pi9340cmpi.b %d1,&0x0a # check range $01 - $0a9341ble.b z_val # if in this range, return zero9342cmpi.b %d1,&0x0e # check range $0b - $0e9343ble.b sm_tbl # valid constants in this range9344cmpi.b %d1,&0x2f # check range $10 - $2f9345ble.b z_val # if in this range, return zero9346cmpi.b %d1,&0x3f # check range $30 - $3f9347ble.b bg_tbl # valid constants in this range93489349z_val:9350bra.l ld_pzero # return a zero93519352#9353# the answer is PI rounded to the proper precision.9354#9355# fetch a pointer to the answer table relating to the proper rounding9356# precision.9357#9358pi_tbl:9359tst.b %d0 # is rmode RN?9360bne.b pi_not_rn # no9361pi_rn:9362lea.l PIRN(%pc),%a0 # yes; load PI RN table addr9363bra.w set_finx9364pi_not_rn:9365cmpi.b %d0,&rp_mode # is rmode RP?9366beq.b pi_rp # yes9367pi_rzrm:9368lea.l PIRZRM(%pc),%a0 # no; load PI RZ,RM table addr9369bra.b set_finx9370pi_rp:9371lea.l PIRP(%pc),%a0 # load PI RP table addr9372bra.b set_finx93739374#9375# the answer is one of:9376# $0B log10(2) (inexact)9377# $0C e (inexact)9378# $0D log2(e) (inexact)9379# $0E log10(e) (exact)9380#9381# fetch a pointer to the answer table relating to the proper rounding9382# precision.9383#9384sm_tbl:9385subi.b &0xb,%d1 # make offset in 0-4 range9386tst.b %d0 # is rmode RN?9387bne.b sm_not_rn # no9388sm_rn:9389lea.l SMALRN(%pc),%a0 # yes; load RN table addr9390sm_tbl_cont:9391cmpi.b %d1,&0x2 # is result log10(e)?9392ble.b set_finx # no; answer is inexact9393bra.b no_finx # yes; answer is exact9394sm_not_rn:9395cmpi.b %d0,&rp_mode # is rmode RP?9396beq.b sm_rp # yes9397sm_rzrm:9398lea.l SMALRZRM(%pc),%a0 # no; load RZ,RM table addr9399bra.b sm_tbl_cont9400sm_rp:9401lea.l SMALRP(%pc),%a0 # load RP table addr9402bra.b sm_tbl_cont94039404#9405# the answer is one of:9406# $30 ln(2) (inexact)9407# $31 ln(10) (inexact)9408# $32 10^0 (exact)9409# $33 10^1 (exact)9410# $34 10^2 (exact)9411# $35 10^4 (exact)9412# $36 10^8 (exact)9413# $37 10^16 (exact)9414# $38 10^32 (inexact)9415# $39 10^64 (inexact)9416# $3A 10^128 (inexact)9417# $3B 10^256 (inexact)9418# $3C 10^512 (inexact)9419# $3D 10^1024 (inexact)9420# $3E 10^2048 (inexact)9421# $3F 10^4096 (inexact)9422#9423# fetch a pointer to the answer table relating to the proper rounding9424# precision.9425#9426bg_tbl:9427subi.b &0x30,%d1 # make offset in 0-f range9428tst.b %d0 # is rmode RN?9429bne.b bg_not_rn # no9430bg_rn:9431lea.l BIGRN(%pc),%a0 # yes; load RN table addr9432bg_tbl_cont:9433cmpi.b %d1,&0x1 # is offset <= $31?9434ble.b set_finx # yes; answer is inexact9435cmpi.b %d1,&0x7 # is $32 <= offset <= $37?9436ble.b no_finx # yes; answer is exact9437bra.b set_finx # no; answer is inexact9438bg_not_rn:9439cmpi.b %d0,&rp_mode # is rmode RP?9440beq.b bg_rp # yes9441bg_rzrm:9442lea.l BIGRZRM(%pc),%a0 # no; load RZ,RM table addr9443bra.b bg_tbl_cont9444bg_rp:9445lea.l BIGRP(%pc),%a0 # load RP table addr9446bra.b bg_tbl_cont94479448# answer is inexact, so set INEX2 and AINEX in the user's FPSR.9449set_finx:9450ori.l &inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX9451no_finx:9452mulu.w &0xc,%d1 # offset points into tables9453swap %d0 # put rnd prec in lo word9454tst.b %d0 # is precision extended?94559456bne.b not_ext # if xprec, do not call round94579458# Precision is extended9459fmovm.x (%a0,%d1.w),&0x80 # return result in fp09460rts94619462# Precision is single or double9463not_ext:9464swap %d0 # rnd prec in upper word94659466# call round() to round the answer to the proper precision.9467# exponents out of range for single or double DO NOT cause underflow9468# or overflow.9469mov.w 0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word9470mov.l 0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word9471mov.l 0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word9472mov.l %d0,%d19473clr.l %d0 # clear g,r,s9474lea FP_SCR1(%a6),%a0 # pass ptr to answer9475clr.w LOCAL_SGN(%a0) # sign always positive9476bsr.l _round # round the mantissa94779478fmovm.x (%a0),&0x80 # return rounded result in fp09479rts94809481align 0x494829483PIRN: long 0x40000000,0xc90fdaa2,0x2168c235 # pi9484PIRZRM: long 0x40000000,0xc90fdaa2,0x2168c234 # pi9485PIRP: long 0x40000000,0xc90fdaa2,0x2168c235 # pi94869487SMALRN: long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)9488long 0x40000000,0xadf85458,0xa2bb4a9a # e9489long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)9490long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)9491long 0x00000000,0x00000000,0x00000000 # 0.094929493SMALRZRM:9494long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)9495long 0x40000000,0xadf85458,0xa2bb4a9a # e9496long 0x3fff0000,0xb8aa3b29,0x5c17f0bb # log2(e)9497long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)9498long 0x00000000,0x00000000,0x00000000 # 0.094999500SMALRP: long 0x3ffd0000,0x9a209a84,0xfbcff799 # log10(2)9501long 0x40000000,0xadf85458,0xa2bb4a9b # e9502long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)9503long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)9504long 0x00000000,0x00000000,0x00000000 # 0.095059506BIGRN: long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)9507long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)95089509long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 09510long 0x40020000,0xA0000000,0x00000000 # 10 ^ 19511long 0x40050000,0xC8000000,0x00000000 # 10 ^ 29512long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 49513long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 89514long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 169515long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 329516long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 649517long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 1289518long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 2569519long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 5129520long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 10249521long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 20489522long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 409695239524BIGRZRM:9525long 0x3ffe0000,0xb17217f7,0xd1cf79ab # ln(2)9526long 0x40000000,0x935d8ddd,0xaaa8ac16 # ln(10)95279528long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 09529long 0x40020000,0xA0000000,0x00000000 # 10 ^ 19530long 0x40050000,0xC8000000,0x00000000 # 10 ^ 29531long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 49532long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 89533long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 169534long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 329535long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 649536long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 1289537long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 2569538long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 5129539long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 10249540long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 20489541long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 409695429543BIGRP:9544long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)9545long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)95469547long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 09548long 0x40020000,0xA0000000,0x00000000 # 10 ^ 19549long 0x40050000,0xC8000000,0x00000000 # 10 ^ 29550long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 49551long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 89552long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 169553long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 329554long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 649555long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 1289556long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 2569557long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 5129558long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 10249559long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 20489560long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 409695619562#########################################################################9563# sscale(): computes the destination operand scaled by the source #9564# operand. If the absoulute value of the source operand is #9565# >= 2^14, an overflow or underflow is returned. #9566# #9567# INPUT *************************************************************** #9568# a0 = pointer to double-extended source operand X #9569# a1 = pointer to double-extended destination operand Y #9570# #9571# OUTPUT ************************************************************** #9572# fp0 = scale(X,Y) #9573# #9574#########################################################################95759576set SIGN, L_SCR195779578global sscale9579sscale:9580mov.l %d0,-(%sp) # store off ctrl bits for now95819582mov.w DST_EX(%a1),%d1 # get dst exponent9583smi.b SIGN(%a6) # use SIGN to hold dst sign9584andi.l &0x00007fff,%d1 # strip sign from dst exp95859586mov.w SRC_EX(%a0),%d0 # check src bounds9587andi.w &0x7fff,%d0 # clr src sign bit9588cmpi.w %d0,&0x3fff # is src ~ ZERO?9589blt.w src_small # yes9590cmpi.w %d0,&0x400c # no; is src too big?9591bgt.w src_out # yes95929593#9594# Source is within 2^14 range.9595#9596src_ok:9597fintrz.x SRC(%a0),%fp0 # calc int of src9598fmov.l %fp0,%d0 # int src to d09599# don't want any accrued bits from the fintrz showing up later since9600# we may need to read the fpsr for the last fp op in t_catch2().9601fmov.l &0x0,%fpsr96029603tst.b DST_HI(%a1) # is dst denormalized?9604bmi.b sok_norm96059606# the dst is a DENORM. normalize the DENORM and add the adjustment to9607# the src value. then, jump to the norm part of the routine.9608sok_dnrm:9609mov.l %d0,-(%sp) # save src for now96109611mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy9612mov.l DST_HI(%a1),FP_SCR0_HI(%a6)9613mov.l DST_LO(%a1),FP_SCR0_LO(%a6)96149615lea FP_SCR0(%a6),%a0 # pass ptr to DENORM9616bsr.l norm # normalize the DENORM9617neg.l %d09618add.l (%sp)+,%d0 # add adjustment to src96199620fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM96219622cmpi.w %d0,&-0x3fff # is the shft amt really low?9623bge.b sok_norm2 # thank goodness no96249625# the multiply factor that we're trying to create should be a denorm9626# for the multiply to work. Therefore, we're going to actually do a9627# multiply with a denorm which will cause an unimplemented data type9628# exception to be put into the machine which will be caught and corrected9629# later. we don't do this with the DENORMs above because this method9630# is slower. but, don't fret, I don't see it being used much either.9631fmov.l (%sp)+,%fpcr # restore user fpcr9632mov.l &0x80000000,%d1 # load normalized mantissa9633subi.l &-0x3fff,%d0 # how many should we shift?9634neg.l %d0 # make it positive9635cmpi.b %d0,&0x20 # is it > 32?9636bge.b sok_dnrm_32 # yes9637lsr.l %d0,%d1 # no; bit stays in upper lw9638clr.l -(%sp) # insert zero low mantissa9639mov.l %d1,-(%sp) # insert new high mantissa9640clr.l -(%sp) # make zero exponent9641bra.b sok_norm_cont9642sok_dnrm_32:9643subi.b &0x20,%d0 # get shift count9644lsr.l %d0,%d1 # make low mantissa longword9645mov.l %d1,-(%sp) # insert new low mantissa9646clr.l -(%sp) # insert zero high mantissa9647clr.l -(%sp) # make zero exponent9648bra.b sok_norm_cont96499650# the src will force the dst to a DENORM value or worse. so, let's9651# create an fp multiply that will create the result.9652sok_norm:9653fmovm.x DST(%a1),&0x80 # load fp0 with normalized src9654sok_norm2:9655fmov.l (%sp)+,%fpcr # restore user fpcr96569657addi.w &0x3fff,%d0 # turn src amt into exp value9658swap %d0 # put exponent in high word9659clr.l -(%sp) # insert new exponent9660mov.l &0x80000000,-(%sp) # insert new high mantissa9661mov.l %d0,-(%sp) # insert new lo mantissa96629663sok_norm_cont:9664fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch29665mov.b &FMUL_OP,%d1 # last inst is MUL9666fmul.x (%sp)+,%fp0 # do the multiply9667bra t_catch2 # catch any exceptions96689669#9670# Source is outside of 2^14 range. Test the sign and branch9671# to the appropriate exception handler.9672#9673src_out:9674mov.l (%sp)+,%d0 # restore ctrl bits9675exg %a0,%a1 # swap src,dst ptrs9676tst.b SRC_EX(%a1) # is src negative?9677bmi t_unfl # yes; underflow9678bra t_ovfl_sc # no; overflow96799680#9681# The source input is below 1, so we check for denormalized numbers9682# and set unfl.9683#9684src_small:9685tst.b DST_HI(%a1) # is dst denormalized?9686bpl.b ssmall_done # yes96879688mov.l (%sp)+,%d09689fmov.l %d0,%fpcr # no; load control bits9690mov.b &FMOV_OP,%d1 # last inst is MOVE9691fmov.x DST(%a1),%fp0 # simply return dest9692bra t_catch29693ssmall_done:9694mov.l (%sp)+,%d0 # load control bits into d19695mov.l %a1,%a0 # pass ptr to dst9696bra t_resdnrm96979698#########################################################################9699# smod(): computes the fp MOD of the input values X,Y. #9700# srem(): computes the fp (IEEE) REM of the input values X,Y. #9701# #9702# INPUT *************************************************************** #9703# a0 = pointer to extended precision input X #9704# a1 = pointer to extended precision input Y #9705# d0 = round precision,mode #9706# #9707# The input operands X and Y can be either normalized or #9708# denormalized. #9709# #9710# OUTPUT ************************************************************** #9711# fp0 = FREM(X,Y) or FMOD(X,Y) #9712# #9713# ALGORITHM *********************************************************** #9714# #9715# Step 1. Save and strip signs of X and Y: signX := sign(X), #9716# signY := sign(Y), X := |X|, Y := |Y|, #9717# signQ := signX EOR signY. Record whether MOD or REM #9718# is requested. #9719# #9720# Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #9721# If (L < 0) then #9722# R := X, go to Step 4. #9723# else #9724# R := 2^(-L)X, j := L. #9725# endif #9726# #9727# Step 3. Perform MOD(X,Y) #9728# 3.1 If R = Y, go to Step 9. #9729# 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #9730# 3.3 If j = 0, go to Step 4. #9731# 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #9732# Step 3.1. #9733# #9734# Step 4. At this point, R = X - QY = MOD(X,Y). Set #9735# Last_Subtract := false (used in Step 7 below). If #9736# MOD is requested, go to Step 6. #9737# #9738# Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #9739# 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #9740# Step 6. #9741# 5.2 If R > Y/2, then { set Last_Subtract := true, #9742# Q := Q + 1, Y := signY*Y }. Go to Step 6. #9743# 5.3 This is the tricky case of R = Y/2. If Q is odd, #9744# then { Q := Q + 1, signX := -signX }. #9745# #9746# Step 6. R := signX*R. #9747# #9748# Step 7. If Last_Subtract = true, R := R - Y. #9749# #9750# Step 8. Return signQ, last 7 bits of Q, and R as required. #9751# #9752# Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #9753# X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), #9754# R := 0. Return signQ, last 7 bits of Q, and R. #9755# #9756#########################################################################97579758set Mod_Flag,L_SCR39759set Sc_Flag,L_SCR3+197609761set SignY,L_SCR29762set SignX,L_SCR2+29763set SignQ,L_SCR3+297649765set Y,FP_SCR09766set Y_Hi,Y+49767set Y_Lo,Y+897689769set R,FP_SCR19770set R_Hi,R+49771set R_Lo,R+897729773Scale:9774long 0x00010000,0x80000000,0x00000000,0x0000000097759776global smod9777smod:9778clr.b FPSR_QBYTE(%a6)9779mov.l %d0,-(%sp) # save ctrl bits9780clr.b Mod_Flag(%a6)9781bra.b Mod_Rem97829783global srem9784srem:9785clr.b FPSR_QBYTE(%a6)9786mov.l %d0,-(%sp) # save ctrl bits9787mov.b &0x1,Mod_Flag(%a6)97889789Mod_Rem:9790#..Save sign of X and Y9791movm.l &0x3f00,-(%sp) # save data registers9792mov.w SRC_EX(%a0),%d39793mov.w %d3,SignY(%a6)9794and.l &0x00007FFF,%d3 # Y := |Y|97959796#9797mov.l SRC_HI(%a0),%d49798mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y|97999800tst.l %d39801bne.b Y_Normal98029803mov.l &0x00003FFE,%d3 # $3FFD + 19804tst.l %d49805bne.b HiY_not098069807HiY_0:9808mov.l %d5,%d49809clr.l %d59810sub.l &32,%d39811clr.l %d69812bfffo %d4{&0:&32},%d69813lsl.l %d6,%d49814sub.l %d6,%d3 # (D3,D4,D5) is normalized9815# ...with bias $7FFD9816bra.b Chk_X98179818HiY_not0:9819clr.l %d69820bfffo %d4{&0:&32},%d69821sub.l %d6,%d39822lsl.l %d6,%d49823mov.l %d5,%d7 # a copy of D59824lsl.l %d6,%d59825neg.l %d69826add.l &32,%d69827lsr.l %d6,%d79828or.l %d7,%d4 # (D3,D4,D5) normalized9829# ...with bias $7FFD9830bra.b Chk_X98319832Y_Normal:9833add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized9834# ...with bias $7FFD98359836Chk_X:9837mov.w DST_EX(%a1),%d09838mov.w %d0,SignX(%a6)9839mov.w SignY(%a6),%d19840eor.l %d0,%d19841and.l &0x00008000,%d19842mov.w %d1,SignQ(%a6) # sign(Q) obtained9843and.l &0x00007FFF,%d09844mov.l DST_HI(%a1),%d19845mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X|9846tst.l %d09847bne.b X_Normal9848mov.l &0x00003FFE,%d09849tst.l %d19850bne.b HiX_not098519852HiX_0:9853mov.l %d2,%d19854clr.l %d29855sub.l &32,%d09856clr.l %d69857bfffo %d1{&0:&32},%d69858lsl.l %d6,%d19859sub.l %d6,%d0 # (D0,D1,D2) is normalized9860# ...with bias $7FFD9861bra.b Init98629863HiX_not0:9864clr.l %d69865bfffo %d1{&0:&32},%d69866sub.l %d6,%d09867lsl.l %d6,%d19868mov.l %d2,%d7 # a copy of D29869lsl.l %d6,%d29870neg.l %d69871add.l &32,%d69872lsr.l %d6,%d79873or.l %d7,%d1 # (D0,D1,D2) normalized9874# ...with bias $7FFD9875bra.b Init98769877X_Normal:9878add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized9879# ...with bias $7FFD98809881Init:9882#9883mov.l %d3,L_SCR1(%a6) # save biased exp(Y)9884mov.l %d0,-(%sp) # save biased exp(X)9885sub.l %d3,%d0 # L := expo(X)-expo(Y)98869887clr.l %d6 # D6 := carry <- 09888clr.l %d3 # D3 is Q9889mov.l &0,%a1 # A1 is k; j+k=L, Q=098909891#..(Carry,D1,D2) is R9892tst.l %d09893bge.b Mod_Loop_pre98949895#..expo(X) < expo(Y). Thus X = mod(X,Y)9896#9897mov.l (%sp)+,%d0 # restore d09898bra.w Get_Mod98999900Mod_Loop_pre:9901addq.l &0x4,%sp # erase exp(X)9902#..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L9903Mod_Loop:9904tst.l %d6 # test carry bit9905bgt.b R_GT_Y99069907#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)9908cmp.l %d1,%d4 # compare hi(R) and hi(Y)9909bne.b R_NE_Y9910cmp.l %d2,%d5 # compare lo(R) and lo(Y)9911bne.b R_NE_Y99129913#..At this point, R = Y9914bra.w Rem_is_099159916R_NE_Y:9917#..use the borrow of the previous compare9918bcs.b R_LT_Y # borrow is set iff R < Y99199920R_GT_Y:9921#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 09922#..and Y < (D1,D2) < 2Y. Either way, perform R - Y9923sub.l %d5,%d2 # lo(R) - lo(Y)9924subx.l %d4,%d1 # hi(R) - hi(Y)9925clr.l %d6 # clear carry9926addq.l &1,%d3 # Q := Q + 199279928R_LT_Y:9929#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.9930tst.l %d0 # see if j = 0.9931beq.b PostLoop99329933add.l %d3,%d3 # Q := 2Q9934add.l %d2,%d2 # lo(R) = 2lo(R)9935roxl.l &1,%d1 # hi(R) = 2hi(R) + carry9936scs %d6 # set Carry if 2(R) overflows9937addq.l &1,%a1 # k := k+19938subq.l &1,%d0 # j := j - 19939#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.99409941bra.b Mod_Loop99429943PostLoop:9944#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.99459946#..normalize R.9947mov.l L_SCR1(%a6),%d0 # new biased expo of R9948tst.l %d19949bne.b HiR_not099509951HiR_0:9952mov.l %d2,%d19953clr.l %d29954sub.l &32,%d09955clr.l %d69956bfffo %d1{&0:&32},%d69957lsl.l %d6,%d19958sub.l %d6,%d0 # (D0,D1,D2) is normalized9959# ...with bias $7FFD9960bra.b Get_Mod99619962HiR_not0:9963clr.l %d69964bfffo %d1{&0:&32},%d69965bmi.b Get_Mod # already normalized9966sub.l %d6,%d09967lsl.l %d6,%d19968mov.l %d2,%d7 # a copy of D29969lsl.l %d6,%d29970neg.l %d69971add.l &32,%d69972lsr.l %d6,%d79973or.l %d7,%d1 # (D0,D1,D2) normalized99749975#9976Get_Mod:9977cmp.l %d0,&0x000041FE9978bge.b No_Scale9979Do_Scale:9980mov.w %d0,R(%a6)9981mov.l %d1,R_Hi(%a6)9982mov.l %d2,R_Lo(%a6)9983mov.l L_SCR1(%a6),%d69984mov.w %d6,Y(%a6)9985mov.l %d4,Y_Hi(%a6)9986mov.l %d5,Y_Lo(%a6)9987fmov.x R(%a6),%fp0 # no exception9988mov.b &1,Sc_Flag(%a6)9989bra.b ModOrRem9990No_Scale:9991mov.l %d1,R_Hi(%a6)9992mov.l %d2,R_Lo(%a6)9993sub.l &0x3FFE,%d09994mov.w %d0,R(%a6)9995mov.l L_SCR1(%a6),%d69996sub.l &0x3FFE,%d69997mov.l %d6,L_SCR1(%a6)9998fmov.x R(%a6),%fp09999mov.w %d6,Y(%a6)10000mov.l %d4,Y_Hi(%a6)10001mov.l %d5,Y_Lo(%a6)10002clr.b Sc_Flag(%a6)1000310004#10005ModOrRem:10006tst.b Mod_Flag(%a6)10007beq.b Fix_Sign1000810009mov.l L_SCR1(%a6),%d6 # new biased expo(Y)10010subq.l &1,%d6 # biased expo(Y/2)10011cmp.l %d0,%d610012blt.b Fix_Sign10013bgt.b Last_Sub1001410015cmp.l %d1,%d410016bne.b Not_EQ10017cmp.l %d2,%d510018bne.b Not_EQ10019bra.w Tie_Case1002010021Not_EQ:10022bcs.b Fix_Sign1002310024Last_Sub:10025#10026fsub.x Y(%a6),%fp0 # no exceptions10027addq.l &1,%d3 # Q := Q + 11002810029#10030Fix_Sign:10031#..Get sign of X10032mov.w SignX(%a6),%d610033bge.b Get_Q10034fneg.x %fp01003510036#..Get Q10037#10038Get_Q:10039clr.l %d610040mov.w SignQ(%a6),%d6 # D6 is sign(Q)10041mov.l &8,%d710042lsr.l %d7,%d610043and.l &0x0000007F,%d3 # 7 bits of Q10044or.l %d6,%d3 # sign and bits of Q10045# swap %d310046# fmov.l %fpsr,%d610047# and.l &0xFF00FFFF,%d610048# or.l %d3,%d610049# fmov.l %d6,%fpsr # put Q in fpsr10050mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr1005110052#10053Restore:10054movm.l (%sp)+,&0xfc # {%d2-%d7}10055mov.l (%sp)+,%d010056fmov.l %d0,%fpcr10057tst.b Sc_Flag(%a6)10058beq.b Finish10059mov.b &FMUL_OP,%d1 # last inst is MUL10060fmul.x Scale(%pc),%fp0 # may cause underflow10061bra t_catch210062# the '040 package did this apparently to see if the dst operand for the10063# preceding fmul was a denorm. but, it better not have been since the10064# algorithm just got done playing with fp0 and expected no exceptions10065# as a result. trust me...10066# bra t_avoid_unsupp # check for denorm as a10067# ;result of the scaling1006810069Finish:10070mov.b &FMOV_OP,%d1 # last inst is MOVE10071fmov.x %fp0,%fp0 # capture exceptions & round10072bra t_catch21007310074Rem_is_0:10075#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)10076addq.l &1,%d310077cmp.l %d0,&8 # D0 is j10078bge.b Q_Big1007910080lsl.l %d0,%d310081bra.b Set_R_01008210083Q_Big:10084clr.l %d31008510086Set_R_0:10087fmov.s &0x00000000,%fp010088clr.b Sc_Flag(%a6)10089bra.w Fix_Sign1009010091Tie_Case:10092#..Check parity of Q10093mov.l %d3,%d610094and.l &0x00000001,%d610095tst.l %d610096beq.w Fix_Sign # Q is even1009710098#..Q is odd, Q := Q + 1, signX := -signX10099addq.l &1,%d310100mov.w SignX(%a6),%d610101eor.l &0x00008000,%d610102mov.w %d6,SignX(%a6)10103bra.w Fix_Sign1010410105qnan: long 0x7fff0000, 0xffffffff, 0xffffffff1010610107#########################################################################10108# XDEF **************************************************************** #10109# t_dz(): Handle DZ exception during transcendental emulation. #10110# Sets N bit according to sign of source operand. #10111# t_dz2(): Handle DZ exception during transcendental emulation. #10112# Sets N bit always. #10113# #10114# XREF **************************************************************** #10115# None #10116# #10117# INPUT *************************************************************** #10118# a0 = pointer to source operand #10119# #10120# OUTPUT ************************************************************** #10121# fp0 = default result #10122# #10123# ALGORITHM *********************************************************** #10124# - Store properly signed INF into fp0. #10125# - Set FPSR exception status dz bit, ccode inf bit, and #10126# accrued dz bit. #10127# #10128#########################################################################1012910130global t_dz10131t_dz:10132tst.b SRC_EX(%a0) # no; is src negative?10133bmi.b t_dz2 # yes1013410135dz_pinf:10136fmov.s &0x7f800000,%fp0 # return +INF in fp010137ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ10138rts1013910140global t_dz210141t_dz2:10142fmov.s &0xff800000,%fp0 # return -INF in fp010143ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ10144rts1014510146#################################################################10147# OPERR exception: #10148# - set FPSR exception status operr bit, condition code #10149# nan bit; Store default NAN into fp0 #10150#################################################################10151global t_operr10152t_operr:10153ori.l &opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP10154fmovm.x qnan(%pc),&0x80 # return default NAN in fp010155rts1015610157#################################################################10158# Extended DENORM: #10159# - For all functions that have a denormalized input and #10160# that f(x)=x, this is the entry point. #10161# - we only return the EXOP here if either underflow or #10162# inexact is enabled. #10163#################################################################1016410165# Entry point for scale w/ extended denorm. The function does10166# NOT set INEX2/AUNFL/AINEX.10167global t_resdnrm10168t_resdnrm:10169ori.l &unfl_mask,USER_FPSR(%a6) # set UNFL10170bra.b xdnrm_con1017110172global t_extdnrm10173t_extdnrm:10174ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX1017510176xdnrm_con:10177mov.l %a0,%a1 # make copy of src ptr10178mov.l %d0,%d1 # make copy of rnd prec,mode10179andi.b &0xc0,%d1 # extended precision?10180bne.b xdnrm_sd # no1018110182# result precision is extended.10183tst.b LOCAL_EX(%a0) # is denorm negative?10184bpl.b xdnrm_exit # no1018510186bset &neg_bit,FPSR_CC(%a6) # yes; set 'N' ccode bit10187bra.b xdnrm_exit1018810189# result precision is single or double10190xdnrm_sd:10191mov.l %a1,-(%sp)10192tst.b LOCAL_EX(%a0) # is denorm pos or neg?10193smi.b %d1 # set d0 accordingly10194bsr.l unf_sub10195mov.l (%sp)+,%a110196xdnrm_exit:10197fmovm.x (%a0),&0x80 # return default result in fp01019810199mov.b FPCR_ENABLE(%a6),%d010200andi.b &0x0a,%d0 # is UNFL or INEX enabled?10201bne.b xdnrm_ena # yes10202rts1020310204################10205# unfl enabled #10206################10207# we have a DENORM that needs to be converted into an EXOP.10208# so, normalize the mantissa, add 0x6000 to the new exponent,10209# and return the result in fp1.10210xdnrm_ena:10211mov.w LOCAL_EX(%a1),FP_SCR0_EX(%a6)10212mov.l LOCAL_HI(%a1),FP_SCR0_HI(%a6)10213mov.l LOCAL_LO(%a1),FP_SCR0_LO(%a6)1021410215lea FP_SCR0(%a6),%a010216bsr.l norm # normalize mantissa10217addi.l &0x6000,%d0 # add extra bias10218andi.w &0x8000,FP_SCR0_EX(%a6) # keep old sign10219or.w %d0,FP_SCR0_EX(%a6) # insert new exponent1022010221fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp110222rts1022310224#################################################################10225# UNFL exception: #10226# - This routine is for cases where even an EXOP isn't #10227# large enough to hold the range of this result. #10228# In such a case, the EXOP equals zero. #10229# - Return the default result to the proper precision #10230# with the sign of this result being the same as that #10231# of the src operand. #10232# - t_unfl2() is provided to force the result sign to #10233# positive which is the desired result for fetox(). #10234#################################################################10235global t_unfl10236t_unfl:10237ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX1023810239tst.b (%a0) # is result pos or neg?10240smi.b %d1 # set d1 accordingly10241bsr.l unf_sub # calc default unfl result10242fmovm.x (%a0),&0x80 # return default result in fp01024310244fmov.s &0x00000000,%fp1 # return EXOP in fp110245rts1024610247# t_unfl2 ALWAYS tells unf_sub to create a positive result10248global t_unfl210249t_unfl2:10250ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX1025110252sf.b %d1 # set d0 to represent positive10253bsr.l unf_sub # calc default unfl result10254fmovm.x (%a0),&0x80 # return default result in fp01025510256fmov.s &0x0000000,%fp1 # return EXOP in fp110257rts1025810259#################################################################10260# OVFL exception: #10261# - This routine is for cases where even an EXOP isn't #10262# large enough to hold the range of this result. #10263# - Return the default result to the proper precision #10264# with the sign of this result being the same as that #10265# of the src operand. #10266# - t_ovfl2() is provided to force the result sign to #10267# positive which is the desired result for fcosh(). #10268# - t_ovfl_sc() is provided for scale() which only sets #10269# the inexact bits if the number is inexact for the #10270# precision indicated. #10271#################################################################1027210273global t_ovfl_sc10274t_ovfl_sc:10275ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX1027610277mov.b %d0,%d1 # fetch rnd mode/prec10278andi.b &0xc0,%d1 # extract rnd prec10279beq.b ovfl_work # prec is extended1028010281tst.b LOCAL_HI(%a0) # is dst a DENORM?10282bmi.b ovfl_sc_norm # no1028310284# dst op is a DENORM. we have to normalize the mantissa to see if the10285# result would be inexact for the given precision. make a copy of the10286# dst so we don't screw up the version passed to us.10287mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6)10288mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6)10289mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6)10290lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR010291movm.l &0xc080,-(%sp) # save d0-d1/a010292bsr.l norm # normalize mantissa10293movm.l (%sp)+,&0x0103 # restore d0-d1/a01029410295ovfl_sc_norm:10296cmpi.b %d1,&0x40 # is prec dbl?10297bne.b ovfl_sc_dbl # no; sgl10298ovfl_sc_sgl:10299tst.l LOCAL_LO(%a0) # is lo lw of sgl set?10300bne.b ovfl_sc_inx # yes10301tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set?10302bne.b ovfl_sc_inx # yes10303bra.b ovfl_work # don't set INEX210304ovfl_sc_dbl:10305mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of10306andi.l &0x7ff,%d1 # dbl mantissa set?10307beq.b ovfl_work # no; don't set INEX210308ovfl_sc_inx:10309ori.l &inex2_mask,USER_FPSR(%a6) # set INEX210310bra.b ovfl_work # continue1031110312global t_ovfl10313t_ovfl:10314ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX1031510316ovfl_work:10317tst.b LOCAL_EX(%a0) # what is the sign?10318smi.b %d1 # set d1 accordingly10319bsr.l ovf_res # calc default ovfl result10320mov.b %d0,FPSR_CC(%a6) # insert new ccodes10321fmovm.x (%a0),&0x80 # return default result in fp01032210323fmov.s &0x00000000,%fp1 # return EXOP in fp110324rts1032510326# t_ovfl2 ALWAYS tells ovf_res to create a positive result10327global t_ovfl210328t_ovfl2:10329ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX1033010331sf.b %d1 # clear sign flag for positive10332bsr.l ovf_res # calc default ovfl result10333mov.b %d0,FPSR_CC(%a6) # insert new ccodes10334fmovm.x (%a0),&0x80 # return default result in fp01033510336fmov.s &0x00000000,%fp1 # return EXOP in fp110337rts1033810339#################################################################10340# t_catch(): #10341# - the last operation of a transcendental emulation #10342# routine may have caused an underflow or overflow. #10343# we find out if this occurred by doing an fsave and #10344# checking the exception bit. if one did occur, then we #10345# jump to fgen_except() which creates the default #10346# result and EXOP for us. #10347#################################################################10348global t_catch10349t_catch:1035010351fsave -(%sp)10352tst.b 0x2(%sp)10353bmi.b catch10354add.l &0xc,%sp1035510356#################################################################10357# INEX2 exception: #10358# - The inex2 and ainex bits are set. #10359#################################################################10360global t_inx210361t_inx2:10362fblt.w t_minx210363fbeq.w inx2_zero1036410365global t_pinx210366t_pinx2:10367ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX10368rts1036910370global t_minx210371t_minx2:10372ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX10373rts1037410375inx2_zero:10376mov.b &z_bmask,FPSR_CC(%a6)10377ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX10378rts1037910380# an underflow or overflow exception occurred.10381# we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!10382catch:10383ori.w &inx2a_mask,FPSR_EXCEPT(%a6)10384catch2:10385bsr.l fgen_except10386add.l &0xc,%sp10387rts1038810389global t_catch210390t_catch2:1039110392fsave -(%sp)1039310394tst.b 0x2(%sp)10395bmi.b catch210396add.l &0xc,%sp1039710398fmov.l %fpsr,%d010399or.l %d0,USER_FPSR(%a6)1040010401rts1040210403#########################################################################1040410405#########################################################################10406# unf_res(): underflow default result calculation for transcendentals #10407# #10408# INPUT: #10409# d0 : rnd mode,precision #10410# d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+)) #10411# OUTPUT: #10412# a0 : points to result (in instruction memory) #10413#########################################################################10414unf_sub:10415ori.l &unfinx_mask,USER_FPSR(%a6)1041610417andi.w &0x10,%d1 # keep sign bit in 4th spot1041810419lsr.b &0x4,%d0 # shift rnd prec,mode to lo bits10420andi.b &0xf,%d0 # strip hi rnd mode bit10421or.b %d1,%d0 # concat {sgn,mode,prec}1042210423mov.l %d0,%d1 # make a copy10424lsl.b &0x1,%d1 # mult index 2 by 21042510426mov.b (tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits10427lea (tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr10428rts1042910430tbl_unf_cc:10431byte 0x4, 0x4, 0x4, 0x010432byte 0x4, 0x4, 0x4, 0x010433byte 0x4, 0x4, 0x4, 0x010434byte 0x0, 0x0, 0x0, 0x010435byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x410436byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x410437byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x41043810439tbl_unf_result:10440long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext10441long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext10442long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext10443long 0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext1044410445long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl10446long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl10447long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl10448long 0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl1044910450long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl10451long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl10452long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl10453long 0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl1045410455long 0x0,0x0,0x0,0x010456long 0x0,0x0,0x0,0x010457long 0x0,0x0,0x0,0x010458long 0x0,0x0,0x0,0x01045910460long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext10461long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext10462long 0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext10463long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext1046410465long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl10466long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl10467long 0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl10468long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl1046910470long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl10471long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl10472long 0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl10473long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl1047410475############################################################1047610477#########################################################################10478# src_zero(): Return signed zero according to sign of src operand. #10479#########################################################################10480global src_zero10481src_zero:10482tst.b SRC_EX(%a0) # get sign of src operand10483bmi.b ld_mzero # if neg, load neg zero1048410485#10486# ld_pzero(): return a positive zero.10487#10488global ld_pzero10489ld_pzero:10490fmov.s &0x00000000,%fp0 # load +010491mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit10492rts1049310494# ld_mzero(): return a negative zero.10495global ld_mzero10496ld_mzero:10497fmov.s &0x80000000,%fp0 # load -010498mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits10499rts1050010501#########################################################################10502# dst_zero(): Return signed zero according to sign of dst operand. #10503#########################################################################10504global dst_zero10505dst_zero:10506tst.b DST_EX(%a1) # get sign of dst operand10507bmi.b ld_mzero # if neg, load neg zero10508bra.b ld_pzero # load positive zero1050910510#########################################################################10511# src_inf(): Return signed inf according to sign of src operand. #10512#########################################################################10513global src_inf10514src_inf:10515tst.b SRC_EX(%a0) # get sign of src operand10516bmi.b ld_minf # if negative branch1051710518#10519# ld_pinf(): return a positive infinity.10520#10521global ld_pinf10522ld_pinf:10523fmov.s &0x7f800000,%fp0 # load +INF10524mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit10525rts1052610527#10528# ld_minf():return a negative infinity.10529#10530global ld_minf10531ld_minf:10532fmov.s &0xff800000,%fp0 # load -INF10533mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits10534rts1053510536#########################################################################10537# dst_inf(): Return signed inf according to sign of dst operand. #10538#########################################################################10539global dst_inf10540dst_inf:10541tst.b DST_EX(%a1) # get sign of dst operand10542bmi.b ld_minf # if negative branch10543bra.b ld_pinf1054410545global szr_inf10546#################################################################10547# szr_inf(): Return +ZERO for a negative src operand or #10548# +INF for a positive src operand. #10549# Routine used for fetox, ftwotox, and ftentox. #10550#################################################################10551szr_inf:10552tst.b SRC_EX(%a0) # check sign of source10553bmi.b ld_pzero10554bra.b ld_pinf1055510556#########################################################################10557# sopr_inf(): Return +INF for a positive src operand or #10558# jump to operand error routine for a negative src operand. #10559# Routine used for flogn, flognp1, flog10, and flog2. #10560#########################################################################10561global sopr_inf10562sopr_inf:10563tst.b SRC_EX(%a0) # check sign of source10564bmi.w t_operr10565bra.b ld_pinf1056610567#################################################################10568# setoxm1i(): Return minus one for a negative src operand or #10569# positive infinity for a positive src operand. #10570# Routine used for fetoxm1. #10571#################################################################10572global setoxm1i10573setoxm1i:10574tst.b SRC_EX(%a0) # check sign of source10575bmi.b ld_mone10576bra.b ld_pinf1057710578#########################################################################10579# src_one(): Return signed one according to sign of src operand. #10580#########################################################################10581global src_one10582src_one:10583tst.b SRC_EX(%a0) # check sign of source10584bmi.b ld_mone1058510586#10587# ld_pone(): return positive one.10588#10589global ld_pone10590ld_pone:10591fmov.s &0x3f800000,%fp0 # load +110592clr.b FPSR_CC(%a6)10593rts1059410595#10596# ld_mone(): return negative one.10597#10598global ld_mone10599ld_mone:10600fmov.s &0xbf800000,%fp0 # load -110601mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit10602rts1060310604ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c23510605mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c2351060610607#################################################################10608# spi_2(): Return signed PI/2 according to sign of src operand. #10609#################################################################10610global spi_210611spi_2:10612tst.b SRC_EX(%a0) # check sign of source10613bmi.b ld_mpi21061410615#10616# ld_ppi2(): return positive PI/2.10617#10618global ld_ppi210619ld_ppi2:10620fmov.l %d0,%fpcr10621fmov.x ppiby2(%pc),%fp0 # load +pi/210622bra.w t_pinx2 # set INEX21062310624#10625# ld_mpi2(): return negative PI/2.10626#10627global ld_mpi210628ld_mpi2:10629fmov.l %d0,%fpcr10630fmov.x mpiby2(%pc),%fp0 # load -pi/210631bra.w t_minx2 # set INEX21063210633####################################################10634# The following routines give support for fsincos. #10635####################################################1063610637#10638# ssincosz(): When the src operand is ZERO, store a one in the10639# cosine register and return a ZERO in fp0 w/ the same sign10640# as the src operand.10641#10642global ssincosz10643ssincosz:10644fmov.s &0x3f800000,%fp110645tst.b SRC_EX(%a0) # test sign10646bpl.b sincoszp10647fmov.s &0x80000000,%fp0 # return sin result in fp010648mov.b &z_bmask+neg_bmask,FPSR_CC(%a6)10649bra.b sto_cos # store cosine result10650sincoszp:10651fmov.s &0x00000000,%fp0 # return sin result in fp010652mov.b &z_bmask,FPSR_CC(%a6)10653bra.b sto_cos # store cosine result1065410655#10656# ssincosi(): When the src operand is INF, store a QNAN in the cosine10657# register and jump to the operand error routine for negative10658# src operands.10659#10660global ssincosi10661ssincosi:10662fmov.x qnan(%pc),%fp1 # load NAN10663bsr.l sto_cos # store cosine result10664bra.w t_operr1066510666#10667# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine10668# register and branch to the src QNAN routine.10669#10670global ssincosqnan10671ssincosqnan:10672fmov.x LOCAL_EX(%a0),%fp110673bsr.l sto_cos10674bra.w src_qnan1067510676#10677# ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set10678# in the cosine register and branch to the src SNAN routine.10679#10680global ssincossnan10681ssincossnan:10682fmov.x LOCAL_EX(%a0),%fp110683bsr.l sto_cos10684bra.w src_snan1068510686########################################################################1068710688#########################################################################10689# sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field. #10690# fp1 holds the result of the cosine portion of ssincos(). #10691# the value in fp1 will not take any exceptions when moved. #10692# INPUT: #10693# fp1 : fp value to store #10694# MODIFIED: #10695# d0 #10696#########################################################################10697global sto_cos10698sto_cos:10699mov.b 1+EXC_CMDREG(%a6),%d010700andi.w &0x7,%d010701mov.w (tbl_sto_cos.b,%pc,%d0.w*2),%d010702jmp (tbl_sto_cos.b,%pc,%d0.w*1)1070310704tbl_sto_cos:10705short sto_cos_0 - tbl_sto_cos10706short sto_cos_1 - tbl_sto_cos10707short sto_cos_2 - tbl_sto_cos10708short sto_cos_3 - tbl_sto_cos10709short sto_cos_4 - tbl_sto_cos10710short sto_cos_5 - tbl_sto_cos10711short sto_cos_6 - tbl_sto_cos10712short sto_cos_7 - tbl_sto_cos1071310714sto_cos_0:10715fmovm.x &0x40,EXC_FP0(%a6)10716rts10717sto_cos_1:10718fmovm.x &0x40,EXC_FP1(%a6)10719rts10720sto_cos_2:10721fmov.x %fp1,%fp210722rts10723sto_cos_3:10724fmov.x %fp1,%fp310725rts10726sto_cos_4:10727fmov.x %fp1,%fp410728rts10729sto_cos_5:10730fmov.x %fp1,%fp510731rts10732sto_cos_6:10733fmov.x %fp1,%fp610734rts10735sto_cos_7:10736fmov.x %fp1,%fp710737rts1073810739##################################################################10740global smod_sdnrm10741global smod_snorm10742smod_sdnrm:10743smod_snorm:10744mov.b DTAG(%a6),%d110745beq.l smod10746cmpi.b %d1,&ZERO10747beq.w smod_zro10748cmpi.b %d1,&INF10749beq.l t_operr10750cmpi.b %d1,&DENORM10751beq.l smod10752cmpi.b %d1,&SNAN10753beq.l dst_snan10754bra.l dst_qnan1075510756global smod_szero10757smod_szero:10758mov.b DTAG(%a6),%d110759beq.l t_operr10760cmpi.b %d1,&ZERO10761beq.l t_operr10762cmpi.b %d1,&INF10763beq.l t_operr10764cmpi.b %d1,&DENORM10765beq.l t_operr10766cmpi.b %d1,&QNAN10767beq.l dst_qnan10768bra.l dst_snan1076910770global smod_sinf10771smod_sinf:10772mov.b DTAG(%a6),%d110773beq.l smod_fpn10774cmpi.b %d1,&ZERO10775beq.l smod_zro10776cmpi.b %d1,&INF10777beq.l t_operr10778cmpi.b %d1,&DENORM10779beq.l smod_fpn10780cmpi.b %d1,&QNAN10781beq.l dst_qnan10782bra.l dst_snan1078310784smod_zro:10785srem_zro:10786mov.b SRC_EX(%a0),%d1 # get src sign10787mov.b DST_EX(%a1),%d0 # get dst sign10788eor.b %d0,%d1 # get qbyte sign10789andi.b &0x80,%d110790mov.b %d1,FPSR_QBYTE(%a6)10791tst.b %d010792bpl.w ld_pzero10793bra.w ld_mzero1079410795smod_fpn:10796srem_fpn:10797clr.b FPSR_QBYTE(%a6)10798mov.l %d0,-(%sp)10799mov.b SRC_EX(%a0),%d1 # get src sign10800mov.b DST_EX(%a1),%d0 # get dst sign10801eor.b %d0,%d1 # get qbyte sign10802andi.b &0x80,%d110803mov.b %d1,FPSR_QBYTE(%a6)10804cmpi.b DTAG(%a6),&DENORM10805bne.b smod_nrm10806lea DST(%a1),%a010807mov.l (%sp)+,%d010808bra t_resdnrm10809smod_nrm:10810fmov.l (%sp)+,%fpcr10811fmov.x DST(%a1),%fp010812tst.b DST_EX(%a1)10813bmi.b smod_nrm_neg10814rts1081510816smod_nrm_neg:10817mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode10818rts1081910820#########################################################################10821global srem_snorm10822global srem_sdnrm10823srem_sdnrm:10824srem_snorm:10825mov.b DTAG(%a6),%d110826beq.l srem10827cmpi.b %d1,&ZERO10828beq.w srem_zro10829cmpi.b %d1,&INF10830beq.l t_operr10831cmpi.b %d1,&DENORM10832beq.l srem10833cmpi.b %d1,&QNAN10834beq.l dst_qnan10835bra.l dst_snan1083610837global srem_szero10838srem_szero:10839mov.b DTAG(%a6),%d110840beq.l t_operr10841cmpi.b %d1,&ZERO10842beq.l t_operr10843cmpi.b %d1,&INF10844beq.l t_operr10845cmpi.b %d1,&DENORM10846beq.l t_operr10847cmpi.b %d1,&QNAN10848beq.l dst_qnan10849bra.l dst_snan1085010851global srem_sinf10852srem_sinf:10853mov.b DTAG(%a6),%d110854beq.w srem_fpn10855cmpi.b %d1,&ZERO10856beq.w srem_zro10857cmpi.b %d1,&INF10858beq.l t_operr10859cmpi.b %d1,&DENORM10860beq.l srem_fpn10861cmpi.b %d1,&QNAN10862beq.l dst_qnan10863bra.l dst_snan1086410865#########################################################################10866global sscale_snorm10867global sscale_sdnrm10868sscale_snorm:10869sscale_sdnrm:10870mov.b DTAG(%a6),%d110871beq.l sscale10872cmpi.b %d1,&ZERO10873beq.l dst_zero10874cmpi.b %d1,&INF10875beq.l dst_inf10876cmpi.b %d1,&DENORM10877beq.l sscale10878cmpi.b %d1,&QNAN10879beq.l dst_qnan10880bra.l dst_snan1088110882global sscale_szero10883sscale_szero:10884mov.b DTAG(%a6),%d110885beq.l sscale10886cmpi.b %d1,&ZERO10887beq.l dst_zero10888cmpi.b %d1,&INF10889beq.l dst_inf10890cmpi.b %d1,&DENORM10891beq.l sscale10892cmpi.b %d1,&QNAN10893beq.l dst_qnan10894bra.l dst_snan1089510896global sscale_sinf10897sscale_sinf:10898mov.b DTAG(%a6),%d110899beq.l t_operr10900cmpi.b %d1,&QNAN10901beq.l dst_qnan10902cmpi.b %d1,&SNAN10903beq.l dst_snan10904bra.l t_operr1090510906########################################################################1090710908#10909# sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.10910#10911global sop_sqnan10912sop_sqnan:10913mov.b DTAG(%a6),%d110914cmpi.b %d1,&QNAN10915beq.b dst_qnan10916cmpi.b %d1,&SNAN10917beq.b dst_snan10918bra.b src_qnan1091910920#10921# sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.10922#10923global sop_ssnan10924sop_ssnan:10925mov.b DTAG(%a6),%d110926cmpi.b %d1,&QNAN10927beq.b dst_qnan_src_snan10928cmpi.b %d1,&SNAN10929beq.b dst_snan10930bra.b src_snan1093110932dst_qnan_src_snan:10933ori.l &snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP10934bra.b dst_qnan1093510936#10937# dst_qnan(): Return the dst SNAN w/ the SNAN bit set.10938#10939global dst_snan10940dst_snan:10941fmov.x DST(%a1),%fp0 # the fmove sets the SNAN bit10942fmov.l %fpsr,%d0 # catch resulting status10943or.l %d0,USER_FPSR(%a6) # store status10944rts1094510946#10947# dst_qnan(): Return the dst QNAN.10948#10949global dst_qnan10950dst_qnan:10951fmov.x DST(%a1),%fp0 # return the non-signalling nan10952tst.b DST_EX(%a1) # set ccodes according to QNAN sign10953bmi.b dst_qnan_m10954dst_qnan_p:10955mov.b &nan_bmask,FPSR_CC(%a6)10956rts10957dst_qnan_m:10958mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6)10959rts1096010961#10962# src_snan(): Return the src SNAN w/ the SNAN bit set.10963#10964global src_snan10965src_snan:10966fmov.x SRC(%a0),%fp0 # the fmove sets the SNAN bit10967fmov.l %fpsr,%d0 # catch resulting status10968or.l %d0,USER_FPSR(%a6) # store status10969rts1097010971#10972# src_qnan(): Return the src QNAN.10973#10974global src_qnan10975src_qnan:10976fmov.x SRC(%a0),%fp0 # return the non-signalling nan10977tst.b SRC_EX(%a0) # set ccodes according to QNAN sign10978bmi.b dst_qnan_m10979src_qnan_p:10980mov.b &nan_bmask,FPSR_CC(%a6)10981rts10982src_qnan_m:10983mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6)10984rts1098510986#10987# fkern2.s:10988# These entry points are used by the exception handler10989# routines where an instruction is selected by an index into10990# a large jump table corresponding to a given instruction which10991# has been decoded. Flow continues here where we now decode10992# further according to the source operand type.10993#1099410995global fsinh10996fsinh:10997mov.b STAG(%a6),%d110998beq.l ssinh10999cmpi.b %d1,&ZERO11000beq.l src_zero11001cmpi.b %d1,&INF11002beq.l src_inf11003cmpi.b %d1,&DENORM11004beq.l ssinhd11005cmpi.b %d1,&QNAN11006beq.l src_qnan11007bra.l src_snan1100811009global flognp111010flognp1:11011mov.b STAG(%a6),%d111012beq.l slognp111013cmpi.b %d1,&ZERO11014beq.l src_zero11015cmpi.b %d1,&INF11016beq.l sopr_inf11017cmpi.b %d1,&DENORM11018beq.l slognp1d11019cmpi.b %d1,&QNAN11020beq.l src_qnan11021bra.l src_snan1102211023global fetoxm111024fetoxm1:11025mov.b STAG(%a6),%d111026beq.l setoxm111027cmpi.b %d1,&ZERO11028beq.l src_zero11029cmpi.b %d1,&INF11030beq.l setoxm1i11031cmpi.b %d1,&DENORM11032beq.l setoxm1d11033cmpi.b %d1,&QNAN11034beq.l src_qnan11035bra.l src_snan1103611037global ftanh11038ftanh:11039mov.b STAG(%a6),%d111040beq.l stanh11041cmpi.b %d1,&ZERO11042beq.l src_zero11043cmpi.b %d1,&INF11044beq.l src_one11045cmpi.b %d1,&DENORM11046beq.l stanhd11047cmpi.b %d1,&QNAN11048beq.l src_qnan11049bra.l src_snan1105011051global fatan11052fatan:11053mov.b STAG(%a6),%d111054beq.l satan11055cmpi.b %d1,&ZERO11056beq.l src_zero11057cmpi.b %d1,&INF11058beq.l spi_211059cmpi.b %d1,&DENORM11060beq.l satand11061cmpi.b %d1,&QNAN11062beq.l src_qnan11063bra.l src_snan1106411065global fasin11066fasin:11067mov.b STAG(%a6),%d111068beq.l sasin11069cmpi.b %d1,&ZERO11070beq.l src_zero11071cmpi.b %d1,&INF11072beq.l t_operr11073cmpi.b %d1,&DENORM11074beq.l sasind11075cmpi.b %d1,&QNAN11076beq.l src_qnan11077bra.l src_snan1107811079global fatanh11080fatanh:11081mov.b STAG(%a6),%d111082beq.l satanh11083cmpi.b %d1,&ZERO11084beq.l src_zero11085cmpi.b %d1,&INF11086beq.l t_operr11087cmpi.b %d1,&DENORM11088beq.l satanhd11089cmpi.b %d1,&QNAN11090beq.l src_qnan11091bra.l src_snan1109211093global fsine11094fsine:11095mov.b STAG(%a6),%d111096beq.l ssin11097cmpi.b %d1,&ZERO11098beq.l src_zero11099cmpi.b %d1,&INF11100beq.l t_operr11101cmpi.b %d1,&DENORM11102beq.l ssind11103cmpi.b %d1,&QNAN11104beq.l src_qnan11105bra.l src_snan1110611107global ftan11108ftan:11109mov.b STAG(%a6),%d111110beq.l stan11111cmpi.b %d1,&ZERO11112beq.l src_zero11113cmpi.b %d1,&INF11114beq.l t_operr11115cmpi.b %d1,&DENORM11116beq.l stand11117cmpi.b %d1,&QNAN11118beq.l src_qnan11119bra.l src_snan1112011121global fetox11122fetox:11123mov.b STAG(%a6),%d111124beq.l setox11125cmpi.b %d1,&ZERO11126beq.l ld_pone11127cmpi.b %d1,&INF11128beq.l szr_inf11129cmpi.b %d1,&DENORM11130beq.l setoxd11131cmpi.b %d1,&QNAN11132beq.l src_qnan11133bra.l src_snan1113411135global ftwotox11136ftwotox:11137mov.b STAG(%a6),%d111138beq.l stwotox11139cmpi.b %d1,&ZERO11140beq.l ld_pone11141cmpi.b %d1,&INF11142beq.l szr_inf11143cmpi.b %d1,&DENORM11144beq.l stwotoxd11145cmpi.b %d1,&QNAN11146beq.l src_qnan11147bra.l src_snan1114811149global ftentox11150ftentox:11151mov.b STAG(%a6),%d111152beq.l stentox11153cmpi.b %d1,&ZERO11154beq.l ld_pone11155cmpi.b %d1,&INF11156beq.l szr_inf11157cmpi.b %d1,&DENORM11158beq.l stentoxd11159cmpi.b %d1,&QNAN11160beq.l src_qnan11161bra.l src_snan1116211163global flogn11164flogn:11165mov.b STAG(%a6),%d111166beq.l slogn11167cmpi.b %d1,&ZERO11168beq.l t_dz211169cmpi.b %d1,&INF11170beq.l sopr_inf11171cmpi.b %d1,&DENORM11172beq.l slognd11173cmpi.b %d1,&QNAN11174beq.l src_qnan11175bra.l src_snan1117611177global flog1011178flog10:11179mov.b STAG(%a6),%d111180beq.l slog1011181cmpi.b %d1,&ZERO11182beq.l t_dz211183cmpi.b %d1,&INF11184beq.l sopr_inf11185cmpi.b %d1,&DENORM11186beq.l slog10d11187cmpi.b %d1,&QNAN11188beq.l src_qnan11189bra.l src_snan1119011191global flog211192flog2:11193mov.b STAG(%a6),%d111194beq.l slog211195cmpi.b %d1,&ZERO11196beq.l t_dz211197cmpi.b %d1,&INF11198beq.l sopr_inf11199cmpi.b %d1,&DENORM11200beq.l slog2d11201cmpi.b %d1,&QNAN11202beq.l src_qnan11203bra.l src_snan1120411205global fcosh11206fcosh:11207mov.b STAG(%a6),%d111208beq.l scosh11209cmpi.b %d1,&ZERO11210beq.l ld_pone11211cmpi.b %d1,&INF11212beq.l ld_pinf11213cmpi.b %d1,&DENORM11214beq.l scoshd11215cmpi.b %d1,&QNAN11216beq.l src_qnan11217bra.l src_snan1121811219global facos11220facos:11221mov.b STAG(%a6),%d111222beq.l sacos11223cmpi.b %d1,&ZERO11224beq.l ld_ppi211225cmpi.b %d1,&INF11226beq.l t_operr11227cmpi.b %d1,&DENORM11228beq.l sacosd11229cmpi.b %d1,&QNAN11230beq.l src_qnan11231bra.l src_snan1123211233global fcos11234fcos:11235mov.b STAG(%a6),%d111236beq.l scos11237cmpi.b %d1,&ZERO11238beq.l ld_pone11239cmpi.b %d1,&INF11240beq.l t_operr11241cmpi.b %d1,&DENORM11242beq.l scosd11243cmpi.b %d1,&QNAN11244beq.l src_qnan11245bra.l src_snan1124611247global fgetexp11248fgetexp:11249mov.b STAG(%a6),%d111250beq.l sgetexp11251cmpi.b %d1,&ZERO11252beq.l src_zero11253cmpi.b %d1,&INF11254beq.l t_operr11255cmpi.b %d1,&DENORM11256beq.l sgetexpd11257cmpi.b %d1,&QNAN11258beq.l src_qnan11259bra.l src_snan1126011261global fgetman11262fgetman:11263mov.b STAG(%a6),%d111264beq.l sgetman11265cmpi.b %d1,&ZERO11266beq.l src_zero11267cmpi.b %d1,&INF11268beq.l t_operr11269cmpi.b %d1,&DENORM11270beq.l sgetmand11271cmpi.b %d1,&QNAN11272beq.l src_qnan11273bra.l src_snan1127411275global fsincos11276fsincos:11277mov.b STAG(%a6),%d111278beq.l ssincos11279cmpi.b %d1,&ZERO11280beq.l ssincosz11281cmpi.b %d1,&INF11282beq.l ssincosi11283cmpi.b %d1,&DENORM11284beq.l ssincosd11285cmpi.b %d1,&QNAN11286beq.l ssincosqnan11287bra.l ssincossnan1128811289global fmod11290fmod:11291mov.b STAG(%a6),%d111292beq.l smod_snorm11293cmpi.b %d1,&ZERO11294beq.l smod_szero11295cmpi.b %d1,&INF11296beq.l smod_sinf11297cmpi.b %d1,&DENORM11298beq.l smod_sdnrm11299cmpi.b %d1,&QNAN11300beq.l sop_sqnan11301bra.l sop_ssnan1130211303global frem11304frem:11305mov.b STAG(%a6),%d111306beq.l srem_snorm11307cmpi.b %d1,&ZERO11308beq.l srem_szero11309cmpi.b %d1,&INF11310beq.l srem_sinf11311cmpi.b %d1,&DENORM11312beq.l srem_sdnrm11313cmpi.b %d1,&QNAN11314beq.l sop_sqnan11315bra.l sop_ssnan1131611317global fscale11318fscale:11319mov.b STAG(%a6),%d111320beq.l sscale_snorm11321cmpi.b %d1,&ZERO11322beq.l sscale_szero11323cmpi.b %d1,&INF11324beq.l sscale_sinf11325cmpi.b %d1,&DENORM11326beq.l sscale_sdnrm11327cmpi.b %d1,&QNAN11328beq.l sop_sqnan11329bra.l sop_ssnan1133011331#########################################################################11332# XDEF **************************************************************** #11333# fgen_except(): catch an exception during transcendental #11334# emulation #11335# #11336# XREF **************************************************************** #11337# fmul() - emulate a multiply instruction #11338# fadd() - emulate an add instruction #11339# fin() - emulate an fmove instruction #11340# #11341# INPUT *************************************************************** #11342# fp0 = destination operand #11343# d0 = type of instruction that took exception #11344# fsave frame = source operand #11345# #11346# OUTPUT ************************************************************** #11347# fp0 = result #11348# fp1 = EXOP #11349# #11350# ALGORITHM *********************************************************** #11351# An exception occurred on the last instruction of the #11352# transcendental emulation. hopefully, this won't be happening much #11353# because it will be VERY slow. #11354# The only exceptions capable of passing through here are #11355# Overflow, Underflow, and Unsupported Data Type. #11356# #11357#########################################################################1135811359global fgen_except11360fgen_except:11361cmpi.b 0x3(%sp),&0x7 # is exception UNSUPP?11362beq.b fge_unsupp # yes1136311364mov.b &NORM,STAG(%a6)1136511366fge_cont:11367mov.b &NORM,DTAG(%a6)1136811369# ok, I have a problem with putting the dst op at FP_DST. the emulation11370# routines aren't supposed to alter the operands but we've just squashed11371# FP_DST here...1137211373# 8/17/93 - this turns out to be more of a "cleanliness" standpoint11374# then a potential bug. to begin with, only the dyadic functions11375# frem,fmod, and fscale would get the dst trashed here. But, for11376# the 060SP, the FP_DST is never used again anyways.11377fmovm.x &0x80,FP_DST(%a6) # dst op is in fp01137811379lea 0x4(%sp),%a0 # pass: ptr to src op11380lea FP_DST(%a6),%a1 # pass: ptr to dst op1138111382cmpi.b %d1,&FMOV_OP11383beq.b fge_fin # it was an "fmov"11384cmpi.b %d1,&FADD_OP11385beq.b fge_fadd # it was an "fadd"11386fge_fmul:11387bsr.l fmul11388rts11389fge_fadd:11390bsr.l fadd11391rts11392fge_fin:11393bsr.l fin11394rts1139511396fge_unsupp:11397mov.b &DENORM,STAG(%a6)11398bra.b fge_cont1139911400#11401# This table holds the offsets of the emulation routines for each individual11402# math operation relative to the address of this table. Included are11403# routines like fadd/fmul/fabs as well as the transcendentals.11404# The location within the table is determined by the extension bits of the11405# operation longword.11406#1140711408swbeg &10911409tbl_unsupp:11410long fin - tbl_unsupp # 00: fmove11411long fint - tbl_unsupp # 01: fint11412long fsinh - tbl_unsupp # 02: fsinh11413long fintrz - tbl_unsupp # 03: fintrz11414long fsqrt - tbl_unsupp # 04: fsqrt11415long tbl_unsupp - tbl_unsupp11416long flognp1 - tbl_unsupp # 06: flognp111417long tbl_unsupp - tbl_unsupp11418long fetoxm1 - tbl_unsupp # 08: fetoxm111419long ftanh - tbl_unsupp # 09: ftanh11420long fatan - tbl_unsupp # 0a: fatan11421long tbl_unsupp - tbl_unsupp11422long fasin - tbl_unsupp # 0c: fasin11423long fatanh - tbl_unsupp # 0d: fatanh11424long fsine - tbl_unsupp # 0e: fsin11425long ftan - tbl_unsupp # 0f: ftan11426long fetox - tbl_unsupp # 10: fetox11427long ftwotox - tbl_unsupp # 11: ftwotox11428long ftentox - tbl_unsupp # 12: ftentox11429long tbl_unsupp - tbl_unsupp11430long flogn - tbl_unsupp # 14: flogn11431long flog10 - tbl_unsupp # 15: flog1011432long flog2 - tbl_unsupp # 16: flog211433long tbl_unsupp - tbl_unsupp11434long fabs - tbl_unsupp # 18: fabs11435long fcosh - tbl_unsupp # 19: fcosh11436long fneg - tbl_unsupp # 1a: fneg11437long tbl_unsupp - tbl_unsupp11438long facos - tbl_unsupp # 1c: facos11439long fcos - tbl_unsupp # 1d: fcos11440long fgetexp - tbl_unsupp # 1e: fgetexp11441long fgetman - tbl_unsupp # 1f: fgetman11442long fdiv - tbl_unsupp # 20: fdiv11443long fmod - tbl_unsupp # 21: fmod11444long fadd - tbl_unsupp # 22: fadd11445long fmul - tbl_unsupp # 23: fmul11446long fsgldiv - tbl_unsupp # 24: fsgldiv11447long frem - tbl_unsupp # 25: frem11448long fscale - tbl_unsupp # 26: fscale11449long fsglmul - tbl_unsupp # 27: fsglmul11450long fsub - tbl_unsupp # 28: fsub11451long tbl_unsupp - tbl_unsupp11452long tbl_unsupp - tbl_unsupp11453long tbl_unsupp - tbl_unsupp11454long tbl_unsupp - tbl_unsupp11455long tbl_unsupp - tbl_unsupp11456long tbl_unsupp - tbl_unsupp11457long tbl_unsupp - tbl_unsupp11458long fsincos - tbl_unsupp # 30: fsincos11459long fsincos - tbl_unsupp # 31: fsincos11460long fsincos - tbl_unsupp # 32: fsincos11461long fsincos - tbl_unsupp # 33: fsincos11462long fsincos - tbl_unsupp # 34: fsincos11463long fsincos - tbl_unsupp # 35: fsincos11464long fsincos - tbl_unsupp # 36: fsincos11465long fsincos - tbl_unsupp # 37: fsincos11466long fcmp - tbl_unsupp # 38: fcmp11467long tbl_unsupp - tbl_unsupp11468long ftst - tbl_unsupp # 3a: ftst11469long tbl_unsupp - tbl_unsupp11470long tbl_unsupp - tbl_unsupp11471long tbl_unsupp - tbl_unsupp11472long tbl_unsupp - tbl_unsupp11473long tbl_unsupp - tbl_unsupp11474long fsin - tbl_unsupp # 40: fsmove11475long fssqrt - tbl_unsupp # 41: fssqrt11476long tbl_unsupp - tbl_unsupp11477long tbl_unsupp - tbl_unsupp11478long fdin - tbl_unsupp # 44: fdmove11479long fdsqrt - tbl_unsupp # 45: fdsqrt11480long tbl_unsupp - tbl_unsupp11481long tbl_unsupp - tbl_unsupp11482long tbl_unsupp - tbl_unsupp11483long tbl_unsupp - tbl_unsupp11484long tbl_unsupp - tbl_unsupp11485long tbl_unsupp - tbl_unsupp11486long tbl_unsupp - tbl_unsupp11487long tbl_unsupp - tbl_unsupp11488long tbl_unsupp - tbl_unsupp11489long tbl_unsupp - tbl_unsupp11490long tbl_unsupp - tbl_unsupp11491long tbl_unsupp - tbl_unsupp11492long tbl_unsupp - tbl_unsupp11493long tbl_unsupp - tbl_unsupp11494long tbl_unsupp - tbl_unsupp11495long tbl_unsupp - tbl_unsupp11496long tbl_unsupp - tbl_unsupp11497long tbl_unsupp - tbl_unsupp11498long fsabs - tbl_unsupp # 58: fsabs11499long tbl_unsupp - tbl_unsupp11500long fsneg - tbl_unsupp # 5a: fsneg11501long tbl_unsupp - tbl_unsupp11502long fdabs - tbl_unsupp # 5c: fdabs11503long tbl_unsupp - tbl_unsupp11504long fdneg - tbl_unsupp # 5e: fdneg11505long tbl_unsupp - tbl_unsupp11506long fsdiv - tbl_unsupp # 60: fsdiv11507long tbl_unsupp - tbl_unsupp11508long fsadd - tbl_unsupp # 62: fsadd11509long fsmul - tbl_unsupp # 63: fsmul11510long fddiv - tbl_unsupp # 64: fddiv11511long tbl_unsupp - tbl_unsupp11512long fdadd - tbl_unsupp # 66: fdadd11513long fdmul - tbl_unsupp # 67: fdmul11514long fssub - tbl_unsupp # 68: fssub11515long tbl_unsupp - tbl_unsupp11516long tbl_unsupp - tbl_unsupp11517long tbl_unsupp - tbl_unsupp11518long fdsub - tbl_unsupp # 6c: fdsub1151911520#########################################################################11521# XDEF **************************************************************** #11522# fmul(): emulates the fmul instruction #11523# fsmul(): emulates the fsmul instruction #11524# fdmul(): emulates the fdmul instruction #11525# #11526# XREF **************************************************************** #11527# scale_to_zero_src() - scale src exponent to zero #11528# scale_to_zero_dst() - scale dst exponent to zero #11529# unf_res() - return default underflow result #11530# ovf_res() - return default overflow result #11531# res_qnan() - return QNAN result #11532# res_snan() - return SNAN result #11533# #11534# INPUT *************************************************************** #11535# a0 = pointer to extended precision source operand #11536# a1 = pointer to extended precision destination operand #11537# d0 rnd prec,mode #11538# #11539# OUTPUT ************************************************************** #11540# fp0 = result #11541# fp1 = EXOP (if exception occurred) #11542# #11543# ALGORITHM *********************************************************** #11544# Handle NANs, infinities, and zeroes as special cases. Divide #11545# norms/denorms into ext/sgl/dbl precision. #11546# For norms/denorms, scale the exponents such that a multiply #11547# instruction won't cause an exception. Use the regular fmul to #11548# compute a result. Check if the regular operands would have taken #11549# an exception. If so, return the default overflow/underflow result #11550# and return the EXOP if exceptions are enabled. Else, scale the #11551# result operand to the proper exponent. #11552# #11553#########################################################################1155411555align 0x1011556tbl_fmul_ovfl:11557long 0x3fff - 0x7ffe # ext_max11558long 0x3fff - 0x407e # sgl_max11559long 0x3fff - 0x43fe # dbl_max11560tbl_fmul_unfl:11561long 0x3fff + 0x0001 # ext_unfl11562long 0x3fff - 0x3f80 # sgl_unfl11563long 0x3fff - 0x3c00 # dbl_unfl1156411565global fsmul11566fsmul:11567andi.b &0x30,%d0 # clear rnd prec11568ori.b &s_mode*0x10,%d0 # insert sgl prec11569bra.b fmul1157011571global fdmul11572fdmul:11573andi.b &0x30,%d011574ori.b &d_mode*0x10,%d0 # insert dbl prec1157511576global fmul11577fmul:11578mov.l %d0,L_SCR3(%a6) # store rnd info1157911580clr.w %d111581mov.b DTAG(%a6),%d111582lsl.b &0x3,%d111583or.b STAG(%a6),%d1 # combine src tags11584bne.w fmul_not_norm # optimize on non-norm input1158511586fmul_norm:11587mov.w DST_EX(%a1),FP_SCR1_EX(%a6)11588mov.l DST_HI(%a1),FP_SCR1_HI(%a6)11589mov.l DST_LO(%a1),FP_SCR1_LO(%a6)1159011591mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)11592mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)11593mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1159411595bsr.l scale_to_zero_src # scale src exponent11596mov.l %d0,-(%sp) # save scale factor 11159711598bsr.l scale_to_zero_dst # scale dst exponent1159911600add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale21160111602mov.w 2+L_SCR3(%a6),%d1 # fetch precision11603lsr.b &0x6,%d1 # shift to lo bits11604mov.l (%sp)+,%d0 # load S.F.11605cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?11606beq.w fmul_may_ovfl # result may rnd to overflow11607blt.w fmul_ovfl # result will overflow1160811609cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?11610beq.w fmul_may_unfl # result may rnd to no unfl11611bgt.w fmul_unfl # result will underflow1161211613#11614# NORMAL:11615# - the result of the multiply operation will neither overflow nor underflow.11616# - do the multiply to the proper precision and rounding mode.11617# - scale the result exponent using the scale factor. if both operands were11618# normalized then we really don't need to go through this scaling. but for now,11619# this will do.11620#11621fmul_normal:11622fmovm.x FP_SCR1(%a6),&0x80 # load dst operand1162311624fmov.l L_SCR3(%a6),%fpcr # set FPCR11625fmov.l &0x0,%fpsr # clear FPSR1162611627fmul.x FP_SCR0(%a6),%fp0 # execute multiply1162811629fmov.l %fpsr,%d1 # save status11630fmov.l &0x0,%fpcr # clear FPCR1163111632or.l %d1,USER_FPSR(%a6) # save INEX2,N1163311634fmul_normal_exit:11635fmovm.x &0x80,FP_SCR0(%a6) # store out result11636mov.l %d2,-(%sp) # save d211637mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}11638mov.l %d1,%d2 # make a copy11639andi.l &0x7fff,%d1 # strip sign11640andi.w &0x8000,%d2 # keep old sign11641sub.l %d0,%d1 # add scale factor11642or.w %d2,%d1 # concat old sign,new exp11643mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent11644mov.l (%sp)+,%d2 # restore d211645fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp011646rts1164711648#11649# OVERFLOW:11650# - the result of the multiply operation is an overflow.11651# - do the multiply to the proper precision and rounding mode in order to11652# set the inexact bits.11653# - calculate the default result and return it in fp0.11654# - if overflow or inexact is enabled, we need a multiply result rounded to11655# extended precision. if the original operation was extended, then we have this11656# result. if the original operation was single or double, we have to do another11657# multiply using extended precision and the correct rounding mode. the result11658# of this operation then has its exponent scaled by -0x6000 to create the11659# exceptional operand.11660#11661fmul_ovfl:11662fmovm.x FP_SCR1(%a6),&0x80 # load dst operand1166311664fmov.l L_SCR3(%a6),%fpcr # set FPCR11665fmov.l &0x0,%fpsr # clear FPSR1166611667fmul.x FP_SCR0(%a6),%fp0 # execute multiply1166811669fmov.l %fpsr,%d1 # save status11670fmov.l &0x0,%fpcr # clear FPCR1167111672or.l %d1,USER_FPSR(%a6) # save INEX2,N1167311674# save setting this until now because this is where fmul_may_ovfl may jump in11675fmul_ovfl_tst:11676or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1167711678mov.b FPCR_ENABLE(%a6),%d111679andi.b &0x13,%d1 # is OVFL or INEX enabled?11680bne.b fmul_ovfl_ena # yes1168111682# calculate the default result11683fmul_ovfl_dis:11684btst &neg_bit,FPSR_CC(%a6) # is result negative?11685sne %d1 # set sign param accordingly11686mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode11687bsr.l ovf_res # calculate default result11688or.b %d0,FPSR_CC(%a6) # set INF,N if applicable11689fmovm.x (%a0),&0x80 # return default result in fp011690rts1169111692#11693# OVFL is enabled; Create EXOP:11694# - if precision is extended, then we have the EXOP. simply bias the exponent11695# with an extra -0x6000. if the precision is single or double, we need to11696# calculate a result rounded to extended precision.11697#11698fmul_ovfl_ena:11699mov.l L_SCR3(%a6),%d111700andi.b &0xc0,%d1 # test the rnd prec11701bne.b fmul_ovfl_ena_sd # it's sgl or dbl1170211703fmul_ovfl_ena_cont:11704fmovm.x &0x80,FP_SCR0(%a6) # move result to stack1170511706mov.l %d2,-(%sp) # save d211707mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}11708mov.w %d1,%d2 # make a copy11709andi.l &0x7fff,%d1 # strip sign11710sub.l %d0,%d1 # add scale factor11711subi.l &0x6000,%d1 # subtract bias11712andi.w &0x7fff,%d1 # clear sign bit11713andi.w &0x8000,%d2 # keep old sign11714or.w %d2,%d1 # concat old sign,new exp11715mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent11716mov.l (%sp)+,%d2 # restore d211717fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp111718bra.b fmul_ovfl_dis1171911720fmul_ovfl_ena_sd:11721fmovm.x FP_SCR1(%a6),&0x80 # load dst operand1172211723mov.l L_SCR3(%a6),%d111724andi.b &0x30,%d1 # keep rnd mode only11725fmov.l %d1,%fpcr # set FPCR1172611727fmul.x FP_SCR0(%a6),%fp0 # execute multiply1172811729fmov.l &0x0,%fpcr # clear FPCR11730bra.b fmul_ovfl_ena_cont1173111732#11733# may OVERFLOW:11734# - the result of the multiply operation MAY overflow.11735# - do the multiply to the proper precision and rounding mode in order to11736# set the inexact bits.11737# - calculate the default result and return it in fp0.11738#11739fmul_may_ovfl:11740fmovm.x FP_SCR1(%a6),&0x80 # load dst op1174111742fmov.l L_SCR3(%a6),%fpcr # set FPCR11743fmov.l &0x0,%fpsr # clear FPSR1174411745fmul.x FP_SCR0(%a6),%fp0 # execute multiply1174611747fmov.l %fpsr,%d1 # save status11748fmov.l &0x0,%fpcr # clear FPCR1174911750or.l %d1,USER_FPSR(%a6) # save INEX2,N1175111752fabs.x %fp0,%fp1 # make a copy of result11753fcmp.b %fp1,&0x2 # is |result| >= 2.b?11754fbge.w fmul_ovfl_tst # yes; overflow has occurred1175511756# no, it didn't overflow; we have correct result11757bra.w fmul_normal_exit1175811759#11760# UNDERFLOW:11761# - the result of the multiply operation is an underflow.11762# - do the multiply to the proper precision and rounding mode in order to11763# set the inexact bits.11764# - calculate the default result and return it in fp0.11765# - if overflow or inexact is enabled, we need a multiply result rounded to11766# extended precision. if the original operation was extended, then we have this11767# result. if the original operation was single or double, we have to do another11768# multiply using extended precision and the correct rounding mode. the result11769# of this operation then has its exponent scaled by -0x6000 to create the11770# exceptional operand.11771#11772fmul_unfl:11773bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1177411775# for fun, let's use only extended precision, round to zero. then, let11776# the unf_res() routine figure out all the rest.11777# will we get the correct answer.11778fmovm.x FP_SCR1(%a6),&0x80 # load dst operand1177911780fmov.l &rz_mode*0x10,%fpcr # set FPCR11781fmov.l &0x0,%fpsr # clear FPSR1178211783fmul.x FP_SCR0(%a6),%fp0 # execute multiply1178411785fmov.l %fpsr,%d1 # save status11786fmov.l &0x0,%fpcr # clear FPCR1178711788or.l %d1,USER_FPSR(%a6) # save INEX2,N1178911790mov.b FPCR_ENABLE(%a6),%d111791andi.b &0x0b,%d1 # is UNFL or INEX enabled?11792bne.b fmul_unfl_ena # yes1179311794fmul_unfl_dis:11795fmovm.x &0x80,FP_SCR0(%a6) # store out result1179611797lea FP_SCR0(%a6),%a0 # pass: result addr11798mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode11799bsr.l unf_res # calculate default result11800or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'11801fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp011802rts1180311804#11805# UNFL is enabled.11806#11807fmul_unfl_ena:11808fmovm.x FP_SCR1(%a6),&0x40 # load dst op1180911810mov.l L_SCR3(%a6),%d111811andi.b &0xc0,%d1 # is precision extended?11812bne.b fmul_unfl_ena_sd # no, sgl or dbl1181311814# if the rnd mode is anything but RZ, then we have to re-do the above11815# multiplication because we used RZ for all.11816fmov.l L_SCR3(%a6),%fpcr # set FPCR1181711818fmul_unfl_ena_cont:11819fmov.l &0x0,%fpsr # clear FPSR1182011821fmul.x FP_SCR0(%a6),%fp1 # execute multiply1182211823fmov.l &0x0,%fpcr # clear FPCR1182411825fmovm.x &0x40,FP_SCR0(%a6) # save result to stack11826mov.l %d2,-(%sp) # save d211827mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}11828mov.l %d1,%d2 # make a copy11829andi.l &0x7fff,%d1 # strip sign11830andi.w &0x8000,%d2 # keep old sign11831sub.l %d0,%d1 # add scale factor11832addi.l &0x6000,%d1 # add bias11833andi.w &0x7fff,%d111834or.w %d2,%d1 # concat old sign,new exp11835mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent11836mov.l (%sp)+,%d2 # restore d211837fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp111838bra.w fmul_unfl_dis1183911840fmul_unfl_ena_sd:11841mov.l L_SCR3(%a6),%d111842andi.b &0x30,%d1 # use only rnd mode11843fmov.l %d1,%fpcr # set FPCR1184411845bra.b fmul_unfl_ena_cont1184611847# MAY UNDERFLOW:11848# -use the correct rounding mode and precision. this code favors operations11849# that do not underflow.11850fmul_may_unfl:11851fmovm.x FP_SCR1(%a6),&0x80 # load dst operand1185211853fmov.l L_SCR3(%a6),%fpcr # set FPCR11854fmov.l &0x0,%fpsr # clear FPSR1185511856fmul.x FP_SCR0(%a6),%fp0 # execute multiply1185711858fmov.l %fpsr,%d1 # save status11859fmov.l &0x0,%fpcr # clear FPCR1186011861or.l %d1,USER_FPSR(%a6) # save INEX2,N1186211863fabs.x %fp0,%fp1 # make a copy of result11864fcmp.b %fp1,&0x2 # is |result| > 2.b?11865fbgt.w fmul_normal_exit # no; no underflow occurred11866fblt.w fmul_unfl # yes; underflow occurred1186711868#11869# we still don't know if underflow occurred. result is ~ equal to 2. but,11870# we don't know if the result was an underflow that rounded up to a 2 or11871# a normalized number that rounded down to a 2. so, redo the entire operation11872# using RZ as the rounding mode to see what the pre-rounded result is.11873# this case should be relatively rare.11874#11875fmovm.x FP_SCR1(%a6),&0x40 # load dst operand1187611877mov.l L_SCR3(%a6),%d111878andi.b &0xc0,%d1 # keep rnd prec11879ori.b &rz_mode*0x10,%d1 # insert RZ1188011881fmov.l %d1,%fpcr # set FPCR11882fmov.l &0x0,%fpsr # clear FPSR1188311884fmul.x FP_SCR0(%a6),%fp1 # execute multiply1188511886fmov.l &0x0,%fpcr # clear FPCR11887fabs.x %fp1 # make absolute value11888fcmp.b %fp1,&0x2 # is |result| < 2.b?11889fbge.w fmul_normal_exit # no; no underflow occurred11890bra.w fmul_unfl # yes, underflow occurred1189111892################################################################################1189311894#11895# Multiply: inputs are not both normalized; what are they?11896#11897fmul_not_norm:11898mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d111899jmp (tbl_fmul_op.b,%pc,%d1.w)1190011901swbeg &4811902tbl_fmul_op:11903short fmul_norm - tbl_fmul_op # NORM x NORM11904short fmul_zero - tbl_fmul_op # NORM x ZERO11905short fmul_inf_src - tbl_fmul_op # NORM x INF11906short fmul_res_qnan - tbl_fmul_op # NORM x QNAN11907short fmul_norm - tbl_fmul_op # NORM x DENORM11908short fmul_res_snan - tbl_fmul_op # NORM x SNAN11909short tbl_fmul_op - tbl_fmul_op #11910short tbl_fmul_op - tbl_fmul_op #1191111912short fmul_zero - tbl_fmul_op # ZERO x NORM11913short fmul_zero - tbl_fmul_op # ZERO x ZERO11914short fmul_res_operr - tbl_fmul_op # ZERO x INF11915short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN11916short fmul_zero - tbl_fmul_op # ZERO x DENORM11917short fmul_res_snan - tbl_fmul_op # ZERO x SNAN11918short tbl_fmul_op - tbl_fmul_op #11919short tbl_fmul_op - tbl_fmul_op #1192011921short fmul_inf_dst - tbl_fmul_op # INF x NORM11922short fmul_res_operr - tbl_fmul_op # INF x ZERO11923short fmul_inf_dst - tbl_fmul_op # INF x INF11924short fmul_res_qnan - tbl_fmul_op # INF x QNAN11925short fmul_inf_dst - tbl_fmul_op # INF x DENORM11926short fmul_res_snan - tbl_fmul_op # INF x SNAN11927short tbl_fmul_op - tbl_fmul_op #11928short tbl_fmul_op - tbl_fmul_op #1192911930short fmul_res_qnan - tbl_fmul_op # QNAN x NORM11931short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO11932short fmul_res_qnan - tbl_fmul_op # QNAN x INF11933short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN11934short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM11935short fmul_res_snan - tbl_fmul_op # QNAN x SNAN11936short tbl_fmul_op - tbl_fmul_op #11937short tbl_fmul_op - tbl_fmul_op #1193811939short fmul_norm - tbl_fmul_op # NORM x NORM11940short fmul_zero - tbl_fmul_op # NORM x ZERO11941short fmul_inf_src - tbl_fmul_op # NORM x INF11942short fmul_res_qnan - tbl_fmul_op # NORM x QNAN11943short fmul_norm - tbl_fmul_op # NORM x DENORM11944short fmul_res_snan - tbl_fmul_op # NORM x SNAN11945short tbl_fmul_op - tbl_fmul_op #11946short tbl_fmul_op - tbl_fmul_op #1194711948short fmul_res_snan - tbl_fmul_op # SNAN x NORM11949short fmul_res_snan - tbl_fmul_op # SNAN x ZERO11950short fmul_res_snan - tbl_fmul_op # SNAN x INF11951short fmul_res_snan - tbl_fmul_op # SNAN x QNAN11952short fmul_res_snan - tbl_fmul_op # SNAN x DENORM11953short fmul_res_snan - tbl_fmul_op # SNAN x SNAN11954short tbl_fmul_op - tbl_fmul_op #11955short tbl_fmul_op - tbl_fmul_op #1195611957fmul_res_operr:11958bra.l res_operr11959fmul_res_snan:11960bra.l res_snan11961fmul_res_qnan:11962bra.l res_qnan1196311964#11965# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)11966#11967global fmul_zero # global for fsglmul11968fmul_zero:11969mov.b SRC_EX(%a0),%d0 # exclusive or the signs11970mov.b DST_EX(%a1),%d111971eor.b %d0,%d111972bpl.b fmul_zero_p # result ZERO is pos.11973fmul_zero_n:11974fmov.s &0x80000000,%fp0 # load -ZERO11975mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N11976rts11977fmul_zero_p:11978fmov.s &0x00000000,%fp0 # load +ZERO11979mov.b &z_bmask,FPSR_CC(%a6) # set Z11980rts1198111982#11983# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)11984#11985# Note: The j-bit for an infinity is a don't-care. However, to be11986# strictly compatible w/ the 68881/882, we make sure to return an11987# INF w/ the j-bit set if the input INF j-bit was set. Destination11988# INFs take priority.11989#11990global fmul_inf_dst # global for fsglmul11991fmul_inf_dst:11992fmovm.x DST(%a1),&0x80 # return INF result in fp011993mov.b SRC_EX(%a0),%d0 # exclusive or the signs11994mov.b DST_EX(%a1),%d111995eor.b %d0,%d111996bpl.b fmul_inf_dst_p # result INF is pos.11997fmul_inf_dst_n:11998fabs.x %fp0 # clear result sign11999fneg.x %fp0 # set result sign12000mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N12001rts12002fmul_inf_dst_p:12003fabs.x %fp0 # clear result sign12004mov.b &inf_bmask,FPSR_CC(%a6) # set INF12005rts1200612007global fmul_inf_src # global for fsglmul12008fmul_inf_src:12009fmovm.x SRC(%a0),&0x80 # return INF result in fp012010mov.b SRC_EX(%a0),%d0 # exclusive or the signs12011mov.b DST_EX(%a1),%d112012eor.b %d0,%d112013bpl.b fmul_inf_dst_p # result INF is pos.12014bra.b fmul_inf_dst_n1201512016#########################################################################12017# XDEF **************************************************************** #12018# fin(): emulates the fmove instruction #12019# fsin(): emulates the fsmove instruction #12020# fdin(): emulates the fdmove instruction #12021# #12022# XREF **************************************************************** #12023# norm() - normalize mantissa for EXOP on denorm #12024# scale_to_zero_src() - scale src exponent to zero #12025# ovf_res() - return default overflow result #12026# unf_res() - return default underflow result #12027# res_qnan_1op() - return QNAN result #12028# res_snan_1op() - return SNAN result #12029# #12030# INPUT *************************************************************** #12031# a0 = pointer to extended precision source operand #12032# d0 = round prec/mode #12033# #12034# OUTPUT ************************************************************** #12035# fp0 = result #12036# fp1 = EXOP (if exception occurred) #12037# #12038# ALGORITHM *********************************************************** #12039# Handle NANs, infinities, and zeroes as special cases. Divide #12040# norms into extended, single, and double precision. #12041# Norms can be emulated w/ a regular fmove instruction. For #12042# sgl/dbl, must scale exponent and perform an "fmove". Check to see #12043# if the result would have overflowed/underflowed. If so, use unf_res() #12044# or ovf_res() to return the default result. Also return EXOP if #12045# exception is enabled. If no exception, return the default result. #12046# Unnorms don't pass through here. #12047# #12048#########################################################################1204912050global fsin12051fsin:12052andi.b &0x30,%d0 # clear rnd prec12053ori.b &s_mode*0x10,%d0 # insert sgl precision12054bra.b fin1205512056global fdin12057fdin:12058andi.b &0x30,%d0 # clear rnd prec12059ori.b &d_mode*0x10,%d0 # insert dbl precision1206012061global fin12062fin:12063mov.l %d0,L_SCR3(%a6) # store rnd info1206412065mov.b STAG(%a6),%d1 # fetch src optype tag12066bne.w fin_not_norm # optimize on non-norm input1206712068#12069# FP MOVE IN: NORMs and DENORMs ONLY!12070#12071fin_norm:12072andi.b &0xc0,%d0 # is precision extended?12073bne.w fin_not_ext # no, so go handle dbl or sgl1207412075#12076# precision selected is extended. so...we cannot get an underflow12077# or overflow because of rounding to the correct precision. so...12078# skip the scaling and unscaling...12079#12080tst.b SRC_EX(%a0) # is the operand negative?12081bpl.b fin_norm_done # no12082bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit12083fin_norm_done:12084fmovm.x SRC(%a0),&0x80 # return result in fp012085rts1208612087#12088# for an extended precision DENORM, the UNFL exception bit is set12089# the accrued bit is NOT set in this instance(no inexactness!)12090#12091fin_denorm:12092andi.b &0xc0,%d0 # is precision extended?12093bne.w fin_not_ext # no, so go handle dbl or sgl1209412095bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit12096tst.b SRC_EX(%a0) # is the operand negative?12097bpl.b fin_denorm_done # no12098bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit12099fin_denorm_done:12100fmovm.x SRC(%a0),&0x80 # return result in fp012101btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?12102bne.b fin_denorm_unfl_ena # yes12103rts1210412105#12106# the input is an extended DENORM and underflow is enabled in the FPCR.12107# normalize the mantissa and add the bias of 0x6000 to the resulting negative12108# exponent and insert back into the operand.12109#12110fin_denorm_unfl_ena:12111mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)12112mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)12113mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)12114lea FP_SCR0(%a6),%a0 # pass: ptr to operand12115bsr.l norm # normalize result12116neg.w %d0 # new exponent = -(shft val)12117addi.w &0x6000,%d0 # add new bias to exponent12118mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp12119andi.w &0x8000,%d1 # keep old sign12120andi.w &0x7fff,%d0 # clear sign position12121or.w %d1,%d0 # concat new exo,old sign12122mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent12123fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp112124rts1212512126#12127# operand is to be rounded to single or double precision12128#12129fin_not_ext:12130cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec12131bne.b fin_dbl1213212133#12134# operand is to be rounded to single precision12135#12136fin_sgl:12137mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)12138mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)12139mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)12140bsr.l scale_to_zero_src # calculate scale factor1214112142cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?12143bge.w fin_sd_unfl # yes; go handle underflow12144cmpi.l %d0,&0x3fff-0x407e # will move in overflow?12145beq.w fin_sd_may_ovfl # maybe; go check12146blt.w fin_sd_ovfl # yes; go handle overflow1214712148#12149# operand will NOT overflow or underflow when moved into the fp reg file12150#12151fin_sd_normal:12152fmov.l &0x0,%fpsr # clear FPSR12153fmov.l L_SCR3(%a6),%fpcr # set FPCR1215412155fmov.x FP_SCR0(%a6),%fp0 # perform move1215612157fmov.l %fpsr,%d1 # save FPSR12158fmov.l &0x0,%fpcr # clear FPCR1215912160or.l %d1,USER_FPSR(%a6) # save INEX2,N1216112162fin_sd_normal_exit:12163mov.l %d2,-(%sp) # save d212164fmovm.x &0x80,FP_SCR0(%a6) # store out result12165mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}12166mov.w %d1,%d2 # make a copy12167andi.l &0x7fff,%d1 # strip sign12168sub.l %d0,%d1 # add scale factor12169andi.w &0x8000,%d2 # keep old sign12170or.w %d1,%d2 # concat old sign,new exponent12171mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent12172mov.l (%sp)+,%d2 # restore d212173fmovm.x FP_SCR0(%a6),&0x80 # return result in fp012174rts1217512176#12177# operand is to be rounded to double precision12178#12179fin_dbl:12180mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)12181mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)12182mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)12183bsr.l scale_to_zero_src # calculate scale factor1218412185cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?12186bge.w fin_sd_unfl # yes; go handle underflow12187cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?12188beq.w fin_sd_may_ovfl # maybe; go check12189blt.w fin_sd_ovfl # yes; go handle overflow12190bra.w fin_sd_normal # no; ho handle normalized op1219112192#12193# operand WILL underflow when moved in to the fp register file12194#12195fin_sd_unfl:12196bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1219712198tst.b FP_SCR0_EX(%a6) # is operand negative?12199bpl.b fin_sd_unfl_tst12200bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit1220112202# if underflow or inexact is enabled, then go calculate the EXOP first.12203fin_sd_unfl_tst:12204mov.b FPCR_ENABLE(%a6),%d112205andi.b &0x0b,%d1 # is UNFL or INEX enabled?12206bne.b fin_sd_unfl_ena # yes1220712208fin_sd_unfl_dis:12209lea FP_SCR0(%a6),%a0 # pass: result addr12210mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode12211bsr.l unf_res # calculate default result12212or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'12213fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp012214rts1221512216#12217# operand will underflow AND underflow or inexact is enabled.12218# Therefore, we must return the result rounded to extended precision.12219#12220fin_sd_unfl_ena:12221mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)12222mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)12223mov.w FP_SCR0_EX(%a6),%d1 # load current exponent1222412225mov.l %d2,-(%sp) # save d212226mov.w %d1,%d2 # make a copy12227andi.l &0x7fff,%d1 # strip sign12228sub.l %d0,%d1 # subtract scale factor12229andi.w &0x8000,%d2 # extract old sign12230addi.l &0x6000,%d1 # add new bias12231andi.w &0x7fff,%d112232or.w %d1,%d2 # concat old sign,new exp12233mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent12234fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp112235mov.l (%sp)+,%d2 # restore d212236bra.b fin_sd_unfl_dis1223712238#12239# operand WILL overflow.12240#12241fin_sd_ovfl:12242fmov.l &0x0,%fpsr # clear FPSR12243fmov.l L_SCR3(%a6),%fpcr # set FPCR1224412245fmov.x FP_SCR0(%a6),%fp0 # perform move1224612247fmov.l &0x0,%fpcr # clear FPCR12248fmov.l %fpsr,%d1 # save FPSR1224912250or.l %d1,USER_FPSR(%a6) # save INEX2,N1225112252fin_sd_ovfl_tst:12253or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1225412255mov.b FPCR_ENABLE(%a6),%d112256andi.b &0x13,%d1 # is OVFL or INEX enabled?12257bne.b fin_sd_ovfl_ena # yes1225812259#12260# OVFL is not enabled; therefore, we must create the default result by12261# calling ovf_res().12262#12263fin_sd_ovfl_dis:12264btst &neg_bit,FPSR_CC(%a6) # is result negative?12265sne %d1 # set sign param accordingly12266mov.l L_SCR3(%a6),%d0 # pass: prec,mode12267bsr.l ovf_res # calculate default result12268or.b %d0,FPSR_CC(%a6) # set INF,N if applicable12269fmovm.x (%a0),&0x80 # return default result in fp012270rts1227112272#12273# OVFL is enabled.12274# the INEX2 bit has already been updated by the round to the correct precision.12275# now, round to extended(and don't alter the FPSR).12276#12277fin_sd_ovfl_ena:12278mov.l %d2,-(%sp) # save d212279mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}12280mov.l %d1,%d2 # make a copy12281andi.l &0x7fff,%d1 # strip sign12282andi.w &0x8000,%d2 # keep old sign12283sub.l %d0,%d1 # add scale factor12284sub.l &0x6000,%d1 # subtract bias12285andi.w &0x7fff,%d112286or.w %d2,%d112287mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent12288mov.l (%sp)+,%d2 # restore d212289fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp112290bra.b fin_sd_ovfl_dis1229112292#12293# the move in MAY overflow. so...12294#12295fin_sd_may_ovfl:12296fmov.l &0x0,%fpsr # clear FPSR12297fmov.l L_SCR3(%a6),%fpcr # set FPCR1229812299fmov.x FP_SCR0(%a6),%fp0 # perform the move1230012301fmov.l %fpsr,%d1 # save status12302fmov.l &0x0,%fpcr # clear FPCR1230312304or.l %d1,USER_FPSR(%a6) # save INEX2,N1230512306fabs.x %fp0,%fp1 # make a copy of result12307fcmp.b %fp1,&0x2 # is |result| >= 2.b?12308fbge.w fin_sd_ovfl_tst # yes; overflow has occurred1230912310# no, it didn't overflow; we have correct result12311bra.w fin_sd_normal_exit1231212313##########################################################################1231412315#12316# operand is not a NORM: check its optype and branch accordingly12317#12318fin_not_norm:12319cmpi.b %d1,&DENORM # weed out DENORM12320beq.w fin_denorm12321cmpi.b %d1,&SNAN # weed out SNANs12322beq.l res_snan_1op12323cmpi.b %d1,&QNAN # weed out QNANs12324beq.l res_qnan_1op1232512326#12327# do the fmove in; at this point, only possible ops are ZERO and INF.12328# use fmov to determine ccodes.12329# prec:mode should be zero at this point but it won't affect answer anyways.12330#12331fmov.x SRC(%a0),%fp0 # do fmove in12332fmov.l %fpsr,%d0 # no exceptions possible12333rol.l &0x8,%d0 # put ccodes in lo byte12334mov.b %d0,FPSR_CC(%a6) # insert correct ccodes12335rts1233612337#########################################################################12338# XDEF **************************************************************** #12339# fdiv(): emulates the fdiv instruction #12340# fsdiv(): emulates the fsdiv instruction #12341# fddiv(): emulates the fddiv instruction #12342# #12343# XREF **************************************************************** #12344# scale_to_zero_src() - scale src exponent to zero #12345# scale_to_zero_dst() - scale dst exponent to zero #12346# unf_res() - return default underflow result #12347# ovf_res() - return default overflow result #12348# res_qnan() - return QNAN result #12349# res_snan() - return SNAN result #12350# #12351# INPUT *************************************************************** #12352# a0 = pointer to extended precision source operand #12353# a1 = pointer to extended precision destination operand #12354# d0 rnd prec,mode #12355# #12356# OUTPUT ************************************************************** #12357# fp0 = result #12358# fp1 = EXOP (if exception occurred) #12359# #12360# ALGORITHM *********************************************************** #12361# Handle NANs, infinities, and zeroes as special cases. Divide #12362# norms/denorms into ext/sgl/dbl precision. #12363# For norms/denorms, scale the exponents such that a divide #12364# instruction won't cause an exception. Use the regular fdiv to #12365# compute a result. Check if the regular operands would have taken #12366# an exception. If so, return the default overflow/underflow result #12367# and return the EXOP if exceptions are enabled. Else, scale the #12368# result operand to the proper exponent. #12369# #12370#########################################################################1237112372align 0x1012373tbl_fdiv_unfl:12374long 0x3fff - 0x0000 # ext_unfl12375long 0x3fff - 0x3f81 # sgl_unfl12376long 0x3fff - 0x3c01 # dbl_unfl1237712378tbl_fdiv_ovfl:12379long 0x3fff - 0x7ffe # ext overflow exponent12380long 0x3fff - 0x407e # sgl overflow exponent12381long 0x3fff - 0x43fe # dbl overflow exponent1238212383global fsdiv12384fsdiv:12385andi.b &0x30,%d0 # clear rnd prec12386ori.b &s_mode*0x10,%d0 # insert sgl prec12387bra.b fdiv1238812389global fddiv12390fddiv:12391andi.b &0x30,%d0 # clear rnd prec12392ori.b &d_mode*0x10,%d0 # insert dbl prec1239312394global fdiv12395fdiv:12396mov.l %d0,L_SCR3(%a6) # store rnd info1239712398clr.w %d112399mov.b DTAG(%a6),%d112400lsl.b &0x3,%d112401or.b STAG(%a6),%d1 # combine src tags1240212403bne.w fdiv_not_norm # optimize on non-norm input1240412405#12406# DIVIDE: NORMs and DENORMs ONLY!12407#12408fdiv_norm:12409mov.w DST_EX(%a1),FP_SCR1_EX(%a6)12410mov.l DST_HI(%a1),FP_SCR1_HI(%a6)12411mov.l DST_LO(%a1),FP_SCR1_LO(%a6)1241212413mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)12414mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)12415mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1241612417bsr.l scale_to_zero_src # scale src exponent12418mov.l %d0,-(%sp) # save scale factor 11241912420bsr.l scale_to_zero_dst # scale dst exponent1242112422neg.l (%sp) # SCALE FACTOR = scale1 - scale212423add.l %d0,(%sp)1242412425mov.w 2+L_SCR3(%a6),%d1 # fetch precision12426lsr.b &0x6,%d1 # shift to lo bits12427mov.l (%sp)+,%d0 # load S.F.12428cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?12429ble.w fdiv_may_ovfl # result will overflow1243012431cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?12432beq.w fdiv_may_unfl # maybe12433bgt.w fdiv_unfl # yes; go handle underflow1243412435fdiv_normal:12436fmovm.x FP_SCR1(%a6),&0x80 # load dst op1243712438fmov.l L_SCR3(%a6),%fpcr # save FPCR12439fmov.l &0x0,%fpsr # clear FPSR1244012441fdiv.x FP_SCR0(%a6),%fp0 # perform divide1244212443fmov.l %fpsr,%d1 # save FPSR12444fmov.l &0x0,%fpcr # clear FPCR1244512446or.l %d1,USER_FPSR(%a6) # save INEX2,N1244712448fdiv_normal_exit:12449fmovm.x &0x80,FP_SCR0(%a6) # store result on stack12450mov.l %d2,-(%sp) # store d212451mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}12452mov.l %d1,%d2 # make a copy12453andi.l &0x7fff,%d1 # strip sign12454andi.w &0x8000,%d2 # keep old sign12455sub.l %d0,%d1 # add scale factor12456or.w %d2,%d1 # concat old sign,new exp12457mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent12458mov.l (%sp)+,%d2 # restore d212459fmovm.x FP_SCR0(%a6),&0x80 # return result in fp012460rts1246112462tbl_fdiv_ovfl2:12463long 0x7fff12464long 0x407f12465long 0x43ff1246612467fdiv_no_ovfl:12468mov.l (%sp)+,%d0 # restore scale factor12469bra.b fdiv_normal_exit1247012471fdiv_may_ovfl:12472mov.l %d0,-(%sp) # save scale factor1247312474fmovm.x FP_SCR1(%a6),&0x80 # load dst op1247512476fmov.l L_SCR3(%a6),%fpcr # set FPCR12477fmov.l &0x0,%fpsr # set FPSR1247812479fdiv.x FP_SCR0(%a6),%fp0 # execute divide1248012481fmov.l %fpsr,%d012482fmov.l &0x0,%fpcr1248312484or.l %d0,USER_FPSR(%a6) # save INEX,N1248512486fmovm.x &0x01,-(%sp) # save result to stack12487mov.w (%sp),%d0 # fetch new exponent12488add.l &0xc,%sp # clear result from stack12489andi.l &0x7fff,%d0 # strip sign12490sub.l (%sp),%d0 # add scale factor12491cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)12492blt.b fdiv_no_ovfl12493mov.l (%sp)+,%d01249412495fdiv_ovfl_tst:12496or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1249712498mov.b FPCR_ENABLE(%a6),%d112499andi.b &0x13,%d1 # is OVFL or INEX enabled?12500bne.b fdiv_ovfl_ena # yes1250112502fdiv_ovfl_dis:12503btst &neg_bit,FPSR_CC(%a6) # is result negative?12504sne %d1 # set sign param accordingly12505mov.l L_SCR3(%a6),%d0 # pass prec:rnd12506bsr.l ovf_res # calculate default result12507or.b %d0,FPSR_CC(%a6) # set INF if applicable12508fmovm.x (%a0),&0x80 # return default result in fp012509rts1251012511fdiv_ovfl_ena:12512mov.l L_SCR3(%a6),%d112513andi.b &0xc0,%d1 # is precision extended?12514bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl1251512516fdiv_ovfl_ena_cont:12517fmovm.x &0x80,FP_SCR0(%a6) # move result to stack1251812519mov.l %d2,-(%sp) # save d212520mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}12521mov.w %d1,%d2 # make a copy12522andi.l &0x7fff,%d1 # strip sign12523sub.l %d0,%d1 # add scale factor12524subi.l &0x6000,%d1 # subtract bias12525andi.w &0x7fff,%d1 # clear sign bit12526andi.w &0x8000,%d2 # keep old sign12527or.w %d2,%d1 # concat old sign,new exp12528mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent12529mov.l (%sp)+,%d2 # restore d212530fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp112531bra.b fdiv_ovfl_dis1253212533fdiv_ovfl_ena_sd:12534fmovm.x FP_SCR1(%a6),&0x80 # load dst operand1253512536mov.l L_SCR3(%a6),%d112537andi.b &0x30,%d1 # keep rnd mode12538fmov.l %d1,%fpcr # set FPCR1253912540fdiv.x FP_SCR0(%a6),%fp0 # execute divide1254112542fmov.l &0x0,%fpcr # clear FPCR12543bra.b fdiv_ovfl_ena_cont1254412545fdiv_unfl:12546bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1254712548fmovm.x FP_SCR1(%a6),&0x80 # load dst op1254912550fmov.l &rz_mode*0x10,%fpcr # set FPCR12551fmov.l &0x0,%fpsr # clear FPSR1255212553fdiv.x FP_SCR0(%a6),%fp0 # execute divide1255412555fmov.l %fpsr,%d1 # save status12556fmov.l &0x0,%fpcr # clear FPCR1255712558or.l %d1,USER_FPSR(%a6) # save INEX2,N1255912560mov.b FPCR_ENABLE(%a6),%d112561andi.b &0x0b,%d1 # is UNFL or INEX enabled?12562bne.b fdiv_unfl_ena # yes1256312564fdiv_unfl_dis:12565fmovm.x &0x80,FP_SCR0(%a6) # store out result1256612567lea FP_SCR0(%a6),%a0 # pass: result addr12568mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode12569bsr.l unf_res # calculate default result12570or.b %d0,FPSR_CC(%a6) # 'Z' may have been set12571fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp012572rts1257312574#12575# UNFL is enabled.12576#12577fdiv_unfl_ena:12578fmovm.x FP_SCR1(%a6),&0x40 # load dst op1257912580mov.l L_SCR3(%a6),%d112581andi.b &0xc0,%d1 # is precision extended?12582bne.b fdiv_unfl_ena_sd # no, sgl or dbl1258312584fmov.l L_SCR3(%a6),%fpcr # set FPCR1258512586fdiv_unfl_ena_cont:12587fmov.l &0x0,%fpsr # clear FPSR1258812589fdiv.x FP_SCR0(%a6),%fp1 # execute divide1259012591fmov.l &0x0,%fpcr # clear FPCR1259212593fmovm.x &0x40,FP_SCR0(%a6) # save result to stack12594mov.l %d2,-(%sp) # save d212595mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}12596mov.l %d1,%d2 # make a copy12597andi.l &0x7fff,%d1 # strip sign12598andi.w &0x8000,%d2 # keep old sign12599sub.l %d0,%d1 # add scale factoer12600addi.l &0x6000,%d1 # add bias12601andi.w &0x7fff,%d112602or.w %d2,%d1 # concat old sign,new exp12603mov.w %d1,FP_SCR0_EX(%a6) # insert new exp12604mov.l (%sp)+,%d2 # restore d212605fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp112606bra.w fdiv_unfl_dis1260712608fdiv_unfl_ena_sd:12609mov.l L_SCR3(%a6),%d112610andi.b &0x30,%d1 # use only rnd mode12611fmov.l %d1,%fpcr # set FPCR1261212613bra.b fdiv_unfl_ena_cont1261412615#12616# the divide operation MAY underflow:12617#12618fdiv_may_unfl:12619fmovm.x FP_SCR1(%a6),&0x80 # load dst op1262012621fmov.l L_SCR3(%a6),%fpcr # set FPCR12622fmov.l &0x0,%fpsr # clear FPSR1262312624fdiv.x FP_SCR0(%a6),%fp0 # execute divide1262512626fmov.l %fpsr,%d1 # save status12627fmov.l &0x0,%fpcr # clear FPCR1262812629or.l %d1,USER_FPSR(%a6) # save INEX2,N1263012631fabs.x %fp0,%fp1 # make a copy of result12632fcmp.b %fp1,&0x1 # is |result| > 1.b?12633fbgt.w fdiv_normal_exit # no; no underflow occurred12634fblt.w fdiv_unfl # yes; underflow occurred1263512636#12637# we still don't know if underflow occurred. result is ~ equal to 1. but,12638# we don't know if the result was an underflow that rounded up to a 112639# or a normalized number that rounded down to a 1. so, redo the entire12640# operation using RZ as the rounding mode to see what the pre-rounded12641# result is. this case should be relatively rare.12642#12643fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp11264412645mov.l L_SCR3(%a6),%d112646andi.b &0xc0,%d1 # keep rnd prec12647ori.b &rz_mode*0x10,%d1 # insert RZ1264812649fmov.l %d1,%fpcr # set FPCR12650fmov.l &0x0,%fpsr # clear FPSR1265112652fdiv.x FP_SCR0(%a6),%fp1 # execute divide1265312654fmov.l &0x0,%fpcr # clear FPCR12655fabs.x %fp1 # make absolute value12656fcmp.b %fp1,&0x1 # is |result| < 1.b?12657fbge.w fdiv_normal_exit # no; no underflow occurred12658bra.w fdiv_unfl # yes; underflow occurred1265912660############################################################################1266112662#12663# Divide: inputs are not both normalized; what are they?12664#12665fdiv_not_norm:12666mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d112667jmp (tbl_fdiv_op.b,%pc,%d1.w*1)1266812669swbeg &4812670tbl_fdiv_op:12671short fdiv_norm - tbl_fdiv_op # NORM / NORM12672short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO12673short fdiv_zero_load - tbl_fdiv_op # NORM / INF12674short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN12675short fdiv_norm - tbl_fdiv_op # NORM / DENORM12676short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN12677short tbl_fdiv_op - tbl_fdiv_op #12678short tbl_fdiv_op - tbl_fdiv_op #1267912680short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM12681short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO12682short fdiv_zero_load - tbl_fdiv_op # ZERO / INF12683short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN12684short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM12685short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN12686short tbl_fdiv_op - tbl_fdiv_op #12687short tbl_fdiv_op - tbl_fdiv_op #1268812689short fdiv_inf_dst - tbl_fdiv_op # INF / NORM12690short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO12691short fdiv_res_operr - tbl_fdiv_op # INF / INF12692short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN12693short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM12694short fdiv_res_snan - tbl_fdiv_op # INF / SNAN12695short tbl_fdiv_op - tbl_fdiv_op #12696short tbl_fdiv_op - tbl_fdiv_op #1269712698short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM12699short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO12700short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF12701short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN12702short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM12703short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN12704short tbl_fdiv_op - tbl_fdiv_op #12705short tbl_fdiv_op - tbl_fdiv_op #1270612707short fdiv_norm - tbl_fdiv_op # DENORM / NORM12708short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO12709short fdiv_zero_load - tbl_fdiv_op # DENORM / INF12710short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN12711short fdiv_norm - tbl_fdiv_op # DENORM / DENORM12712short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN12713short tbl_fdiv_op - tbl_fdiv_op #12714short tbl_fdiv_op - tbl_fdiv_op #1271512716short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM12717short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO12718short fdiv_res_snan - tbl_fdiv_op # SNAN / INF12719short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN12720short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM12721short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN12722short tbl_fdiv_op - tbl_fdiv_op #12723short tbl_fdiv_op - tbl_fdiv_op #1272412725fdiv_res_qnan:12726bra.l res_qnan12727fdiv_res_snan:12728bra.l res_snan12729fdiv_res_operr:12730bra.l res_operr1273112732global fdiv_zero_load # global for fsgldiv12733fdiv_zero_load:12734mov.b SRC_EX(%a0),%d0 # result sign is exclusive12735mov.b DST_EX(%a1),%d1 # or of input signs.12736eor.b %d0,%d112737bpl.b fdiv_zero_load_p # result is positive12738fmov.s &0x80000000,%fp0 # load a -ZERO12739mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N12740rts12741fdiv_zero_load_p:12742fmov.s &0x00000000,%fp0 # load a +ZERO12743mov.b &z_bmask,FPSR_CC(%a6) # set Z12744rts1274512746#12747# The destination was In Range and the source was a ZERO. The result,12748# Therefore, is an INF w/ the proper sign.12749# So, determine the sign and return a new INF (w/ the j-bit cleared).12750#12751global fdiv_inf_load # global for fsgldiv12752fdiv_inf_load:12753ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ12754mov.b SRC_EX(%a0),%d0 # load both signs12755mov.b DST_EX(%a1),%d112756eor.b %d0,%d112757bpl.b fdiv_inf_load_p # result is positive12758fmov.s &0xff800000,%fp0 # make result -INF12759mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N12760rts12761fdiv_inf_load_p:12762fmov.s &0x7f800000,%fp0 # make result +INF12763mov.b &inf_bmask,FPSR_CC(%a6) # set INF12764rts1276512766#12767# The destination was an INF w/ an In Range or ZERO source, the result is12768# an INF w/ the proper sign.12769# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the12770# dst INF is set, then then j-bit of the result INF is also set).12771#12772global fdiv_inf_dst # global for fsgldiv12773fdiv_inf_dst:12774mov.b DST_EX(%a1),%d0 # load both signs12775mov.b SRC_EX(%a0),%d112776eor.b %d0,%d112777bpl.b fdiv_inf_dst_p # result is positive1277812779fmovm.x DST(%a1),&0x80 # return result in fp012780fabs.x %fp0 # clear sign bit12781fneg.x %fp0 # set sign bit12782mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG12783rts1278412785fdiv_inf_dst_p:12786fmovm.x DST(%a1),&0x80 # return result in fp012787fabs.x %fp0 # return positive INF12788mov.b &inf_bmask,FPSR_CC(%a6) # set INF12789rts1279012791#########################################################################12792# XDEF **************************************************************** #12793# fneg(): emulates the fneg instruction #12794# fsneg(): emulates the fsneg instruction #12795# fdneg(): emulates the fdneg instruction #12796# #12797# XREF **************************************************************** #12798# norm() - normalize a denorm to provide EXOP #12799# scale_to_zero_src() - scale sgl/dbl source exponent #12800# ovf_res() - return default overflow result #12801# unf_res() - return default underflow result #12802# res_qnan_1op() - return QNAN result #12803# res_snan_1op() - return SNAN result #12804# #12805# INPUT *************************************************************** #12806# a0 = pointer to extended precision source operand #12807# d0 = rnd prec,mode #12808# #12809# OUTPUT ************************************************************** #12810# fp0 = result #12811# fp1 = EXOP (if exception occurred) #12812# #12813# ALGORITHM *********************************************************** #12814# Handle NANs, zeroes, and infinities as special cases. Separate #12815# norms/denorms into ext/sgl/dbl precisions. Extended precision can be #12816# emulated by simply setting sign bit. Sgl/dbl operands must be scaled #12817# and an actual fneg performed to see if overflow/underflow would have #12818# occurred. If so, return default underflow/overflow result. Else, #12819# scale the result exponent and return result. FPSR gets set based on #12820# the result value. #12821# #12822#########################################################################1282312824global fsneg12825fsneg:12826andi.b &0x30,%d0 # clear rnd prec12827ori.b &s_mode*0x10,%d0 # insert sgl precision12828bra.b fneg1282912830global fdneg12831fdneg:12832andi.b &0x30,%d0 # clear rnd prec12833ori.b &d_mode*0x10,%d0 # insert dbl prec1283412835global fneg12836fneg:12837mov.l %d0,L_SCR3(%a6) # store rnd info12838mov.b STAG(%a6),%d112839bne.w fneg_not_norm # optimize on non-norm input1284012841#12842# NEGATE SIGN : norms and denorms ONLY!12843#12844fneg_norm:12845andi.b &0xc0,%d0 # is precision extended?12846bne.w fneg_not_ext # no; go handle sgl or dbl1284712848#12849# precision selected is extended. so...we can not get an underflow12850# or overflow because of rounding to the correct precision. so...12851# skip the scaling and unscaling...12852#12853mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)12854mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)12855mov.w SRC_EX(%a0),%d012856eori.w &0x8000,%d0 # negate sign12857bpl.b fneg_norm_load # sign is positive12858mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit12859fneg_norm_load:12860mov.w %d0,FP_SCR0_EX(%a6)12861fmovm.x FP_SCR0(%a6),&0x80 # return result in fp012862rts1286312864#12865# for an extended precision DENORM, the UNFL exception bit is set12866# the accrued bit is NOT set in this instance(no inexactness!)12867#12868fneg_denorm:12869andi.b &0xc0,%d0 # is precision extended?12870bne.b fneg_not_ext # no; go handle sgl or dbl1287112872bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1287312874mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)12875mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)12876mov.w SRC_EX(%a0),%d012877eori.w &0x8000,%d0 # negate sign12878bpl.b fneg_denorm_done # no12879mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit12880fneg_denorm_done:12881mov.w %d0,FP_SCR0_EX(%a6)12882fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp01288312884btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?12885bne.b fneg_ext_unfl_ena # yes12886rts1288712888#12889# the input is an extended DENORM and underflow is enabled in the FPCR.12890# normalize the mantissa and add the bias of 0x6000 to the resulting negative12891# exponent and insert back into the operand.12892#12893fneg_ext_unfl_ena:12894lea FP_SCR0(%a6),%a0 # pass: ptr to operand12895bsr.l norm # normalize result12896neg.w %d0 # new exponent = -(shft val)12897addi.w &0x6000,%d0 # add new bias to exponent12898mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp12899andi.w &0x8000,%d1 # keep old sign12900andi.w &0x7fff,%d0 # clear sign position12901or.w %d1,%d0 # concat old sign, new exponent12902mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent12903fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp112904rts1290512906#12907# operand is either single or double12908#12909fneg_not_ext:12910cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec12911bne.b fneg_dbl1291212913#12914# operand is to be rounded to single precision12915#12916fneg_sgl:12917mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)12918mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)12919mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)12920bsr.l scale_to_zero_src # calculate scale factor1292112922cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?12923bge.w fneg_sd_unfl # yes; go handle underflow12924cmpi.l %d0,&0x3fff-0x407e # will move in overflow?12925beq.w fneg_sd_may_ovfl # maybe; go check12926blt.w fneg_sd_ovfl # yes; go handle overflow1292712928#12929# operand will NOT overflow or underflow when moved in to the fp reg file12930#12931fneg_sd_normal:12932fmov.l &0x0,%fpsr # clear FPSR12933fmov.l L_SCR3(%a6),%fpcr # set FPCR1293412935fneg.x FP_SCR0(%a6),%fp0 # perform negation1293612937fmov.l %fpsr,%d1 # save FPSR12938fmov.l &0x0,%fpcr # clear FPCR1293912940or.l %d1,USER_FPSR(%a6) # save INEX2,N1294112942fneg_sd_normal_exit:12943mov.l %d2,-(%sp) # save d212944fmovm.x &0x80,FP_SCR0(%a6) # store out result12945mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp12946mov.w %d1,%d2 # make a copy12947andi.l &0x7fff,%d1 # strip sign12948sub.l %d0,%d1 # add scale factor12949andi.w &0x8000,%d2 # keep old sign12950or.w %d1,%d2 # concat old sign,new exp12951mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent12952mov.l (%sp)+,%d2 # restore d212953fmovm.x FP_SCR0(%a6),&0x80 # return result in fp012954rts1295512956#12957# operand is to be rounded to double precision12958#12959fneg_dbl:12960mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)12961mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)12962mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)12963bsr.l scale_to_zero_src # calculate scale factor1296412965cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?12966bge.b fneg_sd_unfl # yes; go handle underflow12967cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?12968beq.w fneg_sd_may_ovfl # maybe; go check12969blt.w fneg_sd_ovfl # yes; go handle overflow12970bra.w fneg_sd_normal # no; ho handle normalized op1297112972#12973# operand WILL underflow when moved in to the fp register file12974#12975fneg_sd_unfl:12976bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1297712978eori.b &0x80,FP_SCR0_EX(%a6) # negate sign12979bpl.b fneg_sd_unfl_tst12980bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit1298112982# if underflow or inexact is enabled, go calculate EXOP first.12983fneg_sd_unfl_tst:12984mov.b FPCR_ENABLE(%a6),%d112985andi.b &0x0b,%d1 # is UNFL or INEX enabled?12986bne.b fneg_sd_unfl_ena # yes1298712988fneg_sd_unfl_dis:12989lea FP_SCR0(%a6),%a0 # pass: result addr12990mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode12991bsr.l unf_res # calculate default result12992or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'12993fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp012994rts1299512996#12997# operand will underflow AND underflow is enabled.12998# Therefore, we must return the result rounded to extended precision.12999#13000fneg_sd_unfl_ena:13001mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)13002mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)13003mov.w FP_SCR0_EX(%a6),%d1 # load current exponent1300413005mov.l %d2,-(%sp) # save d213006mov.l %d1,%d2 # make a copy13007andi.l &0x7fff,%d1 # strip sign13008andi.w &0x8000,%d2 # keep old sign13009sub.l %d0,%d1 # subtract scale factor13010addi.l &0x6000,%d1 # add new bias13011andi.w &0x7fff,%d113012or.w %d2,%d1 # concat new sign,new exp13013mov.w %d1,FP_SCR1_EX(%a6) # insert new exp13014fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp113015mov.l (%sp)+,%d2 # restore d213016bra.b fneg_sd_unfl_dis1301713018#13019# operand WILL overflow.13020#13021fneg_sd_ovfl:13022fmov.l &0x0,%fpsr # clear FPSR13023fmov.l L_SCR3(%a6),%fpcr # set FPCR1302413025fneg.x FP_SCR0(%a6),%fp0 # perform negation1302613027fmov.l &0x0,%fpcr # clear FPCR13028fmov.l %fpsr,%d1 # save FPSR1302913030or.l %d1,USER_FPSR(%a6) # save INEX2,N1303113032fneg_sd_ovfl_tst:13033or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1303413035mov.b FPCR_ENABLE(%a6),%d113036andi.b &0x13,%d1 # is OVFL or INEX enabled?13037bne.b fneg_sd_ovfl_ena # yes1303813039#13040# OVFL is not enabled; therefore, we must create the default result by13041# calling ovf_res().13042#13043fneg_sd_ovfl_dis:13044btst &neg_bit,FPSR_CC(%a6) # is result negative?13045sne %d1 # set sign param accordingly13046mov.l L_SCR3(%a6),%d0 # pass: prec,mode13047bsr.l ovf_res # calculate default result13048or.b %d0,FPSR_CC(%a6) # set INF,N if applicable13049fmovm.x (%a0),&0x80 # return default result in fp013050rts1305113052#13053# OVFL is enabled.13054# the INEX2 bit has already been updated by the round to the correct precision.13055# now, round to extended(and don't alter the FPSR).13056#13057fneg_sd_ovfl_ena:13058mov.l %d2,-(%sp) # save d213059mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}13060mov.l %d1,%d2 # make a copy13061andi.l &0x7fff,%d1 # strip sign13062andi.w &0x8000,%d2 # keep old sign13063sub.l %d0,%d1 # add scale factor13064subi.l &0x6000,%d1 # subtract bias13065andi.w &0x7fff,%d113066or.w %d2,%d1 # concat sign,exp13067mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent13068fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp113069mov.l (%sp)+,%d2 # restore d213070bra.b fneg_sd_ovfl_dis1307113072#13073# the move in MAY underflow. so...13074#13075fneg_sd_may_ovfl:13076fmov.l &0x0,%fpsr # clear FPSR13077fmov.l L_SCR3(%a6),%fpcr # set FPCR1307813079fneg.x FP_SCR0(%a6),%fp0 # perform negation1308013081fmov.l %fpsr,%d1 # save status13082fmov.l &0x0,%fpcr # clear FPCR1308313084or.l %d1,USER_FPSR(%a6) # save INEX2,N1308513086fabs.x %fp0,%fp1 # make a copy of result13087fcmp.b %fp1,&0x2 # is |result| >= 2.b?13088fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred1308913090# no, it didn't overflow; we have correct result13091bra.w fneg_sd_normal_exit1309213093##########################################################################1309413095#13096# input is not normalized; what is it?13097#13098fneg_not_norm:13099cmpi.b %d1,&DENORM # weed out DENORM13100beq.w fneg_denorm13101cmpi.b %d1,&SNAN # weed out SNAN13102beq.l res_snan_1op13103cmpi.b %d1,&QNAN # weed out QNAN13104beq.l res_qnan_1op1310513106#13107# do the fneg; at this point, only possible ops are ZERO and INF.13108# use fneg to determine ccodes.13109# prec:mode should be zero at this point but it won't affect answer anyways.13110#13111fneg.x SRC_EX(%a0),%fp0 # do fneg13112fmov.l %fpsr,%d013113rol.l &0x8,%d0 # put ccodes in lo byte13114mov.b %d0,FPSR_CC(%a6) # insert correct ccodes13115rts1311613117#########################################################################13118# XDEF **************************************************************** #13119# ftst(): emulates the ftest instruction #13120# #13121# XREF **************************************************************** #13122# res{s,q}nan_1op() - set NAN result for monadic instruction #13123# #13124# INPUT *************************************************************** #13125# a0 = pointer to extended precision source operand #13126# #13127# OUTPUT ************************************************************** #13128# none #13129# #13130# ALGORITHM *********************************************************** #13131# Check the source operand tag (STAG) and set the FPCR according #13132# to the operand type and sign. #13133# #13134#########################################################################1313513136global ftst13137ftst:13138mov.b STAG(%a6),%d113139bne.b ftst_not_norm # optimize on non-norm input1314013141#13142# Norm:13143#13144ftst_norm:13145tst.b SRC_EX(%a0) # is operand negative?13146bmi.b ftst_norm_m # yes13147rts13148ftst_norm_m:13149mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit13150rts1315113152#13153# input is not normalized; what is it?13154#13155ftst_not_norm:13156cmpi.b %d1,&ZERO # weed out ZERO13157beq.b ftst_zero13158cmpi.b %d1,&INF # weed out INF13159beq.b ftst_inf13160cmpi.b %d1,&SNAN # weed out SNAN13161beq.l res_snan_1op13162cmpi.b %d1,&QNAN # weed out QNAN13163beq.l res_qnan_1op1316413165#13166# Denorm:13167#13168ftst_denorm:13169tst.b SRC_EX(%a0) # is operand negative?13170bmi.b ftst_denorm_m # yes13171rts13172ftst_denorm_m:13173mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit13174rts1317513176#13177# Infinity:13178#13179ftst_inf:13180tst.b SRC_EX(%a0) # is operand negative?13181bmi.b ftst_inf_m # yes13182ftst_inf_p:13183mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit13184rts13185ftst_inf_m:13186mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits13187rts1318813189#13190# Zero:13191#13192ftst_zero:13193tst.b SRC_EX(%a0) # is operand negative?13194bmi.b ftst_zero_m # yes13195ftst_zero_p:13196mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit13197rts13198ftst_zero_m:13199mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits13200rts1320113202#########################################################################13203# XDEF **************************************************************** #13204# fint(): emulates the fint instruction #13205# #13206# XREF **************************************************************** #13207# res_{s,q}nan_1op() - set NAN result for monadic operation #13208# #13209# INPUT *************************************************************** #13210# a0 = pointer to extended precision source operand #13211# d0 = round precision/mode #13212# #13213# OUTPUT ************************************************************** #13214# fp0 = result #13215# #13216# ALGORITHM *********************************************************** #13217# Separate according to operand type. Unnorms don't pass through #13218# here. For norms, load the rounding mode/prec, execute a "fint", then #13219# store the resulting FPSR bits. #13220# For denorms, force the j-bit to a one and do the same as for #13221# norms. Denorms are so low that the answer will either be a zero or a #13222# one. #13223# For zeroes/infs/NANs, return the same while setting the FPSR #13224# as appropriate. #13225# #13226#########################################################################1322713228global fint13229fint:13230mov.b STAG(%a6),%d113231bne.b fint_not_norm # optimize on non-norm input1323213233#13234# Norm:13235#13236fint_norm:13237andi.b &0x30,%d0 # set prec = ext1323813239fmov.l %d0,%fpcr # set FPCR13240fmov.l &0x0,%fpsr # clear FPSR1324113242fint.x SRC(%a0),%fp0 # execute fint1324313244fmov.l &0x0,%fpcr # clear FPCR13245fmov.l %fpsr,%d0 # save FPSR13246or.l %d0,USER_FPSR(%a6) # set exception bits1324713248rts1324913250#13251# input is not normalized; what is it?13252#13253fint_not_norm:13254cmpi.b %d1,&ZERO # weed out ZERO13255beq.b fint_zero13256cmpi.b %d1,&INF # weed out INF13257beq.b fint_inf13258cmpi.b %d1,&DENORM # weed out DENORM13259beq.b fint_denorm13260cmpi.b %d1,&SNAN # weed out SNAN13261beq.l res_snan_1op13262bra.l res_qnan_1op # weed out QNAN1326313264#13265# Denorm:13266#13267# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.13268# also, the INEX2 and AINEX exception bits will be set.13269# so, we could either set these manually or force the DENORM13270# to a very small NORM and ship it to the NORM routine.13271# I do the latter.13272#13273fint_denorm:13274mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp13275mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM13276lea FP_SCR0(%a6),%a013277bra.b fint_norm1327813279#13280# Zero:13281#13282fint_zero:13283tst.b SRC_EX(%a0) # is ZERO negative?13284bmi.b fint_zero_m # yes13285fint_zero_p:13286fmov.s &0x00000000,%fp0 # return +ZERO in fp013287mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit13288rts13289fint_zero_m:13290fmov.s &0x80000000,%fp0 # return -ZERO in fp013291mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits13292rts1329313294#13295# Infinity:13296#13297fint_inf:13298fmovm.x SRC(%a0),&0x80 # return result in fp013299tst.b SRC_EX(%a0) # is INF negative?13300bmi.b fint_inf_m # yes13301fint_inf_p:13302mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit13303rts13304fint_inf_m:13305mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits13306rts1330713308#########################################################################13309# XDEF **************************************************************** #13310# fintrz(): emulates the fintrz instruction #13311# #13312# XREF **************************************************************** #13313# res_{s,q}nan_1op() - set NAN result for monadic operation #13314# #13315# INPUT *************************************************************** #13316# a0 = pointer to extended precision source operand #13317# d0 = round precision/mode #13318# #13319# OUTPUT ************************************************************** #13320# fp0 = result #13321# #13322# ALGORITHM *********************************************************** #13323# Separate according to operand type. Unnorms don't pass through #13324# here. For norms, load the rounding mode/prec, execute a "fintrz", #13325# then store the resulting FPSR bits. #13326# For denorms, force the j-bit to a one and do the same as for #13327# norms. Denorms are so low that the answer will either be a zero or a #13328# one. #13329# For zeroes/infs/NANs, return the same while setting the FPSR #13330# as appropriate. #13331# #13332#########################################################################1333313334global fintrz13335fintrz:13336mov.b STAG(%a6),%d113337bne.b fintrz_not_norm # optimize on non-norm input1333813339#13340# Norm:13341#13342fintrz_norm:13343fmov.l &0x0,%fpsr # clear FPSR1334413345fintrz.x SRC(%a0),%fp0 # execute fintrz1334613347fmov.l %fpsr,%d0 # save FPSR13348or.l %d0,USER_FPSR(%a6) # set exception bits1334913350rts1335113352#13353# input is not normalized; what is it?13354#13355fintrz_not_norm:13356cmpi.b %d1,&ZERO # weed out ZERO13357beq.b fintrz_zero13358cmpi.b %d1,&INF # weed out INF13359beq.b fintrz_inf13360cmpi.b %d1,&DENORM # weed out DENORM13361beq.b fintrz_denorm13362cmpi.b %d1,&SNAN # weed out SNAN13363beq.l res_snan_1op13364bra.l res_qnan_1op # weed out QNAN1336513366#13367# Denorm:13368#13369# for DENORMs, the result will be (+/-)ZERO.13370# also, the INEX2 and AINEX exception bits will be set.13371# so, we could either set these manually or force the DENORM13372# to a very small NORM and ship it to the NORM routine.13373# I do the latter.13374#13375fintrz_denorm:13376mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp13377mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM13378lea FP_SCR0(%a6),%a013379bra.b fintrz_norm1338013381#13382# Zero:13383#13384fintrz_zero:13385tst.b SRC_EX(%a0) # is ZERO negative?13386bmi.b fintrz_zero_m # yes13387fintrz_zero_p:13388fmov.s &0x00000000,%fp0 # return +ZERO in fp013389mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit13390rts13391fintrz_zero_m:13392fmov.s &0x80000000,%fp0 # return -ZERO in fp013393mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits13394rts1339513396#13397# Infinity:13398#13399fintrz_inf:13400fmovm.x SRC(%a0),&0x80 # return result in fp013401tst.b SRC_EX(%a0) # is INF negative?13402bmi.b fintrz_inf_m # yes13403fintrz_inf_p:13404mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit13405rts13406fintrz_inf_m:13407mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits13408rts1340913410#########################################################################13411# XDEF **************************************************************** #13412# fabs(): emulates the fabs instruction #13413# fsabs(): emulates the fsabs instruction #13414# fdabs(): emulates the fdabs instruction #13415# #13416# XREF **************************************************************** #13417# norm() - normalize denorm mantissa to provide EXOP #13418# scale_to_zero_src() - make exponent. = 0; get scale factor #13419# unf_res() - calculate underflow result #13420# ovf_res() - calculate overflow result #13421# res_{s,q}nan_1op() - set NAN result for monadic operation #13422# #13423# INPUT *************************************************************** #13424# a0 = pointer to extended precision source operand #13425# d0 = rnd precision/mode #13426# #13427# OUTPUT ************************************************************** #13428# fp0 = result #13429# fp1 = EXOP (if exception occurred) #13430# #13431# ALGORITHM *********************************************************** #13432# Handle NANs, infinities, and zeroes as special cases. Divide #13433# norms into extended, single, and double precision. #13434# Simply clear sign for extended precision norm. Ext prec denorm #13435# gets an EXOP created for it since it's an underflow. #13436# Double and single precision can overflow and underflow. First, #13437# scale the operand such that the exponent is zero. Perform an "fabs" #13438# using the correct rnd mode/prec. Check to see if the original #13439# exponent would take an exception. If so, use unf_res() or ovf_res() #13440# to calculate the default result. Also, create the EXOP for the #13441# exceptional case. If no exception should occur, insert the correct #13442# result exponent and return. #13443# Unnorms don't pass through here. #13444# #13445#########################################################################1344613447global fsabs13448fsabs:13449andi.b &0x30,%d0 # clear rnd prec13450ori.b &s_mode*0x10,%d0 # insert sgl precision13451bra.b fabs1345213453global fdabs13454fdabs:13455andi.b &0x30,%d0 # clear rnd prec13456ori.b &d_mode*0x10,%d0 # insert dbl precision1345713458global fabs13459fabs:13460mov.l %d0,L_SCR3(%a6) # store rnd info13461mov.b STAG(%a6),%d113462bne.w fabs_not_norm # optimize on non-norm input1346313464#13465# ABSOLUTE VALUE: norms and denorms ONLY!13466#13467fabs_norm:13468andi.b &0xc0,%d0 # is precision extended?13469bne.b fabs_not_ext # no; go handle sgl or dbl1347013471#13472# precision selected is extended. so...we can not get an underflow13473# or overflow because of rounding to the correct precision. so...13474# skip the scaling and unscaling...13475#13476mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)13477mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)13478mov.w SRC_EX(%a0),%d113479bclr &15,%d1 # force absolute value13480mov.w %d1,FP_SCR0_EX(%a6) # insert exponent13481fmovm.x FP_SCR0(%a6),&0x80 # return result in fp013482rts1348313484#13485# for an extended precision DENORM, the UNFL exception bit is set13486# the accrued bit is NOT set in this instance(no inexactness!)13487#13488fabs_denorm:13489andi.b &0xc0,%d0 # is precision extended?13490bne.b fabs_not_ext # no1349113492bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1349313494mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)13495mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)13496mov.w SRC_EX(%a0),%d013497bclr &15,%d0 # clear sign13498mov.w %d0,FP_SCR0_EX(%a6) # insert exponent1349913500fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp01350113502btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?13503bne.b fabs_ext_unfl_ena13504rts1350513506#13507# the input is an extended DENORM and underflow is enabled in the FPCR.13508# normalize the mantissa and add the bias of 0x6000 to the resulting negative13509# exponent and insert back into the operand.13510#13511fabs_ext_unfl_ena:13512lea FP_SCR0(%a6),%a0 # pass: ptr to operand13513bsr.l norm # normalize result13514neg.w %d0 # new exponent = -(shft val)13515addi.w &0x6000,%d0 # add new bias to exponent13516mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp13517andi.w &0x8000,%d1 # keep old sign13518andi.w &0x7fff,%d0 # clear sign position13519or.w %d1,%d0 # concat old sign, new exponent13520mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent13521fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp113522rts1352313524#13525# operand is either single or double13526#13527fabs_not_ext:13528cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec13529bne.b fabs_dbl1353013531#13532# operand is to be rounded to single precision13533#13534fabs_sgl:13535mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)13536mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)13537mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)13538bsr.l scale_to_zero_src # calculate scale factor1353913540cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?13541bge.w fabs_sd_unfl # yes; go handle underflow13542cmpi.l %d0,&0x3fff-0x407e # will move in overflow?13543beq.w fabs_sd_may_ovfl # maybe; go check13544blt.w fabs_sd_ovfl # yes; go handle overflow1354513546#13547# operand will NOT overflow or underflow when moved in to the fp reg file13548#13549fabs_sd_normal:13550fmov.l &0x0,%fpsr # clear FPSR13551fmov.l L_SCR3(%a6),%fpcr # set FPCR1355213553fabs.x FP_SCR0(%a6),%fp0 # perform absolute1355413555fmov.l %fpsr,%d1 # save FPSR13556fmov.l &0x0,%fpcr # clear FPCR1355713558or.l %d1,USER_FPSR(%a6) # save INEX2,N1355913560fabs_sd_normal_exit:13561mov.l %d2,-(%sp) # save d213562fmovm.x &0x80,FP_SCR0(%a6) # store out result13563mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp13564mov.l %d1,%d2 # make a copy13565andi.l &0x7fff,%d1 # strip sign13566sub.l %d0,%d1 # add scale factor13567andi.w &0x8000,%d2 # keep old sign13568or.w %d1,%d2 # concat old sign,new exp13569mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent13570mov.l (%sp)+,%d2 # restore d213571fmovm.x FP_SCR0(%a6),&0x80 # return result in fp013572rts1357313574#13575# operand is to be rounded to double precision13576#13577fabs_dbl:13578mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)13579mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)13580mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)13581bsr.l scale_to_zero_src # calculate scale factor1358213583cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?13584bge.b fabs_sd_unfl # yes; go handle underflow13585cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?13586beq.w fabs_sd_may_ovfl # maybe; go check13587blt.w fabs_sd_ovfl # yes; go handle overflow13588bra.w fabs_sd_normal # no; ho handle normalized op1358913590#13591# operand WILL underflow when moved in to the fp register file13592#13593fabs_sd_unfl:13594bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1359513596bclr &0x7,FP_SCR0_EX(%a6) # force absolute value1359713598# if underflow or inexact is enabled, go calculate EXOP first.13599mov.b FPCR_ENABLE(%a6),%d113600andi.b &0x0b,%d1 # is UNFL or INEX enabled?13601bne.b fabs_sd_unfl_ena # yes1360213603fabs_sd_unfl_dis:13604lea FP_SCR0(%a6),%a0 # pass: result addr13605mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode13606bsr.l unf_res # calculate default result13607or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode13608fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp013609rts1361013611#13612# operand will underflow AND underflow is enabled.13613# Therefore, we must return the result rounded to extended precision.13614#13615fabs_sd_unfl_ena:13616mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)13617mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)13618mov.w FP_SCR0_EX(%a6),%d1 # load current exponent1361913620mov.l %d2,-(%sp) # save d213621mov.l %d1,%d2 # make a copy13622andi.l &0x7fff,%d1 # strip sign13623andi.w &0x8000,%d2 # keep old sign13624sub.l %d0,%d1 # subtract scale factor13625addi.l &0x6000,%d1 # add new bias13626andi.w &0x7fff,%d113627or.w %d2,%d1 # concat new sign,new exp13628mov.w %d1,FP_SCR1_EX(%a6) # insert new exp13629fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp113630mov.l (%sp)+,%d2 # restore d213631bra.b fabs_sd_unfl_dis1363213633#13634# operand WILL overflow.13635#13636fabs_sd_ovfl:13637fmov.l &0x0,%fpsr # clear FPSR13638fmov.l L_SCR3(%a6),%fpcr # set FPCR1363913640fabs.x FP_SCR0(%a6),%fp0 # perform absolute1364113642fmov.l &0x0,%fpcr # clear FPCR13643fmov.l %fpsr,%d1 # save FPSR1364413645or.l %d1,USER_FPSR(%a6) # save INEX2,N1364613647fabs_sd_ovfl_tst:13648or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1364913650mov.b FPCR_ENABLE(%a6),%d113651andi.b &0x13,%d1 # is OVFL or INEX enabled?13652bne.b fabs_sd_ovfl_ena # yes1365313654#13655# OVFL is not enabled; therefore, we must create the default result by13656# calling ovf_res().13657#13658fabs_sd_ovfl_dis:13659btst &neg_bit,FPSR_CC(%a6) # is result negative?13660sne %d1 # set sign param accordingly13661mov.l L_SCR3(%a6),%d0 # pass: prec,mode13662bsr.l ovf_res # calculate default result13663or.b %d0,FPSR_CC(%a6) # set INF,N if applicable13664fmovm.x (%a0),&0x80 # return default result in fp013665rts1366613667#13668# OVFL is enabled.13669# the INEX2 bit has already been updated by the round to the correct precision.13670# now, round to extended(and don't alter the FPSR).13671#13672fabs_sd_ovfl_ena:13673mov.l %d2,-(%sp) # save d213674mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}13675mov.l %d1,%d2 # make a copy13676andi.l &0x7fff,%d1 # strip sign13677andi.w &0x8000,%d2 # keep old sign13678sub.l %d0,%d1 # add scale factor13679subi.l &0x6000,%d1 # subtract bias13680andi.w &0x7fff,%d113681or.w %d2,%d1 # concat sign,exp13682mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent13683fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp113684mov.l (%sp)+,%d2 # restore d213685bra.b fabs_sd_ovfl_dis1368613687#13688# the move in MAY underflow. so...13689#13690fabs_sd_may_ovfl:13691fmov.l &0x0,%fpsr # clear FPSR13692fmov.l L_SCR3(%a6),%fpcr # set FPCR1369313694fabs.x FP_SCR0(%a6),%fp0 # perform absolute1369513696fmov.l %fpsr,%d1 # save status13697fmov.l &0x0,%fpcr # clear FPCR1369813699or.l %d1,USER_FPSR(%a6) # save INEX2,N1370013701fabs.x %fp0,%fp1 # make a copy of result13702fcmp.b %fp1,&0x2 # is |result| >= 2.b?13703fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred1370413705# no, it didn't overflow; we have correct result13706bra.w fabs_sd_normal_exit1370713708##########################################################################1370913710#13711# input is not normalized; what is it?13712#13713fabs_not_norm:13714cmpi.b %d1,&DENORM # weed out DENORM13715beq.w fabs_denorm13716cmpi.b %d1,&SNAN # weed out SNAN13717beq.l res_snan_1op13718cmpi.b %d1,&QNAN # weed out QNAN13719beq.l res_qnan_1op1372013721fabs.x SRC(%a0),%fp0 # force absolute value1372213723cmpi.b %d1,&INF # weed out INF13724beq.b fabs_inf13725fabs_zero:13726mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit13727rts13728fabs_inf:13729mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit13730rts1373113732#########################################################################13733# XDEF **************************************************************** #13734# fcmp(): fp compare op routine #13735# #13736# XREF **************************************************************** #13737# res_qnan() - return QNAN result #13738# res_snan() - return SNAN result #13739# #13740# INPUT *************************************************************** #13741# a0 = pointer to extended precision source operand #13742# a1 = pointer to extended precision destination operand #13743# d0 = round prec/mode #13744# #13745# OUTPUT ************************************************************** #13746# None #13747# #13748# ALGORITHM *********************************************************** #13749# Handle NANs and denorms as special cases. For everything else, #13750# just use the actual fcmp instruction to produce the correct condition #13751# codes. #13752# #13753#########################################################################1375413755global fcmp13756fcmp:13757clr.w %d113758mov.b DTAG(%a6),%d113759lsl.b &0x3,%d113760or.b STAG(%a6),%d113761bne.b fcmp_not_norm # optimize on non-norm input1376213763#13764# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs13765#13766fcmp_norm:13767fmovm.x DST(%a1),&0x80 # load dst op1376813769fcmp.x %fp0,SRC(%a0) # do compare1377013771fmov.l %fpsr,%d0 # save FPSR13772rol.l &0x8,%d0 # extract ccode bits13773mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)1377413775rts1377613777#13778# fcmp: inputs are not both normalized; what are they?13779#13780fcmp_not_norm:13781mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d113782jmp (tbl_fcmp_op.b,%pc,%d1.w*1)1378313784swbeg &4813785tbl_fcmp_op:13786short fcmp_norm - tbl_fcmp_op # NORM - NORM13787short fcmp_norm - tbl_fcmp_op # NORM - ZERO13788short fcmp_norm - tbl_fcmp_op # NORM - INF13789short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN13790short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM13791short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN13792short tbl_fcmp_op - tbl_fcmp_op #13793short tbl_fcmp_op - tbl_fcmp_op #1379413795short fcmp_norm - tbl_fcmp_op # ZERO - NORM13796short fcmp_norm - tbl_fcmp_op # ZERO - ZERO13797short fcmp_norm - tbl_fcmp_op # ZERO - INF13798short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN13799short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM13800short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN13801short tbl_fcmp_op - tbl_fcmp_op #13802short tbl_fcmp_op - tbl_fcmp_op #1380313804short fcmp_norm - tbl_fcmp_op # INF - NORM13805short fcmp_norm - tbl_fcmp_op # INF - ZERO13806short fcmp_norm - tbl_fcmp_op # INF - INF13807short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN13808short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM13809short fcmp_res_snan - tbl_fcmp_op # INF - SNAN13810short tbl_fcmp_op - tbl_fcmp_op #13811short tbl_fcmp_op - tbl_fcmp_op #1381213813short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM13814short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO13815short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF13816short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN13817short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM13818short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN13819short tbl_fcmp_op - tbl_fcmp_op #13820short tbl_fcmp_op - tbl_fcmp_op #1382113822short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM13823short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO13824short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF13825short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN13826short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM13827short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN13828short tbl_fcmp_op - tbl_fcmp_op #13829short tbl_fcmp_op - tbl_fcmp_op #1383013831short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM13832short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO13833short fcmp_res_snan - tbl_fcmp_op # SNAN - INF13834short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN13835short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM13836short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN13837short tbl_fcmp_op - tbl_fcmp_op #13838short tbl_fcmp_op - tbl_fcmp_op #1383913840# unlike all other functions for QNAN and SNAN, fcmp does NOT set the13841# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.13842fcmp_res_qnan:13843bsr.l res_qnan13844andi.b &0xf7,FPSR_CC(%a6)13845rts13846fcmp_res_snan:13847bsr.l res_snan13848andi.b &0xf7,FPSR_CC(%a6)13849rts1385013851#13852# DENORMs are a little more difficult.13853# If you have a 2 DENORMs, then you can just force the j-bit to a one13854# and use the fcmp_norm routine.13855# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one13856# and use the fcmp_norm routine.13857# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.13858# But with a DENORM and a NORM of the same sign, the neg bit is set if the13859# (1) signs are (+) and the DENORM is the dst or13860# (2) signs are (-) and the DENORM is the src13861#1386213863fcmp_dnrm_s:13864mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)13865mov.l SRC_HI(%a0),%d013866bset &31,%d0 # DENORM src; make into small norm13867mov.l %d0,FP_SCR0_HI(%a6)13868mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)13869lea FP_SCR0(%a6),%a013870bra.w fcmp_norm1387113872fcmp_dnrm_d:13873mov.l DST_EX(%a1),FP_SCR0_EX(%a6)13874mov.l DST_HI(%a1),%d013875bset &31,%d0 # DENORM src; make into small norm13876mov.l %d0,FP_SCR0_HI(%a6)13877mov.l DST_LO(%a1),FP_SCR0_LO(%a6)13878lea FP_SCR0(%a6),%a113879bra.w fcmp_norm1388013881fcmp_dnrm_sd:13882mov.w DST_EX(%a1),FP_SCR1_EX(%a6)13883mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)13884mov.l DST_HI(%a1),%d013885bset &31,%d0 # DENORM dst; make into small norm13886mov.l %d0,FP_SCR1_HI(%a6)13887mov.l SRC_HI(%a0),%d013888bset &31,%d0 # DENORM dst; make into small norm13889mov.l %d0,FP_SCR0_HI(%a6)13890mov.l DST_LO(%a1),FP_SCR1_LO(%a6)13891mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)13892lea FP_SCR1(%a6),%a113893lea FP_SCR0(%a6),%a013894bra.w fcmp_norm1389513896fcmp_nrm_dnrm:13897mov.b SRC_EX(%a0),%d0 # determine if like signs13898mov.b DST_EX(%a1),%d113899eor.b %d0,%d113900bmi.w fcmp_dnrm_s1390113902# signs are the same, so must determine the answer ourselves.13903tst.b %d0 # is src op negative?13904bmi.b fcmp_nrm_dnrm_m # yes13905rts13906fcmp_nrm_dnrm_m:13907mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit13908rts1390913910fcmp_dnrm_nrm:13911mov.b SRC_EX(%a0),%d0 # determine if like signs13912mov.b DST_EX(%a1),%d113913eor.b %d0,%d113914bmi.w fcmp_dnrm_d1391513916# signs are the same, so must determine the answer ourselves.13917tst.b %d0 # is src op negative?13918bpl.b fcmp_dnrm_nrm_m # no13919rts13920fcmp_dnrm_nrm_m:13921mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit13922rts1392313924#########################################################################13925# XDEF **************************************************************** #13926# fsglmul(): emulates the fsglmul instruction #13927# #13928# XREF **************************************************************** #13929# scale_to_zero_src() - scale src exponent to zero #13930# scale_to_zero_dst() - scale dst exponent to zero #13931# unf_res4() - return default underflow result for sglop #13932# ovf_res() - return default overflow result #13933# res_qnan() - return QNAN result #13934# res_snan() - return SNAN result #13935# #13936# INPUT *************************************************************** #13937# a0 = pointer to extended precision source operand #13938# a1 = pointer to extended precision destination operand #13939# d0 rnd prec,mode #13940# #13941# OUTPUT ************************************************************** #13942# fp0 = result #13943# fp1 = EXOP (if exception occurred) #13944# #13945# ALGORITHM *********************************************************** #13946# Handle NANs, infinities, and zeroes as special cases. Divide #13947# norms/denorms into ext/sgl/dbl precision. #13948# For norms/denorms, scale the exponents such that a multiply #13949# instruction won't cause an exception. Use the regular fsglmul to #13950# compute a result. Check if the regular operands would have taken #13951# an exception. If so, return the default overflow/underflow result #13952# and return the EXOP if exceptions are enabled. Else, scale the #13953# result operand to the proper exponent. #13954# #13955#########################################################################1395613957global fsglmul13958fsglmul:13959mov.l %d0,L_SCR3(%a6) # store rnd info1396013961clr.w %d113962mov.b DTAG(%a6),%d113963lsl.b &0x3,%d113964or.b STAG(%a6),%d11396513966bne.w fsglmul_not_norm # optimize on non-norm input1396713968fsglmul_norm:13969mov.w DST_EX(%a1),FP_SCR1_EX(%a6)13970mov.l DST_HI(%a1),FP_SCR1_HI(%a6)13971mov.l DST_LO(%a1),FP_SCR1_LO(%a6)1397213973mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)13974mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)13975mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1397613977bsr.l scale_to_zero_src # scale exponent13978mov.l %d0,-(%sp) # save scale factor 11397913980bsr.l scale_to_zero_dst # scale dst exponent1398113982add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale21398313984cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?13985beq.w fsglmul_may_ovfl # result may rnd to overflow13986blt.w fsglmul_ovfl # result will overflow1398713988cmpi.l %d0,&0x3fff+0x0001 # would result unfl?13989beq.w fsglmul_may_unfl # result may rnd to no unfl13990bgt.w fsglmul_unfl # result will underflow1399113992fsglmul_normal:13993fmovm.x FP_SCR1(%a6),&0x80 # load dst op1399413995fmov.l L_SCR3(%a6),%fpcr # set FPCR13996fmov.l &0x0,%fpsr # clear FPSR1399713998fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply1399914000fmov.l %fpsr,%d1 # save status14001fmov.l &0x0,%fpcr # clear FPCR1400214003or.l %d1,USER_FPSR(%a6) # save INEX2,N1400414005fsglmul_normal_exit:14006fmovm.x &0x80,FP_SCR0(%a6) # store out result14007mov.l %d2,-(%sp) # save d214008mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}14009mov.l %d1,%d2 # make a copy14010andi.l &0x7fff,%d1 # strip sign14011andi.w &0x8000,%d2 # keep old sign14012sub.l %d0,%d1 # add scale factor14013or.w %d2,%d1 # concat old sign,new exp14014mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent14015mov.l (%sp)+,%d2 # restore d214016fmovm.x FP_SCR0(%a6),&0x80 # return result in fp014017rts1401814019fsglmul_ovfl:14020fmovm.x FP_SCR1(%a6),&0x80 # load dst op1402114022fmov.l L_SCR3(%a6),%fpcr # set FPCR14023fmov.l &0x0,%fpsr # clear FPSR1402414025fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply1402614027fmov.l %fpsr,%d1 # save status14028fmov.l &0x0,%fpcr # clear FPCR1402914030or.l %d1,USER_FPSR(%a6) # save INEX2,N1403114032fsglmul_ovfl_tst:1403314034# save setting this until now because this is where fsglmul_may_ovfl may jump in14035or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex1403614037mov.b FPCR_ENABLE(%a6),%d114038andi.b &0x13,%d1 # is OVFL or INEX enabled?14039bne.b fsglmul_ovfl_ena # yes1404014041fsglmul_ovfl_dis:14042btst &neg_bit,FPSR_CC(%a6) # is result negative?14043sne %d1 # set sign param accordingly14044mov.l L_SCR3(%a6),%d0 # pass prec:rnd14045andi.b &0x30,%d0 # force prec = ext14046bsr.l ovf_res # calculate default result14047or.b %d0,FPSR_CC(%a6) # set INF,N if applicable14048fmovm.x (%a0),&0x80 # return default result in fp014049rts1405014051fsglmul_ovfl_ena:14052fmovm.x &0x80,FP_SCR0(%a6) # move result to stack1405314054mov.l %d2,-(%sp) # save d214055mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}14056mov.l %d1,%d2 # make a copy14057andi.l &0x7fff,%d1 # strip sign14058sub.l %d0,%d1 # add scale factor14059subi.l &0x6000,%d1 # subtract bias14060andi.w &0x7fff,%d114061andi.w &0x8000,%d2 # keep old sign14062or.w %d2,%d1 # concat old sign,new exp14063mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent14064mov.l (%sp)+,%d2 # restore d214065fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp114066bra.b fsglmul_ovfl_dis1406714068fsglmul_may_ovfl:14069fmovm.x FP_SCR1(%a6),&0x80 # load dst op1407014071fmov.l L_SCR3(%a6),%fpcr # set FPCR14072fmov.l &0x0,%fpsr # clear FPSR1407314074fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply1407514076fmov.l %fpsr,%d1 # save status14077fmov.l &0x0,%fpcr # clear FPCR1407814079or.l %d1,USER_FPSR(%a6) # save INEX2,N1408014081fabs.x %fp0,%fp1 # make a copy of result14082fcmp.b %fp1,&0x2 # is |result| >= 2.b?14083fbge.w fsglmul_ovfl_tst # yes; overflow has occurred1408414085# no, it didn't overflow; we have correct result14086bra.w fsglmul_normal_exit1408714088fsglmul_unfl:14089bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1409014091fmovm.x FP_SCR1(%a6),&0x80 # load dst op1409214093fmov.l &rz_mode*0x10,%fpcr # set FPCR14094fmov.l &0x0,%fpsr # clear FPSR1409514096fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply1409714098fmov.l %fpsr,%d1 # save status14099fmov.l &0x0,%fpcr # clear FPCR1410014101or.l %d1,USER_FPSR(%a6) # save INEX2,N1410214103mov.b FPCR_ENABLE(%a6),%d114104andi.b &0x0b,%d1 # is UNFL or INEX enabled?14105bne.b fsglmul_unfl_ena # yes1410614107fsglmul_unfl_dis:14108fmovm.x &0x80,FP_SCR0(%a6) # store out result1410914110lea FP_SCR0(%a6),%a0 # pass: result addr14111mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode14112bsr.l unf_res4 # calculate default result14113or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set14114fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp014115rts1411614117#14118# UNFL is enabled.14119#14120fsglmul_unfl_ena:14121fmovm.x FP_SCR1(%a6),&0x40 # load dst op1412214123fmov.l L_SCR3(%a6),%fpcr # set FPCR14124fmov.l &0x0,%fpsr # clear FPSR1412514126fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply1412714128fmov.l &0x0,%fpcr # clear FPCR1412914130fmovm.x &0x40,FP_SCR0(%a6) # save result to stack14131mov.l %d2,-(%sp) # save d214132mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}14133mov.l %d1,%d2 # make a copy14134andi.l &0x7fff,%d1 # strip sign14135andi.w &0x8000,%d2 # keep old sign14136sub.l %d0,%d1 # add scale factor14137addi.l &0x6000,%d1 # add bias14138andi.w &0x7fff,%d114139or.w %d2,%d1 # concat old sign,new exp14140mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent14141mov.l (%sp)+,%d2 # restore d214142fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp114143bra.w fsglmul_unfl_dis1414414145fsglmul_may_unfl:14146fmovm.x FP_SCR1(%a6),&0x80 # load dst op1414714148fmov.l L_SCR3(%a6),%fpcr # set FPCR14149fmov.l &0x0,%fpsr # clear FPSR1415014151fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply1415214153fmov.l %fpsr,%d1 # save status14154fmov.l &0x0,%fpcr # clear FPCR1415514156or.l %d1,USER_FPSR(%a6) # save INEX2,N1415714158fabs.x %fp0,%fp1 # make a copy of result14159fcmp.b %fp1,&0x2 # is |result| > 2.b?14160fbgt.w fsglmul_normal_exit # no; no underflow occurred14161fblt.w fsglmul_unfl # yes; underflow occurred1416214163#14164# we still don't know if underflow occurred. result is ~ equal to 2. but,14165# we don't know if the result was an underflow that rounded up to a 2 or14166# a normalized number that rounded down to a 2. so, redo the entire operation14167# using RZ as the rounding mode to see what the pre-rounded result is.14168# this case should be relatively rare.14169#14170fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp11417114172mov.l L_SCR3(%a6),%d114173andi.b &0xc0,%d1 # keep rnd prec14174ori.b &rz_mode*0x10,%d1 # insert RZ1417514176fmov.l %d1,%fpcr # set FPCR14177fmov.l &0x0,%fpsr # clear FPSR1417814179fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply1418014181fmov.l &0x0,%fpcr # clear FPCR14182fabs.x %fp1 # make absolute value14183fcmp.b %fp1,&0x2 # is |result| < 2.b?14184fbge.w fsglmul_normal_exit # no; no underflow occurred14185bra.w fsglmul_unfl # yes, underflow occurred1418614187##############################################################################1418814189#14190# Single Precision Multiply: inputs are not both normalized; what are they?14191#14192fsglmul_not_norm:14193mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d114194jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)1419514196swbeg &4814197tbl_fsglmul_op:14198short fsglmul_norm - tbl_fsglmul_op # NORM x NORM14199short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO14200short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF14201short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN14202short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM14203short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN14204short tbl_fsglmul_op - tbl_fsglmul_op #14205short tbl_fsglmul_op - tbl_fsglmul_op #1420614207short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM14208short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO14209short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF14210short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN14211short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM14212short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN14213short tbl_fsglmul_op - tbl_fsglmul_op #14214short tbl_fsglmul_op - tbl_fsglmul_op #1421514216short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM14217short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO14218short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF14219short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN14220short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM14221short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN14222short tbl_fsglmul_op - tbl_fsglmul_op #14223short tbl_fsglmul_op - tbl_fsglmul_op #1422414225short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM14226short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO14227short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF14228short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN14229short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM14230short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN14231short tbl_fsglmul_op - tbl_fsglmul_op #14232short tbl_fsglmul_op - tbl_fsglmul_op #1423314234short fsglmul_norm - tbl_fsglmul_op # NORM x NORM14235short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO14236short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF14237short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN14238short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM14239short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN14240short tbl_fsglmul_op - tbl_fsglmul_op #14241short tbl_fsglmul_op - tbl_fsglmul_op #1424214243short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM14244short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO14245short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF14246short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN14247short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM14248short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN14249short tbl_fsglmul_op - tbl_fsglmul_op #14250short tbl_fsglmul_op - tbl_fsglmul_op #1425114252fsglmul_res_operr:14253bra.l res_operr14254fsglmul_res_snan:14255bra.l res_snan14256fsglmul_res_qnan:14257bra.l res_qnan14258fsglmul_zero:14259bra.l fmul_zero14260fsglmul_inf_src:14261bra.l fmul_inf_src14262fsglmul_inf_dst:14263bra.l fmul_inf_dst1426414265#########################################################################14266# XDEF **************************************************************** #14267# fsgldiv(): emulates the fsgldiv instruction #14268# #14269# XREF **************************************************************** #14270# scale_to_zero_src() - scale src exponent to zero #14271# scale_to_zero_dst() - scale dst exponent to zero #14272# unf_res4() - return default underflow result for sglop #14273# ovf_res() - return default overflow result #14274# res_qnan() - return QNAN result #14275# res_snan() - return SNAN result #14276# #14277# INPUT *************************************************************** #14278# a0 = pointer to extended precision source operand #14279# a1 = pointer to extended precision destination operand #14280# d0 rnd prec,mode #14281# #14282# OUTPUT ************************************************************** #14283# fp0 = result #14284# fp1 = EXOP (if exception occurred) #14285# #14286# ALGORITHM *********************************************************** #14287# Handle NANs, infinities, and zeroes as special cases. Divide #14288# norms/denorms into ext/sgl/dbl precision. #14289# For norms/denorms, scale the exponents such that a divide #14290# instruction won't cause an exception. Use the regular fsgldiv to #14291# compute a result. Check if the regular operands would have taken #14292# an exception. If so, return the default overflow/underflow result #14293# and return the EXOP if exceptions are enabled. Else, scale the #14294# result operand to the proper exponent. #14295# #14296#########################################################################1429714298global fsgldiv14299fsgldiv:14300mov.l %d0,L_SCR3(%a6) # store rnd info1430114302clr.w %d114303mov.b DTAG(%a6),%d114304lsl.b &0x3,%d114305or.b STAG(%a6),%d1 # combine src tags1430614307bne.w fsgldiv_not_norm # optimize on non-norm input1430814309#14310# DIVIDE: NORMs and DENORMs ONLY!14311#14312fsgldiv_norm:14313mov.w DST_EX(%a1),FP_SCR1_EX(%a6)14314mov.l DST_HI(%a1),FP_SCR1_HI(%a6)14315mov.l DST_LO(%a1),FP_SCR1_LO(%a6)1431614317mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)14318mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)14319mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1432014321bsr.l scale_to_zero_src # calculate scale factor 114322mov.l %d0,-(%sp) # save scale factor 11432314324bsr.l scale_to_zero_dst # calculate scale factor 21432514326neg.l (%sp) # S.F. = scale1 - scale214327add.l %d0,(%sp)1432814329mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode14330lsr.b &0x6,%d114331mov.l (%sp)+,%d014332cmpi.l %d0,&0x3fff-0x7ffe14333ble.w fsgldiv_may_ovfl1433414335cmpi.l %d0,&0x3fff-0x0000 # will result underflow?14336beq.w fsgldiv_may_unfl # maybe14337bgt.w fsgldiv_unfl # yes; go handle underflow1433814339fsgldiv_normal:14340fmovm.x FP_SCR1(%a6),&0x80 # load dst op1434114342fmov.l L_SCR3(%a6),%fpcr # save FPCR14343fmov.l &0x0,%fpsr # clear FPSR1434414345fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide1434614347fmov.l %fpsr,%d1 # save FPSR14348fmov.l &0x0,%fpcr # clear FPCR1434914350or.l %d1,USER_FPSR(%a6) # save INEX2,N1435114352fsgldiv_normal_exit:14353fmovm.x &0x80,FP_SCR0(%a6) # store result on stack14354mov.l %d2,-(%sp) # save d214355mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}14356mov.l %d1,%d2 # make a copy14357andi.l &0x7fff,%d1 # strip sign14358andi.w &0x8000,%d2 # keep old sign14359sub.l %d0,%d1 # add scale factor14360or.w %d2,%d1 # concat old sign,new exp14361mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent14362mov.l (%sp)+,%d2 # restore d214363fmovm.x FP_SCR0(%a6),&0x80 # return result in fp014364rts1436514366fsgldiv_may_ovfl:14367fmovm.x FP_SCR1(%a6),&0x80 # load dst op1436814369fmov.l L_SCR3(%a6),%fpcr # set FPCR14370fmov.l &0x0,%fpsr # set FPSR1437114372fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide1437314374fmov.l %fpsr,%d114375fmov.l &0x0,%fpcr1437614377or.l %d1,USER_FPSR(%a6) # save INEX,N1437814379fmovm.x &0x01,-(%sp) # save result to stack14380mov.w (%sp),%d1 # fetch new exponent14381add.l &0xc,%sp # clear result14382andi.l &0x7fff,%d1 # strip sign14383sub.l %d0,%d1 # add scale factor14384cmp.l %d1,&0x7fff # did divide overflow?14385blt.b fsgldiv_normal_exit1438614387fsgldiv_ovfl_tst:14388or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex1438914390mov.b FPCR_ENABLE(%a6),%d114391andi.b &0x13,%d1 # is OVFL or INEX enabled?14392bne.b fsgldiv_ovfl_ena # yes1439314394fsgldiv_ovfl_dis:14395btst &neg_bit,FPSR_CC(%a6) # is result negative14396sne %d1 # set sign param accordingly14397mov.l L_SCR3(%a6),%d0 # pass prec:rnd14398andi.b &0x30,%d0 # kill precision14399bsr.l ovf_res # calculate default result14400or.b %d0,FPSR_CC(%a6) # set INF if applicable14401fmovm.x (%a0),&0x80 # return default result in fp014402rts1440314404fsgldiv_ovfl_ena:14405fmovm.x &0x80,FP_SCR0(%a6) # move result to stack1440614407mov.l %d2,-(%sp) # save d214408mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}14409mov.l %d1,%d2 # make a copy14410andi.l &0x7fff,%d1 # strip sign14411andi.w &0x8000,%d2 # keep old sign14412sub.l %d0,%d1 # add scale factor14413subi.l &0x6000,%d1 # subtract new bias14414andi.w &0x7fff,%d1 # clear ms bit14415or.w %d2,%d1 # concat old sign,new exp14416mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent14417mov.l (%sp)+,%d2 # restore d214418fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp114419bra.b fsgldiv_ovfl_dis1442014421fsgldiv_unfl:14422bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1442314424fmovm.x FP_SCR1(%a6),&0x80 # load dst op1442514426fmov.l &rz_mode*0x10,%fpcr # set FPCR14427fmov.l &0x0,%fpsr # clear FPSR1442814429fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide1443014431fmov.l %fpsr,%d1 # save status14432fmov.l &0x0,%fpcr # clear FPCR1443314434or.l %d1,USER_FPSR(%a6) # save INEX2,N1443514436mov.b FPCR_ENABLE(%a6),%d114437andi.b &0x0b,%d1 # is UNFL or INEX enabled?14438bne.b fsgldiv_unfl_ena # yes1443914440fsgldiv_unfl_dis:14441fmovm.x &0x80,FP_SCR0(%a6) # store out result1444214443lea FP_SCR0(%a6),%a0 # pass: result addr14444mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode14445bsr.l unf_res4 # calculate default result14446or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set14447fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp014448rts1444914450#14451# UNFL is enabled.14452#14453fsgldiv_unfl_ena:14454fmovm.x FP_SCR1(%a6),&0x40 # load dst op1445514456fmov.l L_SCR3(%a6),%fpcr # set FPCR14457fmov.l &0x0,%fpsr # clear FPSR1445814459fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide1446014461fmov.l &0x0,%fpcr # clear FPCR1446214463fmovm.x &0x40,FP_SCR0(%a6) # save result to stack14464mov.l %d2,-(%sp) # save d214465mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}14466mov.l %d1,%d2 # make a copy14467andi.l &0x7fff,%d1 # strip sign14468andi.w &0x8000,%d2 # keep old sign14469sub.l %d0,%d1 # add scale factor14470addi.l &0x6000,%d1 # add bias14471andi.w &0x7fff,%d1 # clear top bit14472or.w %d2,%d1 # concat old sign, new exp14473mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent14474mov.l (%sp)+,%d2 # restore d214475fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp114476bra.b fsgldiv_unfl_dis1447714478#14479# the divide operation MAY underflow:14480#14481fsgldiv_may_unfl:14482fmovm.x FP_SCR1(%a6),&0x80 # load dst op1448314484fmov.l L_SCR3(%a6),%fpcr # set FPCR14485fmov.l &0x0,%fpsr # clear FPSR1448614487fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide1448814489fmov.l %fpsr,%d1 # save status14490fmov.l &0x0,%fpcr # clear FPCR1449114492or.l %d1,USER_FPSR(%a6) # save INEX2,N1449314494fabs.x %fp0,%fp1 # make a copy of result14495fcmp.b %fp1,&0x1 # is |result| > 1.b?14496fbgt.w fsgldiv_normal_exit # no; no underflow occurred14497fblt.w fsgldiv_unfl # yes; underflow occurred1449814499#14500# we still don't know if underflow occurred. result is ~ equal to 1. but,14501# we don't know if the result was an underflow that rounded up to a 114502# or a normalized number that rounded down to a 1. so, redo the entire14503# operation using RZ as the rounding mode to see what the pre-rounded14504# result is. this case should be relatively rare.14505#14506fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp11450714508clr.l %d1 # clear scratch register14509ori.b &rz_mode*0x10,%d1 # force RZ rnd mode1451014511fmov.l %d1,%fpcr # set FPCR14512fmov.l &0x0,%fpsr # clear FPSR1451314514fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide1451514516fmov.l &0x0,%fpcr # clear FPCR14517fabs.x %fp1 # make absolute value14518fcmp.b %fp1,&0x1 # is |result| < 1.b?14519fbge.w fsgldiv_normal_exit # no; no underflow occurred14520bra.w fsgldiv_unfl # yes; underflow occurred1452114522############################################################################1452314524#14525# Divide: inputs are not both normalized; what are they?14526#14527fsgldiv_not_norm:14528mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d114529jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)1453014531swbeg &4814532tbl_fsgldiv_op:14533short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM14534short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO14535short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF14536short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN14537short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM14538short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN14539short tbl_fsgldiv_op - tbl_fsgldiv_op #14540short tbl_fsgldiv_op - tbl_fsgldiv_op #1454114542short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM14543short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO14544short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF14545short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN14546short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM14547short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN14548short tbl_fsgldiv_op - tbl_fsgldiv_op #14549short tbl_fsgldiv_op - tbl_fsgldiv_op #1455014551short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM14552short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO14553short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF14554short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN14555short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM14556short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN14557short tbl_fsgldiv_op - tbl_fsgldiv_op #14558short tbl_fsgldiv_op - tbl_fsgldiv_op #1455914560short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM14561short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO14562short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF14563short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN14564short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM14565short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN14566short tbl_fsgldiv_op - tbl_fsgldiv_op #14567short tbl_fsgldiv_op - tbl_fsgldiv_op #1456814569short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM14570short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO14571short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF14572short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN14573short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM14574short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN14575short tbl_fsgldiv_op - tbl_fsgldiv_op #14576short tbl_fsgldiv_op - tbl_fsgldiv_op #1457714578short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM14579short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO14580short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF14581short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN14582short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM14583short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN14584short tbl_fsgldiv_op - tbl_fsgldiv_op #14585short tbl_fsgldiv_op - tbl_fsgldiv_op #1458614587fsgldiv_res_qnan:14588bra.l res_qnan14589fsgldiv_res_snan:14590bra.l res_snan14591fsgldiv_res_operr:14592bra.l res_operr14593fsgldiv_inf_load:14594bra.l fdiv_inf_load14595fsgldiv_zero_load:14596bra.l fdiv_zero_load14597fsgldiv_inf_dst:14598bra.l fdiv_inf_dst1459914600#########################################################################14601# XDEF **************************************************************** #14602# fadd(): emulates the fadd instruction #14603# fsadd(): emulates the fadd instruction #14604# fdadd(): emulates the fdadd instruction #14605# #14606# XREF **************************************************************** #14607# addsub_scaler2() - scale the operands so they won't take exc #14608# ovf_res() - return default overflow result #14609# unf_res() - return default underflow result #14610# res_qnan() - set QNAN result #14611# res_snan() - set SNAN result #14612# res_operr() - set OPERR result #14613# scale_to_zero_src() - set src operand exponent equal to zero #14614# scale_to_zero_dst() - set dst operand exponent equal to zero #14615# #14616# INPUT *************************************************************** #14617# a0 = pointer to extended precision source operand #14618# a1 = pointer to extended precision destination operand #14619# #14620# OUTPUT ************************************************************** #14621# fp0 = result #14622# fp1 = EXOP (if exception occurred) #14623# #14624# ALGORITHM *********************************************************** #14625# Handle NANs, infinities, and zeroes as special cases. Divide #14626# norms into extended, single, and double precision. #14627# Do addition after scaling exponents such that exception won't #14628# occur. Then, check result exponent to see if exception would have #14629# occurred. If so, return default result and maybe EXOP. Else, insert #14630# the correct result exponent and return. Set FPSR bits as appropriate. #14631# #14632#########################################################################1463314634global fsadd14635fsadd:14636andi.b &0x30,%d0 # clear rnd prec14637ori.b &s_mode*0x10,%d0 # insert sgl prec14638bra.b fadd1463914640global fdadd14641fdadd:14642andi.b &0x30,%d0 # clear rnd prec14643ori.b &d_mode*0x10,%d0 # insert dbl prec1464414645global fadd14646fadd:14647mov.l %d0,L_SCR3(%a6) # store rnd info1464814649clr.w %d114650mov.b DTAG(%a6),%d114651lsl.b &0x3,%d114652or.b STAG(%a6),%d1 # combine src tags1465314654bne.w fadd_not_norm # optimize on non-norm input1465514656#14657# ADD: norms and denorms14658#14659fadd_norm:14660bsr.l addsub_scaler2 # scale exponents1466114662fadd_zero_entry:14663fmovm.x FP_SCR1(%a6),&0x80 # load dst op1466414665fmov.l &0x0,%fpsr # clear FPSR14666fmov.l L_SCR3(%a6),%fpcr # set FPCR1466714668fadd.x FP_SCR0(%a6),%fp0 # execute add1466914670fmov.l &0x0,%fpcr # clear FPCR14671fmov.l %fpsr,%d1 # fetch INEX2,N,Z1467214673or.l %d1,USER_FPSR(%a6) # save exc and ccode bits1467414675fbeq.w fadd_zero_exit # if result is zero, end now1467614677mov.l %d2,-(%sp) # save d21467814679fmovm.x &0x01,-(%sp) # save result to stack1468014681mov.w 2+L_SCR3(%a6),%d114682lsr.b &0x6,%d11468314684mov.w (%sp),%d2 # fetch new sign, exp14685andi.l &0x7fff,%d2 # strip sign14686sub.l %d0,%d2 # add scale factor1468714688cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?14689bge.b fadd_ovfl # yes1469014691cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?14692blt.w fadd_unfl # yes14693beq.w fadd_may_unfl # maybe; go find out1469414695fadd_normal:14696mov.w (%sp),%d114697andi.w &0x8000,%d1 # keep sign14698or.w %d2,%d1 # concat sign,new exp14699mov.w %d1,(%sp) # insert new exponent1470014701fmovm.x (%sp)+,&0x80 # return result in fp01470214703mov.l (%sp)+,%d2 # restore d214704rts1470514706fadd_zero_exit:14707# fmov.s &0x00000000,%fp0 # return zero in fp014708rts1470914710tbl_fadd_ovfl:14711long 0x7fff # ext ovfl14712long 0x407f # sgl ovfl14713long 0x43ff # dbl ovfl1471414715tbl_fadd_unfl:14716long 0x0000 # ext unfl14717long 0x3f81 # sgl unfl14718long 0x3c01 # dbl unfl1471914720fadd_ovfl:14721or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1472214723mov.b FPCR_ENABLE(%a6),%d114724andi.b &0x13,%d1 # is OVFL or INEX enabled?14725bne.b fadd_ovfl_ena # yes1472614727add.l &0xc,%sp14728fadd_ovfl_dis:14729btst &neg_bit,FPSR_CC(%a6) # is result negative?14730sne %d1 # set sign param accordingly14731mov.l L_SCR3(%a6),%d0 # pass prec:rnd14732bsr.l ovf_res # calculate default result14733or.b %d0,FPSR_CC(%a6) # set INF,N if applicable14734fmovm.x (%a0),&0x80 # return default result in fp014735mov.l (%sp)+,%d2 # restore d214736rts1473714738fadd_ovfl_ena:14739mov.b L_SCR3(%a6),%d114740andi.b &0xc0,%d1 # is precision extended?14741bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl1474214743fadd_ovfl_ena_cont:14744mov.w (%sp),%d114745andi.w &0x8000,%d1 # keep sign14746subi.l &0x6000,%d2 # add extra bias14747andi.w &0x7fff,%d214748or.w %d2,%d1 # concat sign,new exp14749mov.w %d1,(%sp) # insert new exponent1475014751fmovm.x (%sp)+,&0x40 # return EXOP in fp114752bra.b fadd_ovfl_dis1475314754fadd_ovfl_ena_sd:14755fmovm.x FP_SCR1(%a6),&0x80 # load dst op1475614757mov.l L_SCR3(%a6),%d114758andi.b &0x30,%d1 # keep rnd mode14759fmov.l %d1,%fpcr # set FPCR1476014761fadd.x FP_SCR0(%a6),%fp0 # execute add1476214763fmov.l &0x0,%fpcr # clear FPCR1476414765add.l &0xc,%sp14766fmovm.x &0x01,-(%sp)14767bra.b fadd_ovfl_ena_cont1476814769fadd_unfl:14770bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1477114772add.l &0xc,%sp1477314774fmovm.x FP_SCR1(%a6),&0x80 # load dst op1477514776fmov.l &rz_mode*0x10,%fpcr # set FPCR14777fmov.l &0x0,%fpsr # clear FPSR1477814779fadd.x FP_SCR0(%a6),%fp0 # execute add1478014781fmov.l &0x0,%fpcr # clear FPCR14782fmov.l %fpsr,%d1 # save status1478314784or.l %d1,USER_FPSR(%a6) # save INEX,N1478514786mov.b FPCR_ENABLE(%a6),%d114787andi.b &0x0b,%d1 # is UNFL or INEX enabled?14788bne.b fadd_unfl_ena # yes1478914790fadd_unfl_dis:14791fmovm.x &0x80,FP_SCR0(%a6) # store out result1479214793lea FP_SCR0(%a6),%a0 # pass: result addr14794mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode14795bsr.l unf_res # calculate default result14796or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set14797fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp014798mov.l (%sp)+,%d2 # restore d214799rts1480014801fadd_unfl_ena:14802fmovm.x FP_SCR1(%a6),&0x40 # load dst op1480314804mov.l L_SCR3(%a6),%d114805andi.b &0xc0,%d1 # is precision extended?14806bne.b fadd_unfl_ena_sd # no; sgl or dbl1480714808fmov.l L_SCR3(%a6),%fpcr # set FPCR1480914810fadd_unfl_ena_cont:14811fmov.l &0x0,%fpsr # clear FPSR1481214813fadd.x FP_SCR0(%a6),%fp1 # execute multiply1481414815fmov.l &0x0,%fpcr # clear FPCR1481614817fmovm.x &0x40,FP_SCR0(%a6) # save result to stack14818mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}14819mov.l %d1,%d2 # make a copy14820andi.l &0x7fff,%d1 # strip sign14821andi.w &0x8000,%d2 # keep old sign14822sub.l %d0,%d1 # add scale factor14823addi.l &0x6000,%d1 # add new bias14824andi.w &0x7fff,%d1 # clear top bit14825or.w %d2,%d1 # concat sign,new exp14826mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent14827fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp114828bra.w fadd_unfl_dis1482914830fadd_unfl_ena_sd:14831mov.l L_SCR3(%a6),%d114832andi.b &0x30,%d1 # use only rnd mode14833fmov.l %d1,%fpcr # set FPCR1483414835bra.b fadd_unfl_ena_cont1483614837#14838# result is equal to the smallest normalized number in the selected precision14839# if the precision is extended, this result could not have come from an14840# underflow that rounded up.14841#14842fadd_may_unfl:14843mov.l L_SCR3(%a6),%d114844andi.b &0xc0,%d114845beq.w fadd_normal # yes; no underflow occurred1484614847mov.l 0x4(%sp),%d1 # extract hi(man)14848cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?14849bne.w fadd_normal # no; no underflow occurred1485014851tst.l 0x8(%sp) # is lo(man) = 0x0?14852bne.w fadd_normal # no; no underflow occurred1485314854btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?14855beq.w fadd_normal # no; no underflow occurred1485614857#14858# ok, so now the result has a exponent equal to the smallest normalized14859# exponent for the selected precision. also, the mantissa is equal to14860# 0x8000000000000000 and this mantissa is the result of rounding non-zero14861# g,r,s.14862# now, we must determine whether the pre-rounded result was an underflow14863# rounded "up" or a normalized number rounded "down".14864# so, we do this be re-executing the add using RZ as the rounding mode and14865# seeing if the new result is smaller or equal to the current result.14866#14867fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp11486814869mov.l L_SCR3(%a6),%d114870andi.b &0xc0,%d1 # keep rnd prec14871ori.b &rz_mode*0x10,%d1 # insert rnd mode14872fmov.l %d1,%fpcr # set FPCR14873fmov.l &0x0,%fpsr # clear FPSR1487414875fadd.x FP_SCR0(%a6),%fp1 # execute add1487614877fmov.l &0x0,%fpcr # clear FPCR1487814879fabs.x %fp0 # compare absolute values14880fabs.x %fp114881fcmp.x %fp0,%fp1 # is first result > second?1488214883fbgt.w fadd_unfl # yes; it's an underflow14884bra.w fadd_normal # no; it's not an underflow1488514886##########################################################################1488714888#14889# Add: inputs are not both normalized; what are they?14890#14891fadd_not_norm:14892mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d114893jmp (tbl_fadd_op.b,%pc,%d1.w*1)1489414895swbeg &4814896tbl_fadd_op:14897short fadd_norm - tbl_fadd_op # NORM + NORM14898short fadd_zero_src - tbl_fadd_op # NORM + ZERO14899short fadd_inf_src - tbl_fadd_op # NORM + INF14900short fadd_res_qnan - tbl_fadd_op # NORM + QNAN14901short fadd_norm - tbl_fadd_op # NORM + DENORM14902short fadd_res_snan - tbl_fadd_op # NORM + SNAN14903short tbl_fadd_op - tbl_fadd_op #14904short tbl_fadd_op - tbl_fadd_op #1490514906short fadd_zero_dst - tbl_fadd_op # ZERO + NORM14907short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO14908short fadd_inf_src - tbl_fadd_op # ZERO + INF14909short fadd_res_qnan - tbl_fadd_op # NORM + QNAN14910short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM14911short fadd_res_snan - tbl_fadd_op # NORM + SNAN14912short tbl_fadd_op - tbl_fadd_op #14913short tbl_fadd_op - tbl_fadd_op #1491414915short fadd_inf_dst - tbl_fadd_op # INF + NORM14916short fadd_inf_dst - tbl_fadd_op # INF + ZERO14917short fadd_inf_2 - tbl_fadd_op # INF + INF14918short fadd_res_qnan - tbl_fadd_op # NORM + QNAN14919short fadd_inf_dst - tbl_fadd_op # INF + DENORM14920short fadd_res_snan - tbl_fadd_op # NORM + SNAN14921short tbl_fadd_op - tbl_fadd_op #14922short tbl_fadd_op - tbl_fadd_op #1492314924short fadd_res_qnan - tbl_fadd_op # QNAN + NORM14925short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO14926short fadd_res_qnan - tbl_fadd_op # QNAN + INF14927short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN14928short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM14929short fadd_res_snan - tbl_fadd_op # QNAN + SNAN14930short tbl_fadd_op - tbl_fadd_op #14931short tbl_fadd_op - tbl_fadd_op #1493214933short fadd_norm - tbl_fadd_op # DENORM + NORM14934short fadd_zero_src - tbl_fadd_op # DENORM + ZERO14935short fadd_inf_src - tbl_fadd_op # DENORM + INF14936short fadd_res_qnan - tbl_fadd_op # NORM + QNAN14937short fadd_norm - tbl_fadd_op # DENORM + DENORM14938short fadd_res_snan - tbl_fadd_op # NORM + SNAN14939short tbl_fadd_op - tbl_fadd_op #14940short tbl_fadd_op - tbl_fadd_op #1494114942short fadd_res_snan - tbl_fadd_op # SNAN + NORM14943short fadd_res_snan - tbl_fadd_op # SNAN + ZERO14944short fadd_res_snan - tbl_fadd_op # SNAN + INF14945short fadd_res_snan - tbl_fadd_op # SNAN + QNAN14946short fadd_res_snan - tbl_fadd_op # SNAN + DENORM14947short fadd_res_snan - tbl_fadd_op # SNAN + SNAN14948short tbl_fadd_op - tbl_fadd_op #14949short tbl_fadd_op - tbl_fadd_op #1495014951fadd_res_qnan:14952bra.l res_qnan14953fadd_res_snan:14954bra.l res_snan1495514956#14957# both operands are ZEROes14958#14959fadd_zero_2:14960mov.b SRC_EX(%a0),%d0 # are the signs opposite14961mov.b DST_EX(%a1),%d114962eor.b %d0,%d114963bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)1496414965# the signs are the same. so determine whether they are positive or negative14966# and return the appropriately signed zero.14967tst.b %d0 # are ZEROes positive or negative?14968bmi.b fadd_zero_rm # negative14969fmov.s &0x00000000,%fp0 # return +ZERO14970mov.b &z_bmask,FPSR_CC(%a6) # set Z14971rts1497214973#14974# the ZEROes have opposite signs:14975# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.14976# - -ZERO is returned in the case of RM.14977#14978fadd_zero_2_chk_rm:14979mov.b 3+L_SCR3(%a6),%d114980andi.b &0x30,%d1 # extract rnd mode14981cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?14982beq.b fadd_zero_rm # yes14983fmov.s &0x00000000,%fp0 # return +ZERO14984mov.b &z_bmask,FPSR_CC(%a6) # set Z14985rts1498614987fadd_zero_rm:14988fmov.s &0x80000000,%fp0 # return -ZERO14989mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z14990rts1499114992#14993# one operand is a ZERO and the other is a DENORM or NORM. scale14994# the DENORM or NORM and jump to the regular fadd routine.14995#14996fadd_zero_dst:14997mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)14998mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)14999mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)15000bsr.l scale_to_zero_src # scale the operand15001clr.w FP_SCR1_EX(%a6)15002clr.l FP_SCR1_HI(%a6)15003clr.l FP_SCR1_LO(%a6)15004bra.w fadd_zero_entry # go execute fadd1500515006fadd_zero_src:15007mov.w DST_EX(%a1),FP_SCR1_EX(%a6)15008mov.l DST_HI(%a1),FP_SCR1_HI(%a6)15009mov.l DST_LO(%a1),FP_SCR1_LO(%a6)15010bsr.l scale_to_zero_dst # scale the operand15011clr.w FP_SCR0_EX(%a6)15012clr.l FP_SCR0_HI(%a6)15013clr.l FP_SCR0_LO(%a6)15014bra.w fadd_zero_entry # go execute fadd1501515016#15017# both operands are INFs. an OPERR will result if the INFs have15018# different signs. else, an INF of the same sign is returned15019#15020fadd_inf_2:15021mov.b SRC_EX(%a0),%d0 # exclusive or the signs15022mov.b DST_EX(%a1),%d115023eor.b %d1,%d015024bmi.l res_operr # weed out (-INF)+(+INF)1502515026# ok, so it's not an OPERR. but, we do have to remember to return the15027# src INF since that's where the 881/882 gets the j-bit from...1502815029#15030# operands are INF and one of {ZERO, INF, DENORM, NORM}15031#15032fadd_inf_src:15033fmovm.x SRC(%a0),&0x80 # return src INF15034tst.b SRC_EX(%a0) # is INF positive?15035bpl.b fadd_inf_done # yes; we're done15036mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG15037rts1503815039#15040# operands are INF and one of {ZERO, INF, DENORM, NORM}15041#15042fadd_inf_dst:15043fmovm.x DST(%a1),&0x80 # return dst INF15044tst.b DST_EX(%a1) # is INF positive?15045bpl.b fadd_inf_done # yes; we're done15046mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG15047rts1504815049fadd_inf_done:15050mov.b &inf_bmask,FPSR_CC(%a6) # set INF15051rts1505215053#########################################################################15054# XDEF **************************************************************** #15055# fsub(): emulates the fsub instruction #15056# fssub(): emulates the fssub instruction #15057# fdsub(): emulates the fdsub instruction #15058# #15059# XREF **************************************************************** #15060# addsub_scaler2() - scale the operands so they won't take exc #15061# ovf_res() - return default overflow result #15062# unf_res() - return default underflow result #15063# res_qnan() - set QNAN result #15064# res_snan() - set SNAN result #15065# res_operr() - set OPERR result #15066# scale_to_zero_src() - set src operand exponent equal to zero #15067# scale_to_zero_dst() - set dst operand exponent equal to zero #15068# #15069# INPUT *************************************************************** #15070# a0 = pointer to extended precision source operand #15071# a1 = pointer to extended precision destination operand #15072# #15073# OUTPUT ************************************************************** #15074# fp0 = result #15075# fp1 = EXOP (if exception occurred) #15076# #15077# ALGORITHM *********************************************************** #15078# Handle NANs, infinities, and zeroes as special cases. Divide #15079# norms into extended, single, and double precision. #15080# Do subtraction after scaling exponents such that exception won't#15081# occur. Then, check result exponent to see if exception would have #15082# occurred. If so, return default result and maybe EXOP. Else, insert #15083# the correct result exponent and return. Set FPSR bits as appropriate. #15084# #15085#########################################################################1508615087global fssub15088fssub:15089andi.b &0x30,%d0 # clear rnd prec15090ori.b &s_mode*0x10,%d0 # insert sgl prec15091bra.b fsub1509215093global fdsub15094fdsub:15095andi.b &0x30,%d0 # clear rnd prec15096ori.b &d_mode*0x10,%d0 # insert dbl prec1509715098global fsub15099fsub:15100mov.l %d0,L_SCR3(%a6) # store rnd info1510115102clr.w %d115103mov.b DTAG(%a6),%d115104lsl.b &0x3,%d115105or.b STAG(%a6),%d1 # combine src tags1510615107bne.w fsub_not_norm # optimize on non-norm input1510815109#15110# SUB: norms and denorms15111#15112fsub_norm:15113bsr.l addsub_scaler2 # scale exponents1511415115fsub_zero_entry:15116fmovm.x FP_SCR1(%a6),&0x80 # load dst op1511715118fmov.l &0x0,%fpsr # clear FPSR15119fmov.l L_SCR3(%a6),%fpcr # set FPCR1512015121fsub.x FP_SCR0(%a6),%fp0 # execute subtract1512215123fmov.l &0x0,%fpcr # clear FPCR15124fmov.l %fpsr,%d1 # fetch INEX2, N, Z1512515126or.l %d1,USER_FPSR(%a6) # save exc and ccode bits1512715128fbeq.w fsub_zero_exit # if result zero, end now1512915130mov.l %d2,-(%sp) # save d21513115132fmovm.x &0x01,-(%sp) # save result to stack1513315134mov.w 2+L_SCR3(%a6),%d115135lsr.b &0x6,%d11513615137mov.w (%sp),%d2 # fetch new exponent15138andi.l &0x7fff,%d2 # strip sign15139sub.l %d0,%d2 # add scale factor1514015141cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?15142bge.b fsub_ovfl # yes1514315144cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?15145blt.w fsub_unfl # yes15146beq.w fsub_may_unfl # maybe; go find out1514715148fsub_normal:15149mov.w (%sp),%d115150andi.w &0x8000,%d1 # keep sign15151or.w %d2,%d1 # insert new exponent15152mov.w %d1,(%sp) # insert new exponent1515315154fmovm.x (%sp)+,&0x80 # return result in fp01515515156mov.l (%sp)+,%d2 # restore d215157rts1515815159fsub_zero_exit:15160# fmov.s &0x00000000,%fp0 # return zero in fp015161rts1516215163tbl_fsub_ovfl:15164long 0x7fff # ext ovfl15165long 0x407f # sgl ovfl15166long 0x43ff # dbl ovfl1516715168tbl_fsub_unfl:15169long 0x0000 # ext unfl15170long 0x3f81 # sgl unfl15171long 0x3c01 # dbl unfl1517215173fsub_ovfl:15174or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1517515176mov.b FPCR_ENABLE(%a6),%d115177andi.b &0x13,%d1 # is OVFL or INEX enabled?15178bne.b fsub_ovfl_ena # yes1517915180add.l &0xc,%sp15181fsub_ovfl_dis:15182btst &neg_bit,FPSR_CC(%a6) # is result negative?15183sne %d1 # set sign param accordingly15184mov.l L_SCR3(%a6),%d0 # pass prec:rnd15185bsr.l ovf_res # calculate default result15186or.b %d0,FPSR_CC(%a6) # set INF,N if applicable15187fmovm.x (%a0),&0x80 # return default result in fp015188mov.l (%sp)+,%d2 # restore d215189rts1519015191fsub_ovfl_ena:15192mov.b L_SCR3(%a6),%d115193andi.b &0xc0,%d1 # is precision extended?15194bne.b fsub_ovfl_ena_sd # no1519515196fsub_ovfl_ena_cont:15197mov.w (%sp),%d1 # fetch {sgn,exp}15198andi.w &0x8000,%d1 # keep sign15199subi.l &0x6000,%d2 # subtract new bias15200andi.w &0x7fff,%d2 # clear top bit15201or.w %d2,%d1 # concat sign,exp15202mov.w %d1,(%sp) # insert new exponent1520315204fmovm.x (%sp)+,&0x40 # return EXOP in fp115205bra.b fsub_ovfl_dis1520615207fsub_ovfl_ena_sd:15208fmovm.x FP_SCR1(%a6),&0x80 # load dst op1520915210mov.l L_SCR3(%a6),%d115211andi.b &0x30,%d1 # clear rnd prec15212fmov.l %d1,%fpcr # set FPCR1521315214fsub.x FP_SCR0(%a6),%fp0 # execute subtract1521515216fmov.l &0x0,%fpcr # clear FPCR1521715218add.l &0xc,%sp15219fmovm.x &0x01,-(%sp)15220bra.b fsub_ovfl_ena_cont1522115222fsub_unfl:15223bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1522415225add.l &0xc,%sp1522615227fmovm.x FP_SCR1(%a6),&0x80 # load dst op1522815229fmov.l &rz_mode*0x10,%fpcr # set FPCR15230fmov.l &0x0,%fpsr # clear FPSR1523115232fsub.x FP_SCR0(%a6),%fp0 # execute subtract1523315234fmov.l &0x0,%fpcr # clear FPCR15235fmov.l %fpsr,%d1 # save status1523615237or.l %d1,USER_FPSR(%a6)1523815239mov.b FPCR_ENABLE(%a6),%d115240andi.b &0x0b,%d1 # is UNFL or INEX enabled?15241bne.b fsub_unfl_ena # yes1524215243fsub_unfl_dis:15244fmovm.x &0x80,FP_SCR0(%a6) # store out result1524515246lea FP_SCR0(%a6),%a0 # pass: result addr15247mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode15248bsr.l unf_res # calculate default result15249or.b %d0,FPSR_CC(%a6) # 'Z' may have been set15250fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp015251mov.l (%sp)+,%d2 # restore d215252rts1525315254fsub_unfl_ena:15255fmovm.x FP_SCR1(%a6),&0x401525615257mov.l L_SCR3(%a6),%d115258andi.b &0xc0,%d1 # is precision extended?15259bne.b fsub_unfl_ena_sd # no1526015261fmov.l L_SCR3(%a6),%fpcr # set FPCR1526215263fsub_unfl_ena_cont:15264fmov.l &0x0,%fpsr # clear FPSR1526515266fsub.x FP_SCR0(%a6),%fp1 # execute subtract1526715268fmov.l &0x0,%fpcr # clear FPCR1526915270fmovm.x &0x40,FP_SCR0(%a6) # store result to stack15271mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}15272mov.l %d1,%d2 # make a copy15273andi.l &0x7fff,%d1 # strip sign15274andi.w &0x8000,%d2 # keep old sign15275sub.l %d0,%d1 # add scale factor15276addi.l &0x6000,%d1 # subtract new bias15277andi.w &0x7fff,%d1 # clear top bit15278or.w %d2,%d1 # concat sgn,exp15279mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent15280fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp115281bra.w fsub_unfl_dis1528215283fsub_unfl_ena_sd:15284mov.l L_SCR3(%a6),%d115285andi.b &0x30,%d1 # clear rnd prec15286fmov.l %d1,%fpcr # set FPCR1528715288bra.b fsub_unfl_ena_cont1528915290#15291# result is equal to the smallest normalized number in the selected precision15292# if the precision is extended, this result could not have come from an15293# underflow that rounded up.15294#15295fsub_may_unfl:15296mov.l L_SCR3(%a6),%d115297andi.b &0xc0,%d1 # fetch rnd prec15298beq.w fsub_normal # yes; no underflow occurred1529915300mov.l 0x4(%sp),%d115301cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?15302bne.w fsub_normal # no; no underflow occurred1530315304tst.l 0x8(%sp) # is lo(man) = 0x0?15305bne.w fsub_normal # no; no underflow occurred1530615307btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?15308beq.w fsub_normal # no; no underflow occurred1530915310#15311# ok, so now the result has a exponent equal to the smallest normalized15312# exponent for the selected precision. also, the mantissa is equal to15313# 0x8000000000000000 and this mantissa is the result of rounding non-zero15314# g,r,s.15315# now, we must determine whether the pre-rounded result was an underflow15316# rounded "up" or a normalized number rounded "down".15317# so, we do this be re-executing the add using RZ as the rounding mode and15318# seeing if the new result is smaller or equal to the current result.15319#15320fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp11532115322mov.l L_SCR3(%a6),%d115323andi.b &0xc0,%d1 # keep rnd prec15324ori.b &rz_mode*0x10,%d1 # insert rnd mode15325fmov.l %d1,%fpcr # set FPCR15326fmov.l &0x0,%fpsr # clear FPSR1532715328fsub.x FP_SCR0(%a6),%fp1 # execute subtract1532915330fmov.l &0x0,%fpcr # clear FPCR1533115332fabs.x %fp0 # compare absolute values15333fabs.x %fp115334fcmp.x %fp0,%fp1 # is first result > second?1533515336fbgt.w fsub_unfl # yes; it's an underflow15337bra.w fsub_normal # no; it's not an underflow1533815339##########################################################################1534015341#15342# Sub: inputs are not both normalized; what are they?15343#15344fsub_not_norm:15345mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d115346jmp (tbl_fsub_op.b,%pc,%d1.w*1)1534715348swbeg &4815349tbl_fsub_op:15350short fsub_norm - tbl_fsub_op # NORM - NORM15351short fsub_zero_src - tbl_fsub_op # NORM - ZERO15352short fsub_inf_src - tbl_fsub_op # NORM - INF15353short fsub_res_qnan - tbl_fsub_op # NORM - QNAN15354short fsub_norm - tbl_fsub_op # NORM - DENORM15355short fsub_res_snan - tbl_fsub_op # NORM - SNAN15356short tbl_fsub_op - tbl_fsub_op #15357short tbl_fsub_op - tbl_fsub_op #1535815359short fsub_zero_dst - tbl_fsub_op # ZERO - NORM15360short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO15361short fsub_inf_src - tbl_fsub_op # ZERO - INF15362short fsub_res_qnan - tbl_fsub_op # NORM - QNAN15363short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM15364short fsub_res_snan - tbl_fsub_op # NORM - SNAN15365short tbl_fsub_op - tbl_fsub_op #15366short tbl_fsub_op - tbl_fsub_op #1536715368short fsub_inf_dst - tbl_fsub_op # INF - NORM15369short fsub_inf_dst - tbl_fsub_op # INF - ZERO15370short fsub_inf_2 - tbl_fsub_op # INF - INF15371short fsub_res_qnan - tbl_fsub_op # NORM - QNAN15372short fsub_inf_dst - tbl_fsub_op # INF - DENORM15373short fsub_res_snan - tbl_fsub_op # NORM - SNAN15374short tbl_fsub_op - tbl_fsub_op #15375short tbl_fsub_op - tbl_fsub_op #1537615377short fsub_res_qnan - tbl_fsub_op # QNAN - NORM15378short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO15379short fsub_res_qnan - tbl_fsub_op # QNAN - INF15380short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN15381short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM15382short fsub_res_snan - tbl_fsub_op # QNAN - SNAN15383short tbl_fsub_op - tbl_fsub_op #15384short tbl_fsub_op - tbl_fsub_op #1538515386short fsub_norm - tbl_fsub_op # DENORM - NORM15387short fsub_zero_src - tbl_fsub_op # DENORM - ZERO15388short fsub_inf_src - tbl_fsub_op # DENORM - INF15389short fsub_res_qnan - tbl_fsub_op # NORM - QNAN15390short fsub_norm - tbl_fsub_op # DENORM - DENORM15391short fsub_res_snan - tbl_fsub_op # NORM - SNAN15392short tbl_fsub_op - tbl_fsub_op #15393short tbl_fsub_op - tbl_fsub_op #1539415395short fsub_res_snan - tbl_fsub_op # SNAN - NORM15396short fsub_res_snan - tbl_fsub_op # SNAN - ZERO15397short fsub_res_snan - tbl_fsub_op # SNAN - INF15398short fsub_res_snan - tbl_fsub_op # SNAN - QNAN15399short fsub_res_snan - tbl_fsub_op # SNAN - DENORM15400short fsub_res_snan - tbl_fsub_op # SNAN - SNAN15401short tbl_fsub_op - tbl_fsub_op #15402short tbl_fsub_op - tbl_fsub_op #1540315404fsub_res_qnan:15405bra.l res_qnan15406fsub_res_snan:15407bra.l res_snan1540815409#15410# both operands are ZEROes15411#15412fsub_zero_2:15413mov.b SRC_EX(%a0),%d015414mov.b DST_EX(%a1),%d115415eor.b %d1,%d015416bpl.b fsub_zero_2_chk_rm1541715418# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO15419tst.b %d0 # is dst negative?15420bmi.b fsub_zero_2_rm # yes15421fmov.s &0x00000000,%fp0 # no; return +ZERO15422mov.b &z_bmask,FPSR_CC(%a6) # set Z15423rts1542415425#15426# the ZEROes have the same signs:15427# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP15428# - -ZERO is returned in the case of RM.15429#15430fsub_zero_2_chk_rm:15431mov.b 3+L_SCR3(%a6),%d115432andi.b &0x30,%d1 # extract rnd mode15433cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?15434beq.b fsub_zero_2_rm # yes15435fmov.s &0x00000000,%fp0 # no; return +ZERO15436mov.b &z_bmask,FPSR_CC(%a6) # set Z15437rts1543815439fsub_zero_2_rm:15440fmov.s &0x80000000,%fp0 # return -ZERO15441mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG15442rts1544315444#15445# one operand is a ZERO and the other is a DENORM or a NORM.15446# scale the DENORM or NORM and jump to the regular fsub routine.15447#15448fsub_zero_dst:15449mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)15450mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)15451mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)15452bsr.l scale_to_zero_src # scale the operand15453clr.w FP_SCR1_EX(%a6)15454clr.l FP_SCR1_HI(%a6)15455clr.l FP_SCR1_LO(%a6)15456bra.w fsub_zero_entry # go execute fsub1545715458fsub_zero_src:15459mov.w DST_EX(%a1),FP_SCR1_EX(%a6)15460mov.l DST_HI(%a1),FP_SCR1_HI(%a6)15461mov.l DST_LO(%a1),FP_SCR1_LO(%a6)15462bsr.l scale_to_zero_dst # scale the operand15463clr.w FP_SCR0_EX(%a6)15464clr.l FP_SCR0_HI(%a6)15465clr.l FP_SCR0_LO(%a6)15466bra.w fsub_zero_entry # go execute fsub1546715468#15469# both operands are INFs. an OPERR will result if the INFs have the15470# same signs. else,15471#15472fsub_inf_2:15473mov.b SRC_EX(%a0),%d0 # exclusive or the signs15474mov.b DST_EX(%a1),%d115475eor.b %d1,%d015476bpl.l res_operr # weed out (-INF)+(+INF)1547715478# ok, so it's not an OPERR. but we do have to remember to return15479# the src INF since that's where the 881/882 gets the j-bit.1548015481fsub_inf_src:15482fmovm.x SRC(%a0),&0x80 # return src INF15483fneg.x %fp0 # invert sign15484fbge.w fsub_inf_done # sign is now positive15485mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG15486rts1548715488fsub_inf_dst:15489fmovm.x DST(%a1),&0x80 # return dst INF15490tst.b DST_EX(%a1) # is INF negative?15491bpl.b fsub_inf_done # no15492mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG15493rts1549415495fsub_inf_done:15496mov.b &inf_bmask,FPSR_CC(%a6) # set INF15497rts1549815499#########################################################################15500# XDEF **************************************************************** #15501# fsqrt(): emulates the fsqrt instruction #15502# fssqrt(): emulates the fssqrt instruction #15503# fdsqrt(): emulates the fdsqrt instruction #15504# #15505# XREF **************************************************************** #15506# scale_sqrt() - scale the source operand #15507# unf_res() - return default underflow result #15508# ovf_res() - return default overflow result #15509# res_qnan_1op() - return QNAN result #15510# res_snan_1op() - return SNAN result #15511# #15512# INPUT *************************************************************** #15513# a0 = pointer to extended precision source operand #15514# d0 rnd prec,mode #15515# #15516# OUTPUT ************************************************************** #15517# fp0 = result #15518# fp1 = EXOP (if exception occurred) #15519# #15520# ALGORITHM *********************************************************** #15521# Handle NANs, infinities, and zeroes as special cases. Divide #15522# norms/denorms into ext/sgl/dbl precision. #15523# For norms/denorms, scale the exponents such that a sqrt #15524# instruction won't cause an exception. Use the regular fsqrt to #15525# compute a result. Check if the regular operands would have taken #15526# an exception. If so, return the default overflow/underflow result #15527# and return the EXOP if exceptions are enabled. Else, scale the #15528# result operand to the proper exponent. #15529# #15530#########################################################################1553115532global fssqrt15533fssqrt:15534andi.b &0x30,%d0 # clear rnd prec15535ori.b &s_mode*0x10,%d0 # insert sgl precision15536bra.b fsqrt1553715538global fdsqrt15539fdsqrt:15540andi.b &0x30,%d0 # clear rnd prec15541ori.b &d_mode*0x10,%d0 # insert dbl precision1554215543global fsqrt15544fsqrt:15545mov.l %d0,L_SCR3(%a6) # store rnd info15546clr.w %d115547mov.b STAG(%a6),%d115548bne.w fsqrt_not_norm # optimize on non-norm input1554915550#15551# SQUARE ROOT: norms and denorms ONLY!15552#15553fsqrt_norm:15554tst.b SRC_EX(%a0) # is operand negative?15555bmi.l res_operr # yes1555615557andi.b &0xc0,%d0 # is precision extended?15558bne.b fsqrt_not_ext # no; go handle sgl or dbl1555915560fmov.l L_SCR3(%a6),%fpcr # set FPCR15561fmov.l &0x0,%fpsr # clear FPSR1556215563fsqrt.x (%a0),%fp0 # execute square root1556415565fmov.l %fpsr,%d115566or.l %d1,USER_FPSR(%a6) # set N,INEX1556715568rts1556915570fsqrt_denorm:15571tst.b SRC_EX(%a0) # is operand negative?15572bmi.l res_operr # yes1557315574andi.b &0xc0,%d0 # is precision extended?15575bne.b fsqrt_not_ext # no; go handle sgl or dbl1557615577mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)15578mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)15579mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1558015581bsr.l scale_sqrt # calculate scale factor1558215583bra.w fsqrt_sd_normal1558415585#15586# operand is either single or double15587#15588fsqrt_not_ext:15589cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec15590bne.w fsqrt_dbl1559115592#15593# operand is to be rounded to single precision15594#15595fsqrt_sgl:15596mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)15597mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)15598mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1559915600bsr.l scale_sqrt # calculate scale factor1560115602cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?15603beq.w fsqrt_sd_may_unfl15604bgt.w fsqrt_sd_unfl # yes; go handle underflow15605cmpi.l %d0,&0x3fff-0x407f # will move in overflow?15606beq.w fsqrt_sd_may_ovfl # maybe; go check15607blt.w fsqrt_sd_ovfl # yes; go handle overflow1560815609#15610# operand will NOT overflow or underflow when moved in to the fp reg file15611#15612fsqrt_sd_normal:15613fmov.l &0x0,%fpsr # clear FPSR15614fmov.l L_SCR3(%a6),%fpcr # set FPCR1561515616fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute1561715618fmov.l %fpsr,%d1 # save FPSR15619fmov.l &0x0,%fpcr # clear FPCR1562015621or.l %d1,USER_FPSR(%a6) # save INEX2,N1562215623fsqrt_sd_normal_exit:15624mov.l %d2,-(%sp) # save d215625fmovm.x &0x80,FP_SCR0(%a6) # store out result15626mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp15627mov.l %d1,%d2 # make a copy15628andi.l &0x7fff,%d1 # strip sign15629sub.l %d0,%d1 # add scale factor15630andi.w &0x8000,%d2 # keep old sign15631or.w %d1,%d2 # concat old sign,new exp15632mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent15633mov.l (%sp)+,%d2 # restore d215634fmovm.x FP_SCR0(%a6),&0x80 # return result in fp015635rts1563615637#15638# operand is to be rounded to double precision15639#15640fsqrt_dbl:15641mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)15642mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)15643mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)1564415645bsr.l scale_sqrt # calculate scale factor1564615647cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?15648beq.w fsqrt_sd_may_unfl15649bgt.b fsqrt_sd_unfl # yes; go handle underflow15650cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?15651beq.w fsqrt_sd_may_ovfl # maybe; go check15652blt.w fsqrt_sd_ovfl # yes; go handle overflow15653bra.w fsqrt_sd_normal # no; ho handle normalized op1565415655# we're on the line here and the distinguising characteristic is whether15656# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number15657# elsewise fall through to underflow.15658fsqrt_sd_may_unfl:15659btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?15660bne.w fsqrt_sd_normal # yes, so no underflow1566115662#15663# operand WILL underflow when moved in to the fp register file15664#15665fsqrt_sd_unfl:15666bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit1566715668fmov.l &rz_mode*0x10,%fpcr # set FPCR15669fmov.l &0x0,%fpsr # clear FPSR1567015671fsqrt.x FP_SCR0(%a6),%fp0 # execute square root1567215673fmov.l %fpsr,%d1 # save status15674fmov.l &0x0,%fpcr # clear FPCR1567515676or.l %d1,USER_FPSR(%a6) # save INEX2,N1567715678# if underflow or inexact is enabled, go calculate EXOP first.15679mov.b FPCR_ENABLE(%a6),%d115680andi.b &0x0b,%d1 # is UNFL or INEX enabled?15681bne.b fsqrt_sd_unfl_ena # yes1568215683fsqrt_sd_unfl_dis:15684fmovm.x &0x80,FP_SCR0(%a6) # store out result1568515686lea FP_SCR0(%a6),%a0 # pass: result addr15687mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode15688bsr.l unf_res # calculate default result15689or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode15690fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp015691rts1569215693#15694# operand will underflow AND underflow is enabled.15695# Therefore, we must return the result rounded to extended precision.15696#15697fsqrt_sd_unfl_ena:15698mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)15699mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)15700mov.w FP_SCR0_EX(%a6),%d1 # load current exponent1570115702mov.l %d2,-(%sp) # save d215703mov.l %d1,%d2 # make a copy15704andi.l &0x7fff,%d1 # strip sign15705andi.w &0x8000,%d2 # keep old sign15706sub.l %d0,%d1 # subtract scale factor15707addi.l &0x6000,%d1 # add new bias15708andi.w &0x7fff,%d115709or.w %d2,%d1 # concat new sign,new exp15710mov.w %d1,FP_SCR1_EX(%a6) # insert new exp15711fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp115712mov.l (%sp)+,%d2 # restore d215713bra.b fsqrt_sd_unfl_dis1571415715#15716# operand WILL overflow.15717#15718fsqrt_sd_ovfl:15719fmov.l &0x0,%fpsr # clear FPSR15720fmov.l L_SCR3(%a6),%fpcr # set FPCR1572115722fsqrt.x FP_SCR0(%a6),%fp0 # perform square root1572315724fmov.l &0x0,%fpcr # clear FPCR15725fmov.l %fpsr,%d1 # save FPSR1572615727or.l %d1,USER_FPSR(%a6) # save INEX2,N1572815729fsqrt_sd_ovfl_tst:15730or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex1573115732mov.b FPCR_ENABLE(%a6),%d115733andi.b &0x13,%d1 # is OVFL or INEX enabled?15734bne.b fsqrt_sd_ovfl_ena # yes1573515736#15737# OVFL is not enabled; therefore, we must create the default result by15738# calling ovf_res().15739#15740fsqrt_sd_ovfl_dis:15741btst &neg_bit,FPSR_CC(%a6) # is result negative?15742sne %d1 # set sign param accordingly15743mov.l L_SCR3(%a6),%d0 # pass: prec,mode15744bsr.l ovf_res # calculate default result15745or.b %d0,FPSR_CC(%a6) # set INF,N if applicable15746fmovm.x (%a0),&0x80 # return default result in fp015747rts1574815749#15750# OVFL is enabled.15751# the INEX2 bit has already been updated by the round to the correct precision.15752# now, round to extended(and don't alter the FPSR).15753#15754fsqrt_sd_ovfl_ena:15755mov.l %d2,-(%sp) # save d215756mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}15757mov.l %d1,%d2 # make a copy15758andi.l &0x7fff,%d1 # strip sign15759andi.w &0x8000,%d2 # keep old sign15760sub.l %d0,%d1 # add scale factor15761subi.l &0x6000,%d1 # subtract bias15762andi.w &0x7fff,%d115763or.w %d2,%d1 # concat sign,exp15764mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent15765fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp115766mov.l (%sp)+,%d2 # restore d215767bra.b fsqrt_sd_ovfl_dis1576815769#15770# the move in MAY underflow. so...15771#15772fsqrt_sd_may_ovfl:15773btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?15774bne.w fsqrt_sd_ovfl # yes, so overflow1577515776fmov.l &0x0,%fpsr # clear FPSR15777fmov.l L_SCR3(%a6),%fpcr # set FPCR1577815779fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute1578015781fmov.l %fpsr,%d1 # save status15782fmov.l &0x0,%fpcr # clear FPCR1578315784or.l %d1,USER_FPSR(%a6) # save INEX2,N1578515786fmov.x %fp0,%fp1 # make a copy of result15787fcmp.b %fp1,&0x1 # is |result| >= 1.b?15788fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred1578915790# no, it didn't overflow; we have correct result15791bra.w fsqrt_sd_normal_exit1579215793##########################################################################1579415795#15796# input is not normalized; what is it?15797#15798fsqrt_not_norm:15799cmpi.b %d1,&DENORM # weed out DENORM15800beq.w fsqrt_denorm15801cmpi.b %d1,&ZERO # weed out ZERO15802beq.b fsqrt_zero15803cmpi.b %d1,&INF # weed out INF15804beq.b fsqrt_inf15805cmpi.b %d1,&SNAN # weed out SNAN15806beq.l res_snan_1op15807bra.l res_qnan_1op1580815809#15810# fsqrt(+0) = +015811# fsqrt(-0) = -015812# fsqrt(+INF) = +INF15813# fsqrt(-INF) = OPERR15814#15815fsqrt_zero:15816tst.b SRC_EX(%a0) # is ZERO positive or negative?15817bmi.b fsqrt_zero_m # negative15818fsqrt_zero_p:15819fmov.s &0x00000000,%fp0 # return +ZERO15820mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit15821rts15822fsqrt_zero_m:15823fmov.s &0x80000000,%fp0 # return -ZERO15824mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits15825rts1582615827fsqrt_inf:15828tst.b SRC_EX(%a0) # is INF positive or negative?15829bmi.l res_operr # negative15830fsqrt_inf_p:15831fmovm.x SRC(%a0),&0x80 # return +INF in fp015832mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit15833rts1583415835##########################################################################1583615837#########################################################################15838# XDEF **************************************************************** #15839# addsub_scaler2(): scale inputs to fadd/fsub such that no #15840# OVFL/UNFL exceptions will result #15841# #15842# XREF **************************************************************** #15843# norm() - normalize mantissa after adjusting exponent #15844# #15845# INPUT *************************************************************** #15846# FP_SRC(a6) = fp op1(src) #15847# FP_DST(a6) = fp op2(dst) #15848# #15849# OUTPUT ************************************************************** #15850# FP_SRC(a6) = fp op1 scaled(src) #15851# FP_DST(a6) = fp op2 scaled(dst) #15852# d0 = scale amount #15853# #15854# ALGORITHM *********************************************************** #15855# If the DST exponent is > the SRC exponent, set the DST exponent #15856# equal to 0x3fff and scale the SRC exponent by the value that the #15857# DST exponent was scaled by. If the SRC exponent is greater or equal, #15858# do the opposite. Return this scale factor in d0. #15859# If the two exponents differ by > the number of mantissa bits #15860# plus two, then set the smallest exponent to a very small value as a #15861# quick shortcut. #15862# #15863#########################################################################1586415865global addsub_scaler215866addsub_scaler2:15867mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)15868mov.l DST_HI(%a1),FP_SCR1_HI(%a6)15869mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)15870mov.l DST_LO(%a1),FP_SCR1_LO(%a6)15871mov.w SRC_EX(%a0),%d015872mov.w DST_EX(%a1),%d115873mov.w %d0,FP_SCR0_EX(%a6)15874mov.w %d1,FP_SCR1_EX(%a6)1587515876andi.w &0x7fff,%d015877andi.w &0x7fff,%d115878mov.w %d0,L_SCR1(%a6) # store src exponent15879mov.w %d1,2+L_SCR1(%a6) # store dst exponent1588015881cmp.w %d0, %d1 # is src exp >= dst exp?15882bge.l src_exp_ge21588315884# dst exp is > src exp; scale dst to exp = 0x3fff15885dst_exp_gt2:15886bsr.l scale_to_zero_dst15887mov.l %d0,-(%sp) # save scale factor1588815889cmpi.b STAG(%a6),&DENORM # is dst denormalized?15890bne.b cmpexp121589115892lea FP_SCR0(%a6),%a015893bsr.l norm # normalize the denorm; result is new exp15894neg.w %d0 # new exp = -(shft val)15895mov.w %d0,L_SCR1(%a6) # inset new exp1589615897cmpexp12:15898mov.w 2+L_SCR1(%a6),%d015899subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp1590015901cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?15902bge.b quick_scale121590315904mov.w L_SCR1(%a6),%d015905add.w 0x2(%sp),%d0 # scale src exponent by scale factor15906mov.w FP_SCR0_EX(%a6),%d115907and.w &0x8000,%d115908or.w %d1,%d0 # concat {sgn,new exp}15909mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent1591015911mov.l (%sp)+,%d0 # return SCALE factor15912rts1591315914quick_scale12:15915andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent15916bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 11591715918mov.l (%sp)+,%d0 # return SCALE factor15919rts1592015921# src exp is >= dst exp; scale src to exp = 0x3fff15922src_exp_ge2:15923bsr.l scale_to_zero_src15924mov.l %d0,-(%sp) # save scale factor1592515926cmpi.b DTAG(%a6),&DENORM # is dst denormalized?15927bne.b cmpexp2215928lea FP_SCR1(%a6),%a015929bsr.l norm # normalize the denorm; result is new exp15930neg.w %d0 # new exp = -(shft val)15931mov.w %d0,2+L_SCR1(%a6) # inset new exp1593215933cmpexp22:15934mov.w L_SCR1(%a6),%d015935subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp1593615937cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?15938bge.b quick_scale221593915940mov.w 2+L_SCR1(%a6),%d015941add.w 0x2(%sp),%d0 # scale dst exponent by scale factor15942mov.w FP_SCR1_EX(%a6),%d115943andi.w &0x8000,%d115944or.w %d1,%d0 # concat {sgn,new exp}15945mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent1594615947mov.l (%sp)+,%d0 # return SCALE factor15948rts1594915950quick_scale22:15951andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent15952bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 11595315954mov.l (%sp)+,%d0 # return SCALE factor15955rts1595615957##########################################################################1595815959#########################################################################15960# XDEF **************************************************************** #15961# scale_to_zero_src(): scale the exponent of extended precision #15962# value at FP_SCR0(a6). #15963# #15964# XREF **************************************************************** #15965# norm() - normalize the mantissa if the operand was a DENORM #15966# #15967# INPUT *************************************************************** #15968# FP_SCR0(a6) = extended precision operand to be scaled #15969# #15970# OUTPUT ************************************************************** #15971# FP_SCR0(a6) = scaled extended precision operand #15972# d0 = scale value #15973# #15974# ALGORITHM *********************************************************** #15975# Set the exponent of the input operand to 0x3fff. Save the value #15976# of the difference between the original and new exponent. Then, #15977# normalize the operand if it was a DENORM. Add this normalization #15978# value to the previous value. Return the result. #15979# #15980#########################################################################1598115982global scale_to_zero_src15983scale_to_zero_src:15984mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}15985mov.w %d1,%d0 # make a copy1598615987andi.l &0x7fff,%d1 # extract operand's exponent1598815989andi.w &0x8000,%d0 # extract operand's sgn15990or.w &0x3fff,%d0 # insert new operand's exponent(=0)1599115992mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent1599315994cmpi.b STAG(%a6),&DENORM # is operand normalized?15995beq.b stzs_denorm # normalize the DENORM1599615997stzs_norm:15998mov.l &0x3fff,%d015999sub.l %d1,%d0 # scale = BIAS + (-exp)1600016001rts1600216003stzs_denorm:16004lea FP_SCR0(%a6),%a0 # pass ptr to src op16005bsr.l norm # normalize denorm16006neg.l %d0 # new exponent = -(shft val)16007mov.l %d0,%d1 # prepare for op_norm call16008bra.b stzs_norm # finish scaling1600916010###1601116012#########################################################################16013# XDEF **************************************************************** #16014# scale_sqrt(): scale the input operand exponent so a subsequent #16015# fsqrt operation won't take an exception. #16016# #16017# XREF **************************************************************** #16018# norm() - normalize the mantissa if the operand was a DENORM #16019# #16020# INPUT *************************************************************** #16021# FP_SCR0(a6) = extended precision operand to be scaled #16022# #16023# OUTPUT ************************************************************** #16024# FP_SCR0(a6) = scaled extended precision operand #16025# d0 = scale value #16026# #16027# ALGORITHM *********************************************************** #16028# If the input operand is a DENORM, normalize it. #16029# If the exponent of the input operand is even, set the exponent #16030# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #16031# exponent of the input operand is off, set the exponent to ox3fff and #16032# return a scale factor of "(exp-0x3fff)/2". #16033# #16034#########################################################################1603516036global scale_sqrt16037scale_sqrt:16038cmpi.b STAG(%a6),&DENORM # is operand normalized?16039beq.b ss_denorm # normalize the DENORM1604016041mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}16042andi.l &0x7fff,%d1 # extract operand's exponent1604316044andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn1604516046btst &0x0,%d1 # is exp even or odd?16047beq.b ss_norm_even1604816049ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)1605016051mov.l &0x3fff,%d016052sub.l %d1,%d0 # scale = BIAS + (-exp)16053asr.l &0x1,%d0 # divide scale factor by 216054rts1605516056ss_norm_even:16057ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)1605816059mov.l &0x3ffe,%d016060sub.l %d1,%d0 # scale = BIAS + (-exp)16061asr.l &0x1,%d0 # divide scale factor by 216062rts1606316064ss_denorm:16065lea FP_SCR0(%a6),%a0 # pass ptr to src op16066bsr.l norm # normalize denorm1606716068btst &0x0,%d0 # is exp even or odd?16069beq.b ss_denorm_even1607016071ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)1607216073add.l &0x3fff,%d016074asr.l &0x1,%d0 # divide scale factor by 216075rts1607616077ss_denorm_even:16078ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)1607916080add.l &0x3ffe,%d016081asr.l &0x1,%d0 # divide scale factor by 216082rts1608316084###1608516086#########################################################################16087# XDEF **************************************************************** #16088# scale_to_zero_dst(): scale the exponent of extended precision #16089# value at FP_SCR1(a6). #16090# #16091# XREF **************************************************************** #16092# norm() - normalize the mantissa if the operand was a DENORM #16093# #16094# INPUT *************************************************************** #16095# FP_SCR1(a6) = extended precision operand to be scaled #16096# #16097# OUTPUT ************************************************************** #16098# FP_SCR1(a6) = scaled extended precision operand #16099# d0 = scale value #16100# #16101# ALGORITHM *********************************************************** #16102# Set the exponent of the input operand to 0x3fff. Save the value #16103# of the difference between the original and new exponent. Then, #16104# normalize the operand if it was a DENORM. Add this normalization #16105# value to the previous value. Return the result. #16106# #16107#########################################################################1610816109global scale_to_zero_dst16110scale_to_zero_dst:16111mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}16112mov.w %d1,%d0 # make a copy1611316114andi.l &0x7fff,%d1 # extract operand's exponent1611516116andi.w &0x8000,%d0 # extract operand's sgn16117or.w &0x3fff,%d0 # insert new operand's exponent(=0)1611816119mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent1612016121cmpi.b DTAG(%a6),&DENORM # is operand normalized?16122beq.b stzd_denorm # normalize the DENORM1612316124stzd_norm:16125mov.l &0x3fff,%d016126sub.l %d1,%d0 # scale = BIAS + (-exp)16127rts1612816129stzd_denorm:16130lea FP_SCR1(%a6),%a0 # pass ptr to dst op16131bsr.l norm # normalize denorm16132neg.l %d0 # new exponent = -(shft val)16133mov.l %d0,%d1 # prepare for op_norm call16134bra.b stzd_norm # finish scaling1613516136##########################################################################1613716138#########################################################################16139# XDEF **************************************************************** #16140# res_qnan(): return default result w/ QNAN operand for dyadic #16141# res_snan(): return default result w/ SNAN operand for dyadic #16142# res_qnan_1op(): return dflt result w/ QNAN operand for monadic #16143# res_snan_1op(): return dflt result w/ SNAN operand for monadic #16144# #16145# XREF **************************************************************** #16146# None #16147# #16148# INPUT *************************************************************** #16149# FP_SRC(a6) = pointer to extended precision src operand #16150# FP_DST(a6) = pointer to extended precision dst operand #16151# #16152# OUTPUT ************************************************************** #16153# fp0 = default result #16154# #16155# ALGORITHM *********************************************************** #16156# If either operand (but not both operands) of an operation is a #16157# nonsignalling NAN, then that NAN is returned as the result. If both #16158# operands are nonsignalling NANs, then the destination operand #16159# nonsignalling NAN is returned as the result. #16160# If either operand to an operation is a signalling NAN (SNAN), #16161# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #16162# enable bit is set in the FPCR, then the trap is taken and the #16163# destination is not modified. If the SNAN trap enable bit is not set, #16164# then the SNAN is converted to a nonsignalling NAN (by setting the #16165# SNAN bit in the operand to one), and the operation continues as #16166# described in the preceding paragraph, for nonsignalling NANs. #16167# Make sure the appropriate FPSR bits are set before exiting. #16168# #16169#########################################################################1617016171global res_qnan16172global res_snan16173res_qnan:16174res_snan:16175cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?16176beq.b dst_snan216177cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?16178beq.b dst_qnan216179src_nan:16180cmp.b STAG(%a6), &QNAN16181beq.b src_qnan216182global res_snan_1op16183res_snan_1op:16184src_snan2:16185bset &0x6, FP_SRC_HI(%a6) # set SNAN bit16186or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)16187lea FP_SRC(%a6), %a016188bra.b nan_comp16189global res_qnan_1op16190res_qnan_1op:16191src_qnan2:16192or.l &nan_mask, USER_FPSR(%a6)16193lea FP_SRC(%a6), %a016194bra.b nan_comp16195dst_snan2:16196or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)16197bset &0x6, FP_DST_HI(%a6) # set SNAN bit16198lea FP_DST(%a6), %a016199bra.b nan_comp16200dst_qnan2:16201lea FP_DST(%a6), %a016202cmp.b STAG(%a6), &SNAN16203bne nan_done16204or.l &aiop_mask+snan_mask, USER_FPSR(%a6)16205nan_done:16206or.l &nan_mask, USER_FPSR(%a6)16207nan_comp:16208btst &0x7, FTEMP_EX(%a0) # is NAN neg?16209beq.b nan_not_neg16210or.l &neg_mask, USER_FPSR(%a6)16211nan_not_neg:16212fmovm.x (%a0), &0x8016213rts1621416215#########################################################################16216# XDEF **************************************************************** #16217# res_operr(): return default result during operand error #16218# #16219# XREF **************************************************************** #16220# None #16221# #16222# INPUT *************************************************************** #16223# None #16224# #16225# OUTPUT ************************************************************** #16226# fp0 = default operand error result #16227# #16228# ALGORITHM *********************************************************** #16229# An nonsignalling NAN is returned as the default result when #16230# an operand error occurs for the following cases: #16231# #16232# Multiply: (Infinity x Zero) #16233# Divide : (Zero / Zero) || (Infinity / Infinity) #16234# #16235#########################################################################1623616237global res_operr16238res_operr:16239or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)16240fmovm.x nan_return(%pc), &0x8016241rts1624216243nan_return:16244long 0x7fff0000, 0xffffffff, 0xffffffff1624516246#########################################################################16247# fdbcc(): routine to emulate the fdbcc instruction #16248# #16249# XDEF **************************************************************** #16250# _fdbcc() #16251# #16252# XREF **************************************************************** #16253# fetch_dreg() - fetch Dn value #16254# store_dreg_l() - store updated Dn value #16255# #16256# INPUT *************************************************************** #16257# d0 = displacement #16258# #16259# OUTPUT ************************************************************** #16260# none #16261# #16262# ALGORITHM *********************************************************** #16263# This routine checks which conditional predicate is specified by #16264# the stacked fdbcc instruction opcode and then branches to a routine #16265# for that predicate. The corresponding fbcc instruction is then used #16266# to see whether the condition (specified by the stacked FPSR) is true #16267# or false. #16268# If a BSUN exception should be indicated, the BSUN and ABSUN #16269# bits are set in the stacked FPSR. If the BSUN exception is enabled, #16270# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #16271# enabled BSUN should not be flagged and the predicate is true, then #16272# Dn is fetched and decremented by one. If Dn is not equal to -1, add #16273# the displacement value to the stacked PC so that when an "rte" is #16274# finally executed, the branch occurs. #16275# #16276#########################################################################16277global _fdbcc16278_fdbcc:16279mov.l %d0,L_SCR1(%a6) # save displacement1628016281mov.w EXC_CMDREG(%a6),%d0 # fetch predicate1628216283clr.l %d1 # clear scratch reg16284mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes16285ror.l &0x8,%d1 # rotate to top byte16286fmov.l %d1,%fpsr # insert into FPSR1628716288mov.w (tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table16289jmp (tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine1629016291tbl_fdbcc:16292short fdbcc_f - tbl_fdbcc # 0016293short fdbcc_eq - tbl_fdbcc # 0116294short fdbcc_ogt - tbl_fdbcc # 0216295short fdbcc_oge - tbl_fdbcc # 0316296short fdbcc_olt - tbl_fdbcc # 0416297short fdbcc_ole - tbl_fdbcc # 0516298short fdbcc_ogl - tbl_fdbcc # 0616299short fdbcc_or - tbl_fdbcc # 0716300short fdbcc_un - tbl_fdbcc # 0816301short fdbcc_ueq - tbl_fdbcc # 0916302short fdbcc_ugt - tbl_fdbcc # 1016303short fdbcc_uge - tbl_fdbcc # 1116304short fdbcc_ult - tbl_fdbcc # 1216305short fdbcc_ule - tbl_fdbcc # 1316306short fdbcc_neq - tbl_fdbcc # 1416307short fdbcc_t - tbl_fdbcc # 1516308short fdbcc_sf - tbl_fdbcc # 1616309short fdbcc_seq - tbl_fdbcc # 1716310short fdbcc_gt - tbl_fdbcc # 1816311short fdbcc_ge - tbl_fdbcc # 1916312short fdbcc_lt - tbl_fdbcc # 2016313short fdbcc_le - tbl_fdbcc # 2116314short fdbcc_gl - tbl_fdbcc # 2216315short fdbcc_gle - tbl_fdbcc # 2316316short fdbcc_ngle - tbl_fdbcc # 2416317short fdbcc_ngl - tbl_fdbcc # 2516318short fdbcc_nle - tbl_fdbcc # 2616319short fdbcc_nlt - tbl_fdbcc # 2716320short fdbcc_nge - tbl_fdbcc # 2816321short fdbcc_ngt - tbl_fdbcc # 2916322short fdbcc_sneq - tbl_fdbcc # 3016323short fdbcc_st - tbl_fdbcc # 311632416325#########################################################################16326# #16327# IEEE Nonaware tests #16328# #16329# For the IEEE nonaware tests, only the false branch changes the #16330# counter. However, the true branch may set bsun so we check to see #16331# if the NAN bit is set, in which case BSUN and AIOP will be set. #16332# #16333# The cases EQ and NE are shared by the Aware and Nonaware groups #16334# and are incapable of setting the BSUN exception bit. #16335# #16336# Typically, only one of the two possible branch directions could #16337# have the NAN bit set. #16338# (This is assuming the mutual exclusiveness of FPSR cc bit groupings #16339# is preserved.) #16340# #16341#########################################################################1634216343#16344# equal:16345#16346# Z16347#16348fdbcc_eq:16349fbeq.w fdbcc_eq_yes # equal?16350fdbcc_eq_no:16351bra.w fdbcc_false # no; go handle counter16352fdbcc_eq_yes:16353rts1635416355#16356# not equal:16357# _16358# Z16359#16360fdbcc_neq:16361fbneq.w fdbcc_neq_yes # not equal?16362fdbcc_neq_no:16363bra.w fdbcc_false # no; go handle counter16364fdbcc_neq_yes:16365rts1636616367#16368# greater than:16369# _______16370# NANvZvN16371#16372fdbcc_gt:16373fbgt.w fdbcc_gt_yes # greater than?16374btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16375beq.w fdbcc_false # no;go handle counter16376ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16377btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16378bne.w fdbcc_bsun # yes; we have an exception16379bra.w fdbcc_false # no; go handle counter16380fdbcc_gt_yes:16381rts # do nothing1638216383#16384# not greater than:16385#16386# NANvZvN16387#16388fdbcc_ngt:16389fbngt.w fdbcc_ngt_yes # not greater than?16390fdbcc_ngt_no:16391bra.w fdbcc_false # no; go handle counter16392fdbcc_ngt_yes:16393btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16394beq.b fdbcc_ngt_done # no;go finish16395ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16396btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16397bne.w fdbcc_bsun # yes; we have an exception16398fdbcc_ngt_done:16399rts # no; do nothing1640016401#16402# greater than or equal:16403# _____16404# Zv(NANvN)16405#16406fdbcc_ge:16407fbge.w fdbcc_ge_yes # greater than or equal?16408fdbcc_ge_no:16409btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16410beq.w fdbcc_false # no;go handle counter16411ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16412btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16413bne.w fdbcc_bsun # yes; we have an exception16414bra.w fdbcc_false # no; go handle counter16415fdbcc_ge_yes:16416btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16417beq.b fdbcc_ge_yes_done # no;go do nothing16418ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16419btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16420bne.w fdbcc_bsun # yes; we have an exception16421fdbcc_ge_yes_done:16422rts # do nothing1642316424#16425# not (greater than or equal):16426# _16427# NANv(N^Z)16428#16429fdbcc_nge:16430fbnge.w fdbcc_nge_yes # not (greater than or equal)?16431fdbcc_nge_no:16432bra.w fdbcc_false # no; go handle counter16433fdbcc_nge_yes:16434btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16435beq.b fdbcc_nge_done # no;go finish16436ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16437btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16438bne.w fdbcc_bsun # yes; we have an exception16439fdbcc_nge_done:16440rts # no; do nothing1644116442#16443# less than:16444# _____16445# N^(NANvZ)16446#16447fdbcc_lt:16448fblt.w fdbcc_lt_yes # less than?16449fdbcc_lt_no:16450btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16451beq.w fdbcc_false # no; go handle counter16452ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16453btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16454bne.w fdbcc_bsun # yes; we have an exception16455bra.w fdbcc_false # no; go handle counter16456fdbcc_lt_yes:16457rts # do nothing1645816459#16460# not less than:16461# _16462# NANv(ZvN)16463#16464fdbcc_nlt:16465fbnlt.w fdbcc_nlt_yes # not less than?16466fdbcc_nlt_no:16467bra.w fdbcc_false # no; go handle counter16468fdbcc_nlt_yes:16469btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16470beq.b fdbcc_nlt_done # no;go finish16471ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16472btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16473bne.w fdbcc_bsun # yes; we have an exception16474fdbcc_nlt_done:16475rts # no; do nothing1647616477#16478# less than or equal:16479# ___16480# Zv(N^NAN)16481#16482fdbcc_le:16483fble.w fdbcc_le_yes # less than or equal?16484fdbcc_le_no:16485btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16486beq.w fdbcc_false # no; go handle counter16487ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16488btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16489bne.w fdbcc_bsun # yes; we have an exception16490bra.w fdbcc_false # no; go handle counter16491fdbcc_le_yes:16492btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16493beq.b fdbcc_le_yes_done # no; go do nothing16494ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16495btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16496bne.w fdbcc_bsun # yes; we have an exception16497fdbcc_le_yes_done:16498rts # do nothing1649916500#16501# not (less than or equal):16502# ___16503# NANv(NvZ)16504#16505fdbcc_nle:16506fbnle.w fdbcc_nle_yes # not (less than or equal)?16507fdbcc_nle_no:16508bra.w fdbcc_false # no; go handle counter16509fdbcc_nle_yes:16510btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16511beq.w fdbcc_nle_done # no; go finish16512ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16513btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16514bne.w fdbcc_bsun # yes; we have an exception16515fdbcc_nle_done:16516rts # no; do nothing1651716518#16519# greater or less than:16520# _____16521# NANvZ16522#16523fdbcc_gl:16524fbgl.w fdbcc_gl_yes # greater or less than?16525fdbcc_gl_no:16526btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16527beq.w fdbcc_false # no; handle counter16528ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16529btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16530bne.w fdbcc_bsun # yes; we have an exception16531bra.w fdbcc_false # no; go handle counter16532fdbcc_gl_yes:16533rts # do nothing1653416535#16536# not (greater or less than):16537#16538# NANvZ16539#16540fdbcc_ngl:16541fbngl.w fdbcc_ngl_yes # not (greater or less than)?16542fdbcc_ngl_no:16543bra.w fdbcc_false # no; go handle counter16544fdbcc_ngl_yes:16545btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16546beq.b fdbcc_ngl_done # no; go finish16547ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16548btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16549bne.w fdbcc_bsun # yes; we have an exception16550fdbcc_ngl_done:16551rts # no; do nothing1655216553#16554# greater, less, or equal:16555# ___16556# NAN16557#16558fdbcc_gle:16559fbgle.w fdbcc_gle_yes # greater, less, or equal?16560fdbcc_gle_no:16561ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16562btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16563bne.w fdbcc_bsun # yes; we have an exception16564bra.w fdbcc_false # no; go handle counter16565fdbcc_gle_yes:16566rts # do nothing1656716568#16569# not (greater, less, or equal):16570#16571# NAN16572#16573fdbcc_ngle:16574fbngle.w fdbcc_ngle_yes # not (greater, less, or equal)?16575fdbcc_ngle_no:16576bra.w fdbcc_false # no; go handle counter16577fdbcc_ngle_yes:16578ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16579btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16580bne.w fdbcc_bsun # yes; we have an exception16581rts # no; do nothing1658216583#########################################################################16584# #16585# Miscellaneous tests #16586# #16587# For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #16588# #16589#########################################################################1659016591#16592# false:16593#16594# False16595#16596fdbcc_f: # no bsun possible16597bra.w fdbcc_false # go handle counter1659816599#16600# true:16601#16602# True16603#16604fdbcc_t: # no bsun possible16605rts # do nothing1660616607#16608# signalling false:16609#16610# False16611#16612fdbcc_sf:16613btst &nan_bit, FPSR_CC(%a6) # is NAN set?16614beq.w fdbcc_false # no;go handle counter16615ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16616btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16617bne.w fdbcc_bsun # yes; we have an exception16618bra.w fdbcc_false # go handle counter1661916620#16621# signalling true:16622#16623# True16624#16625fdbcc_st:16626btst &nan_bit, FPSR_CC(%a6) # is NAN set?16627beq.b fdbcc_st_done # no;go finish16628ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16629btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16630bne.w fdbcc_bsun # yes; we have an exception16631fdbcc_st_done:16632rts1663316634#16635# signalling equal:16636#16637# Z16638#16639fdbcc_seq:16640fbseq.w fdbcc_seq_yes # signalling equal?16641fdbcc_seq_no:16642btst &nan_bit, FPSR_CC(%a6) # is NAN set?16643beq.w fdbcc_false # no;go handle counter16644ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16645btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16646bne.w fdbcc_bsun # yes; we have an exception16647bra.w fdbcc_false # go handle counter16648fdbcc_seq_yes:16649btst &nan_bit, FPSR_CC(%a6) # is NAN set?16650beq.b fdbcc_seq_yes_done # no;go do nothing16651ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16652btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16653bne.w fdbcc_bsun # yes; we have an exception16654fdbcc_seq_yes_done:16655rts # yes; do nothing1665616657#16658# signalling not equal:16659# _16660# Z16661#16662fdbcc_sneq:16663fbsneq.w fdbcc_sneq_yes # signalling not equal?16664fdbcc_sneq_no:16665btst &nan_bit, FPSR_CC(%a6) # is NAN set?16666beq.w fdbcc_false # no;go handle counter16667ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16668btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16669bne.w fdbcc_bsun # yes; we have an exception16670bra.w fdbcc_false # go handle counter16671fdbcc_sneq_yes:16672btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit16673beq.w fdbcc_sneq_done # no;go finish16674ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit16675btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?16676bne.w fdbcc_bsun # yes; we have an exception16677fdbcc_sneq_done:16678rts1667916680#########################################################################16681# #16682# IEEE Aware tests #16683# #16684# For the IEEE aware tests, action is only taken if the result is false.#16685# Therefore, the opposite branch type is used to jump to the decrement #16686# routine. #16687# The BSUN exception will not be set for any of these tests. #16688# #16689#########################################################################1669016691#16692# ordered greater than:16693# _______16694# NANvZvN16695#16696fdbcc_ogt:16697fbogt.w fdbcc_ogt_yes # ordered greater than?16698fdbcc_ogt_no:16699bra.w fdbcc_false # no; go handle counter16700fdbcc_ogt_yes:16701rts # yes; do nothing1670216703#16704# unordered or less or equal:16705# _______16706# NANvZvN16707#16708fdbcc_ule:16709fbule.w fdbcc_ule_yes # unordered or less or equal?16710fdbcc_ule_no:16711bra.w fdbcc_false # no; go handle counter16712fdbcc_ule_yes:16713rts # yes; do nothing1671416715#16716# ordered greater than or equal:16717# _____16718# Zv(NANvN)16719#16720fdbcc_oge:16721fboge.w fdbcc_oge_yes # ordered greater than or equal?16722fdbcc_oge_no:16723bra.w fdbcc_false # no; go handle counter16724fdbcc_oge_yes:16725rts # yes; do nothing1672616727#16728# unordered or less than:16729# _16730# NANv(N^Z)16731#16732fdbcc_ult:16733fbult.w fdbcc_ult_yes # unordered or less than?16734fdbcc_ult_no:16735bra.w fdbcc_false # no; go handle counter16736fdbcc_ult_yes:16737rts # yes; do nothing1673816739#16740# ordered less than:16741# _____16742# N^(NANvZ)16743#16744fdbcc_olt:16745fbolt.w fdbcc_olt_yes # ordered less than?16746fdbcc_olt_no:16747bra.w fdbcc_false # no; go handle counter16748fdbcc_olt_yes:16749rts # yes; do nothing1675016751#16752# unordered or greater or equal:16753#16754# NANvZvN16755#16756fdbcc_uge:16757fbuge.w fdbcc_uge_yes # unordered or greater than?16758fdbcc_uge_no:16759bra.w fdbcc_false # no; go handle counter16760fdbcc_uge_yes:16761rts # yes; do nothing1676216763#16764# ordered less than or equal:16765# ___16766# Zv(N^NAN)16767#16768fdbcc_ole:16769fbole.w fdbcc_ole_yes # ordered greater or less than?16770fdbcc_ole_no:16771bra.w fdbcc_false # no; go handle counter16772fdbcc_ole_yes:16773rts # yes; do nothing1677416775#16776# unordered or greater than:16777# ___16778# NANv(NvZ)16779#16780fdbcc_ugt:16781fbugt.w fdbcc_ugt_yes # unordered or greater than?16782fdbcc_ugt_no:16783bra.w fdbcc_false # no; go handle counter16784fdbcc_ugt_yes:16785rts # yes; do nothing1678616787#16788# ordered greater or less than:16789# _____16790# NANvZ16791#16792fdbcc_ogl:16793fbogl.w fdbcc_ogl_yes # ordered greater or less than?16794fdbcc_ogl_no:16795bra.w fdbcc_false # no; go handle counter16796fdbcc_ogl_yes:16797rts # yes; do nothing1679816799#16800# unordered or equal:16801#16802# NANvZ16803#16804fdbcc_ueq:16805fbueq.w fdbcc_ueq_yes # unordered or equal?16806fdbcc_ueq_no:16807bra.w fdbcc_false # no; go handle counter16808fdbcc_ueq_yes:16809rts # yes; do nothing1681016811#16812# ordered:16813# ___16814# NAN16815#16816fdbcc_or:16817fbor.w fdbcc_or_yes # ordered?16818fdbcc_or_no:16819bra.w fdbcc_false # no; go handle counter16820fdbcc_or_yes:16821rts # yes; do nothing1682216823#16824# unordered:16825#16826# NAN16827#16828fdbcc_un:16829fbun.w fdbcc_un_yes # unordered?16830fdbcc_un_no:16831bra.w fdbcc_false # no; go handle counter16832fdbcc_un_yes:16833rts # yes; do nothing1683416835#######################################################################1683616837#16838# the bsun exception bit was not set.16839#16840# (1) subtract 1 from the count register16841# (2) if (cr == -1) then16842# pc = pc of next instruction16843# else16844# pc += sign_ext(16-bit displacement)16845#16846fdbcc_false:16847mov.b 1+EXC_OPWORD(%a6), %d1 # fetch lo opword16848andi.w &0x7, %d1 # extract count register1684916850bsr.l fetch_dreg # fetch count value16851# make sure that d0 isn't corrupted between calls...1685216853subq.w &0x1, %d0 # Dn - 1 -> Dn1685416855bsr.l store_dreg_l # store new count value1685616857cmpi.w %d0, &-0x1 # is (Dn == -1)?16858bne.b fdbcc_false_cont # no;16859rts1686016861fdbcc_false_cont:16862mov.l L_SCR1(%a6),%d0 # fetch displacement16863add.l USER_FPIAR(%a6),%d0 # add instruction PC16864addq.l &0x4,%d0 # add instruction length16865mov.l %d0,EXC_PC(%a6) # set new PC16866rts1686716868# the emulation routine set bsun and BSUN was enabled. have to16869# fix stack and jump to the bsun handler.16870# let the caller of this routine shift the stack frame up to16871# eliminate the effective address field.16872fdbcc_bsun:16873mov.b &fbsun_flg,SPCOND_FLG(%a6)16874rts1687516876#########################################################################16877# ftrapcc(): routine to emulate the ftrapcc instruction #16878# #16879# XDEF **************************************************************** #16880# _ftrapcc() #16881# #16882# XREF **************************************************************** #16883# none #16884# #16885# INPUT *************************************************************** #16886# none #16887# #16888# OUTPUT ************************************************************** #16889# none #16890# #16891# ALGORITHM *********************************************************** #16892# This routine checks which conditional predicate is specified by #16893# the stacked ftrapcc instruction opcode and then branches to a routine #16894# for that predicate. The corresponding fbcc instruction is then used #16895# to see whether the condition (specified by the stacked FPSR) is true #16896# or false. #16897# If a BSUN exception should be indicated, the BSUN and ABSUN #16898# bits are set in the stacked FPSR. If the BSUN exception is enabled, #16899# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #16900# enabled BSUN should not be flagged and the predicate is true, then #16901# the ftrapcc_flg is set in the SPCOND_FLG location. These special #16902# flags indicate to the calling routine to emulate the exceptional #16903# condition. #16904# #16905#########################################################################1690616907global _ftrapcc16908_ftrapcc:16909mov.w EXC_CMDREG(%a6),%d0 # fetch predicate1691016911clr.l %d1 # clear scratch reg16912mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes16913ror.l &0x8,%d1 # rotate to top byte16914fmov.l %d1,%fpsr # insert into FPSR1691516916mov.w (tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table16917jmp (tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine1691816919tbl_ftrapcc:16920short ftrapcc_f - tbl_ftrapcc # 0016921short ftrapcc_eq - tbl_ftrapcc # 0116922short ftrapcc_ogt - tbl_ftrapcc # 0216923short ftrapcc_oge - tbl_ftrapcc # 0316924short ftrapcc_olt - tbl_ftrapcc # 0416925short ftrapcc_ole - tbl_ftrapcc # 0516926short ftrapcc_ogl - tbl_ftrapcc # 0616927short ftrapcc_or - tbl_ftrapcc # 0716928short ftrapcc_un - tbl_ftrapcc # 0816929short ftrapcc_ueq - tbl_ftrapcc # 0916930short ftrapcc_ugt - tbl_ftrapcc # 1016931short ftrapcc_uge - tbl_ftrapcc # 1116932short ftrapcc_ult - tbl_ftrapcc # 1216933short ftrapcc_ule - tbl_ftrapcc # 1316934short ftrapcc_neq - tbl_ftrapcc # 1416935short ftrapcc_t - tbl_ftrapcc # 1516936short ftrapcc_sf - tbl_ftrapcc # 1616937short ftrapcc_seq - tbl_ftrapcc # 1716938short ftrapcc_gt - tbl_ftrapcc # 1816939short ftrapcc_ge - tbl_ftrapcc # 1916940short ftrapcc_lt - tbl_ftrapcc # 2016941short ftrapcc_le - tbl_ftrapcc # 2116942short ftrapcc_gl - tbl_ftrapcc # 2216943short ftrapcc_gle - tbl_ftrapcc # 2316944short ftrapcc_ngle - tbl_ftrapcc # 2416945short ftrapcc_ngl - tbl_ftrapcc # 2516946short ftrapcc_nle - tbl_ftrapcc # 2616947short ftrapcc_nlt - tbl_ftrapcc # 2716948short ftrapcc_nge - tbl_ftrapcc # 2816949short ftrapcc_ngt - tbl_ftrapcc # 2916950short ftrapcc_sneq - tbl_ftrapcc # 3016951short ftrapcc_st - tbl_ftrapcc # 311695216953#########################################################################16954# #16955# IEEE Nonaware tests #16956# #16957# For the IEEE nonaware tests, we set the result based on the #16958# floating point condition codes. In addition, we check to see #16959# if the NAN bit is set, in which case BSUN and AIOP will be set. #16960# #16961# The cases EQ and NE are shared by the Aware and Nonaware groups #16962# and are incapable of setting the BSUN exception bit. #16963# #16964# Typically, only one of the two possible branch directions could #16965# have the NAN bit set. #16966# #16967#########################################################################1696816969#16970# equal:16971#16972# Z16973#16974ftrapcc_eq:16975fbeq.w ftrapcc_trap # equal?16976ftrapcc_eq_no:16977rts # do nothing1697816979#16980# not equal:16981# _16982# Z16983#16984ftrapcc_neq:16985fbneq.w ftrapcc_trap # not equal?16986ftrapcc_neq_no:16987rts # do nothing1698816989#16990# greater than:16991# _______16992# NANvZvN16993#16994ftrapcc_gt:16995fbgt.w ftrapcc_trap # greater than?16996ftrapcc_gt_no:16997btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?16998beq.b ftrapcc_gt_done # no16999ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17000btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17001bne.w ftrapcc_bsun # yes17002ftrapcc_gt_done:17003rts # no; do nothing1700417005#17006# not greater than:17007#17008# NANvZvN17009#17010ftrapcc_ngt:17011fbngt.w ftrapcc_ngt_yes # not greater than?17012ftrapcc_ngt_no:17013rts # do nothing17014ftrapcc_ngt_yes:17015btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17016beq.w ftrapcc_trap # no; go take trap17017ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17018btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17019bne.w ftrapcc_bsun # yes17020bra.w ftrapcc_trap # no; go take trap1702117022#17023# greater than or equal:17024# _____17025# Zv(NANvN)17026#17027ftrapcc_ge:17028fbge.w ftrapcc_ge_yes # greater than or equal?17029ftrapcc_ge_no:17030btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17031beq.b ftrapcc_ge_done # no; go finish17032ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17033btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17034bne.w ftrapcc_bsun # yes17035ftrapcc_ge_done:17036rts # no; do nothing17037ftrapcc_ge_yes:17038btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17039beq.w ftrapcc_trap # no; go take trap17040ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17041btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17042bne.w ftrapcc_bsun # yes17043bra.w ftrapcc_trap # no; go take trap1704417045#17046# not (greater than or equal):17047# _17048# NANv(N^Z)17049#17050ftrapcc_nge:17051fbnge.w ftrapcc_nge_yes # not (greater than or equal)?17052ftrapcc_nge_no:17053rts # do nothing17054ftrapcc_nge_yes:17055btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17056beq.w ftrapcc_trap # no; go take trap17057ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17058btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17059bne.w ftrapcc_bsun # yes17060bra.w ftrapcc_trap # no; go take trap1706117062#17063# less than:17064# _____17065# N^(NANvZ)17066#17067ftrapcc_lt:17068fblt.w ftrapcc_trap # less than?17069ftrapcc_lt_no:17070btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17071beq.b ftrapcc_lt_done # no; go finish17072ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17073btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17074bne.w ftrapcc_bsun # yes17075ftrapcc_lt_done:17076rts # no; do nothing1707717078#17079# not less than:17080# _17081# NANv(ZvN)17082#17083ftrapcc_nlt:17084fbnlt.w ftrapcc_nlt_yes # not less than?17085ftrapcc_nlt_no:17086rts # do nothing17087ftrapcc_nlt_yes:17088btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17089beq.w ftrapcc_trap # no; go take trap17090ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17091btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17092bne.w ftrapcc_bsun # yes17093bra.w ftrapcc_trap # no; go take trap1709417095#17096# less than or equal:17097# ___17098# Zv(N^NAN)17099#17100ftrapcc_le:17101fble.w ftrapcc_le_yes # less than or equal?17102ftrapcc_le_no:17103btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17104beq.b ftrapcc_le_done # no; go finish17105ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17106btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17107bne.w ftrapcc_bsun # yes17108ftrapcc_le_done:17109rts # no; do nothing17110ftrapcc_le_yes:17111btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17112beq.w ftrapcc_trap # no; go take trap17113ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17114btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17115bne.w ftrapcc_bsun # yes17116bra.w ftrapcc_trap # no; go take trap1711717118#17119# not (less than or equal):17120# ___17121# NANv(NvZ)17122#17123ftrapcc_nle:17124fbnle.w ftrapcc_nle_yes # not (less than or equal)?17125ftrapcc_nle_no:17126rts # do nothing17127ftrapcc_nle_yes:17128btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17129beq.w ftrapcc_trap # no; go take trap17130ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17131btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17132bne.w ftrapcc_bsun # yes17133bra.w ftrapcc_trap # no; go take trap1713417135#17136# greater or less than:17137# _____17138# NANvZ17139#17140ftrapcc_gl:17141fbgl.w ftrapcc_trap # greater or less than?17142ftrapcc_gl_no:17143btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17144beq.b ftrapcc_gl_done # no; go finish17145ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17146btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17147bne.w ftrapcc_bsun # yes17148ftrapcc_gl_done:17149rts # no; do nothing1715017151#17152# not (greater or less than):17153#17154# NANvZ17155#17156ftrapcc_ngl:17157fbngl.w ftrapcc_ngl_yes # not (greater or less than)?17158ftrapcc_ngl_no:17159rts # do nothing17160ftrapcc_ngl_yes:17161btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17162beq.w ftrapcc_trap # no; go take trap17163ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17164btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17165bne.w ftrapcc_bsun # yes17166bra.w ftrapcc_trap # no; go take trap1716717168#17169# greater, less, or equal:17170# ___17171# NAN17172#17173ftrapcc_gle:17174fbgle.w ftrapcc_trap # greater, less, or equal?17175ftrapcc_gle_no:17176ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17177btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17178bne.w ftrapcc_bsun # yes17179rts # no; do nothing1718017181#17182# not (greater, less, or equal):17183#17184# NAN17185#17186ftrapcc_ngle:17187fbngle.w ftrapcc_ngle_yes # not (greater, less, or equal)?17188ftrapcc_ngle_no:17189rts # do nothing17190ftrapcc_ngle_yes:17191ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17192btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17193bne.w ftrapcc_bsun # yes17194bra.w ftrapcc_trap # no; go take trap1719517196#########################################################################17197# #17198# Miscellaneous tests #17199# #17200# For the IEEE aware tests, we only have to set the result based on the #17201# floating point condition codes. The BSUN exception will not be #17202# set for any of these tests. #17203# #17204#########################################################################1720517206#17207# false:17208#17209# False17210#17211ftrapcc_f:17212rts # do nothing1721317214#17215# true:17216#17217# True17218#17219ftrapcc_t:17220bra.w ftrapcc_trap # go take trap1722117222#17223# signalling false:17224#17225# False17226#17227ftrapcc_sf:17228btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17229beq.b ftrapcc_sf_done # no; go finish17230ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17231btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17232bne.w ftrapcc_bsun # yes17233ftrapcc_sf_done:17234rts # no; do nothing1723517236#17237# signalling true:17238#17239# True17240#17241ftrapcc_st:17242btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17243beq.w ftrapcc_trap # no; go take trap17244ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17245btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17246bne.w ftrapcc_bsun # yes17247bra.w ftrapcc_trap # no; go take trap1724817249#17250# signalling equal:17251#17252# Z17253#17254ftrapcc_seq:17255fbseq.w ftrapcc_seq_yes # signalling equal?17256ftrapcc_seq_no:17257btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17258beq.w ftrapcc_seq_done # no; go finish17259ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17260btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17261bne.w ftrapcc_bsun # yes17262ftrapcc_seq_done:17263rts # no; do nothing17264ftrapcc_seq_yes:17265btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17266beq.w ftrapcc_trap # no; go take trap17267ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17268btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17269bne.w ftrapcc_bsun # yes17270bra.w ftrapcc_trap # no; go take trap1727117272#17273# signalling not equal:17274# _17275# Z17276#17277ftrapcc_sneq:17278fbsneq.w ftrapcc_sneq_yes # signalling equal?17279ftrapcc_sneq_no:17280btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17281beq.w ftrapcc_sneq_no_done # no; go finish17282ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17283btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17284bne.w ftrapcc_bsun # yes17285ftrapcc_sneq_no_done:17286rts # do nothing17287ftrapcc_sneq_yes:17288btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17289beq.w ftrapcc_trap # no; go take trap17290ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17291btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?17292bne.w ftrapcc_bsun # yes17293bra.w ftrapcc_trap # no; go take trap1729417295#########################################################################17296# #17297# IEEE Aware tests #17298# #17299# For the IEEE aware tests, we only have to set the result based on the #17300# floating point condition codes. The BSUN exception will not be #17301# set for any of these tests. #17302# #17303#########################################################################1730417305#17306# ordered greater than:17307# _______17308# NANvZvN17309#17310ftrapcc_ogt:17311fbogt.w ftrapcc_trap # ordered greater than?17312ftrapcc_ogt_no:17313rts # do nothing1731417315#17316# unordered or less or equal:17317# _______17318# NANvZvN17319#17320ftrapcc_ule:17321fbule.w ftrapcc_trap # unordered or less or equal?17322ftrapcc_ule_no:17323rts # do nothing1732417325#17326# ordered greater than or equal:17327# _____17328# Zv(NANvN)17329#17330ftrapcc_oge:17331fboge.w ftrapcc_trap # ordered greater than or equal?17332ftrapcc_oge_no:17333rts # do nothing1733417335#17336# unordered or less than:17337# _17338# NANv(N^Z)17339#17340ftrapcc_ult:17341fbult.w ftrapcc_trap # unordered or less than?17342ftrapcc_ult_no:17343rts # do nothing1734417345#17346# ordered less than:17347# _____17348# N^(NANvZ)17349#17350ftrapcc_olt:17351fbolt.w ftrapcc_trap # ordered less than?17352ftrapcc_olt_no:17353rts # do nothing1735417355#17356# unordered or greater or equal:17357#17358# NANvZvN17359#17360ftrapcc_uge:17361fbuge.w ftrapcc_trap # unordered or greater than?17362ftrapcc_uge_no:17363rts # do nothing1736417365#17366# ordered less than or equal:17367# ___17368# Zv(N^NAN)17369#17370ftrapcc_ole:17371fbole.w ftrapcc_trap # ordered greater or less than?17372ftrapcc_ole_no:17373rts # do nothing1737417375#17376# unordered or greater than:17377# ___17378# NANv(NvZ)17379#17380ftrapcc_ugt:17381fbugt.w ftrapcc_trap # unordered or greater than?17382ftrapcc_ugt_no:17383rts # do nothing1738417385#17386# ordered greater or less than:17387# _____17388# NANvZ17389#17390ftrapcc_ogl:17391fbogl.w ftrapcc_trap # ordered greater or less than?17392ftrapcc_ogl_no:17393rts # do nothing1739417395#17396# unordered or equal:17397#17398# NANvZ17399#17400ftrapcc_ueq:17401fbueq.w ftrapcc_trap # unordered or equal?17402ftrapcc_ueq_no:17403rts # do nothing1740417405#17406# ordered:17407# ___17408# NAN17409#17410ftrapcc_or:17411fbor.w ftrapcc_trap # ordered?17412ftrapcc_or_no:17413rts # do nothing1741417415#17416# unordered:17417#17418# NAN17419#17420ftrapcc_un:17421fbun.w ftrapcc_trap # unordered?17422ftrapcc_un_no:17423rts # do nothing1742417425#######################################################################1742617427# the bsun exception bit was not set.17428# we will need to jump to the ftrapcc vector. the stack frame17429# is the same size as that of the fp unimp instruction. the17430# only difference is that the <ea> field should hold the PC17431# of the ftrapcc instruction and the vector offset field17432# should denote the ftrapcc trap.17433ftrapcc_trap:17434mov.b &ftrapcc_flg,SPCOND_FLG(%a6)17435rts1743617437# the emulation routine set bsun and BSUN was enabled. have to17438# fix stack and jump to the bsun handler.17439# let the caller of this routine shift the stack frame up to17440# eliminate the effective address field.17441ftrapcc_bsun:17442mov.b &fbsun_flg,SPCOND_FLG(%a6)17443rts1744417445#########################################################################17446# fscc(): routine to emulate the fscc instruction #17447# #17448# XDEF **************************************************************** #17449# _fscc() #17450# #17451# XREF **************************************************************** #17452# store_dreg_b() - store result to data register file #17453# dec_areg() - decrement an areg for -(an) mode #17454# inc_areg() - increment an areg for (an)+ mode #17455# _dmem_write_byte() - store result to memory #17456# #17457# INPUT *************************************************************** #17458# none #17459# #17460# OUTPUT ************************************************************** #17461# none #17462# #17463# ALGORITHM *********************************************************** #17464# This routine checks which conditional predicate is specified by #17465# the stacked fscc instruction opcode and then branches to a routine #17466# for that predicate. The corresponding fbcc instruction is then used #17467# to see whether the condition (specified by the stacked FPSR) is true #17468# or false. #17469# If a BSUN exception should be indicated, the BSUN and ABSUN #17470# bits are set in the stacked FPSR. If the BSUN exception is enabled, #17471# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #17472# enabled BSUN should not be flagged and the predicate is true, then #17473# the result is stored to the data register file or memory #17474# #17475#########################################################################1747617477global _fscc17478_fscc:17479mov.w EXC_CMDREG(%a6),%d0 # fetch predicate1748017481clr.l %d1 # clear scratch reg17482mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes17483ror.l &0x8,%d1 # rotate to top byte17484fmov.l %d1,%fpsr # insert into FPSR1748517486mov.w (tbl_fscc.b,%pc,%d0.w*2),%d1 # load table17487jmp (tbl_fscc.b,%pc,%d1.w) # jump to fscc routine1748817489tbl_fscc:17490short fscc_f - tbl_fscc # 0017491short fscc_eq - tbl_fscc # 0117492short fscc_ogt - tbl_fscc # 0217493short fscc_oge - tbl_fscc # 0317494short fscc_olt - tbl_fscc # 0417495short fscc_ole - tbl_fscc # 0517496short fscc_ogl - tbl_fscc # 0617497short fscc_or - tbl_fscc # 0717498short fscc_un - tbl_fscc # 0817499short fscc_ueq - tbl_fscc # 0917500short fscc_ugt - tbl_fscc # 1017501short fscc_uge - tbl_fscc # 1117502short fscc_ult - tbl_fscc # 1217503short fscc_ule - tbl_fscc # 1317504short fscc_neq - tbl_fscc # 1417505short fscc_t - tbl_fscc # 1517506short fscc_sf - tbl_fscc # 1617507short fscc_seq - tbl_fscc # 1717508short fscc_gt - tbl_fscc # 1817509short fscc_ge - tbl_fscc # 1917510short fscc_lt - tbl_fscc # 2017511short fscc_le - tbl_fscc # 2117512short fscc_gl - tbl_fscc # 2217513short fscc_gle - tbl_fscc # 2317514short fscc_ngle - tbl_fscc # 2417515short fscc_ngl - tbl_fscc # 2517516short fscc_nle - tbl_fscc # 2617517short fscc_nlt - tbl_fscc # 2717518short fscc_nge - tbl_fscc # 2817519short fscc_ngt - tbl_fscc # 2917520short fscc_sneq - tbl_fscc # 3017521short fscc_st - tbl_fscc # 311752217523#########################################################################17524# #17525# IEEE Nonaware tests #17526# #17527# For the IEEE nonaware tests, we set the result based on the #17528# floating point condition codes. In addition, we check to see #17529# if the NAN bit is set, in which case BSUN and AIOP will be set. #17530# #17531# The cases EQ and NE are shared by the Aware and Nonaware groups #17532# and are incapable of setting the BSUN exception bit. #17533# #17534# Typically, only one of the two possible branch directions could #17535# have the NAN bit set. #17536# #17537#########################################################################1753817539#17540# equal:17541#17542# Z17543#17544fscc_eq:17545fbeq.w fscc_eq_yes # equal?17546fscc_eq_no:17547clr.b %d0 # set false17548bra.w fscc_done # go finish17549fscc_eq_yes:17550st %d0 # set true17551bra.w fscc_done # go finish1755217553#17554# not equal:17555# _17556# Z17557#17558fscc_neq:17559fbneq.w fscc_neq_yes # not equal?17560fscc_neq_no:17561clr.b %d0 # set false17562bra.w fscc_done # go finish17563fscc_neq_yes:17564st %d0 # set true17565bra.w fscc_done # go finish1756617567#17568# greater than:17569# _______17570# NANvZvN17571#17572fscc_gt:17573fbgt.w fscc_gt_yes # greater than?17574fscc_gt_no:17575clr.b %d0 # set false17576btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17577beq.w fscc_done # no;go finish17578ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17579bra.w fscc_chk_bsun # go finish17580fscc_gt_yes:17581st %d0 # set true17582bra.w fscc_done # go finish1758317584#17585# not greater than:17586#17587# NANvZvN17588#17589fscc_ngt:17590fbngt.w fscc_ngt_yes # not greater than?17591fscc_ngt_no:17592clr.b %d0 # set false17593bra.w fscc_done # go finish17594fscc_ngt_yes:17595st %d0 # set true17596btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17597beq.w fscc_done # no;go finish17598ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17599bra.w fscc_chk_bsun # go finish1760017601#17602# greater than or equal:17603# _____17604# Zv(NANvN)17605#17606fscc_ge:17607fbge.w fscc_ge_yes # greater than or equal?17608fscc_ge_no:17609clr.b %d0 # set false17610btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17611beq.w fscc_done # no;go finish17612ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17613bra.w fscc_chk_bsun # go finish17614fscc_ge_yes:17615st %d0 # set true17616btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17617beq.w fscc_done # no;go finish17618ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17619bra.w fscc_chk_bsun # go finish1762017621#17622# not (greater than or equal):17623# _17624# NANv(N^Z)17625#17626fscc_nge:17627fbnge.w fscc_nge_yes # not (greater than or equal)?17628fscc_nge_no:17629clr.b %d0 # set false17630bra.w fscc_done # go finish17631fscc_nge_yes:17632st %d0 # set true17633btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17634beq.w fscc_done # no;go finish17635ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17636bra.w fscc_chk_bsun # go finish1763717638#17639# less than:17640# _____17641# N^(NANvZ)17642#17643fscc_lt:17644fblt.w fscc_lt_yes # less than?17645fscc_lt_no:17646clr.b %d0 # set false17647btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17648beq.w fscc_done # no;go finish17649ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17650bra.w fscc_chk_bsun # go finish17651fscc_lt_yes:17652st %d0 # set true17653bra.w fscc_done # go finish1765417655#17656# not less than:17657# _17658# NANv(ZvN)17659#17660fscc_nlt:17661fbnlt.w fscc_nlt_yes # not less than?17662fscc_nlt_no:17663clr.b %d0 # set false17664bra.w fscc_done # go finish17665fscc_nlt_yes:17666st %d0 # set true17667btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17668beq.w fscc_done # no;go finish17669ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17670bra.w fscc_chk_bsun # go finish1767117672#17673# less than or equal:17674# ___17675# Zv(N^NAN)17676#17677fscc_le:17678fble.w fscc_le_yes # less than or equal?17679fscc_le_no:17680clr.b %d0 # set false17681btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17682beq.w fscc_done # no;go finish17683ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17684bra.w fscc_chk_bsun # go finish17685fscc_le_yes:17686st %d0 # set true17687btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17688beq.w fscc_done # no;go finish17689ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17690bra.w fscc_chk_bsun # go finish1769117692#17693# not (less than or equal):17694# ___17695# NANv(NvZ)17696#17697fscc_nle:17698fbnle.w fscc_nle_yes # not (less than or equal)?17699fscc_nle_no:17700clr.b %d0 # set false17701bra.w fscc_done # go finish17702fscc_nle_yes:17703st %d0 # set true17704btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17705beq.w fscc_done # no;go finish17706ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17707bra.w fscc_chk_bsun # go finish1770817709#17710# greater or less than:17711# _____17712# NANvZ17713#17714fscc_gl:17715fbgl.w fscc_gl_yes # greater or less than?17716fscc_gl_no:17717clr.b %d0 # set false17718btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17719beq.w fscc_done # no;go finish17720ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17721bra.w fscc_chk_bsun # go finish17722fscc_gl_yes:17723st %d0 # set true17724bra.w fscc_done # go finish1772517726#17727# not (greater or less than):17728#17729# NANvZ17730#17731fscc_ngl:17732fbngl.w fscc_ngl_yes # not (greater or less than)?17733fscc_ngl_no:17734clr.b %d0 # set false17735bra.w fscc_done # go finish17736fscc_ngl_yes:17737st %d0 # set true17738btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?17739beq.w fscc_done # no;go finish17740ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17741bra.w fscc_chk_bsun # go finish1774217743#17744# greater, less, or equal:17745# ___17746# NAN17747#17748fscc_gle:17749fbgle.w fscc_gle_yes # greater, less, or equal?17750fscc_gle_no:17751clr.b %d0 # set false17752ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17753bra.w fscc_chk_bsun # go finish17754fscc_gle_yes:17755st %d0 # set true17756bra.w fscc_done # go finish1775717758#17759# not (greater, less, or equal):17760#17761# NAN17762#17763fscc_ngle:17764fbngle.w fscc_ngle_yes # not (greater, less, or equal)?17765fscc_ngle_no:17766clr.b %d0 # set false17767bra.w fscc_done # go finish17768fscc_ngle_yes:17769st %d0 # set true17770ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17771bra.w fscc_chk_bsun # go finish1777217773#########################################################################17774# #17775# Miscellaneous tests #17776# #17777# For the IEEE aware tests, we only have to set the result based on the #17778# floating point condition codes. The BSUN exception will not be #17779# set for any of these tests. #17780# #17781#########################################################################1778217783#17784# false:17785#17786# False17787#17788fscc_f:17789clr.b %d0 # set false17790bra.w fscc_done # go finish1779117792#17793# true:17794#17795# True17796#17797fscc_t:17798st %d0 # set true17799bra.w fscc_done # go finish1780017801#17802# signalling false:17803#17804# False17805#17806fscc_sf:17807clr.b %d0 # set false17808btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17809beq.w fscc_done # no;go finish17810ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17811bra.w fscc_chk_bsun # go finish1781217813#17814# signalling true:17815#17816# True17817#17818fscc_st:17819st %d0 # set false17820btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17821beq.w fscc_done # no;go finish17822ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17823bra.w fscc_chk_bsun # go finish1782417825#17826# signalling equal:17827#17828# Z17829#17830fscc_seq:17831fbseq.w fscc_seq_yes # signalling equal?17832fscc_seq_no:17833clr.b %d0 # set false17834btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17835beq.w fscc_done # no;go finish17836ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17837bra.w fscc_chk_bsun # go finish17838fscc_seq_yes:17839st %d0 # set true17840btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17841beq.w fscc_done # no;go finish17842ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17843bra.w fscc_chk_bsun # go finish1784417845#17846# signalling not equal:17847# _17848# Z17849#17850fscc_sneq:17851fbsneq.w fscc_sneq_yes # signalling equal?17852fscc_sneq_no:17853clr.b %d0 # set false17854btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17855beq.w fscc_done # no;go finish17856ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17857bra.w fscc_chk_bsun # go finish17858fscc_sneq_yes:17859st %d0 # set true17860btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit17861beq.w fscc_done # no;go finish17862ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit17863bra.w fscc_chk_bsun # go finish1786417865#########################################################################17866# #17867# IEEE Aware tests #17868# #17869# For the IEEE aware tests, we only have to set the result based on the #17870# floating point condition codes. The BSUN exception will not be #17871# set for any of these tests. #17872# #17873#########################################################################1787417875#17876# ordered greater than:17877# _______17878# NANvZvN17879#17880fscc_ogt:17881fbogt.w fscc_ogt_yes # ordered greater than?17882fscc_ogt_no:17883clr.b %d0 # set false17884bra.w fscc_done # go finish17885fscc_ogt_yes:17886st %d0 # set true17887bra.w fscc_done # go finish1788817889#17890# unordered or less or equal:17891# _______17892# NANvZvN17893#17894fscc_ule:17895fbule.w fscc_ule_yes # unordered or less or equal?17896fscc_ule_no:17897clr.b %d0 # set false17898bra.w fscc_done # go finish17899fscc_ule_yes:17900st %d0 # set true17901bra.w fscc_done # go finish1790217903#17904# ordered greater than or equal:17905# _____17906# Zv(NANvN)17907#17908fscc_oge:17909fboge.w fscc_oge_yes # ordered greater than or equal?17910fscc_oge_no:17911clr.b %d0 # set false17912bra.w fscc_done # go finish17913fscc_oge_yes:17914st %d0 # set true17915bra.w fscc_done # go finish1791617917#17918# unordered or less than:17919# _17920# NANv(N^Z)17921#17922fscc_ult:17923fbult.w fscc_ult_yes # unordered or less than?17924fscc_ult_no:17925clr.b %d0 # set false17926bra.w fscc_done # go finish17927fscc_ult_yes:17928st %d0 # set true17929bra.w fscc_done # go finish1793017931#17932# ordered less than:17933# _____17934# N^(NANvZ)17935#17936fscc_olt:17937fbolt.w fscc_olt_yes # ordered less than?17938fscc_olt_no:17939clr.b %d0 # set false17940bra.w fscc_done # go finish17941fscc_olt_yes:17942st %d0 # set true17943bra.w fscc_done # go finish1794417945#17946# unordered or greater or equal:17947#17948# NANvZvN17949#17950fscc_uge:17951fbuge.w fscc_uge_yes # unordered or greater than?17952fscc_uge_no:17953clr.b %d0 # set false17954bra.w fscc_done # go finish17955fscc_uge_yes:17956st %d0 # set true17957bra.w fscc_done # go finish1795817959#17960# ordered less than or equal:17961# ___17962# Zv(N^NAN)17963#17964fscc_ole:17965fbole.w fscc_ole_yes # ordered greater or less than?17966fscc_ole_no:17967clr.b %d0 # set false17968bra.w fscc_done # go finish17969fscc_ole_yes:17970st %d0 # set true17971bra.w fscc_done # go finish1797217973#17974# unordered or greater than:17975# ___17976# NANv(NvZ)17977#17978fscc_ugt:17979fbugt.w fscc_ugt_yes # unordered or greater than?17980fscc_ugt_no:17981clr.b %d0 # set false17982bra.w fscc_done # go finish17983fscc_ugt_yes:17984st %d0 # set true17985bra.w fscc_done # go finish1798617987#17988# ordered greater or less than:17989# _____17990# NANvZ17991#17992fscc_ogl:17993fbogl.w fscc_ogl_yes # ordered greater or less than?17994fscc_ogl_no:17995clr.b %d0 # set false17996bra.w fscc_done # go finish17997fscc_ogl_yes:17998st %d0 # set true17999bra.w fscc_done # go finish1800018001#18002# unordered or equal:18003#18004# NANvZ18005#18006fscc_ueq:18007fbueq.w fscc_ueq_yes # unordered or equal?18008fscc_ueq_no:18009clr.b %d0 # set false18010bra.w fscc_done # go finish18011fscc_ueq_yes:18012st %d0 # set true18013bra.w fscc_done # go finish1801418015#18016# ordered:18017# ___18018# NAN18019#18020fscc_or:18021fbor.w fscc_or_yes # ordered?18022fscc_or_no:18023clr.b %d0 # set false18024bra.w fscc_done # go finish18025fscc_or_yes:18026st %d0 # set true18027bra.w fscc_done # go finish1802818029#18030# unordered:18031#18032# NAN18033#18034fscc_un:18035fbun.w fscc_un_yes # unordered?18036fscc_un_no:18037clr.b %d0 # set false18038bra.w fscc_done # go finish18039fscc_un_yes:18040st %d0 # set true18041bra.w fscc_done # go finish1804218043#######################################################################1804418045#18046# the bsun exception bit was set. now, check to see is BSUN18047# is enabled. if so, don't store result and correct stack frame18048# for a bsun exception.18049#18050fscc_chk_bsun:18051btst &bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?18052bne.w fscc_bsun1805318054#18055# the bsun exception bit was not set.18056# the result has been selected.18057# now, check to see if the result is to be stored in the data register18058# file or in memory.18059#18060fscc_done:18061mov.l %d0,%a0 # save result for a moment1806218063mov.b 1+EXC_OPWORD(%a6),%d1 # fetch lo opword18064mov.l %d1,%d0 # make a copy18065andi.b &0x38,%d1 # extract src mode1806618067bne.b fscc_mem_op # it's a memory operation1806818069mov.l %d0,%d118070andi.w &0x7,%d1 # pass index in d118071mov.l %a0,%d0 # pass result in d018072bsr.l store_dreg_b # save result in regfile18073rts1807418075#18076# the stacked <ea> is correct with the exception of:18077# -> Dn : <ea> is garbage18078#18079# if the addressing mode is post-increment or pre-decrement,18080# then the address registers have not been updated.18081#18082fscc_mem_op:18083cmpi.b %d1,&0x18 # is <ea> (An)+ ?18084beq.b fscc_mem_inc # yes18085cmpi.b %d1,&0x20 # is <ea> -(An) ?18086beq.b fscc_mem_dec # yes1808718088mov.l %a0,%d0 # pass result in d018089mov.l EXC_EA(%a6),%a0 # fetch <ea>18090bsr.l _dmem_write_byte # write result byte1809118092tst.l %d1 # did dstore fail?18093bne.w fscc_err # yes1809418095rts1809618097# addressing mode is post-increment. write the result byte. if the write18098# fails then don't update the address register. if write passes then18099# call inc_areg() to update the address register.18100fscc_mem_inc:18101mov.l %a0,%d0 # pass result in d018102mov.l EXC_EA(%a6),%a0 # fetch <ea>18103bsr.l _dmem_write_byte # write result byte1810418105tst.l %d1 # did dstore fail?18106bne.w fscc_err # yes1810718108mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword18109andi.w &0x7,%d1 # pass index in d118110movq.l &0x1,%d0 # pass amt to inc by18111bsr.l inc_areg # increment address register1811218113rts1811418115# addressing mode is pre-decrement. write the result byte. if the write18116# fails then don't update the address register. if the write passes then18117# call dec_areg() to update the address register.18118fscc_mem_dec:18119mov.l %a0,%d0 # pass result in d018120mov.l EXC_EA(%a6),%a0 # fetch <ea>18121bsr.l _dmem_write_byte # write result byte1812218123tst.l %d1 # did dstore fail?18124bne.w fscc_err # yes1812518126mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword18127andi.w &0x7,%d1 # pass index in d118128movq.l &0x1,%d0 # pass amt to dec by18129bsr.l dec_areg # decrement address register1813018131rts1813218133# the emulation routine set bsun and BSUN was enabled. have to18134# fix stack and jump to the bsun handler.18135# let the caller of this routine shift the stack frame up to18136# eliminate the effective address field.18137fscc_bsun:18138mov.b &fbsun_flg,SPCOND_FLG(%a6)18139rts1814018141# the byte write to memory has failed. pass the failing effective address18142# and a FSLW to funimp_dacc().18143fscc_err:18144mov.w &0x00a1,EXC_VOFF(%a6)18145bra.l facc_finish1814618147#########################################################################18148# XDEF **************************************************************** #18149# fmovm_dynamic(): emulate "fmovm" dynamic instruction #18150# #18151# XREF **************************************************************** #18152# fetch_dreg() - fetch data register #18153# {i,d,}mem_read() - fetch data from memory #18154# _mem_write() - write data to memory #18155# iea_iacc() - instruction memory access error occurred #18156# iea_dacc() - data memory access error occurred #18157# restore() - restore An index regs if access error occurred #18158# #18159# INPUT *************************************************************** #18160# None #18161# #18162# OUTPUT ************************************************************** #18163# If instr is "fmovm Dn,-(A7)" from supervisor mode, #18164# d0 = size of dump #18165# d1 = Dn #18166# Else if instruction access error, #18167# d0 = FSLW #18168# Else if data access error, #18169# d0 = FSLW #18170# a0 = address of fault #18171# Else #18172# none. #18173# #18174# ALGORITHM *********************************************************** #18175# The effective address must be calculated since this is entered #18176# from an "Unimplemented Effective Address" exception handler. So, we #18177# have our own fcalc_ea() routine here. If an access error is flagged #18178# by a _{i,d,}mem_read() call, we must exit through the special #18179# handler. #18180# The data register is determined and its value loaded to get the #18181# string of FP registers affected. This value is used as an index into #18182# a lookup table such that we can determine the number of bytes #18183# involved. #18184# If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #18185# to read in all FP values. Again, _mem_read() may fail and require a #18186# special exit. #18187# If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #18188# to write all FP values. _mem_write() may also fail. #18189# If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #18190# then we return the size of the dump and the string to the caller #18191# so that the move can occur outside of this routine. This special #18192# case is required so that moves to the system stack are handled #18193# correctly. #18194# #18195# DYNAMIC: #18196# fmovm.x dn, <ea> #18197# fmovm.x <ea>, dn #18198# #18199# <WORD 1> <WORD2> #18200# 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #18201# #18202# & = (0): predecrement addressing mode #18203# (1): postincrement or control addressing mode #18204# @ = (0): move listed regs from memory to the FPU #18205# (1): move listed regs from the FPU to memory #18206# $$$ : index of data register holding reg select mask #18207# #18208# NOTES: #18209# If the data register holds a zero, then the #18210# instruction is a nop. #18211# #18212#########################################################################1821318214global fmovm_dynamic18215fmovm_dynamic:1821618217# extract the data register in which the bit string resides...18218mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword18219andi.w &0x70,%d1 # extract reg bits18220lsr.b &0x4,%d1 # shift into lo bits1822118222# fetch the bit string into d0...18223bsr.l fetch_dreg # fetch reg string1822418225andi.l &0x000000ff,%d0 # keep only lo byte1822618227mov.l %d0,-(%sp) # save strg18228mov.b (tbl_fmovm_size.w,%pc,%d0),%d018229mov.l %d0,-(%sp) # save size18230bsr.l fmovm_calc_ea # calculate <ea>18231mov.l (%sp)+,%d0 # restore size18232mov.l (%sp)+,%d1 # restore strg1823318234# if the bit string is a zero, then the operation is a no-op18235# but, make sure that we've calculated ea and advanced the opword pointer18236beq.w fmovm_data_done1823718238# separate move ins from move outs...18239btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?18240beq.w fmovm_data_in # it's a move out1824118242#############18243# MOVE OUT: #18244#############18245fmovm_data_out:18246btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?18247bne.w fmovm_out_ctrl # control1824818249############################18250fmovm_out_predec:18251# for predecrement mode, the bit string is the opposite of both control18252# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)18253# here, we convert it to be just like the others...18254mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d11825518256btst &0x5,EXC_SR(%a6) # user or supervisor mode?18257beq.b fmovm_out_ctrl # user1825818259fmovm_out_predec_s:18260cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?18261bne.b fmovm_out_ctrl1826218263# the operation was unfortunately an: fmovm.x dn,-(sp)18264# called from supervisor mode.18265# we're also passing "size" and "strg" back to the calling routine18266rts1826718268############################18269fmovm_out_ctrl:18270mov.l %a0,%a1 # move <ea> to a11827118272sub.l %d0,%sp # subtract size of dump18273lea (%sp),%a01827418275tst.b %d1 # should FP0 be moved?18276bpl.b fmovm_out_ctrl_fp1 # no1827718278mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes18279mov.l 0x4+EXC_FP0(%a6),(%a0)+18280mov.l 0x8+EXC_FP0(%a6),(%a0)+1828118282fmovm_out_ctrl_fp1:18283lsl.b &0x1,%d1 # should FP1 be moved?18284bpl.b fmovm_out_ctrl_fp2 # no1828518286mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes18287mov.l 0x4+EXC_FP1(%a6),(%a0)+18288mov.l 0x8+EXC_FP1(%a6),(%a0)+1828918290fmovm_out_ctrl_fp2:18291lsl.b &0x1,%d1 # should FP2 be moved?18292bpl.b fmovm_out_ctrl_fp3 # no1829318294fmovm.x &0x20,(%a0) # yes18295add.l &0xc,%a01829618297fmovm_out_ctrl_fp3:18298lsl.b &0x1,%d1 # should FP3 be moved?18299bpl.b fmovm_out_ctrl_fp4 # no1830018301fmovm.x &0x10,(%a0) # yes18302add.l &0xc,%a01830318304fmovm_out_ctrl_fp4:18305lsl.b &0x1,%d1 # should FP4 be moved?18306bpl.b fmovm_out_ctrl_fp5 # no1830718308fmovm.x &0x08,(%a0) # yes18309add.l &0xc,%a01831018311fmovm_out_ctrl_fp5:18312lsl.b &0x1,%d1 # should FP5 be moved?18313bpl.b fmovm_out_ctrl_fp6 # no1831418315fmovm.x &0x04,(%a0) # yes18316add.l &0xc,%a01831718318fmovm_out_ctrl_fp6:18319lsl.b &0x1,%d1 # should FP6 be moved?18320bpl.b fmovm_out_ctrl_fp7 # no1832118322fmovm.x &0x02,(%a0) # yes18323add.l &0xc,%a01832418325fmovm_out_ctrl_fp7:18326lsl.b &0x1,%d1 # should FP7 be moved?18327bpl.b fmovm_out_ctrl_done # no1832818329fmovm.x &0x01,(%a0) # yes18330add.l &0xc,%a01833118332fmovm_out_ctrl_done:18333mov.l %a1,L_SCR1(%a6)1833418335lea (%sp),%a0 # pass: supervisor src18336mov.l %d0,-(%sp) # save size18337bsr.l _dmem_write # copy data to user mem1833818339mov.l (%sp)+,%d018340add.l %d0,%sp # clear fpreg data from stack1834118342tst.l %d1 # did dstore err?18343bne.w fmovm_out_err # yes1834418345rts1834618347############18348# MOVE IN: #18349############18350fmovm_data_in:18351mov.l %a0,L_SCR1(%a6)1835218353sub.l %d0,%sp # make room for fpregs18354lea (%sp),%a11835518356mov.l %d1,-(%sp) # save bit string for later18357mov.l %d0,-(%sp) # save # of bytes1835818359bsr.l _dmem_read # copy data from user mem1836018361mov.l (%sp)+,%d0 # retrieve # of bytes1836218363tst.l %d1 # did dfetch fail?18364bne.w fmovm_in_err # yes1836518366mov.l (%sp)+,%d1 # load bit string1836718368lea (%sp),%a0 # addr of stack1836918370tst.b %d1 # should FP0 be moved?18371bpl.b fmovm_data_in_fp1 # no1837218373mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes18374mov.l (%a0)+,0x4+EXC_FP0(%a6)18375mov.l (%a0)+,0x8+EXC_FP0(%a6)1837618377fmovm_data_in_fp1:18378lsl.b &0x1,%d1 # should FP1 be moved?18379bpl.b fmovm_data_in_fp2 # no1838018381mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes18382mov.l (%a0)+,0x4+EXC_FP1(%a6)18383mov.l (%a0)+,0x8+EXC_FP1(%a6)1838418385fmovm_data_in_fp2:18386lsl.b &0x1,%d1 # should FP2 be moved?18387bpl.b fmovm_data_in_fp3 # no1838818389fmovm.x (%a0)+,&0x20 # yes1839018391fmovm_data_in_fp3:18392lsl.b &0x1,%d1 # should FP3 be moved?18393bpl.b fmovm_data_in_fp4 # no1839418395fmovm.x (%a0)+,&0x10 # yes1839618397fmovm_data_in_fp4:18398lsl.b &0x1,%d1 # should FP4 be moved?18399bpl.b fmovm_data_in_fp5 # no1840018401fmovm.x (%a0)+,&0x08 # yes1840218403fmovm_data_in_fp5:18404lsl.b &0x1,%d1 # should FP5 be moved?18405bpl.b fmovm_data_in_fp6 # no1840618407fmovm.x (%a0)+,&0x04 # yes1840818409fmovm_data_in_fp6:18410lsl.b &0x1,%d1 # should FP6 be moved?18411bpl.b fmovm_data_in_fp7 # no1841218413fmovm.x (%a0)+,&0x02 # yes1841418415fmovm_data_in_fp7:18416lsl.b &0x1,%d1 # should FP7 be moved?18417bpl.b fmovm_data_in_done # no1841818419fmovm.x (%a0)+,&0x01 # yes1842018421fmovm_data_in_done:18422add.l %d0,%sp # remove fpregs from stack18423rts1842418425#####################################1842618427fmovm_data_done:18428rts1842918430##############################################################################1843118432#18433# table indexed by the operation's bit string that gives the number18434# of bytes that will be moved.18435#18436# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)18437#18438tbl_fmovm_size:18439byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x2418440byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x3018441byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x3018442byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c18443byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x3018444byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c18445byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c18446byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x4818447byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x3018448byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c18449byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c18450byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x4818451byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c18452byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x4818453byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x4818454byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x5418455byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x3018456byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c18457byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c18458byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x4818459byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c18460byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x4818461byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x4818462byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x5418463byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c18464byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x4818465byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x4818466byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x5418467byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x4818468byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x5418469byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x5418470byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x601847118472#18473# table to convert a pre-decrement bit string into a post-increment18474# or control bit string.18475# ex: 0x00 ==> 0x0018476# 0x01 ==> 0x8018477# 0x02 ==> 0x4018478# .18479# .18480# 0xfd ==> 0xbf18481# 0xfe ==> 0x7f18482# 0xff ==> 0xff18483#18484tbl_fmovm_convert:18485byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe018486byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf018487byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe818488byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf818489byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe418490byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf418491byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec18492byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc18493byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe218494byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf218495byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea18496byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa18497byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe618498byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf618499byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee18500byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe18501byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe118502byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf118503byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe918504byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf918505byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe518506byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf518507byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed18508byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd18509byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe318510byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf318511byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb18512byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb18513byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe718514byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf718515byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef18516byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff1851718518global fmovm_calc_ea18519###############################################18520# _fmovm_calc_ea: calculate effective address #18521###############################################18522fmovm_calc_ea:18523mov.l %d0,%a0 # move # bytes to a01852418525# currently, MODE and REG are taken from the EXC_OPWORD. this could be18526# easily changed if they were inputs passed in registers.18527mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word18528mov.w %d0,%d1 # make a copy1852918530andi.w &0x3f,%d0 # extract mode field18531andi.l &0x7,%d1 # extract reg field1853218533# jump to the corresponding function for each {MODE,REG} pair.18534mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance18535jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode1853618537swbeg &6418538tbl_fea_mode:18539short tbl_fea_mode - tbl_fea_mode18540short tbl_fea_mode - tbl_fea_mode18541short tbl_fea_mode - tbl_fea_mode18542short tbl_fea_mode - tbl_fea_mode18543short tbl_fea_mode - tbl_fea_mode18544short tbl_fea_mode - tbl_fea_mode18545short tbl_fea_mode - tbl_fea_mode18546short tbl_fea_mode - tbl_fea_mode1854718548short tbl_fea_mode - tbl_fea_mode18549short tbl_fea_mode - tbl_fea_mode18550short tbl_fea_mode - tbl_fea_mode18551short tbl_fea_mode - tbl_fea_mode18552short tbl_fea_mode - tbl_fea_mode18553short tbl_fea_mode - tbl_fea_mode18554short tbl_fea_mode - tbl_fea_mode18555short tbl_fea_mode - tbl_fea_mode1855618557short faddr_ind_a0 - tbl_fea_mode18558short faddr_ind_a1 - tbl_fea_mode18559short faddr_ind_a2 - tbl_fea_mode18560short faddr_ind_a3 - tbl_fea_mode18561short faddr_ind_a4 - tbl_fea_mode18562short faddr_ind_a5 - tbl_fea_mode18563short faddr_ind_a6 - tbl_fea_mode18564short faddr_ind_a7 - tbl_fea_mode1856518566short faddr_ind_p_a0 - tbl_fea_mode18567short faddr_ind_p_a1 - tbl_fea_mode18568short faddr_ind_p_a2 - tbl_fea_mode18569short faddr_ind_p_a3 - tbl_fea_mode18570short faddr_ind_p_a4 - tbl_fea_mode18571short faddr_ind_p_a5 - tbl_fea_mode18572short faddr_ind_p_a6 - tbl_fea_mode18573short faddr_ind_p_a7 - tbl_fea_mode1857418575short faddr_ind_m_a0 - tbl_fea_mode18576short faddr_ind_m_a1 - tbl_fea_mode18577short faddr_ind_m_a2 - tbl_fea_mode18578short faddr_ind_m_a3 - tbl_fea_mode18579short faddr_ind_m_a4 - tbl_fea_mode18580short faddr_ind_m_a5 - tbl_fea_mode18581short faddr_ind_m_a6 - tbl_fea_mode18582short faddr_ind_m_a7 - tbl_fea_mode1858318584short faddr_ind_disp_a0 - tbl_fea_mode18585short faddr_ind_disp_a1 - tbl_fea_mode18586short faddr_ind_disp_a2 - tbl_fea_mode18587short faddr_ind_disp_a3 - tbl_fea_mode18588short faddr_ind_disp_a4 - tbl_fea_mode18589short faddr_ind_disp_a5 - tbl_fea_mode18590short faddr_ind_disp_a6 - tbl_fea_mode18591short faddr_ind_disp_a7 - tbl_fea_mode1859218593short faddr_ind_ext - tbl_fea_mode18594short faddr_ind_ext - tbl_fea_mode18595short faddr_ind_ext - tbl_fea_mode18596short faddr_ind_ext - tbl_fea_mode18597short faddr_ind_ext - tbl_fea_mode18598short faddr_ind_ext - tbl_fea_mode18599short faddr_ind_ext - tbl_fea_mode18600short faddr_ind_ext - tbl_fea_mode1860118602short fabs_short - tbl_fea_mode18603short fabs_long - tbl_fea_mode18604short fpc_ind - tbl_fea_mode18605short fpc_ind_ext - tbl_fea_mode18606short tbl_fea_mode - tbl_fea_mode18607short tbl_fea_mode - tbl_fea_mode18608short tbl_fea_mode - tbl_fea_mode18609short tbl_fea_mode - tbl_fea_mode1861018611###################################18612# Address register indirect: (An) #18613###################################18614faddr_ind_a0:18615mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a018616rts1861718618faddr_ind_a1:18619mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a118620rts1862118622faddr_ind_a2:18623mov.l %a2,%a0 # Get current a218624rts1862518626faddr_ind_a3:18627mov.l %a3,%a0 # Get current a318628rts1862918630faddr_ind_a4:18631mov.l %a4,%a0 # Get current a418632rts1863318634faddr_ind_a5:18635mov.l %a5,%a0 # Get current a518636rts1863718638faddr_ind_a6:18639mov.l (%a6),%a0 # Get current a618640rts1864118642faddr_ind_a7:18643mov.l EXC_A7(%a6),%a0 # Get current a718644rts1864518646#####################################################18647# Address register indirect w/ postincrement: (An)+ #18648#####################################################18649faddr_ind_p_a0:18650mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a018651mov.l %d0,%d118652add.l %a0,%d1 # Increment18653mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value18654mov.l %d0,%a018655rts1865618657faddr_ind_p_a1:18658mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a118659mov.l %d0,%d118660add.l %a0,%d1 # Increment18661mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value18662mov.l %d0,%a018663rts1866418665faddr_ind_p_a2:18666mov.l %a2,%d0 # Get current a218667mov.l %d0,%d118668add.l %a0,%d1 # Increment18669mov.l %d1,%a2 # Save incr value18670mov.l %d0,%a018671rts1867218673faddr_ind_p_a3:18674mov.l %a3,%d0 # Get current a318675mov.l %d0,%d118676add.l %a0,%d1 # Increment18677mov.l %d1,%a3 # Save incr value18678mov.l %d0,%a018679rts1868018681faddr_ind_p_a4:18682mov.l %a4,%d0 # Get current a418683mov.l %d0,%d118684add.l %a0,%d1 # Increment18685mov.l %d1,%a4 # Save incr value18686mov.l %d0,%a018687rts1868818689faddr_ind_p_a5:18690mov.l %a5,%d0 # Get current a518691mov.l %d0,%d118692add.l %a0,%d1 # Increment18693mov.l %d1,%a5 # Save incr value18694mov.l %d0,%a018695rts1869618697faddr_ind_p_a6:18698mov.l (%a6),%d0 # Get current a618699mov.l %d0,%d118700add.l %a0,%d1 # Increment18701mov.l %d1,(%a6) # Save incr value18702mov.l %d0,%a018703rts1870418705faddr_ind_p_a7:18706mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag1870718708mov.l EXC_A7(%a6),%d0 # Get current a718709mov.l %d0,%d118710add.l %a0,%d1 # Increment18711mov.l %d1,EXC_A7(%a6) # Save incr value18712mov.l %d0,%a018713rts1871418715####################################################18716# Address register indirect w/ predecrement: -(An) #18717####################################################18718faddr_ind_m_a0:18719mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a018720sub.l %a0,%d0 # Decrement18721mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value18722mov.l %d0,%a018723rts1872418725faddr_ind_m_a1:18726mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a118727sub.l %a0,%d0 # Decrement18728mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value18729mov.l %d0,%a018730rts1873118732faddr_ind_m_a2:18733mov.l %a2,%d0 # Get current a218734sub.l %a0,%d0 # Decrement18735mov.l %d0,%a2 # Save decr value18736mov.l %d0,%a018737rts1873818739faddr_ind_m_a3:18740mov.l %a3,%d0 # Get current a318741sub.l %a0,%d0 # Decrement18742mov.l %d0,%a3 # Save decr value18743mov.l %d0,%a018744rts1874518746faddr_ind_m_a4:18747mov.l %a4,%d0 # Get current a418748sub.l %a0,%d0 # Decrement18749mov.l %d0,%a4 # Save decr value18750mov.l %d0,%a018751rts1875218753faddr_ind_m_a5:18754mov.l %a5,%d0 # Get current a518755sub.l %a0,%d0 # Decrement18756mov.l %d0,%a5 # Save decr value18757mov.l %d0,%a018758rts1875918760faddr_ind_m_a6:18761mov.l (%a6),%d0 # Get current a618762sub.l %a0,%d0 # Decrement18763mov.l %d0,(%a6) # Save decr value18764mov.l %d0,%a018765rts1876618767faddr_ind_m_a7:18768mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag1876918770mov.l EXC_A7(%a6),%d0 # Get current a718771sub.l %a0,%d0 # Decrement18772mov.l %d0,EXC_A7(%a6) # Save decr value18773mov.l %d0,%a018774rts1877518776########################################################18777# Address register indirect w/ displacement: (d16, An) #18778########################################################18779faddr_ind_disp_a0:18780mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18781addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18782bsr.l _imem_read_word1878318784tst.l %d1 # did ifetch fail?18785bne.l iea_iacc # yes1878618787mov.w %d0,%a0 # sign extend displacement1878818789add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d1618790rts1879118792faddr_ind_disp_a1:18793mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18794addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18795bsr.l _imem_read_word1879618797tst.l %d1 # did ifetch fail?18798bne.l iea_iacc # yes1879918800mov.w %d0,%a0 # sign extend displacement1880118802add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d1618803rts1880418805faddr_ind_disp_a2:18806mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18807addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18808bsr.l _imem_read_word1880918810tst.l %d1 # did ifetch fail?18811bne.l iea_iacc # yes1881218813mov.w %d0,%a0 # sign extend displacement1881418815add.l %a2,%a0 # a2 + d1618816rts1881718818faddr_ind_disp_a3:18819mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18820addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18821bsr.l _imem_read_word1882218823tst.l %d1 # did ifetch fail?18824bne.l iea_iacc # yes1882518826mov.w %d0,%a0 # sign extend displacement1882718828add.l %a3,%a0 # a3 + d1618829rts1883018831faddr_ind_disp_a4:18832mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18833addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18834bsr.l _imem_read_word1883518836tst.l %d1 # did ifetch fail?18837bne.l iea_iacc # yes1883818839mov.w %d0,%a0 # sign extend displacement1884018841add.l %a4,%a0 # a4 + d1618842rts1884318844faddr_ind_disp_a5:18845mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18846addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18847bsr.l _imem_read_word1884818849tst.l %d1 # did ifetch fail?18850bne.l iea_iacc # yes1885118852mov.w %d0,%a0 # sign extend displacement1885318854add.l %a5,%a0 # a5 + d1618855rts1885618857faddr_ind_disp_a6:18858mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18859addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18860bsr.l _imem_read_word1886118862tst.l %d1 # did ifetch fail?18863bne.l iea_iacc # yes1886418865mov.w %d0,%a0 # sign extend displacement1886618867add.l (%a6),%a0 # a6 + d1618868rts1886918870faddr_ind_disp_a7:18871mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18872addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18873bsr.l _imem_read_word1887418875tst.l %d1 # did ifetch fail?18876bne.l iea_iacc # yes1887718878mov.w %d0,%a0 # sign extend displacement1887918880add.l EXC_A7(%a6),%a0 # a7 + d1618881rts1888218883########################################################################18884# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #18885# " " " w/ " (base displacement): (bd, An, Xn) #18886# Memory indirect postindexed: ([bd, An], Xn, od) #18887# Memory indirect preindexed: ([bd, An, Xn], od) #18888########################################################################18889faddr_ind_ext:18890addq.l &0x8,%d118891bsr.l fetch_dreg # fetch base areg18892mov.l %d0,-(%sp)1889318894mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18895addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18896bsr.l _imem_read_word # fetch extword in d01889718898tst.l %d1 # did ifetch fail?18899bne.l iea_iacc # yes1890018901mov.l (%sp)+,%a01890218903btst &0x8,%d018904bne.w fcalc_mem_ind1890518906mov.l %d0,L_SCR1(%a6) # hold opword1890718908mov.l %d0,%d118909rol.w &0x4,%d118910andi.w &0xf,%d1 # extract index regno1891118912# count on fetch_dreg() not to alter a0...18913bsr.l fetch_dreg # fetch index1891418915mov.l %d2,-(%sp) # save d218916mov.l L_SCR1(%a6),%d2 # fetch opword1891718918btst &0xb,%d2 # is it word or long?18919bne.b faii8_long18920ext.l %d0 # sign extend word index18921faii8_long:18922mov.l %d2,%d118923rol.w &0x7,%d118924andi.l &0x3,%d1 # extract scale value1892518926lsl.l %d1,%d0 # shift index by scale1892718928extb.l %d2 # sign extend displacement18929add.l %d2,%d0 # index + disp18930add.l %d0,%a0 # An + (index + disp)1893118932mov.l (%sp)+,%d2 # restore old d218933rts1893418935###########################18936# Absolute short: (XXX).W #18937###########################18938fabs_short:18939mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18940addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18941bsr.l _imem_read_word # fetch short address1894218943tst.l %d1 # did ifetch fail?18944bne.l iea_iacc # yes1894518946mov.w %d0,%a0 # return <ea> in a018947rts1894818949##########################18950# Absolute long: (XXX).L #18951##########################18952fabs_long:18953mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18954addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr18955bsr.l _imem_read_long # fetch long address1895618957tst.l %d1 # did ifetch fail?18958bne.l iea_iacc # yes1895918960mov.l %d0,%a0 # return <ea> in a018961rts1896218963#######################################################18964# Program counter indirect w/ displacement: (d16, PC) #18965#######################################################18966fpc_ind:18967mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18968addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18969bsr.l _imem_read_word # fetch word displacement1897018971tst.l %d1 # did ifetch fail?18972bne.l iea_iacc # yes1897318974mov.w %d0,%a0 # sign extend displacement1897518976add.l EXC_EXTWPTR(%a6),%a0 # pc + d161897718978# _imem_read_word() increased the extwptr by 2. need to adjust here.18979subq.l &0x2,%a0 # adjust <ea>18980rts1898118982##########################################################18983# PC indirect w/ index(8-bit displacement): (d8, PC, An) #18984# " " w/ " (base displacement): (bd, PC, An) #18985# PC memory indirect postindexed: ([bd, PC], Xn, od) #18986# PC memory indirect preindexed: ([bd, PC, Xn], od) #18987##########################################################18988fpc_ind_ext:18989mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr18990addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr18991bsr.l _imem_read_word # fetch ext word1899218993tst.l %d1 # did ifetch fail?18994bne.l iea_iacc # yes1899518996mov.l EXC_EXTWPTR(%a6),%a0 # put base in a018997subq.l &0x2,%a0 # adjust base1899818999btst &0x8,%d0 # is disp only 8 bits?19000bne.w fcalc_mem_ind # calc memory indirect1900119002mov.l %d0,L_SCR1(%a6) # store opword1900319004mov.l %d0,%d1 # make extword copy19005rol.w &0x4,%d1 # rotate reg num into place19006andi.w &0xf,%d1 # extract register number1900719008# count on fetch_dreg() not to alter a0...19009bsr.l fetch_dreg # fetch index1901019011mov.l %d2,-(%sp) # save d219012mov.l L_SCR1(%a6),%d2 # fetch opword1901319014btst &0xb,%d2 # is index word or long?19015bne.b fpii8_long # long19016ext.l %d0 # sign extend word index19017fpii8_long:19018mov.l %d2,%d119019rol.w &0x7,%d1 # rotate scale value into place19020andi.l &0x3,%d1 # extract scale value1902119022lsl.l %d1,%d0 # shift index by scale1902319024extb.l %d2 # sign extend displacement19025add.l %d2,%d0 # disp + index19026add.l %d0,%a0 # An + (index + disp)1902719028mov.l (%sp)+,%d2 # restore temp register19029rts1903019031# d2 = index19032# d3 = base19033# d4 = od19034# d5 = extword19035fcalc_mem_ind:19036btst &0x6,%d0 # is the index suppressed?19037beq.b fcalc_index1903819039movm.l &0x3c00,-(%sp) # save d2-d51904019041mov.l %d0,%d5 # put extword in d519042mov.l %a0,%d3 # put base in d31904319044clr.l %d2 # yes, so index = 019045bra.b fbase_supp_ck1904619047# index:19048fcalc_index:19049mov.l %d0,L_SCR1(%a6) # save d0 (opword)19050bfextu %d0{&16:&4},%d1 # fetch dreg index19051bsr.l fetch_dreg1905219053movm.l &0x3c00,-(%sp) # save d2-d519054mov.l %d0,%d2 # put index in d219055mov.l L_SCR1(%a6),%d519056mov.l %a0,%d31905719058btst &0xb,%d5 # is index word or long?19059bne.b fno_ext19060ext.l %d21906119062fno_ext:19063bfextu %d5{&21:&2},%d019064lsl.l %d0,%d21906519066# base address (passed as parameter in d3):19067# we clear the value here if it should actually be suppressed.19068fbase_supp_ck:19069btst &0x7,%d5 # is the bd suppressed?19070beq.b fno_base_sup19071clr.l %d31907219073# base displacement:19074fno_base_sup:19075bfextu %d5{&26:&2},%d0 # get bd size19076# beq.l fmovm_error # if (size == 0) it's reserved1907719078cmpi.b %d0,&0x219079blt.b fno_bd19080beq.b fget_word_bd1908119082mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19083addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr19084bsr.l _imem_read_long1908519086tst.l %d1 # did ifetch fail?19087bne.l fcea_iacc # yes1908819089bra.b fchk_ind1909019091fget_word_bd:19092mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19093addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr19094bsr.l _imem_read_word1909519096tst.l %d1 # did ifetch fail?19097bne.l fcea_iacc # yes1909819099ext.l %d0 # sign extend bd1910019101fchk_ind:19102add.l %d0,%d3 # base += bd1910319104# outer displacement:19105fno_bd:19106bfextu %d5{&30:&2},%d0 # is od suppressed?19107beq.w faii_bd1910819109cmpi.b %d0,&0x219110blt.b fnull_od19111beq.b fword_od1911219113mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19114addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr19115bsr.l _imem_read_long1911619117tst.l %d1 # did ifetch fail?19118bne.l fcea_iacc # yes1911919120bra.b fadd_them1912119122fword_od:19123mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19124addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr19125bsr.l _imem_read_word1912619127tst.l %d1 # did ifetch fail?19128bne.l fcea_iacc # yes1912919130ext.l %d0 # sign extend od19131bra.b fadd_them1913219133fnull_od:19134clr.l %d01913519136fadd_them:19137mov.l %d0,%d41913819139btst &0x2,%d5 # pre or post indexing?19140beq.b fpre_indexed1914119142mov.l %d3,%a019143bsr.l _dmem_read_long1914419145tst.l %d1 # did dfetch fail?19146bne.w fcea_err # yes1914719148add.l %d2,%d0 # <ea> += index19149add.l %d4,%d0 # <ea> += od19150bra.b fdone_ea1915119152fpre_indexed:19153add.l %d2,%d3 # preindexing19154mov.l %d3,%a019155bsr.l _dmem_read_long1915619157tst.l %d1 # did dfetch fail?19158bne.w fcea_err # yes1915919160add.l %d4,%d0 # ea += od19161bra.b fdone_ea1916219163faii_bd:19164add.l %d2,%d3 # ea = (base + bd) + index19165mov.l %d3,%d019166fdone_ea:19167mov.l %d0,%a01916819169movm.l (%sp)+,&0x003c # restore d2-d519170rts1917119172#########################################################19173fcea_err:19174mov.l %d3,%a01917519176movm.l (%sp)+,&0x003c # restore d2-d519177mov.w &0x0101,%d019178bra.l iea_dacc1917919180fcea_iacc:19181movm.l (%sp)+,&0x003c # restore d2-d519182bra.l iea_iacc1918319184fmovm_out_err:19185bsr.l restore19186mov.w &0x00e1,%d019187bra.b fmovm_err1918819189fmovm_in_err:19190bsr.l restore19191mov.w &0x0161,%d01919219193fmovm_err:19194mov.l L_SCR1(%a6),%a019195bra.l iea_dacc1919619197#########################################################################19198# XDEF **************************************************************** #19199# fmovm_ctrl(): emulate fmovm.l of control registers instr #19200# #19201# XREF **************************************************************** #19202# _imem_read_long() - read longword from memory #19203# iea_iacc() - _imem_read_long() failed; error recovery #19204# #19205# INPUT *************************************************************** #19206# None #19207# #19208# OUTPUT ************************************************************** #19209# If _imem_read_long() doesn't fail: #19210# USER_FPCR(a6) = new FPCR value #19211# USER_FPSR(a6) = new FPSR value #19212# USER_FPIAR(a6) = new FPIAR value #19213# #19214# ALGORITHM *********************************************************** #19215# Decode the instruction type by looking at the extension word #19216# in order to see how many control registers to fetch from memory. #19217# Fetch them using _imem_read_long(). If this fetch fails, exit through #19218# the special access error exit handler iea_iacc(). #19219# #19220# Instruction word decoding: #19221# #19222# fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #19223# #19224# WORD1 WORD2 #19225# 1111 0010 00 111100 100$ $$00 0000 0000 #19226# #19227# $$$ (100): FPCR #19228# (010): FPSR #19229# (001): FPIAR #19230# (000): FPIAR #19231# #19232#########################################################################1923319234global fmovm_ctrl19235fmovm_ctrl:19236mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits19237cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?19238beq.w fctrl_in_7 # yes19239cmpi.b %d0,&0x98 # fpcr & fpsr ?19240beq.w fctrl_in_6 # yes19241cmpi.b %d0,&0x94 # fpcr & fpiar ?19242beq.b fctrl_in_5 # yes1924319244# fmovem.l #<data>, fpsr/fpiar19245fctrl_in_3:19246mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19247addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr19248bsr.l _imem_read_long # fetch FPSR from mem1924919250tst.l %d1 # did ifetch fail?19251bne.l iea_iacc # yes1925219253mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack19254mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19255addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr19256bsr.l _imem_read_long # fetch FPIAR from mem1925719258tst.l %d1 # did ifetch fail?19259bne.l iea_iacc # yes1926019261mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack19262rts1926319264# fmovem.l #<data>, fpcr/fpiar19265fctrl_in_5:19266mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19267addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr19268bsr.l _imem_read_long # fetch FPCR from mem1926919270tst.l %d1 # did ifetch fail?19271bne.l iea_iacc # yes1927219273mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack19274mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19275addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr19276bsr.l _imem_read_long # fetch FPIAR from mem1927719278tst.l %d1 # did ifetch fail?19279bne.l iea_iacc # yes1928019281mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack19282rts1928319284# fmovem.l #<data>, fpcr/fpsr19285fctrl_in_6:19286mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19287addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr19288bsr.l _imem_read_long # fetch FPCR from mem1928919290tst.l %d1 # did ifetch fail?19291bne.l iea_iacc # yes1929219293mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem19294mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19295addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr19296bsr.l _imem_read_long # fetch FPSR from mem1929719298tst.l %d1 # did ifetch fail?19299bne.l iea_iacc # yes1930019301mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem19302rts1930319304# fmovem.l #<data>, fpcr/fpsr/fpiar19305fctrl_in_7:19306mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19307addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr19308bsr.l _imem_read_long # fetch FPCR from mem1930919310tst.l %d1 # did ifetch fail?19311bne.l iea_iacc # yes1931219313mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem19314mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19315addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr19316bsr.l _imem_read_long # fetch FPSR from mem1931719318tst.l %d1 # did ifetch fail?19319bne.l iea_iacc # yes1932019321mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem19322mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr19323addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr19324bsr.l _imem_read_long # fetch FPIAR from mem1932519326tst.l %d1 # did ifetch fail?19327bne.l iea_iacc # yes1932819329mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem19330rts1933119332#########################################################################19333# XDEF **************************************************************** #19334# _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #19335# #19336# XREF **************************************************************** #19337# inc_areg() - increment an address register #19338# dec_areg() - decrement an address register #19339# #19340# INPUT *************************************************************** #19341# d0 = number of bytes to adjust <ea> by #19342# #19343# OUTPUT ************************************************************** #19344# None #19345# #19346# ALGORITHM *********************************************************** #19347# "Dummy" CALCulate Effective Address: #19348# The stacked <ea> for FP unimplemented instructions and opclass #19349# two packed instructions is correct with the exception of... #19350# #19351# 1) -(An) : The register is not updated regardless of size. #19352# Also, for extended precision and packed, the #19353# stacked <ea> value is 8 bytes too big #19354# 2) (An)+ : The register is not updated. #19355# 3) #<data> : The upper longword of the immediate operand is #19356# stacked b,w,l and s sizes are completely stacked. #19357# d,x, and p are not. #19358# #19359#########################################################################1936019361global _dcalc_ea19362_dcalc_ea:19363mov.l %d0, %a0 # move # bytes to %a01936419365mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word19366mov.l %d0, %d1 # make a copy1936719368andi.w &0x38, %d0 # extract mode field19369andi.l &0x7, %d1 # extract reg field1937019371cmpi.b %d0,&0x18 # is mode (An)+ ?19372beq.b dcea_pi # yes1937319374cmpi.b %d0,&0x20 # is mode -(An) ?19375beq.b dcea_pd # yes1937619377or.w %d1,%d0 # concat mode,reg19378cmpi.b %d0,&0x3c # is mode #<data>?1937919380beq.b dcea_imm # yes1938119382mov.l EXC_EA(%a6),%a0 # return <ea>19383rts1938419385# need to set immediate data flag here since we'll need to do19386# an imem_read to fetch this later.19387dcea_imm:19388mov.b &immed_flg,SPCOND_FLG(%a6)19389lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>19390rts1939119392# here, the <ea> is stacked correctly. however, we must update the19393# address register...19394dcea_pi:19395mov.l %a0,%d0 # pass amt to inc by19396bsr.l inc_areg # inc addr register1939719398mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct19399rts1940019401# the <ea> is stacked correctly for all but extended and packed which19402# the <ea>s are 8 bytes too large.19403# it would make no sense to have a pre-decrement to a7 in supervisor19404# mode so we don't even worry about this tricky case here : )19405dcea_pd:19406mov.l %a0,%d0 # pass amt to dec by19407bsr.l dec_areg # dec addr register1940819409mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct1941019411cmpi.b %d0,&0xc # is opsize ext or packed?19412beq.b dcea_pd2 # yes19413rts19414dcea_pd2:19415sub.l &0x8,%a0 # correct <ea>19416mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack19417rts1941819419#########################################################################19420# XDEF **************************************************************** #19421# _calc_ea_fout(): calculate correct stacked <ea> for extended #19422# and packed data opclass 3 operations. #19423# #19424# XREF **************************************************************** #19425# None #19426# #19427# INPUT *************************************************************** #19428# None #19429# #19430# OUTPUT ************************************************************** #19431# a0 = return correct effective address #19432# #19433# ALGORITHM *********************************************************** #19434# For opclass 3 extended and packed data operations, the <ea> #19435# stacked for the exception is incorrect for -(an) and (an)+ addressing #19436# modes. Also, while we're at it, the index register itself must get #19437# updated. #19438# So, for -(an), we must subtract 8 off of the stacked <ea> value #19439# and return that value as the correct <ea> and store that value in An. #19440# For (an)+, the stacked <ea> is correct but we must adjust An by +12. #19441# #19442#########################################################################1944319444# This calc_ea is currently used to retrieve the correct <ea>19445# for fmove outs of type extended and packed.19446global _calc_ea_fout19447_calc_ea_fout:19448mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word19449mov.l %d0,%d1 # make a copy1945019451andi.w &0x38,%d0 # extract mode field19452andi.l &0x7,%d1 # extract reg field1945319454cmpi.b %d0,&0x18 # is mode (An)+ ?19455beq.b ceaf_pi # yes1945619457cmpi.b %d0,&0x20 # is mode -(An) ?19458beq.w ceaf_pd # yes1945919460mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct19461rts1946219463# (An)+ : extended and packed fmove out19464# : stacked <ea> is correct19465# : "An" not updated19466ceaf_pi:19467mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d119468mov.l EXC_EA(%a6),%a019469jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)1947019471swbeg &0x819472tbl_ceaf_pi:19473short ceaf_pi0 - tbl_ceaf_pi19474short ceaf_pi1 - tbl_ceaf_pi19475short ceaf_pi2 - tbl_ceaf_pi19476short ceaf_pi3 - tbl_ceaf_pi19477short ceaf_pi4 - tbl_ceaf_pi19478short ceaf_pi5 - tbl_ceaf_pi19479short ceaf_pi6 - tbl_ceaf_pi19480short ceaf_pi7 - tbl_ceaf_pi1948119482ceaf_pi0:19483addi.l &0xc,EXC_DREGS+0x8(%a6)19484rts19485ceaf_pi1:19486addi.l &0xc,EXC_DREGS+0xc(%a6)19487rts19488ceaf_pi2:19489add.l &0xc,%a219490rts19491ceaf_pi3:19492add.l &0xc,%a319493rts19494ceaf_pi4:19495add.l &0xc,%a419496rts19497ceaf_pi5:19498add.l &0xc,%a519499rts19500ceaf_pi6:19501addi.l &0xc,EXC_A6(%a6)19502rts19503ceaf_pi7:19504mov.b &mia7_flg,SPCOND_FLG(%a6)19505addi.l &0xc,EXC_A7(%a6)19506rts1950719508# -(An) : extended and packed fmove out19509# : stacked <ea> = actual <ea> + 819510# : "An" not updated19511ceaf_pd:19512mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d119513mov.l EXC_EA(%a6),%a019514sub.l &0x8,%a019515sub.l &0x8,EXC_EA(%a6)19516jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)1951719518swbeg &0x819519tbl_ceaf_pd:19520short ceaf_pd0 - tbl_ceaf_pd19521short ceaf_pd1 - tbl_ceaf_pd19522short ceaf_pd2 - tbl_ceaf_pd19523short ceaf_pd3 - tbl_ceaf_pd19524short ceaf_pd4 - tbl_ceaf_pd19525short ceaf_pd5 - tbl_ceaf_pd19526short ceaf_pd6 - tbl_ceaf_pd19527short ceaf_pd7 - tbl_ceaf_pd1952819529ceaf_pd0:19530mov.l %a0,EXC_DREGS+0x8(%a6)19531rts19532ceaf_pd1:19533mov.l %a0,EXC_DREGS+0xc(%a6)19534rts19535ceaf_pd2:19536mov.l %a0,%a219537rts19538ceaf_pd3:19539mov.l %a0,%a319540rts19541ceaf_pd4:19542mov.l %a0,%a419543rts19544ceaf_pd5:19545mov.l %a0,%a519546rts19547ceaf_pd6:19548mov.l %a0,EXC_A6(%a6)19549rts19550ceaf_pd7:19551mov.l %a0,EXC_A7(%a6)19552mov.b &mda7_flg,SPCOND_FLG(%a6)19553rts1955419555#########################################################################19556# XDEF **************************************************************** #19557# _load_fop(): load operand for unimplemented FP exception #19558# #19559# XREF **************************************************************** #19560# set_tag_x() - determine ext prec optype tag #19561# set_tag_s() - determine sgl prec optype tag #19562# set_tag_d() - determine dbl prec optype tag #19563# unnorm_fix() - convert normalized number to denorm or zero #19564# norm() - normalize a denormalized number #19565# get_packed() - fetch a packed operand from memory #19566# _dcalc_ea() - calculate <ea>, fixing An in process #19567# #19568# _imem_read_{word,long}() - read from instruction memory #19569# _dmem_read() - read from data memory #19570# _dmem_read_{byte,word,long}() - read from data memory #19571# #19572# facc_in_{b,w,l,d,x}() - mem read failed; special exit point #19573# #19574# INPUT *************************************************************** #19575# None #19576# #19577# OUTPUT ************************************************************** #19578# If memory access doesn't fail: #19579# FP_SRC(a6) = source operand in extended precision #19580# FP_DST(a6) = destination operand in extended precision #19581# #19582# ALGORITHM *********************************************************** #19583# This is called from the Unimplemented FP exception handler in #19584# order to load the source and maybe destination operand into #19585# FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load #19586# the source and destination from the FP register file. Set the optype #19587# tags for both if dyadic, one for monadic. If a number is an UNNORM, #19588# convert it to a DENORM or a ZERO. #19589# If the instruction is opclass two (memory->reg), then fetch #19590# the destination from the register file and the source operand from #19591# memory. Tag and fix both as above w/ opclass zero instructions. #19592# If the source operand is byte,word,long, or single, it may be #19593# in the data register file. If it's actually out in memory, use one of #19594# the mem_read() routines to fetch it. If the mem_read() access returns #19595# a failing value, exit through the special facc_in() routine which #19596# will create an access error exception frame from the current exception #19597# frame. #19598# Immediate data and regular data accesses are separated because #19599# if an immediate data access fails, the resulting fault status #19600# longword stacked for the access error exception must have the #19601# instruction bit set. #19602# #19603#########################################################################1960419605global _load_fop19606_load_fop:1960719608# 15 13 12 10 9 7 6 019609# / \ / \ / \ / \19610# ---------------------------------19611# | opclass | RX | RY | EXTENSION | (2nd word of general FP instruction)19612# ---------------------------------19613#1961419615# bfextu EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass19616# cmpi.b %d0, &0x2 # which class is it? ('000,'010,'011)19617# beq.w op010 # handle <ea> -> fpn19618# bgt.w op011 # handle fpn -> <ea>1961919620# we're not using op011 for now...19621btst &0x6,EXC_CMDREG(%a6)19622bne.b op0101962319624############################19625# OPCLASS '000: reg -> reg #19626############################19627op000:19628mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension word lo19629btst &0x5,%d0 # testing extension bits19630beq.b op000_src # (bit 5 == 0) => monadic19631btst &0x4,%d0 # (bit 5 == 1)19632beq.b op000_dst # (bit 4 == 0) => dyadic19633and.w &0x007f,%d0 # extract extension bits {6:0}19634cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ?19635bne.b op000_src # it's an fcmp1963619637op000_dst:19638bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field19639bsr.l load_fpn2 # fetch dst fpreg into FP_DST1964019641bsr.l set_tag_x # get dst optype tag1964219643cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM?19644beq.b op000_dst_unnorm # yes19645op000_dst_cont:19646mov.b %d0, DTAG(%a6) # store the dst optype tag1964719648op000_src:19649bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field19650bsr.l load_fpn1 # fetch src fpreg into FP_SRC1965119652bsr.l set_tag_x # get src optype tag1965319654cmpi.b %d0, &UNNORM # is src fpreg an UNNORM?19655beq.b op000_src_unnorm # yes19656op000_src_cont:19657mov.b %d0, STAG(%a6) # store the src optype tag19658rts1965919660op000_dst_unnorm:19661bsr.l unnorm_fix # fix the dst UNNORM19662bra.b op000_dst_cont19663op000_src_unnorm:19664bsr.l unnorm_fix # fix the src UNNORM19665bra.b op000_src_cont1966619667#############################19668# OPCLASS '010: <ea> -> reg #19669#############################19670op010:19671mov.w EXC_CMDREG(%a6),%d0 # fetch extension word19672btst &0x5,%d0 # testing extension bits19673beq.b op010_src # (bit 5 == 0) => monadic19674btst &0x4,%d0 # (bit 5 == 1)19675beq.b op010_dst # (bit 4 == 0) => dyadic19676and.w &0x007f,%d0 # extract extension bits {6:0}19677cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ?19678bne.b op010_src # it's an fcmp1967919680op010_dst:19681bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field19682bsr.l load_fpn2 # fetch dst fpreg ptr1968319684bsr.l set_tag_x # get dst type tag1968519686cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM?19687beq.b op010_dst_unnorm # yes19688op010_dst_cont:19689mov.b %d0, DTAG(%a6) # store the dst optype tag1969019691op010_src:19692bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field1969319694bfextu EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field19695bne.w fetch_from_mem # src op is in memory1969619697op010_dreg:19698clr.b STAG(%a6) # either NORM or ZERO19699bfextu EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field1970019701mov.w (tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype19702jmp (tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg1970319704op010_dst_unnorm:19705bsr.l unnorm_fix # fix the dst UNNORM19706bra.b op010_dst_cont1970719708swbeg &0x819709tbl_op010_dreg:19710short opd_long - tbl_op010_dreg19711short opd_sgl - tbl_op010_dreg19712short tbl_op010_dreg - tbl_op010_dreg19713short tbl_op010_dreg - tbl_op010_dreg19714short opd_word - tbl_op010_dreg19715short tbl_op010_dreg - tbl_op010_dreg19716short opd_byte - tbl_op010_dreg19717short tbl_op010_dreg - tbl_op010_dreg1971819719#19720# LONG: can be either NORM or ZERO...19721#19722opd_long:19723bsr.l fetch_dreg # fetch long in d019724fmov.l %d0, %fp0 # load a long19725fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC19726fbeq.w opd_long_zero # long is a ZERO19727rts19728opd_long_zero:19729mov.b &ZERO, STAG(%a6) # set ZERO optype flag19730rts1973119732#19733# WORD: can be either NORM or ZERO...19734#19735opd_word:19736bsr.l fetch_dreg # fetch word in d019737fmov.w %d0, %fp0 # load a word19738fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC19739fbeq.w opd_word_zero # WORD is a ZERO19740rts19741opd_word_zero:19742mov.b &ZERO, STAG(%a6) # set ZERO optype flag19743rts1974419745#19746# BYTE: can be either NORM or ZERO...19747#19748opd_byte:19749bsr.l fetch_dreg # fetch word in d019750fmov.b %d0, %fp0 # load a byte19751fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC19752fbeq.w opd_byte_zero # byte is a ZERO19753rts19754opd_byte_zero:19755mov.b &ZERO, STAG(%a6) # set ZERO optype flag19756rts1975719758#19759# SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM19760#19761# separate SNANs and DENORMs so they can be loaded w/ special care.19762# all others can simply be moved "in" using fmove.19763#19764opd_sgl:19765bsr.l fetch_dreg # fetch sgl in d019766mov.l %d0,L_SCR1(%a6)1976719768lea L_SCR1(%a6), %a0 # pass: ptr to the sgl19769bsr.l set_tag_s # determine sgl type19770mov.b %d0, STAG(%a6) # save the src tag1977119772cmpi.b %d0, &SNAN # is it an SNAN?19773beq.w get_sgl_snan # yes1977419775cmpi.b %d0, &DENORM # is it a DENORM?19776beq.w get_sgl_denorm # yes1977719778fmov.s (%a0), %fp0 # no, so can load it regular19779fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC19780rts1978119782##############################################################################1978319784#########################################################################19785# fetch_from_mem(): #19786# - src is out in memory. must: #19787# (1) calc ea - must read AFTER you know the src type since #19788# if the ea is -() or ()+, need to know # of bytes. #19789# (2) read it in from either user or supervisor space #19790# (3) if (b || w || l) then simply read in #19791# if (s || d || x) then check for SNAN,UNNORM,DENORM #19792# if (packed) then punt for now #19793# INPUT: #19794# %d0 : src type field #19795#########################################################################19796fetch_from_mem:19797clr.b STAG(%a6) # either NORM or ZERO1979819799mov.w (tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field19800jmp (tbl_fp_type.b,%pc,%d0.w*1)1980119802swbeg &0x819803tbl_fp_type:19804short load_long - tbl_fp_type19805short load_sgl - tbl_fp_type19806short load_ext - tbl_fp_type19807short load_packed - tbl_fp_type19808short load_word - tbl_fp_type19809short load_dbl - tbl_fp_type19810short load_byte - tbl_fp_type19811short tbl_fp_type - tbl_fp_type1981219813#########################################19814# load a LONG into %fp0: #19815# -number can't fault #19816# (1) calc ea #19817# (2) read 4 bytes into L_SCR1 #19818# (3) fmov.l into %fp0 #19819#########################################19820load_long:19821movq.l &0x4, %d0 # pass: 4 (bytes)19822bsr.l _dcalc_ea # calc <ea>; <ea> in %a01982319824cmpi.b SPCOND_FLG(%a6),&immed_flg19825beq.b load_long_immed1982619827bsr.l _dmem_read_long # fetch src operand from memory1982819829tst.l %d1 # did dfetch fail?19830bne.l facc_in_l # yes1983119832load_long_cont:19833fmov.l %d0, %fp0 # read into %fp0;convert to xprec19834fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC1983519836fbeq.w load_long_zero # src op is a ZERO19837rts19838load_long_zero:19839mov.b &ZERO, STAG(%a6) # set optype tag to ZERO19840rts1984119842load_long_immed:19843bsr.l _imem_read_long # fetch src operand immed data1984419845tst.l %d1 # did ifetch fail?19846bne.l funimp_iacc # yes19847bra.b load_long_cont1984819849#########################################19850# load a WORD into %fp0: #19851# -number can't fault #19852# (1) calc ea #19853# (2) read 2 bytes into L_SCR1 #19854# (3) fmov.w into %fp0 #19855#########################################19856load_word:19857movq.l &0x2, %d0 # pass: 2 (bytes)19858bsr.l _dcalc_ea # calc <ea>; <ea> in %a01985919860cmpi.b SPCOND_FLG(%a6),&immed_flg19861beq.b load_word_immed1986219863bsr.l _dmem_read_word # fetch src operand from memory1986419865tst.l %d1 # did dfetch fail?19866bne.l facc_in_w # yes1986719868load_word_cont:19869fmov.w %d0, %fp0 # read into %fp0;convert to xprec19870fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC1987119872fbeq.w load_word_zero # src op is a ZERO19873rts19874load_word_zero:19875mov.b &ZERO, STAG(%a6) # set optype tag to ZERO19876rts1987719878load_word_immed:19879bsr.l _imem_read_word # fetch src operand immed data1988019881tst.l %d1 # did ifetch fail?19882bne.l funimp_iacc # yes19883bra.b load_word_cont1988419885#########################################19886# load a BYTE into %fp0: #19887# -number can't fault #19888# (1) calc ea #19889# (2) read 1 byte into L_SCR1 #19890# (3) fmov.b into %fp0 #19891#########################################19892load_byte:19893movq.l &0x1, %d0 # pass: 1 (byte)19894bsr.l _dcalc_ea # calc <ea>; <ea> in %a01989519896cmpi.b SPCOND_FLG(%a6),&immed_flg19897beq.b load_byte_immed1989819899bsr.l _dmem_read_byte # fetch src operand from memory1990019901tst.l %d1 # did dfetch fail?19902bne.l facc_in_b # yes1990319904load_byte_cont:19905fmov.b %d0, %fp0 # read into %fp0;convert to xprec19906fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC1990719908fbeq.w load_byte_zero # src op is a ZERO19909rts19910load_byte_zero:19911mov.b &ZERO, STAG(%a6) # set optype tag to ZERO19912rts1991319914load_byte_immed:19915bsr.l _imem_read_word # fetch src operand immed data1991619917tst.l %d1 # did ifetch fail?19918bne.l funimp_iacc # yes19919bra.b load_byte_cont1992019921#########################################19922# load a SGL into %fp0: #19923# -number can't fault #19924# (1) calc ea #19925# (2) read 4 bytes into L_SCR1 #19926# (3) fmov.s into %fp0 #19927#########################################19928load_sgl:19929movq.l &0x4, %d0 # pass: 4 (bytes)19930bsr.l _dcalc_ea # calc <ea>; <ea> in %a01993119932cmpi.b SPCOND_FLG(%a6),&immed_flg19933beq.b load_sgl_immed1993419935bsr.l _dmem_read_long # fetch src operand from memory19936mov.l %d0, L_SCR1(%a6) # store src op on stack1993719938tst.l %d1 # did dfetch fail?19939bne.l facc_in_l # yes1994019941load_sgl_cont:19942lea L_SCR1(%a6), %a0 # pass: ptr to sgl src op19943bsr.l set_tag_s # determine src type tag19944mov.b %d0, STAG(%a6) # save src optype tag on stack1994519946cmpi.b %d0, &DENORM # is it a sgl DENORM?19947beq.w get_sgl_denorm # yes1994819949cmpi.b %d0, &SNAN # is it a sgl SNAN?19950beq.w get_sgl_snan # yes1995119952fmov.s L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec19953fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC19954rts1995519956load_sgl_immed:19957bsr.l _imem_read_long # fetch src operand immed data1995819959tst.l %d1 # did ifetch fail?19960bne.l funimp_iacc # yes19961bra.b load_sgl_cont1996219963# must convert sgl denorm format to an Xprec denorm fmt suitable for19964# normalization...19965# %a0 : points to sgl denorm19966get_sgl_denorm:19967clr.w FP_SRC_EX(%a6)19968bfextu (%a0){&9:&23}, %d0 # fetch sgl hi(_mantissa)19969lsl.l &0x8, %d019970mov.l %d0, FP_SRC_HI(%a6) # set ext hi(_mantissa)19971clr.l FP_SRC_LO(%a6) # set ext lo(_mantissa)1997219973clr.w FP_SRC_EX(%a6)19974btst &0x7, (%a0) # is sgn bit set?19975beq.b sgl_dnrm_norm19976bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value1997719978sgl_dnrm_norm:19979lea FP_SRC(%a6), %a019980bsr.l norm # normalize number19981mov.w &0x3f81, %d1 # xprec exp = 0x3f8119982sub.w %d0, %d1 # exp = 0x3f81 - shft amt.19983or.w %d1, FP_SRC_EX(%a6) # {sgn,exp}1998419985mov.b &NORM, STAG(%a6) # fix src type tag19986rts1998719988# convert sgl to ext SNAN19989# %a0 : points to sgl SNAN19990get_sgl_snan:19991mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN19992bfextu (%a0){&9:&23}, %d019993lsl.l &0x8, %d0 # extract and insert hi(man)19994mov.l %d0, FP_SRC_HI(%a6)19995clr.l FP_SRC_LO(%a6)1999619997btst &0x7, (%a0) # see if sign of SNAN is set19998beq.b no_sgl_snan_sgn19999bset &0x7, FP_SRC_EX(%a6)20000no_sgl_snan_sgn:20001rts2000220003#########################################20004# load a DBL into %fp0: #20005# -number can't fault #20006# (1) calc ea #20007# (2) read 8 bytes into L_SCR(1,2)#20008# (3) fmov.d into %fp0 #20009#########################################20010load_dbl:20011movq.l &0x8, %d0 # pass: 8 (bytes)20012bsr.l _dcalc_ea # calc <ea>; <ea> in %a02001320014cmpi.b SPCOND_FLG(%a6),&immed_flg20015beq.b load_dbl_immed2001620017lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space20018movq.l &0x8, %d0 # pass: # bytes to read20019bsr.l _dmem_read # fetch src operand from memory2002020021tst.l %d1 # did dfetch fail?20022bne.l facc_in_d # yes2002320024load_dbl_cont:20025lea L_SCR1(%a6), %a0 # pass: ptr to input dbl20026bsr.l set_tag_d # determine src type tag20027mov.b %d0, STAG(%a6) # set src optype tag2002820029cmpi.b %d0, &DENORM # is it a dbl DENORM?20030beq.w get_dbl_denorm # yes2003120032cmpi.b %d0, &SNAN # is it a dbl SNAN?20033beq.w get_dbl_snan # yes2003420035fmov.d L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec20036fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC20037rts2003820039load_dbl_immed:20040lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space20041movq.l &0x8, %d0 # pass: # bytes to read20042bsr.l _imem_read # fetch src operand from memory2004320044tst.l %d1 # did ifetch fail?20045bne.l funimp_iacc # yes20046bra.b load_dbl_cont2004720048# must convert dbl denorm format to an Xprec denorm fmt suitable for20049# normalization...20050# %a0 : loc. of dbl denorm20051get_dbl_denorm:20052clr.w FP_SRC_EX(%a6)20053bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa)20054mov.l %d0, FP_SRC_HI(%a6)20055bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa)20056mov.l &0xb, %d120057lsl.l %d1, %d020058mov.l %d0, FP_SRC_LO(%a6)2005920060btst &0x7, (%a0) # is sgn bit set?20061beq.b dbl_dnrm_norm20062bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value2006320064dbl_dnrm_norm:20065lea FP_SRC(%a6), %a020066bsr.l norm # normalize number20067mov.w &0x3c01, %d1 # xprec exp = 0x3c0120068sub.w %d0, %d1 # exp = 0x3c01 - shft amt.20069or.w %d1, FP_SRC_EX(%a6) # {sgn,exp}2007020071mov.b &NORM, STAG(%a6) # fix src type tag20072rts2007320074# convert dbl to ext SNAN20075# %a0 : points to dbl SNAN20076get_dbl_snan:20077mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN2007820079bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa)20080mov.l %d0, FP_SRC_HI(%a6)20081bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa)20082mov.l &0xb, %d120083lsl.l %d1, %d020084mov.l %d0, FP_SRC_LO(%a6)2008520086btst &0x7, (%a0) # see if sign of SNAN is set20087beq.b no_dbl_snan_sgn20088bset &0x7, FP_SRC_EX(%a6)20089no_dbl_snan_sgn:20090rts2009120092#################################################20093# load a Xprec into %fp0: #20094# -number can't fault #20095# (1) calc ea #20096# (2) read 12 bytes into L_SCR(1,2) #20097# (3) fmov.x into %fp0 #20098#################################################20099load_ext:20100mov.l &0xc, %d0 # pass: 12 (bytes)20101bsr.l _dcalc_ea # calc <ea>2010220103lea FP_SRC(%a6), %a1 # pass: ptr to input ext tmp space20104mov.l &0xc, %d0 # pass: # of bytes to read20105bsr.l _dmem_read # fetch src operand from memory2010620107tst.l %d1 # did dfetch fail?20108bne.l facc_in_x # yes2010920110lea FP_SRC(%a6), %a0 # pass: ptr to src op20111bsr.l set_tag_x # determine src type tag2011220113cmpi.b %d0, &UNNORM # is the src op an UNNORM?20114beq.b load_ext_unnorm # yes2011520116mov.b %d0, STAG(%a6) # store the src optype tag20117rts2011820119load_ext_unnorm:20120bsr.l unnorm_fix # fix the src UNNORM20121mov.b %d0, STAG(%a6) # store the src optype tag20122rts2012320124#################################################20125# load a packed into %fp0: #20126# -number can't fault #20127# (1) calc ea #20128# (2) read 12 bytes into L_SCR(1,2,3) #20129# (3) fmov.x into %fp0 #20130#################################################20131load_packed:20132bsr.l get_packed2013320134lea FP_SRC(%a6),%a0 # pass ptr to src op20135bsr.l set_tag_x # determine src type tag20136cmpi.b %d0,&UNNORM # is the src op an UNNORM ZERO?20137beq.b load_packed_unnorm # yes2013820139mov.b %d0,STAG(%a6) # store the src optype tag20140rts2014120142load_packed_unnorm:20143bsr.l unnorm_fix # fix the UNNORM ZERO20144mov.b %d0,STAG(%a6) # store the src optype tag20145rts2014620147#########################################################################20148# XDEF **************************************************************** #20149# fout(): move from fp register to memory or data register #20150# #20151# XREF **************************************************************** #20152# _round() - needed to create EXOP for sgl/dbl precision #20153# norm() - needed to create EXOP for extended precision #20154# ovf_res() - create default overflow result for sgl/dbl precision#20155# unf_res() - create default underflow result for sgl/dbl prec. #20156# dst_dbl() - create rounded dbl precision result. #20157# dst_sgl() - create rounded sgl precision result. #20158# fetch_dreg() - fetch dynamic k-factor reg for packed. #20159# bindec() - convert FP binary number to packed number. #20160# _mem_write() - write data to memory. #20161# _mem_write2() - write data to memory unless supv mode -(a7) exc.#20162# _dmem_write_{byte,word,long}() - write data to memory. #20163# store_dreg_{b,w,l}() - store data to data register file. #20164# facc_out_{b,w,l,d,x}() - data access error occurred. #20165# #20166# INPUT *************************************************************** #20167# a0 = pointer to extended precision source operand #20168# d0 = round prec,mode #20169# #20170# OUTPUT ************************************************************** #20171# fp0 : intermediate underflow or overflow result if #20172# OVFL/UNFL occurred for a sgl or dbl operand #20173# #20174# ALGORITHM *********************************************************** #20175# This routine is accessed by many handlers that need to do an #20176# opclass three move of an operand out to memory. #20177# Decode an fmove out (opclass 3) instruction to determine if #20178# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #20179# register or memory. The algorithm uses a standard "fmove" to create #20180# the rounded result. Also, since exceptions are disabled, this also #20181# create the correct OPERR default result if appropriate. #20182# For sgl or dbl precision, overflow or underflow can occur. If #20183# either occurs and is enabled, the EXOP. #20184# For extended precision, the stacked <ea> must be fixed along #20185# w/ the address index register as appropriate w/ _calc_ea_fout(). If #20186# the source is a denorm and if underflow is enabled, an EXOP must be #20187# created. #20188# For packed, the k-factor must be fetched from the instruction #20189# word or a data register. The <ea> must be fixed as w/ extended #20190# precision. Then, bindec() is called to create the appropriate #20191# packed result. #20192# If at any time an access error is flagged by one of the move- #20193# to-memory routines, then a special exit must be made so that the #20194# access error can be handled properly. #20195# #20196#########################################################################2019720198global fout20199fout:20200bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt20201mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index20202jmp (tbl_fout.b,%pc,%a1) # jump to routine2020320204swbeg &0x820205tbl_fout:20206short fout_long - tbl_fout20207short fout_sgl - tbl_fout20208short fout_ext - tbl_fout20209short fout_pack - tbl_fout20210short fout_word - tbl_fout20211short fout_dbl - tbl_fout20212short fout_byte - tbl_fout20213short fout_pack - tbl_fout2021420215#################################################################20216# fmove.b out ###################################################20217#################################################################2021820219# Only "Unimplemented Data Type" exceptions enter here. The operand20220# is either a DENORM or a NORM.20221fout_byte:20222tst.b STAG(%a6) # is operand normalized?20223bne.b fout_byte_denorm # no2022420225fmovm.x SRC(%a0),&0x80 # load value2022620227fout_byte_norm:20228fmov.l %d0,%fpcr # insert rnd prec,mode2022920230fmov.b %fp0,%d0 # exec move out w/ correct rnd mode2023120232fmov.l &0x0,%fpcr # clear FPCR20233fmov.l %fpsr,%d1 # fetch FPSR20234or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits2023520236mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode20237andi.b &0x38,%d1 # is mode == 0? (Dreg dst)20238beq.b fout_byte_dn # must save to integer regfile2023920240mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct20241bsr.l _dmem_write_byte # write byte2024220243tst.l %d1 # did dstore fail?20244bne.l facc_out_b # yes2024520246rts2024720248fout_byte_dn:20249mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn20250andi.w &0x7,%d120251bsr.l store_dreg_b20252rts2025320254fout_byte_denorm:20255mov.l SRC_EX(%a0),%d120256andi.l &0x80000000,%d1 # keep DENORM sign20257ori.l &0x00800000,%d1 # make smallest sgl20258fmov.s %d1,%fp020259bra.b fout_byte_norm2026020261#################################################################20262# fmove.w out ###################################################20263#################################################################2026420265# Only "Unimplemented Data Type" exceptions enter here. The operand20266# is either a DENORM or a NORM.20267fout_word:20268tst.b STAG(%a6) # is operand normalized?20269bne.b fout_word_denorm # no2027020271fmovm.x SRC(%a0),&0x80 # load value2027220273fout_word_norm:20274fmov.l %d0,%fpcr # insert rnd prec:mode2027520276fmov.w %fp0,%d0 # exec move out w/ correct rnd mode2027720278fmov.l &0x0,%fpcr # clear FPCR20279fmov.l %fpsr,%d1 # fetch FPSR20280or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits2028120282mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode20283andi.b &0x38,%d1 # is mode == 0? (Dreg dst)20284beq.b fout_word_dn # must save to integer regfile2028520286mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct20287bsr.l _dmem_write_word # write word2028820289tst.l %d1 # did dstore fail?20290bne.l facc_out_w # yes2029120292rts2029320294fout_word_dn:20295mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn20296andi.w &0x7,%d120297bsr.l store_dreg_w20298rts2029920300fout_word_denorm:20301mov.l SRC_EX(%a0),%d120302andi.l &0x80000000,%d1 # keep DENORM sign20303ori.l &0x00800000,%d1 # make smallest sgl20304fmov.s %d1,%fp020305bra.b fout_word_norm2030620307#################################################################20308# fmove.l out ###################################################20309#################################################################2031020311# Only "Unimplemented Data Type" exceptions enter here. The operand20312# is either a DENORM or a NORM.20313fout_long:20314tst.b STAG(%a6) # is operand normalized?20315bne.b fout_long_denorm # no2031620317fmovm.x SRC(%a0),&0x80 # load value2031820319fout_long_norm:20320fmov.l %d0,%fpcr # insert rnd prec:mode2032120322fmov.l %fp0,%d0 # exec move out w/ correct rnd mode2032320324fmov.l &0x0,%fpcr # clear FPCR20325fmov.l %fpsr,%d1 # fetch FPSR20326or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits2032720328fout_long_write:20329mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode20330andi.b &0x38,%d1 # is mode == 0? (Dreg dst)20331beq.b fout_long_dn # must save to integer regfile2033220333mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct20334bsr.l _dmem_write_long # write long2033520336tst.l %d1 # did dstore fail?20337bne.l facc_out_l # yes2033820339rts2034020341fout_long_dn:20342mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn20343andi.w &0x7,%d120344bsr.l store_dreg_l20345rts2034620347fout_long_denorm:20348mov.l SRC_EX(%a0),%d120349andi.l &0x80000000,%d1 # keep DENORM sign20350ori.l &0x00800000,%d1 # make smallest sgl20351fmov.s %d1,%fp020352bra.b fout_long_norm2035320354#################################################################20355# fmove.x out ###################################################20356#################################################################2035720358# Only "Unimplemented Data Type" exceptions enter here. The operand20359# is either a DENORM or a NORM.20360# The DENORM causes an Underflow exception.20361fout_ext:2036220363# we copy the extended precision result to FP_SCR0 so that the reserved20364# 16-bit field gets zeroed. we do this since we promise not to disturb20365# what's at SRC(a0).20366mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)20367clr.w 2+FP_SCR0_EX(%a6) # clear reserved field20368mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)20369mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)2037020371fmovm.x SRC(%a0),&0x80 # return result2037220373bsr.l _calc_ea_fout # fix stacked <ea>2037420375mov.l %a0,%a1 # pass: dst addr20376lea FP_SCR0(%a6),%a0 # pass: src addr20377mov.l &0xc,%d0 # pass: opsize is 12 bytes2037820379# we must not yet write the extended precision data to the stack20380# in the pre-decrement case from supervisor mode or else we'll corrupt20381# the stack frame. so, leave it in FP_SRC for now and deal with it later...20382cmpi.b SPCOND_FLG(%a6),&mda7_flg20383beq.b fout_ext_a72038420385bsr.l _dmem_write # write ext prec number to memory2038620387tst.l %d1 # did dstore fail?20388bne.w fout_ext_err # yes2038920390tst.b STAG(%a6) # is operand normalized?20391bne.b fout_ext_denorm # no20392rts2039320394# the number is a DENORM. must set the underflow exception bit20395fout_ext_denorm:20396bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit2039720398mov.b FPCR_ENABLE(%a6),%d020399andi.b &0x0a,%d0 # is UNFL or INEX enabled?20400bne.b fout_ext_exc # yes20401rts2040220403# we don't want to do the write if the exception occurred in supervisor mode20404# so _mem_write2() handles this for us.20405fout_ext_a7:20406bsr.l _mem_write2 # write ext prec number to memory2040720408tst.l %d1 # did dstore fail?20409bne.w fout_ext_err # yes2041020411tst.b STAG(%a6) # is operand normalized?20412bne.b fout_ext_denorm # no20413rts2041420415fout_ext_exc:20416lea FP_SCR0(%a6),%a020417bsr.l norm # normalize the mantissa20418neg.w %d0 # new exp = -(shft amt)20419andi.w &0x7fff,%d020420andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign20421or.w %d0,FP_SCR0_EX(%a6) # insert new exponent20422fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp120423rts2042420425fout_ext_err:20426mov.l EXC_A6(%a6),(%a6) # fix stacked a620427bra.l facc_out_x2042820429#########################################################################20430# fmove.s out ###########################################################20431#########################################################################20432fout_sgl:20433andi.b &0x30,%d0 # clear rnd prec20434ori.b &s_mode*0x10,%d0 # insert sgl prec20435mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack2043620437#20438# operand is a normalized number. first, we check to see if the move out20439# would cause either an underflow or overflow. these cases are handled20440# separately. otherwise, set the FPCR to the proper rounding mode and20441# execute the move.20442#20443mov.w SRC_EX(%a0),%d0 # extract exponent20444andi.w &0x7fff,%d0 # strip sign2044520446cmpi.w %d0,&SGL_HI # will operand overflow?20447bgt.w fout_sgl_ovfl # yes; go handle OVFL20448beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL20449cmpi.w %d0,&SGL_LO # will operand underflow?20450blt.w fout_sgl_unfl # yes; go handle underflow2045120452#20453# NORMs(in range) can be stored out by a simple "fmov.s"20454# Unnormalized inputs can come through this point.20455#20456fout_sgl_exg:20457fmovm.x SRC(%a0),&0x80 # fetch fop from stack2045820459fmov.l L_SCR3(%a6),%fpcr # set FPCR20460fmov.l &0x0,%fpsr # clear FPSR2046120462fmov.s %fp0,%d0 # store does convert and round2046320464fmov.l &0x0,%fpcr # clear FPCR20465fmov.l %fpsr,%d1 # save FPSR2046620467or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex2046820469fout_sgl_exg_write:20470mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode20471andi.b &0x38,%d1 # is mode == 0? (Dreg dst)20472beq.b fout_sgl_exg_write_dn # must save to integer regfile2047320474mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct20475bsr.l _dmem_write_long # write long2047620477tst.l %d1 # did dstore fail?20478bne.l facc_out_l # yes2047920480rts2048120482fout_sgl_exg_write_dn:20483mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn20484andi.w &0x7,%d120485bsr.l store_dreg_l20486rts2048720488#20489# here, we know that the operand would UNFL if moved out to single prec,20490# so, denorm and round and then use generic store single routine to20491# write the value to memory.20492#20493fout_sgl_unfl:20494bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL2049520496mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)20497mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)20498mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)20499mov.l %a0,-(%sp)2050020501clr.l %d0 # pass: S.F. = 02050220503cmpi.b STAG(%a6),&DENORM # fetch src optype tag20504bne.b fout_sgl_unfl_cont # let DENORMs fall through2050520506lea FP_SCR0(%a6),%a020507bsr.l norm # normalize the DENORM2050820509fout_sgl_unfl_cont:20510lea FP_SCR0(%a6),%a0 # pass: ptr to operand20511mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode20512bsr.l unf_res # calc default underflow result2051320514lea FP_SCR0(%a6),%a0 # pass: ptr to fop20515bsr.l dst_sgl # convert to single prec2051620517mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode20518andi.b &0x38,%d1 # is mode == 0? (Dreg dst)20519beq.b fout_sgl_unfl_dn # must save to integer regfile2052020521mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct20522bsr.l _dmem_write_long # write long2052320524tst.l %d1 # did dstore fail?20525bne.l facc_out_l # yes2052620527bra.b fout_sgl_unfl_chkexc2052820529fout_sgl_unfl_dn:20530mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn20531andi.w &0x7,%d120532bsr.l store_dreg_l2053320534fout_sgl_unfl_chkexc:20535mov.b FPCR_ENABLE(%a6),%d120536andi.b &0x0a,%d1 # is UNFL or INEX enabled?20537bne.w fout_sd_exc_unfl # yes20538addq.l &0x4,%sp20539rts2054020541#20542# it's definitely an overflow so call ovf_res to get the correct answer20543#20544fout_sgl_ovfl:20545tst.b 3+SRC_HI(%a0) # is result inexact?20546bne.b fout_sgl_ovfl_inex220547tst.l SRC_LO(%a0) # is result inexact?20548bne.b fout_sgl_ovfl_inex220549ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex20550bra.b fout_sgl_ovfl_cont20551fout_sgl_ovfl_inex2:20552ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex22055320554fout_sgl_ovfl_cont:20555mov.l %a0,-(%sp)2055620557# call ovf_res() w/ sgl prec and the correct rnd mode to create the default20558# overflow result. DON'T save the returned ccodes from ovf_res() since20559# fmove out doesn't alter them.20560tst.b SRC_EX(%a0) # is operand negative?20561smi %d1 # set if so20562mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode20563bsr.l ovf_res # calc OVFL result20564fmovm.x (%a0),&0x80 # load default overflow result20565fmov.s %fp0,%d0 # store to single2056620567mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode20568andi.b &0x38,%d1 # is mode == 0? (Dreg dst)20569beq.b fout_sgl_ovfl_dn # must save to integer regfile2057020571mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct20572bsr.l _dmem_write_long # write long2057320574tst.l %d1 # did dstore fail?20575bne.l facc_out_l # yes2057620577bra.b fout_sgl_ovfl_chkexc2057820579fout_sgl_ovfl_dn:20580mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn20581andi.w &0x7,%d120582bsr.l store_dreg_l2058320584fout_sgl_ovfl_chkexc:20585mov.b FPCR_ENABLE(%a6),%d120586andi.b &0x0a,%d1 # is UNFL or INEX enabled?20587bne.w fout_sd_exc_ovfl # yes20588addq.l &0x4,%sp20589rts2059020591#20592# move out MAY overflow:20593# (1) force the exp to 0x3fff20594# (2) do a move w/ appropriate rnd mode20595# (3) if exp still equals zero, then insert original exponent20596# for the correct result.20597# if exp now equals one, then it overflowed so call ovf_res.20598#20599fout_sgl_may_ovfl:20600mov.w SRC_EX(%a0),%d1 # fetch current sign20601andi.w &0x8000,%d1 # keep it,clear exp20602ori.w &0x3fff,%d1 # insert exp = 020603mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp20604mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)20605mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)2060620607fmov.l L_SCR3(%a6),%fpcr # set FPCR2060820609fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded20610fmov.l &0x0,%fpcr # clear FPCR2061120612fabs.x %fp0 # need absolute value20613fcmp.b %fp0,&0x2 # did exponent increase?20614fblt.w fout_sgl_exg # no; go finish NORM20615bra.w fout_sgl_ovfl # yes; go handle overflow2061620617################2061820619fout_sd_exc_unfl:20620mov.l (%sp)+,%a02062120622mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)20623mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)20624mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)2062520626cmpi.b STAG(%a6),&DENORM # was src a DENORM?20627bne.b fout_sd_exc_cont # no2062820629lea FP_SCR0(%a6),%a020630bsr.l norm20631neg.l %d020632andi.w &0x7fff,%d020633bfins %d0,FP_SCR0_EX(%a6){&1:&15}20634bra.b fout_sd_exc_cont2063520636fout_sd_exc:20637fout_sd_exc_ovfl:20638mov.l (%sp)+,%a0 # restore a02063920640mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)20641mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)20642mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)2064320644fout_sd_exc_cont:20645bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit20646sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit20647lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM2064820649mov.b 3+L_SCR3(%a6),%d120650lsr.b &0x4,%d120651andi.w &0x0c,%d120652swap %d120653mov.b 3+L_SCR3(%a6),%d120654lsr.b &0x4,%d120655andi.w &0x03,%d120656clr.l %d0 # pass: zero g,r,s20657bsr.l _round # round the DENORM2065820659tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?20660beq.b fout_sd_exc_done # no20661bset &0x7,FP_SCR0_EX(%a6) # yes2066220663fout_sd_exc_done:20664fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp120665rts2066620667#################################################################20668# fmove.d out ###################################################20669#################################################################20670fout_dbl:20671andi.b &0x30,%d0 # clear rnd prec20672ori.b &d_mode*0x10,%d0 # insert dbl prec20673mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack2067420675#20676# operand is a normalized number. first, we check to see if the move out20677# would cause either an underflow or overflow. these cases are handled20678# separately. otherwise, set the FPCR to the proper rounding mode and20679# execute the move.20680#20681mov.w SRC_EX(%a0),%d0 # extract exponent20682andi.w &0x7fff,%d0 # strip sign2068320684cmpi.w %d0,&DBL_HI # will operand overflow?20685bgt.w fout_dbl_ovfl # yes; go handle OVFL20686beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL20687cmpi.w %d0,&DBL_LO # will operand underflow?20688blt.w fout_dbl_unfl # yes; go handle underflow2068920690#20691# NORMs(in range) can be stored out by a simple "fmov.d"20692# Unnormalized inputs can come through this point.20693#20694fout_dbl_exg:20695fmovm.x SRC(%a0),&0x80 # fetch fop from stack2069620697fmov.l L_SCR3(%a6),%fpcr # set FPCR20698fmov.l &0x0,%fpsr # clear FPSR2069920700fmov.d %fp0,L_SCR1(%a6) # store does convert and round2070120702fmov.l &0x0,%fpcr # clear FPCR20703fmov.l %fpsr,%d0 # save FPSR2070420705or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex2070620707mov.l EXC_EA(%a6),%a1 # pass: dst addr20708lea L_SCR1(%a6),%a0 # pass: src addr20709movq.l &0x8,%d0 # pass: opsize is 8 bytes20710bsr.l _dmem_write # store dbl fop to memory2071120712tst.l %d1 # did dstore fail?20713bne.l facc_out_d # yes2071420715rts # no; so we're finished2071620717#20718# here, we know that the operand would UNFL if moved out to double prec,20719# so, denorm and round and then use generic store double routine to20720# write the value to memory.20721#20722fout_dbl_unfl:20723bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL2072420725mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)20726mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)20727mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)20728mov.l %a0,-(%sp)2072920730clr.l %d0 # pass: S.F. = 02073120732cmpi.b STAG(%a6),&DENORM # fetch src optype tag20733bne.b fout_dbl_unfl_cont # let DENORMs fall through2073420735lea FP_SCR0(%a6),%a020736bsr.l norm # normalize the DENORM2073720738fout_dbl_unfl_cont:20739lea FP_SCR0(%a6),%a0 # pass: ptr to operand20740mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode20741bsr.l unf_res # calc default underflow result2074220743lea FP_SCR0(%a6),%a0 # pass: ptr to fop20744bsr.l dst_dbl # convert to single prec20745mov.l %d0,L_SCR1(%a6)20746mov.l %d1,L_SCR2(%a6)2074720748mov.l EXC_EA(%a6),%a1 # pass: dst addr20749lea L_SCR1(%a6),%a0 # pass: src addr20750movq.l &0x8,%d0 # pass: opsize is 8 bytes20751bsr.l _dmem_write # store dbl fop to memory2075220753tst.l %d1 # did dstore fail?20754bne.l facc_out_d # yes2075520756mov.b FPCR_ENABLE(%a6),%d120757andi.b &0x0a,%d1 # is UNFL or INEX enabled?20758bne.w fout_sd_exc_unfl # yes20759addq.l &0x4,%sp20760rts2076120762#20763# it's definitely an overflow so call ovf_res to get the correct answer20764#20765fout_dbl_ovfl:20766mov.w 2+SRC_LO(%a0),%d020767andi.w &0x7ff,%d020768bne.b fout_dbl_ovfl_inex22076920770ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex20771bra.b fout_dbl_ovfl_cont20772fout_dbl_ovfl_inex2:20773ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex22077420775fout_dbl_ovfl_cont:20776mov.l %a0,-(%sp)2077720778# call ovf_res() w/ dbl prec and the correct rnd mode to create the default20779# overflow result. DON'T save the returned ccodes from ovf_res() since20780# fmove out doesn't alter them.20781tst.b SRC_EX(%a0) # is operand negative?20782smi %d1 # set if so20783mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode20784bsr.l ovf_res # calc OVFL result20785fmovm.x (%a0),&0x80 # load default overflow result20786fmov.d %fp0,L_SCR1(%a6) # store to double2078720788mov.l EXC_EA(%a6),%a1 # pass: dst addr20789lea L_SCR1(%a6),%a0 # pass: src addr20790movq.l &0x8,%d0 # pass: opsize is 8 bytes20791bsr.l _dmem_write # store dbl fop to memory2079220793tst.l %d1 # did dstore fail?20794bne.l facc_out_d # yes2079520796mov.b FPCR_ENABLE(%a6),%d120797andi.b &0x0a,%d1 # is UNFL or INEX enabled?20798bne.w fout_sd_exc_ovfl # yes20799addq.l &0x4,%sp20800rts2080120802#20803# move out MAY overflow:20804# (1) force the exp to 0x3fff20805# (2) do a move w/ appropriate rnd mode20806# (3) if exp still equals zero, then insert original exponent20807# for the correct result.20808# if exp now equals one, then it overflowed so call ovf_res.20809#20810fout_dbl_may_ovfl:20811mov.w SRC_EX(%a0),%d1 # fetch current sign20812andi.w &0x8000,%d1 # keep it,clear exp20813ori.w &0x3fff,%d1 # insert exp = 020814mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp20815mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)20816mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)2081720818fmov.l L_SCR3(%a6),%fpcr # set FPCR2081920820fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded20821fmov.l &0x0,%fpcr # clear FPCR2082220823fabs.x %fp0 # need absolute value20824fcmp.b %fp0,&0x2 # did exponent increase?20825fblt.w fout_dbl_exg # no; go finish NORM20826bra.w fout_dbl_ovfl # yes; go handle overflow2082720828#########################################################################20829# XDEF **************************************************************** #20830# dst_dbl(): create double precision value from extended prec. #20831# #20832# XREF **************************************************************** #20833# None #20834# #20835# INPUT *************************************************************** #20836# a0 = pointer to source operand in extended precision #20837# #20838# OUTPUT ************************************************************** #20839# d0 = hi(double precision result) #20840# d1 = lo(double precision result) #20841# #20842# ALGORITHM *********************************************************** #20843# #20844# Changes extended precision to double precision. #20845# Note: no attempt is made to round the extended value to double. #20846# dbl_sign = ext_sign #20847# dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #20848# get rid of ext integer bit #20849# dbl_mant = ext_mant{62:12} #20850# #20851# --------------- --------------- --------------- #20852# extended -> |s| exp | |1| ms mant | | ls mant | #20853# --------------- --------------- --------------- #20854# 95 64 63 62 32 31 11 0 #20855# | | #20856# | | #20857# | | #20858# v v #20859# --------------- --------------- #20860# double -> |s|exp| mant | | mant | #20861# --------------- --------------- #20862# 63 51 32 31 0 #20863# #20864#########################################################################2086520866dst_dbl:20867clr.l %d0 # clear d020868mov.w FTEMP_EX(%a0),%d0 # get exponent20869subi.w &EXT_BIAS,%d0 # subtract extended precision bias20870addi.w &DBL_BIAS,%d0 # add double precision bias20871tst.b FTEMP_HI(%a0) # is number a denorm?20872bmi.b dst_get_dupper # no20873subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 120874dst_get_dupper:20875swap %d0 # d0 now in upper word20876lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp20877tst.b FTEMP_EX(%a0) # test sign20878bpl.b dst_get_dman # if positive, go process mantissa20879bset &0x1f,%d0 # if negative, set sign20880dst_get_dman:20881mov.l FTEMP_HI(%a0),%d1 # get ms mantissa20882bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms20883or.l %d1,%d0 # put these bits in ms word of double20884mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack20885mov.l FTEMP_HI(%a0),%d1 # get ms mantissa20886mov.l &21,%d0 # load shift count20887lsl.l %d0,%d1 # put lower 11 bits in upper bits20888mov.l %d1,L_SCR2(%a6) # build lower lword in memory20889mov.l FTEMP_LO(%a0),%d1 # get ls mantissa20890bfextu %d1{&0:&21},%d0 # get ls 21 bits of double20891mov.l L_SCR2(%a6),%d120892or.l %d0,%d1 # put them in double result20893mov.l L_SCR1(%a6),%d020894rts2089520896#########################################################################20897# XDEF **************************************************************** #20898# dst_sgl(): create single precision value from extended prec #20899# #20900# XREF **************************************************************** #20901# #20902# INPUT *************************************************************** #20903# a0 = pointer to source operand in extended precision #20904# #20905# OUTPUT ************************************************************** #20906# d0 = single precision result #20907# #20908# ALGORITHM *********************************************************** #20909# #20910# Changes extended precision to single precision. #20911# sgl_sign = ext_sign #20912# sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #20913# get rid of ext integer bit #20914# sgl_mant = ext_mant{62:12} #20915# #20916# --------------- --------------- --------------- #20917# extended -> |s| exp | |1| ms mant | | ls mant | #20918# --------------- --------------- --------------- #20919# 95 64 63 62 40 32 31 12 0 #20920# | | #20921# | | #20922# | | #20923# v v #20924# --------------- #20925# single -> |s|exp| mant | #20926# --------------- #20927# 31 22 0 #20928# #20929#########################################################################2093020931dst_sgl:20932clr.l %d020933mov.w FTEMP_EX(%a0),%d0 # get exponent20934subi.w &EXT_BIAS,%d0 # subtract extended precision bias20935addi.w &SGL_BIAS,%d0 # add single precision bias20936tst.b FTEMP_HI(%a0) # is number a denorm?20937bmi.b dst_get_supper # no20938subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 120939dst_get_supper:20940swap %d0 # put exp in upper word of d020941lsl.l &0x7,%d0 # shift it into single exp bits20942tst.b FTEMP_EX(%a0) # test sign20943bpl.b dst_get_sman # if positive, continue20944bset &0x1f,%d0 # if negative, put in sign first20945dst_get_sman:20946mov.l FTEMP_HI(%a0),%d1 # get ms mantissa20947andi.l &0x7fffff00,%d1 # get upper 23 bits of ms20948lsr.l &0x8,%d1 # and put them flush right20949or.l %d1,%d0 # put these bits in ms word of single20950rts2095120952##############################################################################20953fout_pack:20954bsr.l _calc_ea_fout # fetch the <ea>20955mov.l %a0,-(%sp)2095620957mov.b STAG(%a6),%d0 # fetch input type20958bne.w fout_pack_not_norm # input is not NORM2095920960fout_pack_norm:20961btst &0x4,EXC_CMDREG(%a6) # static or dynamic?20962beq.b fout_pack_s # static2096320964fout_pack_d:20965mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg20966lsr.b &0x4,%d120967andi.w &0x7,%d12096820969bsr.l fetch_dreg # fetch Dn w/ k-factor2097020971bra.b fout_pack_type20972fout_pack_s:20973mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field2097420975fout_pack_type:20976bfexts %d0{&25:&7},%d0 # extract k-factor20977mov.l %d0,-(%sp)2097820979lea FP_SRC(%a6),%a0 # pass: ptr to input2098020981# bindec is currently scrambling FP_SRC for denorm inputs.20982# we'll have to change this, but for now, tough luck!!!20983bsr.l bindec # convert xprec to packed2098420985# andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields20986andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields2098720988mov.l (%sp)+,%d02098920990tst.b 3+FP_SCR0_EX(%a6)20991bne.b fout_pack_set20992tst.l FP_SCR0_HI(%a6)20993bne.b fout_pack_set20994tst.l FP_SCR0_LO(%a6)20995bne.b fout_pack_set2099620997# add the extra condition that only if the k-factor was zero, too, should20998# we zero the exponent20999tst.l %d021000bne.b fout_pack_set21001# "mantissa" is all zero which means that the answer is zero. but, the '04021002# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,21003# if the mantissa is zero, I will zero the exponent, too.21004# the question now is whether the exponents sign bit is allowed to be non-zero21005# for a zero, also...21006andi.w &0xf000,FP_SCR0(%a6)2100721008fout_pack_set:2100921010lea FP_SCR0(%a6),%a0 # pass: src addr2101121012fout_pack_write:21013mov.l (%sp)+,%a1 # pass: dst addr21014mov.l &0xc,%d0 # pass: opsize is 12 bytes2101521016cmpi.b SPCOND_FLG(%a6),&mda7_flg21017beq.b fout_pack_a72101821019bsr.l _dmem_write # write ext prec number to memory2102021021tst.l %d1 # did dstore fail?21022bne.w fout_ext_err # yes2102321024rts2102521026# we don't want to do the write if the exception occurred in supervisor mode21027# so _mem_write2() handles this for us.21028fout_pack_a7:21029bsr.l _mem_write2 # write ext prec number to memory2103021031tst.l %d1 # did dstore fail?21032bne.w fout_ext_err # yes2103321034rts2103521036fout_pack_not_norm:21037cmpi.b %d0,&DENORM # is it a DENORM?21038beq.w fout_pack_norm # yes21039lea FP_SRC(%a6),%a021040clr.w 2+FP_SRC_EX(%a6)21041cmpi.b %d0,&SNAN # is it an SNAN?21042beq.b fout_pack_snan # yes21043bra.b fout_pack_write # no2104421045fout_pack_snan:21046ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP21047bset &0x6,FP_SRC_HI(%a6) # set snan bit21048bra.b fout_pack_write2104921050#########################################################################21051# XDEF **************************************************************** #21052# fetch_dreg(): fetch register according to index in d1 #21053# #21054# XREF **************************************************************** #21055# None #21056# #21057# INPUT *************************************************************** #21058# d1 = index of register to fetch from #21059# #21060# OUTPUT ************************************************************** #21061# d0 = value of register fetched #21062# #21063# ALGORITHM *********************************************************** #21064# According to the index value in d1 which can range from zero #21065# to fifteen, load the corresponding register file value (where #21066# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #21067# stack. The rest should still be in their original places. #21068# #21069#########################################################################2107021071# this routine leaves d1 intact for subsequent store_dreg calls.21072global fetch_dreg21073fetch_dreg:21074mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d021075jmp (tbl_fdreg.b,%pc,%d0.w*1)2107621077tbl_fdreg:21078short fdreg0 - tbl_fdreg21079short fdreg1 - tbl_fdreg21080short fdreg2 - tbl_fdreg21081short fdreg3 - tbl_fdreg21082short fdreg4 - tbl_fdreg21083short fdreg5 - tbl_fdreg21084short fdreg6 - tbl_fdreg21085short fdreg7 - tbl_fdreg21086short fdreg8 - tbl_fdreg21087short fdreg9 - tbl_fdreg21088short fdrega - tbl_fdreg21089short fdregb - tbl_fdreg21090short fdregc - tbl_fdreg21091short fdregd - tbl_fdreg21092short fdrege - tbl_fdreg21093short fdregf - tbl_fdreg2109421095fdreg0:21096mov.l EXC_DREGS+0x0(%a6),%d021097rts21098fdreg1:21099mov.l EXC_DREGS+0x4(%a6),%d021100rts21101fdreg2:21102mov.l %d2,%d021103rts21104fdreg3:21105mov.l %d3,%d021106rts21107fdreg4:21108mov.l %d4,%d021109rts21110fdreg5:21111mov.l %d5,%d021112rts21113fdreg6:21114mov.l %d6,%d021115rts21116fdreg7:21117mov.l %d7,%d021118rts21119fdreg8:21120mov.l EXC_DREGS+0x8(%a6),%d021121rts21122fdreg9:21123mov.l EXC_DREGS+0xc(%a6),%d021124rts21125fdrega:21126mov.l %a2,%d021127rts21128fdregb:21129mov.l %a3,%d021130rts21131fdregc:21132mov.l %a4,%d021133rts21134fdregd:21135mov.l %a5,%d021136rts21137fdrege:21138mov.l (%a6),%d021139rts21140fdregf:21141mov.l EXC_A7(%a6),%d021142rts2114321144#########################################################################21145# XDEF **************************************************************** #21146# store_dreg_l(): store longword to data register specified by d1 #21147# #21148# XREF **************************************************************** #21149# None #21150# #21151# INPUT *************************************************************** #21152# d0 = longowrd value to store #21153# d1 = index of register to fetch from #21154# #21155# OUTPUT ************************************************************** #21156# (data register is updated) #21157# #21158# ALGORITHM *********************************************************** #21159# According to the index value in d1, store the longword value #21160# in d0 to the corresponding data register. D0/D1 are on the stack #21161# while the rest are in their initial places. #21162# #21163#########################################################################2116421165global store_dreg_l21166store_dreg_l:21167mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d121168jmp (tbl_sdregl.b,%pc,%d1.w*1)2116921170tbl_sdregl:21171short sdregl0 - tbl_sdregl21172short sdregl1 - tbl_sdregl21173short sdregl2 - tbl_sdregl21174short sdregl3 - tbl_sdregl21175short sdregl4 - tbl_sdregl21176short sdregl5 - tbl_sdregl21177short sdregl6 - tbl_sdregl21178short sdregl7 - tbl_sdregl2117921180sdregl0:21181mov.l %d0,EXC_DREGS+0x0(%a6)21182rts21183sdregl1:21184mov.l %d0,EXC_DREGS+0x4(%a6)21185rts21186sdregl2:21187mov.l %d0,%d221188rts21189sdregl3:21190mov.l %d0,%d321191rts21192sdregl4:21193mov.l %d0,%d421194rts21195sdregl5:21196mov.l %d0,%d521197rts21198sdregl6:21199mov.l %d0,%d621200rts21201sdregl7:21202mov.l %d0,%d721203rts2120421205#########################################################################21206# XDEF **************************************************************** #21207# store_dreg_w(): store word to data register specified by d1 #21208# #21209# XREF **************************************************************** #21210# None #21211# #21212# INPUT *************************************************************** #21213# d0 = word value to store #21214# d1 = index of register to fetch from #21215# #21216# OUTPUT ************************************************************** #21217# (data register is updated) #21218# #21219# ALGORITHM *********************************************************** #21220# According to the index value in d1, store the word value #21221# in d0 to the corresponding data register. D0/D1 are on the stack #21222# while the rest are in their initial places. #21223# #21224#########################################################################2122521226global store_dreg_w21227store_dreg_w:21228mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d121229jmp (tbl_sdregw.b,%pc,%d1.w*1)2123021231tbl_sdregw:21232short sdregw0 - tbl_sdregw21233short sdregw1 - tbl_sdregw21234short sdregw2 - tbl_sdregw21235short sdregw3 - tbl_sdregw21236short sdregw4 - tbl_sdregw21237short sdregw5 - tbl_sdregw21238short sdregw6 - tbl_sdregw21239short sdregw7 - tbl_sdregw2124021241sdregw0:21242mov.w %d0,2+EXC_DREGS+0x0(%a6)21243rts21244sdregw1:21245mov.w %d0,2+EXC_DREGS+0x4(%a6)21246rts21247sdregw2:21248mov.w %d0,%d221249rts21250sdregw3:21251mov.w %d0,%d321252rts21253sdregw4:21254mov.w %d0,%d421255rts21256sdregw5:21257mov.w %d0,%d521258rts21259sdregw6:21260mov.w %d0,%d621261rts21262sdregw7:21263mov.w %d0,%d721264rts2126521266#########################################################################21267# XDEF **************************************************************** #21268# store_dreg_b(): store byte to data register specified by d1 #21269# #21270# XREF **************************************************************** #21271# None #21272# #21273# INPUT *************************************************************** #21274# d0 = byte value to store #21275# d1 = index of register to fetch from #21276# #21277# OUTPUT ************************************************************** #21278# (data register is updated) #21279# #21280# ALGORITHM *********************************************************** #21281# According to the index value in d1, store the byte value #21282# in d0 to the corresponding data register. D0/D1 are on the stack #21283# while the rest are in their initial places. #21284# #21285#########################################################################2128621287global store_dreg_b21288store_dreg_b:21289mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d121290jmp (tbl_sdregb.b,%pc,%d1.w*1)2129121292tbl_sdregb:21293short sdregb0 - tbl_sdregb21294short sdregb1 - tbl_sdregb21295short sdregb2 - tbl_sdregb21296short sdregb3 - tbl_sdregb21297short sdregb4 - tbl_sdregb21298short sdregb5 - tbl_sdregb21299short sdregb6 - tbl_sdregb21300short sdregb7 - tbl_sdregb2130121302sdregb0:21303mov.b %d0,3+EXC_DREGS+0x0(%a6)21304rts21305sdregb1:21306mov.b %d0,3+EXC_DREGS+0x4(%a6)21307rts21308sdregb2:21309mov.b %d0,%d221310rts21311sdregb3:21312mov.b %d0,%d321313rts21314sdregb4:21315mov.b %d0,%d421316rts21317sdregb5:21318mov.b %d0,%d521319rts21320sdregb6:21321mov.b %d0,%d621322rts21323sdregb7:21324mov.b %d0,%d721325rts2132621327#########################################################################21328# XDEF **************************************************************** #21329# inc_areg(): increment an address register by the value in d0 #21330# #21331# XREF **************************************************************** #21332# None #21333# #21334# INPUT *************************************************************** #21335# d0 = amount to increment by #21336# d1 = index of address register to increment #21337# #21338# OUTPUT ************************************************************** #21339# (address register is updated) #21340# #21341# ALGORITHM *********************************************************** #21342# Typically used for an instruction w/ a post-increment <ea>, #21343# this routine adds the increment value in d0 to the address register #21344# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #21345# in their original places. #21346# For a7, if the increment amount is one, then we have to #21347# increment by two. For any a7 update, set the mia7_flag so that if #21348# an access error exception occurs later in emulation, this address #21349# register update can be undone. #21350# #21351#########################################################################2135221353global inc_areg21354inc_areg:21355mov.w (tbl_iareg.b,%pc,%d1.w*2),%d121356jmp (tbl_iareg.b,%pc,%d1.w*1)2135721358tbl_iareg:21359short iareg0 - tbl_iareg21360short iareg1 - tbl_iareg21361short iareg2 - tbl_iareg21362short iareg3 - tbl_iareg21363short iareg4 - tbl_iareg21364short iareg5 - tbl_iareg21365short iareg6 - tbl_iareg21366short iareg7 - tbl_iareg2136721368iareg0: add.l %d0,EXC_DREGS+0x8(%a6)21369rts21370iareg1: add.l %d0,EXC_DREGS+0xc(%a6)21371rts21372iareg2: add.l %d0,%a221373rts21374iareg3: add.l %d0,%a321375rts21376iareg4: add.l %d0,%a421377rts21378iareg5: add.l %d0,%a521379rts21380iareg6: add.l %d0,(%a6)21381rts21382iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)21383cmpi.b %d0,&0x121384beq.b iareg7b21385add.l %d0,EXC_A7(%a6)21386rts21387iareg7b:21388addq.l &0x2,EXC_A7(%a6)21389rts2139021391#########################################################################21392# XDEF **************************************************************** #21393# dec_areg(): decrement an address register by the value in d0 #21394# #21395# XREF **************************************************************** #21396# None #21397# #21398# INPUT *************************************************************** #21399# d0 = amount to decrement by #21400# d1 = index of address register to decrement #21401# #21402# OUTPUT ************************************************************** #21403# (address register is updated) #21404# #21405# ALGORITHM *********************************************************** #21406# Typically used for an instruction w/ a pre-decrement <ea>, #21407# this routine adds the decrement value in d0 to the address register #21408# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #21409# in their original places. #21410# For a7, if the decrement amount is one, then we have to #21411# decrement by two. For any a7 update, set the mda7_flag so that if #21412# an access error exception occurs later in emulation, this address #21413# register update can be undone. #21414# #21415#########################################################################2141621417global dec_areg21418dec_areg:21419mov.w (tbl_dareg.b,%pc,%d1.w*2),%d121420jmp (tbl_dareg.b,%pc,%d1.w*1)2142121422tbl_dareg:21423short dareg0 - tbl_dareg21424short dareg1 - tbl_dareg21425short dareg2 - tbl_dareg21426short dareg3 - tbl_dareg21427short dareg4 - tbl_dareg21428short dareg5 - tbl_dareg21429short dareg6 - tbl_dareg21430short dareg7 - tbl_dareg2143121432dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)21433rts21434dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)21435rts21436dareg2: sub.l %d0,%a221437rts21438dareg3: sub.l %d0,%a321439rts21440dareg4: sub.l %d0,%a421441rts21442dareg5: sub.l %d0,%a521443rts21444dareg6: sub.l %d0,(%a6)21445rts21446dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)21447cmpi.b %d0,&0x121448beq.b dareg7b21449sub.l %d0,EXC_A7(%a6)21450rts21451dareg7b:21452subq.l &0x2,EXC_A7(%a6)21453rts2145421455##############################################################################2145621457#########################################################################21458# XDEF **************************************************************** #21459# load_fpn1(): load FP register value into FP_SRC(a6). #21460# #21461# XREF **************************************************************** #21462# None #21463# #21464# INPUT *************************************************************** #21465# d0 = index of FP register to load #21466# #21467# OUTPUT ************************************************************** #21468# FP_SRC(a6) = value loaded from FP register file #21469# #21470# ALGORITHM *********************************************************** #21471# Using the index in d0, load FP_SRC(a6) with a number from the #21472# FP register file. #21473# #21474#########################################################################2147521476global load_fpn121477load_fpn1:21478mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d021479jmp (tbl_load_fpn1.b,%pc,%d0.w*1)2148021481tbl_load_fpn1:21482short load_fpn1_0 - tbl_load_fpn121483short load_fpn1_1 - tbl_load_fpn121484short load_fpn1_2 - tbl_load_fpn121485short load_fpn1_3 - tbl_load_fpn121486short load_fpn1_4 - tbl_load_fpn121487short load_fpn1_5 - tbl_load_fpn121488short load_fpn1_6 - tbl_load_fpn121489short load_fpn1_7 - tbl_load_fpn12149021491load_fpn1_0:21492mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)21493mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)21494mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)21495lea FP_SRC(%a6), %a021496rts21497load_fpn1_1:21498mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)21499mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)21500mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)21501lea FP_SRC(%a6), %a021502rts21503load_fpn1_2:21504fmovm.x &0x20, FP_SRC(%a6)21505lea FP_SRC(%a6), %a021506rts21507load_fpn1_3:21508fmovm.x &0x10, FP_SRC(%a6)21509lea FP_SRC(%a6), %a021510rts21511load_fpn1_4:21512fmovm.x &0x08, FP_SRC(%a6)21513lea FP_SRC(%a6), %a021514rts21515load_fpn1_5:21516fmovm.x &0x04, FP_SRC(%a6)21517lea FP_SRC(%a6), %a021518rts21519load_fpn1_6:21520fmovm.x &0x02, FP_SRC(%a6)21521lea FP_SRC(%a6), %a021522rts21523load_fpn1_7:21524fmovm.x &0x01, FP_SRC(%a6)21525lea FP_SRC(%a6), %a021526rts2152721528#############################################################################2152921530#########################################################################21531# XDEF **************************************************************** #21532# load_fpn2(): load FP register value into FP_DST(a6). #21533# #21534# XREF **************************************************************** #21535# None #21536# #21537# INPUT *************************************************************** #21538# d0 = index of FP register to load #21539# #21540# OUTPUT ************************************************************** #21541# FP_DST(a6) = value loaded from FP register file #21542# #21543# ALGORITHM *********************************************************** #21544# Using the index in d0, load FP_DST(a6) with a number from the #21545# FP register file. #21546# #21547#########################################################################2154821549global load_fpn221550load_fpn2:21551mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d021552jmp (tbl_load_fpn2.b,%pc,%d0.w*1)2155321554tbl_load_fpn2:21555short load_fpn2_0 - tbl_load_fpn221556short load_fpn2_1 - tbl_load_fpn221557short load_fpn2_2 - tbl_load_fpn221558short load_fpn2_3 - tbl_load_fpn221559short load_fpn2_4 - tbl_load_fpn221560short load_fpn2_5 - tbl_load_fpn221561short load_fpn2_6 - tbl_load_fpn221562short load_fpn2_7 - tbl_load_fpn22156321564load_fpn2_0:21565mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)21566mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)21567mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)21568lea FP_DST(%a6), %a021569rts21570load_fpn2_1:21571mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)21572mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)21573mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)21574lea FP_DST(%a6), %a021575rts21576load_fpn2_2:21577fmovm.x &0x20, FP_DST(%a6)21578lea FP_DST(%a6), %a021579rts21580load_fpn2_3:21581fmovm.x &0x10, FP_DST(%a6)21582lea FP_DST(%a6), %a021583rts21584load_fpn2_4:21585fmovm.x &0x08, FP_DST(%a6)21586lea FP_DST(%a6), %a021587rts21588load_fpn2_5:21589fmovm.x &0x04, FP_DST(%a6)21590lea FP_DST(%a6), %a021591rts21592load_fpn2_6:21593fmovm.x &0x02, FP_DST(%a6)21594lea FP_DST(%a6), %a021595rts21596load_fpn2_7:21597fmovm.x &0x01, FP_DST(%a6)21598lea FP_DST(%a6), %a021599rts2160021601#############################################################################2160221603#########################################################################21604# XDEF **************************************************************** #21605# store_fpreg(): store an fp value to the fpreg designated d0. #21606# #21607# XREF **************************************************************** #21608# None #21609# #21610# INPUT *************************************************************** #21611# fp0 = extended precision value to store #21612# d0 = index of floating-point register #21613# #21614# OUTPUT ************************************************************** #21615# None #21616# #21617# ALGORITHM *********************************************************** #21618# Store the value in fp0 to the FP register designated by the #21619# value in d0. The FP number can be DENORM or SNAN so we have to be #21620# careful that we don't take an exception here. #21621# #21622#########################################################################2162321624global store_fpreg21625store_fpreg:21626mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d021627jmp (tbl_store_fpreg.b,%pc,%d0.w*1)2162821629tbl_store_fpreg:21630short store_fpreg_0 - tbl_store_fpreg21631short store_fpreg_1 - tbl_store_fpreg21632short store_fpreg_2 - tbl_store_fpreg21633short store_fpreg_3 - tbl_store_fpreg21634short store_fpreg_4 - tbl_store_fpreg21635short store_fpreg_5 - tbl_store_fpreg21636short store_fpreg_6 - tbl_store_fpreg21637short store_fpreg_7 - tbl_store_fpreg2163821639store_fpreg_0:21640fmovm.x &0x80, EXC_FP0(%a6)21641rts21642store_fpreg_1:21643fmovm.x &0x80, EXC_FP1(%a6)21644rts21645store_fpreg_2:21646fmovm.x &0x01, -(%sp)21647fmovm.x (%sp)+, &0x2021648rts21649store_fpreg_3:21650fmovm.x &0x01, -(%sp)21651fmovm.x (%sp)+, &0x1021652rts21653store_fpreg_4:21654fmovm.x &0x01, -(%sp)21655fmovm.x (%sp)+, &0x0821656rts21657store_fpreg_5:21658fmovm.x &0x01, -(%sp)21659fmovm.x (%sp)+, &0x0421660rts21661store_fpreg_6:21662fmovm.x &0x01, -(%sp)21663fmovm.x (%sp)+, &0x0221664rts21665store_fpreg_7:21666fmovm.x &0x01, -(%sp)21667fmovm.x (%sp)+, &0x0121668rts2166921670#########################################################################21671# XDEF **************************************************************** #21672# _denorm(): denormalize an intermediate result #21673# #21674# XREF **************************************************************** #21675# None #21676# #21677# INPUT *************************************************************** #21678# a0 = points to the operand to be denormalized #21679# (in the internal extended format) #21680# #21681# d0 = rounding precision #21682# #21683# OUTPUT ************************************************************** #21684# a0 = pointer to the denormalized result #21685# (in the internal extended format) #21686# #21687# d0 = guard,round,sticky #21688# #21689# ALGORITHM *********************************************************** #21690# According to the exponent underflow threshold for the given #21691# precision, shift the mantissa bits to the right in order raise the #21692# exponent of the operand to the threshold value. While shifting the #21693# mantissa bits right, maintain the value of the guard, round, and #21694# sticky bits. #21695# other notes: #21696# (1) _denorm() is called by the underflow routines #21697# (2) _denorm() does NOT affect the status register #21698# #21699#########################################################################2170021701#21702# table of exponent threshold values for each precision21703#21704tbl_thresh:21705short 0x021706short sgl_thresh21707short dbl_thresh2170821709global _denorm21710_denorm:21711#21712# Load the exponent threshold for the precision selected and check21713# to see if (threshold - exponent) is > 65 in which case we can21714# simply calculate the sticky bit and zero the mantissa. otherwise21715# we have to call the denormalization routine.21716#21717lsr.b &0x2, %d0 # shift prec to lo bits21718mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold21719mov.w %d1, %d0 # copy d1 into d021720sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp21721cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)21722bpl.b denorm_set_stky # yes; just calc sticky2172321724clr.l %d0 # clear g,r,s21725btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?21726beq.b denorm_call # no; don't change anything21727bset &29, %d0 # yes; set sticky bit2172821729denorm_call:21730bsr.l dnrm_lp # denormalize the number21731rts2173221733#21734# all bit would have been shifted off during the denorm so simply21735# calculate if the sticky should be set and clear the entire mantissa.21736#21737denorm_set_stky:21738mov.l &0x20000000, %d0 # set sticky bit in return value21739mov.w %d1, FTEMP_EX(%a0) # load exp with threshold21740clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)21741clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)21742rts2174321744# #21745# dnrm_lp(): normalize exponent/mantissa to specified threshold #21746# #21747# INPUT: #21748# %a0 : points to the operand to be denormalized #21749# %d0{31:29} : initial guard,round,sticky #21750# %d1{15:0} : denormalization threshold #21751# OUTPUT: #21752# %a0 : points to the denormalized operand #21753# %d0{31:29} : final guard,round,sticky #21754# #2175521756# *** Local Equates *** #21757set GRS, L_SCR2 # g,r,s temp storage21758set FTEMP_LO2, L_SCR1 # FTEMP_LO copy2175921760global dnrm_lp21761dnrm_lp:2176221763#21764# make a copy of FTEMP_LO and place the g,r,s bits directly after it21765# in memory so as to make the bitfield extraction for denormalization easier.21766#21767mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy21768mov.l %d0, GRS(%a6) # place g,r,s after it2176921770#21771# check to see how much less than the underflow threshold the operand21772# exponent is.21773#21774mov.l %d1, %d0 # copy the denorm threshold21775sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent21776ble.b dnrm_no_lp # d1 <= 021777cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?21778blt.b case_1 # yes21779cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?21780blt.b case_2 # yes21781bra.w case_3 # (d1 >= 64)2178221783#21784# No normalization necessary21785#21786dnrm_no_lp:21787mov.l GRS(%a6), %d0 # restore original g,r,s21788rts2178921790#21791# case (0<d1<32)21792#21793# %d0 = denorm threshold21794# %d1 = "n" = amt to shift21795#21796# ---------------------------------------------------------21797# | FTEMP_HI | FTEMP_LO |grs000.........000|21798# ---------------------------------------------------------21799# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->21800# \ \ \ \21801# \ \ \ \21802# \ \ \ \21803# \ \ \ \21804# \ \ \ \21805# \ \ \ \21806# \ \ \ \21807# \ \ \ \21808# <-(n)-><-(32 - n)-><------(32)-------><------(32)------->21809# ---------------------------------------------------------21810# |0.....0| NEW_HI | NEW_FTEMP_LO |grs |21811# ---------------------------------------------------------21812#21813case_1:21814mov.l %d2, -(%sp) # create temp storage2181521816mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold21817mov.l &32, %d021818sub.w %d1, %d0 # %d0 = 32 - %d12181921820cmpi.w %d1, &29 # is shft amt >= 2921821blt.b case1_extract # no; no fix needed21822mov.b GRS(%a6), %d221823or.b %d2, 3+FTEMP_LO2(%a6)2182421825case1_extract:21826bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI21827bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO21828bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S2182921830mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI21831mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO2183221833bftst %d0{&2:&30} # were bits shifted off?21834beq.b case1_sticky_clear # no; go finish21835bset &rnd_stky_bit, %d0 # yes; set sticky bit2183621837case1_sticky_clear:21838and.l &0xe0000000, %d0 # clear all but G,R,S21839mov.l (%sp)+, %d2 # restore temp register21840rts2184121842#21843# case (32<=d1<64)21844#21845# %d0 = denorm threshold21846# %d1 = "n" = amt to shift21847#21848# ---------------------------------------------------------21849# | FTEMP_HI | FTEMP_LO |grs000.........000|21850# ---------------------------------------------------------21851# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->21852# \ \ \21853# \ \ \21854# \ \ -------------------21855# \ -------------------- \21856# ------------------- \ \21857# \ \ \21858# \ \ \21859# \ \ \21860# <-------(32)------><-(n)-><-(32 - n)-><------(32)------->21861# ---------------------------------------------------------21862# |0...............0|0....0| NEW_LO |grs |21863# ---------------------------------------------------------21864#21865case_2:21866mov.l %d2, -(%sp) # create temp storage2186721868mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold21869subi.w &0x20, %d1 # %d1 now between 0 and 3221870mov.l &0x20, %d021871sub.w %d1, %d0 # %d0 = 32 - %d12187221873# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize21874# the number of bits to check for the sticky detect.21875# it only plays a role in shift amounts of 61-63.21876mov.b GRS(%a6), %d221877or.b %d2, 3+FTEMP_LO2(%a6)2187821879bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO21880bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S2188121882bftst %d1{&2:&30} # were any bits shifted off?21883bne.b case2_set_sticky # yes; set sticky bit21884bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?21885bne.b case2_set_sticky # yes; set sticky bit2188621887mov.l %d1, %d0 # move new G,R,S to %d021888bra.b case2_end2188921890case2_set_sticky:21891mov.l %d1, %d0 # move new G,R,S to %d021892bset &rnd_stky_bit, %d0 # set sticky bit2189321894case2_end:21895clr.l FTEMP_HI(%a0) # store FTEMP_HI = 021896mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO21897and.l &0xe0000000, %d0 # clear all but G,R,S2189821899mov.l (%sp)+,%d2 # restore temp register21900rts2190121902#21903# case (d1>=64)21904#21905# %d0 = denorm threshold21906# %d1 = amt to shift21907#21908case_3:21909mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold2191021911cmpi.w %d1, &65 # is shift amt > 65?21912blt.b case3_64 # no; it's == 6421913beq.b case3_65 # no; it's == 652191421915#21916# case (d1>65)21917#21918# Shift value is > 65 and out of range. All bits are shifted off.21919# Return a zero mantissa with the sticky bit set21920#21921clr.l FTEMP_HI(%a0) # clear hi(mantissa)21922clr.l FTEMP_LO(%a0) # clear lo(mantissa)21923mov.l &0x20000000, %d0 # set sticky bit21924rts2192521926#21927# case (d1 == 64)21928#21929# ---------------------------------------------------------21930# | FTEMP_HI | FTEMP_LO |grs000.........000|21931# ---------------------------------------------------------21932# <-------(32)------>21933# \ \21934# \ \21935# \ \21936# \ ------------------------------21937# ------------------------------- \21938# \ \21939# \ \21940# \ \21941# <-------(32)------>21942# ---------------------------------------------------------21943# |0...............0|0................0|grs |21944# ---------------------------------------------------------21945#21946case3_64:21947mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)21948mov.l %d0, %d1 # make a copy21949and.l &0xc0000000, %d0 # extract G,R21950and.l &0x3fffffff, %d1 # extract other bits2195121952bra.b case3_complete2195321954#21955# case (d1 == 65)21956#21957# ---------------------------------------------------------21958# | FTEMP_HI | FTEMP_LO |grs000.........000|21959# ---------------------------------------------------------21960# <-------(32)------>21961# \ \21962# \ \21963# \ \21964# \ ------------------------------21965# -------------------------------- \21966# \ \21967# \ \21968# \ \21969# <-------(31)----->21970# ---------------------------------------------------------21971# |0...............0|0................0|0rs |21972# ---------------------------------------------------------21973#21974case3_65:21975mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)21976and.l &0x80000000, %d0 # extract R bit21977lsr.l &0x1, %d0 # shift high bit into R bit21978and.l &0x7fffffff, %d1 # extract other bits2197921980case3_complete:21981# last operation done was an "and" of the bits shifted off so the condition21982# codes are already set so branch accordingly.21983bne.b case3_set_sticky # yes; go set new sticky21984tst.l FTEMP_LO(%a0) # were any bits shifted off?21985bne.b case3_set_sticky # yes; go set new sticky21986tst.b GRS(%a6) # were any bits shifted off?21987bne.b case3_set_sticky # yes; go set new sticky2198821989#21990# no bits were shifted off so don't set the sticky bit.21991# the guard and21992# the entire mantissa is zero.21993#21994clr.l FTEMP_HI(%a0) # clear hi(mantissa)21995clr.l FTEMP_LO(%a0) # clear lo(mantissa)21996rts2199721998#21999# some bits were shifted off so set the sticky bit.22000# the entire mantissa is zero.22001#22002case3_set_sticky:22003bset &rnd_stky_bit,%d0 # set new sticky bit22004clr.l FTEMP_HI(%a0) # clear hi(mantissa)22005clr.l FTEMP_LO(%a0) # clear lo(mantissa)22006rts2200722008#########################################################################22009# XDEF **************************************************************** #22010# _round(): round result according to precision/mode #22011# #22012# XREF **************************************************************** #22013# None #22014# #22015# INPUT *************************************************************** #22016# a0 = ptr to input operand in internal extended format #22017# d1(hi) = contains rounding precision: #22018# ext = $0000xxxx #22019# sgl = $0004xxxx #22020# dbl = $0008xxxx #22021# d1(lo) = contains rounding mode: #22022# RN = $xxxx0000 #22023# RZ = $xxxx0001 #22024# RM = $xxxx0002 #22025# RP = $xxxx0003 #22026# d0{31:29} = contains the g,r,s bits (extended) #22027# #22028# OUTPUT ************************************************************** #22029# a0 = pointer to rounded result #22030# #22031# ALGORITHM *********************************************************** #22032# On return the value pointed to by a0 is correctly rounded, #22033# a0 is preserved and the g-r-s bits in d0 are cleared. #22034# The result is not typed - the tag field is invalid. The #22035# result is still in the internal extended format. #22036# #22037# The INEX bit of USER_FPSR will be set if the rounded result was #22038# inexact (i.e. if any of the g-r-s bits were set). #22039# #22040#########################################################################2204122042global _round22043_round:22044#22045# ext_grs() looks at the rounding precision and sets the appropriate22046# G,R,S bits.22047# If (G,R,S == 0) then result is exact and round is done, else set22048# the inex flag in status reg and continue.22049#22050bsr.l ext_grs # extract G,R,S2205122052tst.l %d0 # are G,R,S zero?22053beq.w truncate # yes; round is complete2205422055or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex2205622057#22058# Use rounding mode as an index into a jump table for these modes.22059# All of the following assumes grs != 0.22060#22061mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset22062jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler2206322064tbl_mode:22065short rnd_near - tbl_mode22066short truncate - tbl_mode # RZ always truncates22067short rnd_mnus - tbl_mode22068short rnd_plus - tbl_mode2206922070#################################################################22071# ROUND PLUS INFINITY #22072# #22073# If sign of fp number = 0 (positive), then add 1 to l. #22074#################################################################22075rnd_plus:22076tst.b FTEMP_SGN(%a0) # check for sign22077bmi.w truncate # if positive then truncate2207822079mov.l &0xffffffff, %d0 # force g,r,s to be all f's22080swap %d1 # set up d1 for round prec.2208122082cmpi.b %d1, &s_mode # is prec = sgl?22083beq.w add_sgl # yes22084bgt.w add_dbl # no; it's dbl22085bra.w add_ext # no; it's ext2208622087#################################################################22088# ROUND MINUS INFINITY #22089# #22090# If sign of fp number = 1 (negative), then add 1 to l. #22091#################################################################22092rnd_mnus:22093tst.b FTEMP_SGN(%a0) # check for sign22094bpl.w truncate # if negative then truncate2209522096mov.l &0xffffffff, %d0 # force g,r,s to be all f's22097swap %d1 # set up d1 for round prec.2209822099cmpi.b %d1, &s_mode # is prec = sgl?22100beq.w add_sgl # yes22101bgt.w add_dbl # no; it's dbl22102bra.w add_ext # no; it's ext2210322104#################################################################22105# ROUND NEAREST #22106# #22107# If (g=1), then add 1 to l and if (r=s=0), then clear l #22108# Note that this will round to even in case of a tie. #22109#################################################################22110rnd_near:22111asl.l &0x1, %d0 # shift g-bit to c-bit22112bcc.w truncate # if (g=1) then2211322114swap %d1 # set up d1 for round prec.2211522116cmpi.b %d1, &s_mode # is prec = sgl?22117beq.w add_sgl # yes22118bgt.w add_dbl # no; it's dbl22119bra.w add_ext # no; it's ext2212022121# *** LOCAL EQUATES ***22122set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec22123set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec2212422125#########################22126# ADD SINGLE #22127#########################22128add_sgl:22129add.l &ad_1_sgl, FTEMP_HI(%a0)22130bcc.b scc_clr # no mantissa overflow22131roxr.w FTEMP_HI(%a0) # shift v-bit back in22132roxr.w FTEMP_HI+2(%a0) # shift v-bit back in22133add.w &0x1, FTEMP_EX(%a0) # and incr exponent22134scc_clr:22135tst.l %d0 # test for rs = 022136bne.b sgl_done22137and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit22138sgl_done:22139and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit22140clr.l FTEMP_LO(%a0) # clear d222141rts2214222143#########################22144# ADD EXTENDED #22145#########################22146add_ext:22147addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit22148bcc.b xcc_clr # test for carry out22149addq.l &1,FTEMP_HI(%a0) # propagate carry22150bcc.b xcc_clr22151roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit22152roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit22153roxr.w FTEMP_LO(%a0)22154roxr.w FTEMP_LO+2(%a0)22155add.w &0x1,FTEMP_EX(%a0) # and inc exp22156xcc_clr:22157tst.l %d0 # test rs = 022158bne.b add_ext_done22159and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit22160add_ext_done:22161rts2216222163#########################22164# ADD DOUBLE #22165#########################22166add_dbl:22167add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb22168bcc.b dcc_clr # no carry22169addq.l &0x1, FTEMP_HI(%a0) # propagate carry22170bcc.b dcc_clr # no carry2217122172roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit22173roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit22174roxr.w FTEMP_LO(%a0)22175roxr.w FTEMP_LO+2(%a0)22176addq.w &0x1, FTEMP_EX(%a0) # incr exponent22177dcc_clr:22178tst.l %d0 # test for rs = 022179bne.b dbl_done22180and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit2218122182dbl_done:22183and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit22184rts2218522186###########################22187# Truncate all other bits #22188###########################22189truncate:22190swap %d1 # select rnd prec2219122192cmpi.b %d1, &s_mode # is prec sgl?22193beq.w sgl_done # yes22194bgt.b dbl_done # no; it's dbl22195rts # no; it's ext221962219722198#22199# ext_grs(): extract guard, round and sticky bits according to22200# rounding precision.22201#22202# INPUT22203# d0 = extended precision g,r,s (in d0{31:29})22204# d1 = {PREC,ROUND}22205# OUTPUT22206# d0{31:29} = guard, round, sticky22207#22208# The ext_grs extract the guard/round/sticky bits according to the22209# selected rounding precision. It is called by the round subroutine22210# only. All registers except d0 are kept intact. d0 becomes an22211# updated guard,round,sticky in d0{31:29}22212#22213# Notes: the ext_grs uses the round PREC, and therefore has to swap d122214# prior to usage, and needs to restore d1 to original. this22215# routine is tightly tied to the round routine and not meant to22216# uphold standard subroutine calling practices.22217#2221822219ext_grs:22220swap %d1 # have d1.w point to round precision22221tst.b %d1 # is rnd prec = extended?22222bne.b ext_grs_not_ext # no; go handle sgl or dbl2222322224#22225# %d0 actually already hold g,r,s since _round() had it before calling22226# this function. so, as long as we don't disturb it, we are "returning" it.22227#22228ext_grs_ext:22229swap %d1 # yes; return to correct positions22230rts2223122232ext_grs_not_ext:22233movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}2223422235cmpi.b %d1, &s_mode # is rnd prec = sgl?22236bne.b ext_grs_dbl # no; go handle dbl2223722238#22239# sgl:22240# 96 64 40 32 022241# -----------------------------------------------------22242# | EXP |XXXXXXX| |xx | |grs|22243# -----------------------------------------------------22244# <--(24)--->nn\ /22245# ee ---------------------22246# ww |22247# v22248# gr new sticky22249#22250ext_grs_sgl:22251bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right22252mov.l &30, %d2 # of the sgl prec. limits22253lsl.l %d2, %d3 # shift g-r bits to MSB of d322254mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test22255and.l &0x0000003f, %d2 # s bit is the or of all other22256bne.b ext_grs_st_stky # bits to the right of g-r22257tst.l FTEMP_LO(%a0) # test lower mantissa22258bne.b ext_grs_st_stky # if any are set, set sticky22259tst.l %d0 # test original g,r,s22260bne.b ext_grs_st_stky # if any are set, set sticky22261bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit2226222263#22264# dbl:22265# 96 64 32 11 022266# -----------------------------------------------------22267# | EXP |XXXXXXX| | |xx |grs|22268# -----------------------------------------------------22269# nn\ /22270# ee -------22271# ww |22272# v22273# gr new sticky22274#22275ext_grs_dbl:22276bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right22277mov.l &30, %d2 # of the dbl prec. limits22278lsl.l %d2, %d3 # shift g-r bits to the MSB of d322279mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test22280and.l &0x000001ff, %d2 # s bit is the or-ing of all22281bne.b ext_grs_st_stky # other bits to the right of g-r22282tst.l %d0 # test word original g,r,s22283bne.b ext_grs_st_stky # if any are set, set sticky22284bra.b ext_grs_end_sd # if clear, exit2228522286ext_grs_st_stky:22287bset &rnd_stky_bit, %d3 # set sticky bit22288ext_grs_end_sd:22289mov.l %d3, %d0 # return grs to d02229022291movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}2229222293swap %d1 # restore d1 to original22294rts2229522296#########################################################################22297# norm(): normalize the mantissa of an extended precision input. the #22298# input operand should not be normalized already. #22299# #22300# XDEF **************************************************************** #22301# norm() #22302# #22303# XREF **************************************************************** #22304# none #22305# #22306# INPUT *************************************************************** #22307# a0 = pointer fp extended precision operand to normalize #22308# #22309# OUTPUT ************************************************************** #22310# d0 = number of bit positions the mantissa was shifted #22311# a0 = the input operand's mantissa is normalized; the exponent #22312# is unchanged. #22313# #22314#########################################################################22315global norm22316norm:22317mov.l %d2, -(%sp) # create some temp regs22318mov.l %d3, -(%sp)2231922320mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)22321mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)2232222323bfffo %d0{&0:&32}, %d2 # how many places to shift?22324beq.b norm_lo # hi(man) is all zeroes!2232522326norm_hi:22327lsl.l %d2, %d0 # left shift hi(man)22328bfextu %d1{&0:%d2}, %d3 # extract lo bits2232922330or.l %d3, %d0 # create hi(man)22331lsl.l %d2, %d1 # create lo(man)2233222333mov.l %d0, FTEMP_HI(%a0) # store new hi(man)22334mov.l %d1, FTEMP_LO(%a0) # store new lo(man)2233522336mov.l %d2, %d0 # return shift amount2233722338mov.l (%sp)+, %d3 # restore temp regs22339mov.l (%sp)+, %d22234022341rts2234222343norm_lo:22344bfffo %d1{&0:&32}, %d2 # how many places to shift?22345lsl.l %d2, %d1 # shift lo(man)22346add.l &32, %d2 # add 32 to shft amount2234722348mov.l %d1, FTEMP_HI(%a0) # store hi(man)22349clr.l FTEMP_LO(%a0) # lo(man) is now zero2235022351mov.l %d2, %d0 # return shift amount2235222353mov.l (%sp)+, %d3 # restore temp regs22354mov.l (%sp)+, %d22235522356rts2235722358#########################################################################22359# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #22360# - returns corresponding optype tag #22361# #22362# XDEF **************************************************************** #22363# unnorm_fix() #22364# #22365# XREF **************************************************************** #22366# norm() - normalize the mantissa #22367# #22368# INPUT *************************************************************** #22369# a0 = pointer to unnormalized extended precision number #22370# #22371# OUTPUT ************************************************************** #22372# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #22373# a0 = input operand has been converted to a norm, denorm, or #22374# zero; both the exponent and mantissa are changed. #22375# #22376#########################################################################2237722378global unnorm_fix22379unnorm_fix:22380bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?22381bne.b unnorm_shift # hi(man) is not all zeroes2238222383#22384# hi(man) is all zeroes so see if any bits in lo(man) are set22385#22386unnorm_chk_lo:22387bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?22388beq.w unnorm_zero # yes2238922390add.w &32, %d0 # no; fix shift distance2239122392#22393# d0 = # shifts needed for complete normalization22394#22395unnorm_shift:22396clr.l %d1 # clear top word22397mov.w FTEMP_EX(%a0), %d1 # extract exponent22398and.w &0x7fff, %d1 # strip off sgn2239922400cmp.w %d0, %d1 # will denorm push exp < 0?22401bgt.b unnorm_nrm_zero # yes; denorm only until exp = 02240222403#22404# exponent would not go < 0. Therefore, number stays normalized22405#22406sub.w %d0, %d1 # shift exponent value22407mov.w FTEMP_EX(%a0), %d0 # load old exponent22408and.w &0x8000, %d0 # save old sign22409or.w %d0, %d1 # {sgn,new exp}22410mov.w %d1, FTEMP_EX(%a0) # insert new exponent2241122412bsr.l norm # normalize UNNORM2241322414mov.b &NORM, %d0 # return new optype tag22415rts2241622417#22418# exponent would go < 0, so only denormalize until exp = 022419#22420unnorm_nrm_zero:22421cmp.b %d1, &32 # is exp <= 32?22422bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent2242322424bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)22425mov.l %d0, FTEMP_HI(%a0) # save new hi(man)2242622427mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)22428lsl.l %d1, %d0 # extract new lo(man)22429mov.l %d0, FTEMP_LO(%a0) # save new lo(man)2243022431and.w &0x8000, FTEMP_EX(%a0) # set exp = 02243222433mov.b &DENORM, %d0 # return new optype tag22434rts2243522436#22437# only mantissa bits set are in lo(man)22438#22439unnorm_nrm_zero_lrg:22440sub.w &32, %d1 # adjust shft amt by 322244122442mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)22443lsl.l %d1, %d0 # left shift lo(man)2244422445mov.l %d0, FTEMP_HI(%a0) # store new hi(man)22446clr.l FTEMP_LO(%a0) # lo(man) = 02244722448and.w &0x8000, FTEMP_EX(%a0) # set exp = 02244922450mov.b &DENORM, %d0 # return new optype tag22451rts2245222453#22454# whole mantissa is zero so this UNNORM is actually a zero22455#22456unnorm_zero:22457and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero2245822459mov.b &ZERO, %d0 # fix optype tag22460rts2246122462#########################################################################22463# XDEF **************************************************************** #22464# set_tag_x(): return the optype of the input ext fp number #22465# #22466# XREF **************************************************************** #22467# None #22468# #22469# INPUT *************************************************************** #22470# a0 = pointer to extended precision operand #22471# #22472# OUTPUT ************************************************************** #22473# d0 = value of type tag #22474# one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #22475# #22476# ALGORITHM *********************************************************** #22477# Simply test the exponent, j-bit, and mantissa values to #22478# determine the type of operand. #22479# If it's an unnormalized zero, alter the operand and force it #22480# to be a normal zero. #22481# #22482#########################################################################2248322484global set_tag_x22485set_tag_x:22486mov.w FTEMP_EX(%a0), %d0 # extract exponent22487andi.w &0x7fff, %d0 # strip off sign22488cmpi.w %d0, &0x7fff # is (EXP == MAX)?22489beq.b inf_or_nan_x22490not_inf_or_nan_x:22491btst &0x7,FTEMP_HI(%a0)22492beq.b not_norm_x22493is_norm_x:22494mov.b &NORM, %d022495rts22496not_norm_x:22497tst.w %d0 # is exponent = 0?22498bne.b is_unnorm_x22499not_unnorm_x:22500tst.l FTEMP_HI(%a0)22501bne.b is_denorm_x22502tst.l FTEMP_LO(%a0)22503bne.b is_denorm_x22504is_zero_x:22505mov.b &ZERO, %d022506rts22507is_denorm_x:22508mov.b &DENORM, %d022509rts22510# must distinguish now "Unnormalized zeroes" which we22511# must convert to zero.22512is_unnorm_x:22513tst.l FTEMP_HI(%a0)22514bne.b is_unnorm_reg_x22515tst.l FTEMP_LO(%a0)22516bne.b is_unnorm_reg_x22517# it's an "unnormalized zero". let's convert it to an actual zero...22518andi.w &0x8000,FTEMP_EX(%a0) # clear exponent22519mov.b &ZERO, %d022520rts22521is_unnorm_reg_x:22522mov.b &UNNORM, %d022523rts22524inf_or_nan_x:22525tst.l FTEMP_LO(%a0)22526bne.b is_nan_x22527mov.l FTEMP_HI(%a0), %d022528and.l &0x7fffffff, %d0 # msb is a don't care!22529bne.b is_nan_x22530is_inf_x:22531mov.b &INF, %d022532rts22533is_nan_x:22534btst &0x6, FTEMP_HI(%a0)22535beq.b is_snan_x22536mov.b &QNAN, %d022537rts22538is_snan_x:22539mov.b &SNAN, %d022540rts2254122542#########################################################################22543# XDEF **************************************************************** #22544# set_tag_d(): return the optype of the input dbl fp number #22545# #22546# XREF **************************************************************** #22547# None #22548# #22549# INPUT *************************************************************** #22550# a0 = points to double precision operand #22551# #22552# OUTPUT ************************************************************** #22553# d0 = value of type tag #22554# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #22555# #22556# ALGORITHM *********************************************************** #22557# Simply test the exponent, j-bit, and mantissa values to #22558# determine the type of operand. #22559# #22560#########################################################################2256122562global set_tag_d22563set_tag_d:22564mov.l FTEMP(%a0), %d022565mov.l %d0, %d12256622567andi.l &0x7ff00000, %d022568beq.b zero_or_denorm_d2256922570cmpi.l %d0, &0x7ff0000022571beq.b inf_or_nan_d2257222573is_norm_d:22574mov.b &NORM, %d022575rts22576zero_or_denorm_d:22577and.l &0x000fffff, %d122578bne is_denorm_d22579tst.l 4+FTEMP(%a0)22580bne is_denorm_d22581is_zero_d:22582mov.b &ZERO, %d022583rts22584is_denorm_d:22585mov.b &DENORM, %d022586rts22587inf_or_nan_d:22588and.l &0x000fffff, %d122589bne is_nan_d22590tst.l 4+FTEMP(%a0)22591bne is_nan_d22592is_inf_d:22593mov.b &INF, %d022594rts22595is_nan_d:22596btst &19, %d122597bne is_qnan_d22598is_snan_d:22599mov.b &SNAN, %d022600rts22601is_qnan_d:22602mov.b &QNAN, %d022603rts2260422605#########################################################################22606# XDEF **************************************************************** #22607# set_tag_s(): return the optype of the input sgl fp number #22608# #22609# XREF **************************************************************** #22610# None #22611# #22612# INPUT *************************************************************** #22613# a0 = pointer to single precision operand #22614# #22615# OUTPUT ************************************************************** #22616# d0 = value of type tag #22617# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #22618# #22619# ALGORITHM *********************************************************** #22620# Simply test the exponent, j-bit, and mantissa values to #22621# determine the type of operand. #22622# #22623#########################################################################2262422625global set_tag_s22626set_tag_s:22627mov.l FTEMP(%a0), %d022628mov.l %d0, %d12262922630andi.l &0x7f800000, %d022631beq.b zero_or_denorm_s2263222633cmpi.l %d0, &0x7f80000022634beq.b inf_or_nan_s2263522636is_norm_s:22637mov.b &NORM, %d022638rts22639zero_or_denorm_s:22640and.l &0x007fffff, %d122641bne is_denorm_s22642is_zero_s:22643mov.b &ZERO, %d022644rts22645is_denorm_s:22646mov.b &DENORM, %d022647rts22648inf_or_nan_s:22649and.l &0x007fffff, %d122650bne is_nan_s22651is_inf_s:22652mov.b &INF, %d022653rts22654is_nan_s:22655btst &22, %d122656bne is_qnan_s22657is_snan_s:22658mov.b &SNAN, %d022659rts22660is_qnan_s:22661mov.b &QNAN, %d022662rts2266322664#########################################################################22665# XDEF **************************************************************** #22666# unf_res(): routine to produce default underflow result of a #22667# scaled extended precision number; this is used by #22668# fadd/fdiv/fmul/etc. emulation routines. #22669# unf_res4(): same as above but for fsglmul/fsgldiv which use #22670# single round prec and extended prec mode. #22671# #22672# XREF **************************************************************** #22673# _denorm() - denormalize according to scale factor #22674# _round() - round denormalized number according to rnd prec #22675# #22676# INPUT *************************************************************** #22677# a0 = pointer to extended precison operand #22678# d0 = scale factor #22679# d1 = rounding precision/mode #22680# #22681# OUTPUT ************************************************************** #22682# a0 = pointer to default underflow result in extended precision #22683# d0.b = result FPSR_cc which caller may or may not want to save #22684# #22685# ALGORITHM *********************************************************** #22686# Convert the input operand to "internal format" which means the #22687# exponent is extended to 16 bits and the sign is stored in the unused #22688# portion of the extended precison operand. Denormalize the number #22689# according to the scale factor passed in d0. Then, round the #22690# denormalized result. #22691# Set the FPSR_exc bits as appropriate but return the cc bits in #22692# d0 in case the caller doesn't want to save them (as is the case for #22693# fmove out). #22694# unf_res4() for fsglmul/fsgldiv forces the denorm to extended #22695# precision and the rounding mode to single. #22696# #22697#########################################################################22698global unf_res22699unf_res:22700mov.l %d1, -(%sp) # save rnd prec,mode on stack2270122702btst &0x7, FTEMP_EX(%a0) # make "internal" format22703sne FTEMP_SGN(%a0)2270422705mov.w FTEMP_EX(%a0), %d1 # extract exponent22706and.w &0x7fff, %d122707sub.w %d0, %d122708mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent2270922710mov.l %a0, -(%sp) # save operand ptr during calls2271122712mov.l 0x4(%sp),%d0 # pass rnd prec.22713andi.w &0x00c0,%d022714lsr.w &0x4,%d022715bsr.l _denorm # denorm result2271622717mov.l (%sp),%a022718mov.w 0x6(%sp),%d1 # load prec:mode into %d122719andi.w &0xc0,%d1 # extract rnd prec22720lsr.w &0x4,%d122721swap %d122722mov.w 0x6(%sp),%d122723andi.w &0x30,%d122724lsr.w &0x4,%d122725bsr.l _round # round the denorm2272622727mov.l (%sp)+, %a02272822729# result is now rounded properly. convert back to normal format22730bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue22731tst.b FTEMP_SGN(%a0) # is "internal result" sign set?22732beq.b unf_res_chkifzero # no; result is positive22733bset &0x7, FTEMP_EX(%a0) # set result sgn22734clr.b FTEMP_SGN(%a0) # clear temp sign2273522736# the number may have become zero after rounding. set ccodes accordingly.22737unf_res_chkifzero:22738clr.l %d022739tst.l FTEMP_HI(%a0) # is value now a zero?22740bne.b unf_res_cont # no22741tst.l FTEMP_LO(%a0)22742bne.b unf_res_cont # no22743# bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit22744bset &z_bit, %d0 # yes; set zero ccode bit2274522746unf_res_cont:2274722748#22749# can inex1 also be set along with unfl and inex2???22750#22751# we know that underflow has occurred. aunfl should be set if INEX2 is also set.22752#22753btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?22754beq.b unf_res_end # no22755bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl2275622757unf_res_end:22758add.l &0x4, %sp # clear stack22759rts2276022761# unf_res() for fsglmul() and fsgldiv().22762global unf_res422763unf_res4:22764mov.l %d1,-(%sp) # save rnd prec,mode on stack2276522766btst &0x7,FTEMP_EX(%a0) # make "internal" format22767sne FTEMP_SGN(%a0)2276822769mov.w FTEMP_EX(%a0),%d1 # extract exponent22770and.w &0x7fff,%d122771sub.w %d0,%d122772mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent2277322774mov.l %a0,-(%sp) # save operand ptr during calls2277522776clr.l %d0 # force rnd prec = ext22777bsr.l _denorm # denorm result2277822779mov.l (%sp),%a022780mov.w &s_mode,%d1 # force rnd prec = sgl22781swap %d122782mov.w 0x6(%sp),%d1 # load rnd mode22783andi.w &0x30,%d1 # extract rnd prec22784lsr.w &0x4,%d122785bsr.l _round # round the denorm2278622787mov.l (%sp)+,%a02278822789# result is now rounded properly. convert back to normal format22790bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue22791tst.b FTEMP_SGN(%a0) # is "internal result" sign set?22792beq.b unf_res4_chkifzero # no; result is positive22793bset &0x7,FTEMP_EX(%a0) # set result sgn22794clr.b FTEMP_SGN(%a0) # clear temp sign2279522796# the number may have become zero after rounding. set ccodes accordingly.22797unf_res4_chkifzero:22798clr.l %d022799tst.l FTEMP_HI(%a0) # is value now a zero?22800bne.b unf_res4_cont # no22801tst.l FTEMP_LO(%a0)22802bne.b unf_res4_cont # no22803# bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit22804bset &z_bit,%d0 # yes; set zero ccode bit2280522806unf_res4_cont:2280722808#22809# can inex1 also be set along with unfl and inex2???22810#22811# we know that underflow has occurred. aunfl should be set if INEX2 is also set.22812#22813btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?22814beq.b unf_res4_end # no22815bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl2281622817unf_res4_end:22818add.l &0x4,%sp # clear stack22819rts2282022821#########################################################################22822# XDEF **************************************************************** #22823# ovf_res(): routine to produce the default overflow result of #22824# an overflowing number. #22825# ovf_res2(): same as above but the rnd mode/prec are passed #22826# differently. #22827# #22828# XREF **************************************************************** #22829# none #22830# #22831# INPUT *************************************************************** #22832# d1.b = '-1' => (-); '0' => (+) #22833# ovf_res(): #22834# d0 = rnd mode/prec #22835# ovf_res2(): #22836# hi(d0) = rnd prec #22837# lo(d0) = rnd mode #22838# #22839# OUTPUT ************************************************************** #22840# a0 = points to extended precision result #22841# d0.b = condition code bits #22842# #22843# ALGORITHM *********************************************************** #22844# The default overflow result can be determined by the sign of #22845# the result and the rounding mode/prec in effect. These bits are #22846# concatenated together to create an index into the default result #22847# table. A pointer to the correct result is returned in a0. The #22848# resulting condition codes are returned in d0 in case the caller #22849# doesn't want FPSR_cc altered (as is the case for fmove out). #22850# #22851#########################################################################2285222853global ovf_res22854ovf_res:22855andi.w &0x10,%d1 # keep result sign22856lsr.b &0x4,%d0 # shift prec/mode22857or.b %d0,%d1 # concat the two22858mov.w %d1,%d0 # make a copy22859lsl.b &0x1,%d1 # multiply d1 by 222860bra.b ovf_res_load2286122862global ovf_res222863ovf_res2:22864and.w &0x10, %d1 # keep result sign22865or.b %d0, %d1 # insert rnd mode22866swap %d022867or.b %d0, %d1 # insert rnd prec22868mov.w %d1, %d0 # make a copy22869lsl.b &0x1, %d1 # shift left by 12287022871#22872# use the rounding mode, precision, and result sign as in index into the22873# two tables below to fetch the default result and the result ccodes.22874#22875ovf_res_load:22876mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes22877lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr2287822879rts2288022881tbl_ovfl_cc:22882byte 0x2, 0x0, 0x0, 0x222883byte 0x2, 0x0, 0x0, 0x222884byte 0x2, 0x0, 0x0, 0x222885byte 0x0, 0x0, 0x0, 0x022886byte 0x2+0x8, 0x8, 0x2+0x8, 0x822887byte 0x2+0x8, 0x8, 0x2+0x8, 0x822888byte 0x2+0x8, 0x8, 0x2+0x8, 0x82288922890tbl_ovfl_result:22891long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN22892long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ22893long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM22894long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP2289522896long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN22897long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ22898long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM22899long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP2290022901long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN22902long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ22903long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM22904long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP2290522906long 0x00000000,0x00000000,0x00000000,0x0000000022907long 0x00000000,0x00000000,0x00000000,0x0000000022908long 0x00000000,0x00000000,0x00000000,0x0000000022909long 0x00000000,0x00000000,0x00000000,0x000000002291022911long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN22912long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ22913long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM22914long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP2291522916long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN22917long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ22918long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM22919long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP2292022921long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN22922long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ22923long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM22924long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP2292522926#########################################################################22927# XDEF **************************************************************** #22928# get_packed(): fetch a packed operand from memory and then #22929# convert it to a floating-point binary number. #22930# #22931# XREF **************************************************************** #22932# _dcalc_ea() - calculate the correct <ea> #22933# _mem_read() - fetch the packed operand from memory #22934# facc_in_x() - the fetch failed so jump to special exit code #22935# decbin() - convert packed to binary extended precision #22936# #22937# INPUT *************************************************************** #22938# None #22939# #22940# OUTPUT ************************************************************** #22941# If no failure on _mem_read(): #22942# FP_SRC(a6) = packed operand now as a binary FP number #22943# #22944# ALGORITHM *********************************************************** #22945# Get the correct <ea> which is the value on the exception stack #22946# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #22947# Then, fetch the operand from memory. If the fetch fails, exit #22948# through facc_in_x(). #22949# If the packed operand is a ZERO,NAN, or INF, convert it to #22950# its binary representation here. Else, call decbin() which will #22951# convert the packed value to an extended precision binary value. #22952# #22953#########################################################################2295422955# the stacked <ea> for packed is correct except for -(An).22956# the base reg must be updated for both -(An) and (An)+.22957global get_packed22958get_packed:22959mov.l &0xc,%d0 # packed is 12 bytes22960bsr.l _dcalc_ea # fetch <ea>; correct An2296122962lea FP_SRC(%a6),%a1 # pass: ptr to super dst22963mov.l &0xc,%d0 # pass: 12 bytes22964bsr.l _dmem_read # read packed operand2296522966tst.l %d1 # did dfetch fail?22967bne.l facc_in_x # yes2296822969# The packed operand is an INF or a NAN if the exponent field is all ones.22970bfextu FP_SRC(%a6){&1:&15},%d0 # get exp22971cmpi.w %d0,&0x7fff # INF or NAN?22972bne.b gp_try_zero # no22973rts # operand is an INF or NAN2297422975# The packed operand is a zero if the mantissa is all zero, else it's22976# a normal packed op.22977gp_try_zero:22978mov.b 3+FP_SRC(%a6),%d0 # get byte 422979andi.b &0x0f,%d0 # clear all but last nybble22980bne.b gp_not_spec # not a zero22981tst.l FP_SRC_HI(%a6) # is lw 2 zero?22982bne.b gp_not_spec # not a zero22983tst.l FP_SRC_LO(%a6) # is lw 3 zero?22984bne.b gp_not_spec # not a zero22985rts # operand is a ZERO22986gp_not_spec:22987lea FP_SRC(%a6),%a0 # pass: ptr to packed op22988bsr.l decbin # convert to extended22989fmovm.x &0x80,FP_SRC(%a6) # make this the srcop22990rts2299122992#########################################################################22993# decbin(): Converts normalized packed bcd value pointed to by register #22994# a0 to extended-precision value in fp0. #22995# #22996# INPUT *************************************************************** #22997# a0 = pointer to normalized packed bcd value #22998# #22999# OUTPUT ************************************************************** #23000# fp0 = exact fp representation of the packed bcd value. #23001# #23002# ALGORITHM *********************************************************** #23003# Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #23004# and NaN operands are dispatched without entering this routine) #23005# value in 68881/882 format at location (a0). #23006# #23007# A1. Convert the bcd exponent to binary by successive adds and #23008# muls. Set the sign according to SE. Subtract 16 to compensate #23009# for the mantissa which is to be interpreted as 17 integer #23010# digits, rather than 1 integer and 16 fraction digits. #23011# Note: this operation can never overflow. #23012# #23013# A2. Convert the bcd mantissa to binary by successive #23014# adds and muls in FP0. Set the sign according to SM. #23015# The mantissa digits will be converted with the decimal point #23016# assumed following the least-significant digit. #23017# Note: this operation can never overflow. #23018# #23019# A3. Count the number of leading/trailing zeros in the #23020# bcd string. If SE is positive, count the leading zeros; #23021# if negative, count the trailing zeros. Set the adjusted #23022# exponent equal to the exponent from A1 and the zero count #23023# added if SM = 1 and subtracted if SM = 0. Scale the #23024# mantissa the equivalent of forcing in the bcd value: #23025# #23026# SM = 0 a non-zero digit in the integer position #23027# SM = 1 a non-zero digit in Mant0, lsd of the fraction #23028# #23029# this will insure that any value, regardless of its #23030# representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #23031# consistently. #23032# #23033# A4. Calculate the factor 10^exp in FP1 using a table of #23034# 10^(2^n) values. To reduce the error in forming factors #23035# greater than 10^27, a directed rounding scheme is used with #23036# tables rounded to RN, RM, and RP, according to the table #23037# in the comments of the pwrten section. #23038# #23039# A5. Form the final binary number by scaling the mantissa by #23040# the exponent factor. This is done by multiplying the #23041# mantissa in FP0 by the factor in FP1 if the adjusted #23042# exponent sign is positive, and dividing FP0 by FP1 if #23043# it is negative. #23044# #23045# Clean up and return. Check if the final mul or div was inexact. #23046# If so, set INEX1 in USER_FPSR. #23047# #23048#########################################################################2304923050#23051# PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded23052# to nearest, minus, and plus, respectively. The tables include23053# 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding23054# is required until the power is greater than 27, however, all23055# tables include the first 5 for ease of indexing.23056#23057RTABLE:23058byte 0,0,0,023059byte 2,3,2,323060byte 2,3,3,223061byte 3,2,2,32306223063set FNIBS,723064set FSTRT,02306523066set ESTRT,423067set EDIGITS,22306823069global decbin23070decbin:23071mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input23072mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it23073mov.l 0x8(%a0),FP_SCR0_LO(%a6)2307423075lea FP_SCR0(%a6),%a02307623077movm.l &0x3c00,-(%sp) # save d2-d523078fmovm.x &0x1,-(%sp) # save fp123079#23080# Calculate exponent:23081# 1. Copy bcd value in memory for use as a working copy.23082# 2. Calculate absolute value of exponent in d1 by mul and add.23083# 3. Correct for exponent sign.23084# 4. Subtract 16 to compensate for interpreting the mant as all integer digits.23085# (i.e., all digits assumed left of the decimal point.)23086#23087# Register usage:23088#23089# calc_e:23090# (*) d0: temp digit storage23091# (*) d1: accumulator for binary exponent23092# (*) d2: digit count23093# (*) d3: offset pointer23094# ( ) d4: first word of bcd23095# ( ) a0: pointer to working bcd value23096# ( ) a6: pointer to original bcd value23097# (*) FP_SCR1: working copy of original bcd value23098# (*) L_SCR1: copy of original exponent word23099#23100calc_e:23101mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part23102mov.l &ESTRT,%d3 # counter to pick up digits23103mov.l (%a0),%d4 # get first word of bcd23104clr.l %d1 # zero d1 for accumulator23105e_gd:23106mulu.l &0xa,%d1 # mul partial product by one digit place23107bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d023108add.l %d0,%d1 # d1 = d1 + d023109addq.b &4,%d3 # advance d3 to the next digit23110dbf.w %d2,e_gd # if we have used all 3 digits, exit loop23111btst &30,%d4 # get SE23112beq.b e_pos # don't negate if pos23113neg.l %d1 # negate before subtracting23114e_pos:23115sub.l &16,%d1 # sub to compensate for shift of mant23116bge.b e_save # if still pos, do not neg23117neg.l %d1 # now negative, make pos and set SE23118or.l &0x40000000,%d4 # set SE in d4,23119or.l &0x40000000,(%a0) # and in working bcd23120e_save:23121mov.l %d1,-(%sp) # save exp on stack23122#23123#23124# Calculate mantissa:23125# 1. Calculate absolute value of mantissa in fp0 by mul and add.23126# 2. Correct for mantissa sign.23127# (i.e., all digits assumed left of the decimal point.)23128#23129# Register usage:23130#23131# calc_m:23132# (*) d0: temp digit storage23133# (*) d1: lword counter23134# (*) d2: digit count23135# (*) d3: offset pointer23136# ( ) d4: words 2 and 3 of bcd23137# ( ) a0: pointer to working bcd value23138# ( ) a6: pointer to original bcd value23139# (*) fp0: mantissa accumulator23140# ( ) FP_SCR1: working copy of original bcd value23141# ( ) L_SCR1: copy of original exponent word23142#23143calc_m:23144mov.l &1,%d1 # word counter, init to 123145fmov.s &0x00000000,%fp0 # accumulator23146#23147#23148# Since the packed number has a long word between the first & second parts,23149# get the integer digit then skip down & get the rest of the23150# mantissa. We will unroll the loop once.23151#23152bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word23153fadd.b %d0,%fp0 # add digit to sum in fp023154#23155#23156# Get the rest of the mantissa.23157#23158loadlw:23159mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d423160mov.l &FSTRT,%d3 # counter to pick up digits23161mov.l &FNIBS,%d2 # reset number of digits per a0 ptr23162md2b:23163fmul.s &0x41200000,%fp0 # fp0 = fp0 * 1023164bfextu %d4{%d3:&4},%d0 # get the digit and zero extend23165fadd.b %d0,%fp0 # fp0 = fp0 + digit23166#23167#23168# If all the digits (8) in that long word have been converted (d2=0),23169# then inc d1 (=2) to point to the next long word and reset d3 to 023170# to initialize the digit offset, and set d2 to 7 for the digit count;23171# else continue with this long word.23172#23173addq.b &4,%d3 # advance d3 to the next digit23174dbf.w %d2,md2b # check for last digit in this lw23175nextlw:23176addq.l &1,%d1 # inc lw pointer in mantissa23177cmp.l %d1,&2 # test for last lw23178ble.b loadlw # if not, get last one23179#23180# Check the sign of the mant and make the value in fp0 the same sign.23181#23182m_sign:23183btst &31,(%a0) # test sign of the mantissa23184beq.b ap_st_z # if clear, go to append/strip zeros23185fneg.x %fp0 # if set, negate fp023186#23187# Append/strip zeros:23188#23189# For adjusted exponents which have an absolute value greater than 27*,23190# this routine calculates the amount needed to normalize the mantissa23191# for the adjusted exponent. That number is subtracted from the exp23192# if the exp was positive, and added if it was negative. The purpose23193# of this is to reduce the value of the exponent and the possibility23194# of error in calculation of pwrten.23195#23196# 1. Branch on the sign of the adjusted exponent.23197# 2p.(positive exp)23198# 2. Check M16 and the digits in lwords 2 and 3 in descending order.23199# 3. Add one for each zero encountered until a non-zero digit.23200# 4. Subtract the count from the exp.23201# 5. Check if the exp has crossed zero in #3 above; make the exp abs23202# and set SE.23203# 6. Multiply the mantissa by 10**count.23204# 2n.(negative exp)23205# 2. Check the digits in lwords 3 and 2 in descending order.23206# 3. Add one for each zero encountered until a non-zero digit.23207# 4. Add the count to the exp.23208# 5. Check if the exp has crossed zero in #3 above; clear SE.23209# 6. Divide the mantissa by 10**count.23210#23211# *Why 27? If the adjusted exponent is within -28 < expA < 28, than23212# any adjustment due to append/strip zeros will drive the resultane23213# exponent towards zero. Since all pwrten constants with a power23214# of 27 or less are exact, there is no need to use this routine to23215# attempt to lessen the resultant exponent.23216#23217# Register usage:23218#23219# ap_st_z:23220# (*) d0: temp digit storage23221# (*) d1: zero count23222# (*) d2: digit count23223# (*) d3: offset pointer23224# ( ) d4: first word of bcd23225# (*) d5: lword counter23226# ( ) a0: pointer to working bcd value23227# ( ) FP_SCR1: working copy of original bcd value23228# ( ) L_SCR1: copy of original exponent word23229#23230#23231# First check the absolute value of the exponent to see if this23232# routine is necessary. If so, then check the sign of the exponent23233# and do append (+) or strip (-) zeros accordingly.23234# This section handles a positive adjusted exponent.23235#23236ap_st_z:23237mov.l (%sp),%d1 # load expA for range test23238cmp.l %d1,&27 # test is with 2723239ble.w pwrten # if abs(expA) <28, skip ap/st zeros23240btst &30,(%a0) # check sign of exp23241bne.b ap_st_n # if neg, go to neg side23242clr.l %d1 # zero count reg23243mov.l (%a0),%d4 # load lword 1 to d423244bfextu %d4{&28:&4},%d0 # get M16 in d023245bne.b ap_p_fx # if M16 is non-zero, go fix exp23246addq.l &1,%d1 # inc zero count23247mov.l &1,%d5 # init lword counter23248mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d423249bne.b ap_p_cl # if lw 2 is zero, skip it23250addq.l &8,%d1 # and inc count by 823251addq.l &1,%d5 # inc lword counter23252mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d423253ap_p_cl:23254clr.l %d3 # init offset reg23255mov.l &7,%d2 # init digit counter23256ap_p_gd:23257bfextu %d4{%d3:&4},%d0 # get digit23258bne.b ap_p_fx # if non-zero, go to fix exp23259addq.l &4,%d3 # point to next digit23260addq.l &1,%d1 # inc digit counter23261dbf.w %d2,ap_p_gd # get next digit23262ap_p_fx:23263mov.l %d1,%d0 # copy counter to d223264mov.l (%sp),%d1 # get adjusted exp from memory23265sub.l %d0,%d1 # subtract count from exp23266bge.b ap_p_fm # if still pos, go to pwrten23267neg.l %d1 # now its neg; get abs23268mov.l (%a0),%d4 # load lword 1 to d423269or.l &0x40000000,%d4 # and set SE in d423270or.l &0x40000000,(%a0) # and in memory23271#23272# Calculate the mantissa multiplier to compensate for the striping of23273# zeros from the mantissa.23274#23275ap_p_fm:23276lea.l PTENRN(%pc),%a1 # get address of power-of-ten table23277clr.l %d3 # init table index23278fmov.s &0x3f800000,%fp1 # init fp1 to 123279mov.l &3,%d2 # init d2 to count bits in counter23280ap_p_el:23281asr.l &1,%d0 # shift lsb into carry23282bcc.b ap_p_en # if 1, mul fp1 by pwrten factor23283fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)23284ap_p_en:23285add.l &12,%d3 # inc d3 to next rtable entry23286tst.l %d0 # check if d0 is zero23287bne.b ap_p_el # if not, get next bit23288fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)23289bra.b pwrten # go calc pwrten23290#23291# This section handles a negative adjusted exponent.23292#23293ap_st_n:23294clr.l %d1 # clr counter23295mov.l &2,%d5 # set up d5 to point to lword 323296mov.l (%a0,%d5.L*4),%d4 # get lword 323297bne.b ap_n_cl # if not zero, check digits23298sub.l &1,%d5 # dec d5 to point to lword 223299addq.l &8,%d1 # inc counter by 823300mov.l (%a0,%d5.L*4),%d4 # get lword 223301ap_n_cl:23302mov.l &28,%d3 # point to last digit23303mov.l &7,%d2 # init digit counter23304ap_n_gd:23305bfextu %d4{%d3:&4},%d0 # get digit23306bne.b ap_n_fx # if non-zero, go to exp fix23307subq.l &4,%d3 # point to previous digit23308addq.l &1,%d1 # inc digit counter23309dbf.w %d2,ap_n_gd # get next digit23310ap_n_fx:23311mov.l %d1,%d0 # copy counter to d023312mov.l (%sp),%d1 # get adjusted exp from memory23313sub.l %d0,%d1 # subtract count from exp23314bgt.b ap_n_fm # if still pos, go fix mantissa23315neg.l %d1 # take abs of exp and clr SE23316mov.l (%a0),%d4 # load lword 1 to d423317and.l &0xbfffffff,%d4 # and clr SE in d423318and.l &0xbfffffff,(%a0) # and in memory23319#23320# Calculate the mantissa multiplier to compensate for the appending of23321# zeros to the mantissa.23322#23323ap_n_fm:23324lea.l PTENRN(%pc),%a1 # get address of power-of-ten table23325clr.l %d3 # init table index23326fmov.s &0x3f800000,%fp1 # init fp1 to 123327mov.l &3,%d2 # init d2 to count bits in counter23328ap_n_el:23329asr.l &1,%d0 # shift lsb into carry23330bcc.b ap_n_en # if 1, mul fp1 by pwrten factor23331fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)23332ap_n_en:23333add.l &12,%d3 # inc d3 to next rtable entry23334tst.l %d0 # check if d0 is zero23335bne.b ap_n_el # if not, get next bit23336fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)23337#23338#23339# Calculate power-of-ten factor from adjusted and shifted exponent.23340#23341# Register usage:23342#23343# pwrten:23344# (*) d0: temp23345# ( ) d1: exponent23346# (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp23347# (*) d3: FPCR work copy23348# ( ) d4: first word of bcd23349# (*) a1: RTABLE pointer23350# calc_p:23351# (*) d0: temp23352# ( ) d1: exponent23353# (*) d3: PWRTxx table index23354# ( ) a0: pointer to working copy of bcd23355# (*) a1: PWRTxx pointer23356# (*) fp1: power-of-ten accumulator23357#23358# Pwrten calculates the exponent factor in the selected rounding mode23359# according to the following table:23360#23361# Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode23362#23363# ANY ANY RN RN23364#23365# + + RP RP23366# - + RP RM23367# + - RP RM23368# - - RP RP23369#23370# + + RM RM23371# - + RM RP23372# + - RM RP23373# - - RM RM23374#23375# + + RZ RM23376# - + RZ RM23377# + - RZ RP23378# - - RZ RP23379#23380#23381pwrten:23382mov.l USER_FPCR(%a6),%d3 # get user's FPCR23383bfextu %d3{&26:&2},%d2 # isolate rounding mode bits23384mov.l (%a0),%d4 # reload 1st bcd word to d423385asl.l &2,%d2 # format d2 to be23386bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}23387add.l %d0,%d2 # in d2 as index into RTABLE23388lea.l RTABLE(%pc),%a1 # load rtable base23389mov.b (%a1,%d2),%d0 # load new rounding bits from table23390clr.l %d3 # clear d3 to force no exc and extended23391bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR23392fmov.l %d3,%fpcr # write new FPCR23393asr.l &1,%d0 # write correct PTENxx table23394bcc.b not_rp # to a123395lea.l PTENRP(%pc),%a1 # it is RP23396bra.b calc_p # go to init section23397not_rp:23398asr.l &1,%d0 # keep checking23399bcc.b not_rm23400lea.l PTENRM(%pc),%a1 # it is RM23401bra.b calc_p # go to init section23402not_rm:23403lea.l PTENRN(%pc),%a1 # it is RN23404calc_p:23405mov.l %d1,%d0 # copy exp to d0;use d023406bpl.b no_neg # if exp is negative,23407neg.l %d0 # invert it23408or.l &0x40000000,(%a0) # and set SE bit23409no_neg:23410clr.l %d3 # table index23411fmov.s &0x3f800000,%fp1 # init fp1 to 123412e_loop:23413asr.l &1,%d0 # shift next bit into carry23414bcc.b e_next # if zero, skip the mul23415fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)23416e_next:23417add.l &12,%d3 # inc d3 to next rtable entry23418tst.l %d0 # check if d0 is zero23419bne.b e_loop # not zero, continue shifting23420#23421#23422# Check the sign of the adjusted exp and make the value in fp0 the23423# same sign. If the exp was pos then multiply fp1*fp0;23424# else divide fp0/fp1.23425#23426# Register Usage:23427# norm:23428# ( ) a0: pointer to working bcd value23429# (*) fp0: mantissa accumulator23430# ( ) fp1: scaling factor - 10**(abs(exp))23431#23432pnorm:23433btst &30,(%a0) # test the sign of the exponent23434beq.b mul # if clear, go to multiply23435div:23436fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp23437bra.b end_dec23438mul:23439fmul.x %fp1,%fp0 # exp is positive, so multiply by exp23440#23441#23442# Clean up and return with result in fp0.23443#23444# If the final mul/div in decbin incurred an inex exception,23445# it will be inex2, but will be reported as inex1 by get_op.23446#23447end_dec:23448fmov.l %fpsr,%d0 # get status register23449bclr &inex2_bit+8,%d0 # test for inex2 and clear it23450beq.b no_exc # skip this if no exc23451ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX23452no_exc:23453add.l &0x4,%sp # clear 1 lw param23454fmovm.x (%sp)+,&0x40 # restore fp123455movm.l (%sp)+,&0x3c # restore d2-d523456fmov.l &0x0,%fpcr23457fmov.l &0x0,%fpsr23458rts2345923460#########################################################################23461# bindec(): Converts an input in extended precision format to bcd format#23462# #23463# INPUT *************************************************************** #23464# a0 = pointer to the input extended precision value in memory. #23465# the input may be either normalized, unnormalized, or #23466# denormalized. #23467# d0 = contains the k-factor sign-extended to 32-bits. #23468# #23469# OUTPUT ************************************************************** #23470# FP_SCR0(a6) = bcd format result on the stack. #23471# #23472# ALGORITHM *********************************************************** #23473# #23474# A1. Set RM and size ext; Set SIGMA = sign of input. #23475# The k-factor is saved for use in d7. Clear the #23476# BINDEC_FLG for separating normalized/denormalized #23477# input. If input is unnormalized or denormalized, #23478# normalize it. #23479# #23480# A2. Set X = abs(input). #23481# #23482# A3. Compute ILOG. #23483# ILOG is the log base 10 of the input value. It is #23484# approximated by adding e + 0.f when the original #23485# value is viewed as 2^^e * 1.f in extended precision. #23486# This value is stored in d6. #23487# #23488# A4. Clr INEX bit. #23489# The operation in A3 above may have set INEX2. #23490# #23491# A5. Set ICTR = 0; #23492# ICTR is a flag used in A13. It must be set before the #23493# loop entry A6. #23494# #23495# A6. Calculate LEN. #23496# LEN is the number of digits to be displayed. The #23497# k-factor can dictate either the total number of digits, #23498# if it is a positive number, or the number of digits #23499# after the decimal point which are to be included as #23500# significant. See the 68882 manual for examples. #23501# If LEN is computed to be greater than 17, set OPERR in #23502# USER_FPSR. LEN is stored in d4. #23503# #23504# A7. Calculate SCALE. #23505# SCALE is equal to 10^ISCALE, where ISCALE is the number #23506# of decimal places needed to insure LEN integer digits #23507# in the output before conversion to bcd. LAMBDA is the #23508# sign of ISCALE, used in A9. Fp1 contains #23509# 10^^(abs(ISCALE)) using a rounding mode which is a #23510# function of the original rounding mode and the signs #23511# of ISCALE and X. A table is given in the code. #23512# #23513# A8. Clr INEX; Force RZ. #23514# The operation in A3 above may have set INEX2. #23515# RZ mode is forced for the scaling operation to insure #23516# only one rounding error. The grs bits are collected in #23517# the INEX flag for use in A10. #23518# #23519# A9. Scale X -> Y. #23520# The mantissa is scaled to the desired number of #23521# significant digits. The excess digits are collected #23522# in INEX2. #23523# #23524# A10. Or in INEX. #23525# If INEX is set, round error occurred. This is #23526# compensated for by 'or-ing' in the INEX2 flag to #23527# the lsb of Y. #23528# #23529# A11. Restore original FPCR; set size ext. #23530# Perform FINT operation in the user's rounding mode. #23531# Keep the size to extended. #23532# #23533# A12. Calculate YINT = FINT(Y) according to user's rounding #23534# mode. The FPSP routine sintd0 is used. The output #23535# is in fp0. #23536# #23537# A13. Check for LEN digits. #23538# If the int operation results in more than LEN digits, #23539# or less than LEN -1 digits, adjust ILOG and repeat from #23540# A6. This test occurs only on the first pass. If the #23541# result is exactly 10^LEN, decrement ILOG and divide #23542# the mantissa by 10. #23543# #23544# A14. Convert the mantissa to bcd. #23545# The binstr routine is used to convert the LEN digit #23546# mantissa to bcd in memory. The input to binstr is #23547# to be a fraction; i.e. (mantissa)/10^LEN and adjusted #23548# such that the decimal point is to the left of bit 63. #23549# The bcd digits are stored in the correct position in #23550# the final string area in memory. #23551# #23552# A15. Convert the exponent to bcd. #23553# As in A14 above, the exp is converted to bcd and the #23554# digits are stored in the final string. #23555# Test the length of the final exponent string. If the #23556# length is 4, set operr. #23557# #23558# A16. Write sign bits to final string. #23559# #23560#########################################################################2356123562set BINDEC_FLG, EXC_TEMP # DENORM flag2356323564# Constants in extended precision23565PLOG2:23566long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x0000000023567PLOG2UP1:23568long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x000000002356923570# Constants in single precision23571FONE:23572long 0x3F800000,0x00000000,0x00000000,0x0000000023573FTWO:23574long 0x40000000,0x00000000,0x00000000,0x0000000023575FTEN:23576long 0x41200000,0x00000000,0x00000000,0x0000000023577F4933:23578long 0x459A2800,0x00000000,0x00000000,0x000000002357923580RBDTBL:23581byte 0,0,0,023582byte 3,3,2,223583byte 3,2,2,323584byte 2,3,3,22358523586# Implementation Notes:23587#23588# The registers are used as follows:23589#23590# d0: scratch; LEN input to binstr23591# d1: scratch23592# d2: upper 32-bits of mantissa for binstr23593# d3: scratch;lower 32-bits of mantissa for binstr23594# d4: LEN23595# d5: LAMBDA/ICTR23596# d6: ILOG23597# d7: k-factor23598# a0: ptr for original operand/final result23599# a1: scratch pointer23600# a2: pointer to FP_X; abs(original value) in ext23601# fp0: scratch23602# fp1: scratch23603# fp2: scratch23604# F_SCR1:23605# F_SCR2:23606# L_SCR1:23607# L_SCR2:2360823609global bindec23610bindec:23611movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}23612fmovm.x &0x7,-(%sp) # {%fp0-%fp2}2361323614# A1. Set RM and size ext. Set SIGMA = sign input;23615# The k-factor is saved for use in d7. Clear BINDEC_FLG for23616# separating normalized/denormalized input. If the input23617# is a denormalized number, set the BINDEC_FLG memory word23618# to signal denorm. If the input is unnormalized, normalize23619# the input and test for denormalized result.23620#23621fmov.l &rm_mode*0x10,%fpcr # set RM and ext23622mov.l (%a0),L_SCR2(%a6) # save exponent for sign check23623mov.l %d0,%d7 # move k-factor to d72362423625clr.b BINDEC_FLG(%a6) # clr norm/denorm flag23626cmpi.b STAG(%a6),&DENORM # is input a DENORM?23627bne.w A2_str # no; input is a NORM2362823629#23630# Normalize the denorm23631#23632un_de_norm:23633mov.w (%a0),%d023634and.w &0x7fff,%d0 # strip sign of normalized exp23635mov.l 4(%a0),%d123636mov.l 8(%a0),%d223637norm_loop:23638sub.w &1,%d023639lsl.l &1,%d223640roxl.l &1,%d123641tst.l %d123642bge.b norm_loop23643#23644# Test if the normalized input is denormalized23645#23646tst.w %d023647bgt.b pos_exp # if greater than zero, it is a norm23648st BINDEC_FLG(%a6) # set flag for denorm23649pos_exp:23650and.w &0x7fff,%d0 # strip sign of normalized exp23651mov.w %d0,(%a0)23652mov.l %d1,4(%a0)23653mov.l %d2,8(%a0)2365423655# A2. Set X = abs(input).23656#23657A2_str:23658mov.l (%a0),FP_SCR1(%a6) # move input to work space23659mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space23660mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space23661and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)2366223663# A3. Compute ILOG.23664# ILOG is the log base 10 of the input value. It is approx-23665# imated by adding e + 0.f when the original value is viewed23666# as 2^^e * 1.f in extended precision. This value is stored23667# in d6.23668#23669# Register usage:23670# Input/Output23671# d0: k-factor/exponent23672# d2: x/x23673# d3: x/x23674# d4: x/x23675# d5: x/x23676# d6: x/ILOG23677# d7: k-factor/Unchanged23678# a0: ptr for original operand/final result23679# a1: x/x23680# a2: x/x23681# fp0: x/float(ILOG)23682# fp1: x/x23683# fp2: x/x23684# F_SCR1:x/x23685# F_SCR2:Abs(X)/Abs(X) with $3fff exponent23686# L_SCR1:x/x23687# L_SCR2:first word of X packed/Unchanged2368823689tst.b BINDEC_FLG(%a6) # check for denorm23690beq.b A3_cont # if clr, continue with norm23691mov.l &-4933,%d6 # force ILOG = -493323692bra.b A4_str23693A3_cont:23694mov.w FP_SCR1(%a6),%d0 # move exp to d023695mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff23696fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f23697sub.w &0x3fff,%d0 # strip off bias23698fadd.w %d0,%fp0 # add in exp23699fsub.s FONE(%pc),%fp0 # subtract off 1.023700fbge.w pos_res # if pos, branch23701fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP123702fmov.l %fp0,%d6 # put ILOG in d6 as a lword23703bra.b A4_str # go move out ILOG23704pos_res:23705fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG223706fmov.l %fp0,%d6 # put ILOG in d6 as a lword237072370823709# A4. Clr INEX bit.23710# The operation in A3 above may have set INEX2.2371123712A4_str:23713fmov.l &0,%fpsr # zero all of fpsr - nothing needed237142371523716# A5. Set ICTR = 0;23717# ICTR is a flag used in A13. It must be set before the23718# loop entry A6. The lower word of d5 is used for ICTR.2371923720clr.w %d5 # clear ICTR2372123722# A6. Calculate LEN.23723# LEN is the number of digits to be displayed. The k-factor23724# can dictate either the total number of digits, if it is23725# a positive number, or the number of digits after the23726# original decimal point which are to be included as23727# significant. See the 68882 manual for examples.23728# If LEN is computed to be greater than 17, set OPERR in23729# USER_FPSR. LEN is stored in d4.23730#23731# Register usage:23732# Input/Output23733# d0: exponent/Unchanged23734# d2: x/x/scratch23735# d3: x/x23736# d4: exc picture/LEN23737# d5: ICTR/Unchanged23738# d6: ILOG/Unchanged23739# d7: k-factor/Unchanged23740# a0: ptr for original operand/final result23741# a1: x/x23742# a2: x/x23743# fp0: float(ILOG)/Unchanged23744# fp1: x/x23745# fp2: x/x23746# F_SCR1:x/x23747# F_SCR2:Abs(X) with $3fff exponent/Unchanged23748# L_SCR1:x/x23749# L_SCR2:first word of X packed/Unchanged2375023751A6_str:23752tst.l %d7 # branch on sign of k23753ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k23754mov.l %d7,%d4 # if k > 0, LEN = k23755bra.b len_ck # skip to LEN check23756k_neg:23757mov.l %d6,%d4 # first load ILOG to d423758sub.l %d7,%d4 # subtract off k23759addq.l &1,%d4 # add in the 123760len_ck:23761tst.l %d4 # LEN check: branch on sign of LEN23762ble.b LEN_ng # if neg, set LEN = 123763cmp.l %d4,&17 # test if LEN > 1723764ble.b A7_str # if not, forget it23765mov.l &17,%d4 # set max LEN = 1723766tst.l %d7 # if negative, never set OPERR23767ble.b A7_str # if positive, continue23768or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR23769bra.b A7_str # finished here23770LEN_ng:23771mov.l &1,%d4 # min LEN is 1237722377323774# A7. Calculate SCALE.23775# SCALE is equal to 10^ISCALE, where ISCALE is the number23776# of decimal places needed to insure LEN integer digits23777# in the output before conversion to bcd. LAMBDA is the sign23778# of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using23779# the rounding mode as given in the following table (see23780# Coonen, p. 7.23 as ref.; however, the SCALE variable is23781# of opposite sign in bindec.sa from Coonen).23782#23783# Initial USE23784# FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]23785# ----------------------------------------------23786# RN 00 0 0 00/0 RN23787# RN 00 0 1 00/0 RN23788# RN 00 1 0 00/0 RN23789# RN 00 1 1 00/0 RN23790# RZ 01 0 0 11/3 RP23791# RZ 01 0 1 11/3 RP23792# RZ 01 1 0 10/2 RM23793# RZ 01 1 1 10/2 RM23794# RM 10 0 0 11/3 RP23795# RM 10 0 1 10/2 RM23796# RM 10 1 0 10/2 RM23797# RM 10 1 1 11/3 RP23798# RP 11 0 0 10/2 RM23799# RP 11 0 1 11/3 RP23800# RP 11 1 0 11/3 RP23801# RP 11 1 1 10/2 RM23802#23803# Register usage:23804# Input/Output23805# d0: exponent/scratch - final is 023806# d2: x/0 or 24 for A923807# d3: x/scratch - offset ptr into PTENRM array23808# d4: LEN/Unchanged23809# d5: 0/ICTR:LAMBDA23810# d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))23811# d7: k-factor/Unchanged23812# a0: ptr for original operand/final result23813# a1: x/ptr to PTENRM array23814# a2: x/x23815# fp0: float(ILOG)/Unchanged23816# fp1: x/10^ISCALE23817# fp2: x/x23818# F_SCR1:x/x23819# F_SCR2:Abs(X) with $3fff exponent/Unchanged23820# L_SCR1:x/x23821# L_SCR2:first word of X packed/Unchanged2382223823A7_str:23824tst.l %d7 # test sign of k23825bgt.b k_pos # if pos and > 0, skip this23826cmp.l %d7,%d6 # test k - ILOG23827blt.b k_pos # if ILOG >= k, skip this23828mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k23829k_pos:23830mov.l %d6,%d0 # calc ILOG + 1 - LEN in d023831addq.l &1,%d0 # add the 123832sub.l %d4,%d0 # sub off LEN23833swap %d5 # use upper word of d5 for LAMBDA23834clr.w %d5 # set it zero initially23835clr.w %d2 # set up d2 for very small case23836tst.l %d0 # test sign of ISCALE23837bge.b iscale # if pos, skip next inst23838addq.w &1,%d5 # if neg, set LAMBDA true23839cmp.l %d0,&0xffffecd4 # test iscale <= -490823840bgt.b no_inf # if false, skip rest23841add.l &24,%d0 # add in 24 to iscale23842mov.l &24,%d2 # put 24 in d2 for A923843no_inf:23844neg.l %d0 # and take abs of ISCALE23845iscale:23846fmov.s FONE(%pc),%fp1 # init fp1 to 123847bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits23848lsl.w &1,%d1 # put them in bits 2:123849add.w %d5,%d1 # add in LAMBDA23850lsl.w &1,%d1 # put them in bits 3:123851tst.l L_SCR2(%a6) # test sign of original x23852bge.b x_pos # if pos, don't set bit 023853addq.l &1,%d1 # if neg, set bit 023854x_pos:23855lea.l RBDTBL(%pc),%a2 # load rbdtbl base23856mov.b (%a2,%d1),%d3 # load d3 with new rmode23857lsl.l &4,%d3 # put bits in proper position23858fmov.l %d3,%fpcr # load bits into fpu23859lsr.l &4,%d3 # put bits in proper position23860tst.b %d3 # decode new rmode for pten table23861bne.b not_rn # if zero, it is RN23862lea.l PTENRN(%pc),%a1 # load a1 with RN table base23863bra.b rmode # exit decode23864not_rn:23865lsr.b &1,%d3 # get lsb in carry23866bcc.b not_rp2 # if carry clear, it is RM23867lea.l PTENRP(%pc),%a1 # load a1 with RP table base23868bra.b rmode # exit decode23869not_rp2:23870lea.l PTENRM(%pc),%a1 # load a1 with RM table base23871rmode:23872clr.l %d3 # clr table index23873e_loop2:23874lsr.l &1,%d0 # shift next bit into carry23875bcc.b e_next2 # if zero, skip the mul23876fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)23877e_next2:23878add.l &12,%d3 # inc d3 to next pwrten table entry23879tst.l %d0 # test if ISCALE is zero23880bne.b e_loop2 # if not, loop2388123882# A8. Clr INEX; Force RZ.23883# The operation in A3 above may have set INEX2.23884# RZ mode is forced for the scaling operation to insure23885# only one rounding error. The grs bits are collected in23886# the INEX flag for use in A10.23887#23888# Register usage:23889# Input/Output2389023891fmov.l &0,%fpsr # clr INEX23892fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode2389323894# A9. Scale X -> Y.23895# The mantissa is scaled to the desired number of significant23896# digits. The excess digits are collected in INEX2. If mul,23897# Check d2 for excess 10 exponential value. If not zero,23898# the iscale value would have caused the pwrten calculation23899# to overflow. Only a negative iscale can cause this, so23900# multiply by 10^(d2), which is now only allowed to be 24,23901# with a multiply by 10^8 and 10^16, which is exact since23902# 10^24 is exact. If the input was denormalized, we must23903# create a busy stack frame with the mul command and the23904# two operands, and allow the fpu to complete the multiply.23905#23906# Register usage:23907# Input/Output23908# d0: FPCR with RZ mode/Unchanged23909# d2: 0 or 24/unchanged23910# d3: x/x23911# d4: LEN/Unchanged23912# d5: ICTR:LAMBDA23913# d6: ILOG/Unchanged23914# d7: k-factor/Unchanged23915# a0: ptr for original operand/final result23916# a1: ptr to PTENRM array/Unchanged23917# a2: x/x23918# fp0: float(ILOG)/X adjusted for SCALE (Y)23919# fp1: 10^ISCALE/Unchanged23920# fp2: x/x23921# F_SCR1:x/x23922# F_SCR2:Abs(X) with $3fff exponent/Unchanged23923# L_SCR1:x/x23924# L_SCR2:first word of X packed/Unchanged2392523926A9_str:23927fmov.x (%a0),%fp0 # load X from memory23928fabs.x %fp0 # use abs(X)23929tst.w %d5 # LAMBDA is in lower word of d523930bne.b sc_mul # if neg (LAMBDA = 1), scale by mul23931fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp023932bra.w A10_st # branch to A102393323934sc_mul:23935tst.b BINDEC_FLG(%a6) # check for denorm23936beq.w A9_norm # if norm, continue with mul2393723938# for DENORM, we must calculate:23939# fp0 = input_op * 10^ISCALE * 10^2423940# since the input operand is a DENORM, we can't multiply it directly.23941# so, we do the multiplication of the exponents and mantissas separately.23942# in this way, we avoid underflow on intermediate stages of the23943# multiplication and guarantee a result without exception.23944fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack2394523946mov.w (%sp),%d3 # grab exponent23947andi.w &0x7fff,%d3 # clear sign23948ori.w &0x8000,(%a0) # make DENORM exp negative23949add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp23950subi.w &0x3fff,%d3 # subtract BIAS23951add.w 36(%a1),%d323952subi.w &0x3fff,%d3 # subtract BIAS23953add.w 48(%a1),%d323954subi.w &0x3fff,%d3 # subtract BIAS2395523956bmi.w sc_mul_err # is result is DENORM, punt!!!2395723958andi.w &0x8000,(%sp) # keep sign23959or.w %d3,(%sp) # insert new exponent23960andi.w &0x7fff,(%a0) # clear sign bit on DENORM again23961mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk23962mov.l 0x4(%a0),-(%sp)23963mov.l &0x3fff0000,-(%sp) # force exp to zero23964fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp023965fmul.x (%sp)+,%fp02396623967# fmul.x 36(%a1),%fp0 # multiply fp0 by 10^823968# fmul.x 48(%a1),%fp0 # multiply fp0 by 10^1623969mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa23970mov.l 36+4(%a1),-(%sp)23971mov.l &0x3fff0000,-(%sp) # force exp to zero23972mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa23973mov.l 48+4(%a1),-(%sp)23974mov.l &0x3fff0000,-(%sp)# force exp to zero23975fmul.x (%sp)+,%fp0 # multiply fp0 by 10^823976fmul.x (%sp)+,%fp0 # multiply fp0 by 10^1623977bra.b A10_st2397823979sc_mul_err:23980bra.b sc_mul_err2398123982A9_norm:23983tst.w %d2 # test for small exp case23984beq.b A9_con # if zero, continue as normal23985fmul.x 36(%a1),%fp0 # multiply fp0 by 10^823986fmul.x 48(%a1),%fp0 # multiply fp0 by 10^1623987A9_con:23988fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp02398923990# A10. Or in INEX.23991# If INEX is set, round error occurred. This is compensated23992# for by 'or-ing' in the INEX2 flag to the lsb of Y.23993#23994# Register usage:23995# Input/Output23996# d0: FPCR with RZ mode/FPSR with INEX2 isolated23997# d2: x/x23998# d3: x/x23999# d4: LEN/Unchanged24000# d5: ICTR:LAMBDA24001# d6: ILOG/Unchanged24002# d7: k-factor/Unchanged24003# a0: ptr for original operand/final result24004# a1: ptr to PTENxx array/Unchanged24005# a2: x/ptr to FP_SCR1(a6)24006# fp0: Y/Y with lsb adjusted24007# fp1: 10^ISCALE/Unchanged24008# fp2: x/x2400924010A10_st:24011fmov.l %fpsr,%d0 # get FPSR24012fmov.x %fp0,FP_SCR1(%a6) # move Y to memory24013lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR124014btst &9,%d0 # check if INEX2 set24015beq.b A11_st # if clear, skip rest24016or.l &1,8(%a2) # or in 1 to lsb of mantissa24017fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu240182401924020# A11. Restore original FPCR; set size ext.24021# Perform FINT operation in the user's rounding mode. Keep24022# the size to extended. The sintdo entry point in the sint24023# routine expects the FPCR value to be in USER_FPCR for24024# mode and precision. The original FPCR is saved in L_SCR1.2402524026A11_st:24027mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later24028and.l &0x00000030,USER_FPCR(%a6) # set size to ext,24029# ;block exceptions240302403124032# A12. Calculate YINT = FINT(Y) according to user's rounding mode.24033# The FPSP routine sintd0 is used. The output is in fp0.24034#24035# Register usage:24036# Input/Output24037# d0: FPSR with AINEX cleared/FPCR with size set to ext24038# d2: x/x/scratch24039# d3: x/x24040# d4: LEN/Unchanged24041# d5: ICTR:LAMBDA/Unchanged24042# d6: ILOG/Unchanged24043# d7: k-factor/Unchanged24044# a0: ptr for original operand/src ptr for sintdo24045# a1: ptr to PTENxx array/Unchanged24046# a2: ptr to FP_SCR1(a6)/Unchanged24047# a6: temp pointer to FP_SCR1(a6) - orig value saved and restored24048# fp0: Y/YINT24049# fp1: 10^ISCALE/Unchanged24050# fp2: x/x24051# F_SCR1:x/x24052# F_SCR2:Y adjusted for inex/Y with original exponent24053# L_SCR1:x/original USER_FPCR24054# L_SCR2:first word of X packed/Unchanged2405524056A12_st:24057movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}24058mov.l L_SCR1(%a6),-(%sp)24059mov.l L_SCR2(%a6),-(%sp)2406024061lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)24062fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)24063tst.l L_SCR2(%a6) # test sign of original operand24064bge.b do_fint12 # if pos, use Y24065or.l &0x80000000,(%a0) # if neg, use -Y24066do_fint12:24067mov.l USER_FPSR(%a6),-(%sp)24068# bsr sintdo # sint routine returns int in fp02406924070fmov.l USER_FPCR(%a6),%fpcr24071fmov.l &0x0,%fpsr # clear the AEXC bits!!!24072## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode24073## andi.l &0x00000030,%d024074## fmov.l %d0,%fpcr24075fint.x FP_SCR1(%a6),%fp0 # do fint()24076fmov.l %fpsr,%d024077or.w %d0,FPSR_EXCEPT(%a6)24078## fmov.l &0x0,%fpcr24079## fmov.l %fpsr,%d0 # don't keep ccodes24080## or.w %d0,FPSR_EXCEPT(%a6)2408124082mov.b (%sp),USER_FPSR(%a6)24083add.l &4,%sp2408424085mov.l (%sp)+,L_SCR2(%a6)24086mov.l (%sp)+,L_SCR1(%a6)24087movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}2408824089mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent24090mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR2409124092# A13. Check for LEN digits.24093# If the int operation results in more than LEN digits,24094# or less than LEN -1 digits, adjust ILOG and repeat from24095# A6. This test occurs only on the first pass. If the24096# result is exactly 10^LEN, decrement ILOG and divide24097# the mantissa by 10. The calculation of 10^LEN cannot24098# be inexact, since all powers of ten up to 10^27 are exact24099# in extended precision, so the use of a previous power-of-ten24100# table will introduce no error.24101#24102#24103# Register usage:24104# Input/Output24105# d0: FPCR with size set to ext/scratch final = 024106# d2: x/x24107# d3: x/scratch final = x24108# d4: LEN/LEN adjusted24109# d5: ICTR:LAMBDA/LAMBDA:ICTR24110# d6: ILOG/ILOG adjusted24111# d7: k-factor/Unchanged24112# a0: pointer into memory for packed bcd string formation24113# a1: ptr to PTENxx array/Unchanged24114# a2: ptr to FP_SCR1(a6)/Unchanged24115# fp0: int portion of Y/abs(YINT) adjusted24116# fp1: 10^ISCALE/Unchanged24117# fp2: x/10^LEN24118# F_SCR1:x/x24119# F_SCR2:Y with original exponent/Unchanged24120# L_SCR1:original USER_FPCR/Unchanged24121# L_SCR2:first word of X packed/Unchanged2412224123A13_st:24124swap %d5 # put ICTR in lower word of d524125tst.w %d5 # check if ICTR = 024126bne not_zr # if non-zero, go to second test24127#24128# Compute 10^(LEN-1)24129#24130fmov.s FONE(%pc),%fp2 # init fp2 to 1.024131mov.l %d4,%d0 # put LEN in d024132subq.l &1,%d0 # d0 = LEN -124133clr.l %d3 # clr table index24134l_loop:24135lsr.l &1,%d0 # shift next bit into carry24136bcc.b l_next # if zero, skip the mul24137fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)24138l_next:24139add.l &12,%d3 # inc d3 to next pwrten table entry24140tst.l %d0 # test if LEN is zero24141bne.b l_loop # if not, loop24142#24143# 10^LEN-1 is computed for this test and A14. If the input was24144# denormalized, check only the case in which YINT > 10^LEN.24145#24146tst.b BINDEC_FLG(%a6) # check if input was norm24147beq.b A13_con # if norm, continue with checking24148fabs.x %fp0 # take abs of YINT24149bra test_224150#24151# Compare abs(YINT) to 10^(LEN-1) and 10^LEN24152#24153A13_con:24154fabs.x %fp0 # take abs of YINT24155fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)24156fbge.w test_2 # if greater, do next test24157subq.l &1,%d6 # subtract 1 from ILOG24158mov.w &1,%d5 # set ICTR24159fmov.l &rm_mode*0x10,%fpcr # set rmode to RM24160fmul.s FTEN(%pc),%fp2 # compute 10^LEN24161bra.w A6_str # return to A6 and recompute YINT24162test_2:24163fmul.s FTEN(%pc),%fp2 # compute 10^LEN24164fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN24165fblt.w A14_st # if less, all is ok, go to A1424166fbgt.w fix_ex # if greater, fix and redo24167fdiv.s FTEN(%pc),%fp0 # if equal, divide by 1024168addq.l &1,%d6 # and inc ILOG24169bra.b A14_st # and continue elsewhere24170fix_ex:24171addq.l &1,%d6 # increment ILOG by 124172mov.w &1,%d5 # set ICTR24173fmov.l &rm_mode*0x10,%fpcr # set rmode to RM24174bra.w A6_str # return to A6 and recompute YINT24175#24176# Since ICTR <> 0, we have already been through one adjustment,24177# and shouldn't have another; this is to check if abs(YINT) = 10^LEN24178# 10^LEN is again computed using whatever table is in a1 since the24179# value calculated cannot be inexact.24180#24181not_zr:24182fmov.s FONE(%pc),%fp2 # init fp2 to 1.024183mov.l %d4,%d0 # put LEN in d024184clr.l %d3 # clr table index24185z_loop:24186lsr.l &1,%d0 # shift next bit into carry24187bcc.b z_next # if zero, skip the mul24188fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)24189z_next:24190add.l &12,%d3 # inc d3 to next pwrten table entry24191tst.l %d0 # test if LEN is zero24192bne.b z_loop # if not, loop24193fabs.x %fp0 # get abs(YINT)24194fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN24195fbneq.w A14_st # if not, skip this24196fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 1024197addq.l &1,%d6 # and inc ILOG by 124198addq.l &1,%d4 # and inc LEN24199fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN2420024201# A14. Convert the mantissa to bcd.24202# The binstr routine is used to convert the LEN digit24203# mantissa to bcd in memory. The input to binstr is24204# to be a fraction; i.e. (mantissa)/10^LEN and adjusted24205# such that the decimal point is to the left of bit 63.24206# The bcd digits are stored in the correct position in24207# the final string area in memory.24208#24209#24210# Register usage:24211# Input/Output24212# d0: x/LEN call to binstr - final is 024213# d1: x/024214# d2: x/ms 32-bits of mant of abs(YINT)24215# d3: x/ls 32-bits of mant of abs(YINT)24216# d4: LEN/Unchanged24217# d5: ICTR:LAMBDA/LAMBDA:ICTR24218# d6: ILOG24219# d7: k-factor/Unchanged24220# a0: pointer into memory for packed bcd string formation24221# /ptr to first mantissa byte in result string24222# a1: ptr to PTENxx array/Unchanged24223# a2: ptr to FP_SCR1(a6)/Unchanged24224# fp0: int portion of Y/abs(YINT) adjusted24225# fp1: 10^ISCALE/Unchanged24226# fp2: 10^LEN/Unchanged24227# F_SCR1:x/Work area for final result24228# F_SCR2:Y with original exponent/Unchanged24229# L_SCR1:original USER_FPCR/Unchanged24230# L_SCR2:first word of X packed/Unchanged2423124232A14_st:24233fmov.l &rz_mode*0x10,%fpcr # force rz for conversion24234fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN24235lea.l FP_SCR0(%a6),%a024236fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory24237mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d224238mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d324239clr.l 4(%a0) # zero word 2 of FP_RES24240clr.l 8(%a0) # zero word 3 of FP_RES24241mov.l (%a0),%d0 # move exponent to d024242swap %d0 # put exponent in lower word24243beq.b no_sft # if zero, don't shift24244sub.l &0x3ffd,%d0 # sub bias less 2 to make fract24245tst.l %d0 # check if > 124246bgt.b no_sft # if so, don't shift24247neg.l %d0 # make exp positive24248m_loop:24249lsr.l &1,%d2 # shift d2:d3 right, add 0s24250roxr.l &1,%d3 # the number of places24251dbf.w %d0,m_loop # given in d024252no_sft:24253tst.l %d2 # check for mantissa of zero24254bne.b no_zr # if not, go on24255tst.l %d3 # continue zero check24256beq.b zer_m # if zero, go directly to binstr24257no_zr:24258clr.l %d1 # put zero in d1 for addx24259add.l &0x00000080,%d3 # inc at bit 724260addx.l %d1,%d2 # continue inc24261and.l &0xffffff80,%d3 # strip off lsb not used by 88224262zer_m:24263mov.l %d4,%d0 # put LEN in d0 for binstr call24264addq.l &3,%a0 # a0 points to M16 byte in result24265bsr binstr # call binstr to convert mant242662426724268# A15. Convert the exponent to bcd.24269# As in A14 above, the exp is converted to bcd and the24270# digits are stored in the final string.24271#24272# Digits are stored in L_SCR1(a6) on return from BINDEC as:24273#24274# 32 16 15 024275# -----------------------------------------24276# | 0 | e3 | e2 | e1 | e4 | X | X | X |24277# -----------------------------------------24278#24279# And are moved into their proper places in FP_SCR0. If digit e424280# is non-zero, OPERR is signaled. In all cases, all 4 digits are24281# written as specified in the 881/882 manual for packed decimal.24282#24283# Register usage:24284# Input/Output24285# d0: x/LEN call to binstr - final is 024286# d1: x/scratch (0);shift count for final exponent packing24287# d2: x/ms 32-bits of exp fraction/scratch24288# d3: x/ls 32-bits of exp fraction24289# d4: LEN/Unchanged24290# d5: ICTR:LAMBDA/LAMBDA:ICTR24291# d6: ILOG24292# d7: k-factor/Unchanged24293# a0: ptr to result string/ptr to L_SCR1(a6)24294# a1: ptr to PTENxx array/Unchanged24295# a2: ptr to FP_SCR1(a6)/Unchanged24296# fp0: abs(YINT) adjusted/float(ILOG)24297# fp1: 10^ISCALE/Unchanged24298# fp2: 10^LEN/Unchanged24299# F_SCR1:Work area for final result/BCD result24300# F_SCR2:Y with original exponent/ILOG/10^424301# L_SCR1:original USER_FPCR/Exponent digits on return from binstr24302# L_SCR2:first word of X packed/Unchanged2430324304A15_st:24305tst.b BINDEC_FLG(%a6) # check for denorm24306beq.b not_denorm24307ftest.x %fp0 # test for zero24308fbeq.w den_zero # if zero, use k-factor or 493324309fmov.l %d6,%fp0 # float ILOG24310fabs.x %fp0 # get abs of ILOG24311bra.b convrt24312den_zero:24313tst.l %d7 # check sign of the k-factor24314blt.b use_ilog # if negative, use ILOG24315fmov.s F4933(%pc),%fp0 # force exponent to 493324316bra.b convrt # do it24317use_ilog:24318fmov.l %d6,%fp0 # float ILOG24319fabs.x %fp0 # get abs of ILOG24320bra.b convrt24321not_denorm:24322ftest.x %fp0 # test for zero24323fbneq.w not_zero # if zero, force exponent24324fmov.s FONE(%pc),%fp0 # force exponent to 124325bra.b convrt # do it24326not_zero:24327fmov.l %d6,%fp0 # float ILOG24328fabs.x %fp0 # get abs of ILOG24329convrt:24330fdiv.x 24(%a1),%fp0 # compute ILOG/10^424331fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory24332mov.l 4(%a2),%d2 # move word 2 to d224333mov.l 8(%a2),%d3 # move word 3 to d324334mov.w (%a2),%d0 # move exp to d024335beq.b x_loop_fin # if zero, skip the shift24336sub.w &0x3ffd,%d0 # subtract off bias24337neg.w %d0 # make exp positive24338x_loop:24339lsr.l &1,%d2 # shift d2:d3 right24340roxr.l &1,%d3 # the number of places24341dbf.w %d0,x_loop # given in d024342x_loop_fin:24343clr.l %d1 # put zero in d1 for addx24344add.l &0x00000080,%d3 # inc at bit 624345addx.l %d1,%d2 # continue inc24346and.l &0xffffff80,%d3 # strip off lsb not used by 88224347mov.l &4,%d0 # put 4 in d0 for binstr call24348lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits24349bsr binstr # call binstr to convert exp24350mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d024351mov.l &12,%d1 # use d1 for shift count24352lsr.l %d1,%d0 # shift d0 right by 1224353bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR024354lsr.l %d1,%d0 # shift d0 right by 1224355bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR024356tst.b %d0 # check if e4 is zero24357beq.b A16_st # if zero, skip rest24358or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR243592436024361# A16. Write sign bits to final string.24362# Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).24363#24364# Register usage:24365# Input/Output24366# d0: x/scratch - final is x24367# d2: x/x24368# d3: x/x24369# d4: LEN/Unchanged24370# d5: ICTR:LAMBDA/LAMBDA:ICTR24371# d6: ILOG/ILOG adjusted24372# d7: k-factor/Unchanged24373# a0: ptr to L_SCR1(a6)/Unchanged24374# a1: ptr to PTENxx array/Unchanged24375# a2: ptr to FP_SCR1(a6)/Unchanged24376# fp0: float(ILOG)/Unchanged24377# fp1: 10^ISCALE/Unchanged24378# fp2: 10^LEN/Unchanged24379# F_SCR1:BCD result with correct signs24380# F_SCR2:ILOG/10^424381# L_SCR1:Exponent digits on return from binstr24382# L_SCR2:first word of X packed/Unchanged2438324384A16_st:24385clr.l %d0 # clr d0 for collection of signs24386and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR024387tst.l L_SCR2(%a6) # check sign of original mantissa24388bge.b mant_p # if pos, don't set SM24389mov.l &2,%d0 # move 2 in to d0 for SM24390mant_p:24391tst.l %d6 # check sign of ILOG24392bge.b wr_sgn # if pos, don't set SE24393addq.l &1,%d0 # set bit 0 in d0 for SE24394wr_sgn:24395bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR02439624397# Clean up and restore all registers used.2439824399fmov.l &0,%fpsr # clear possible inex2/ainex bits24400fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}24401movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}24402rts2440324404global PTENRN24405PTENRN:24406long 0x40020000,0xA0000000,0x00000000 # 10 ^ 124407long 0x40050000,0xC8000000,0x00000000 # 10 ^ 224408long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 424409long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 824410long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 1624411long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 3224412long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 6424413long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 12824414long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 25624415long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 51224416long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 102424417long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 204824418long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 40962441924420global PTENRP24421PTENRP:24422long 0x40020000,0xA0000000,0x00000000 # 10 ^ 124423long 0x40050000,0xC8000000,0x00000000 # 10 ^ 224424long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 424425long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 824426long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 1624427long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 3224428long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 6424429long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 12824430long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 25624431long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 51224432long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 102424433long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 204824434long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 40962443524436global PTENRM24437PTENRM:24438long 0x40020000,0xA0000000,0x00000000 # 10 ^ 124439long 0x40050000,0xC8000000,0x00000000 # 10 ^ 224440long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 424441long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 824442long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 1624443long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 3224444long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 6424445long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 12824446long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 25624447long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 51224448long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 102424449long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 204824450long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 40962445124452#########################################################################24453# binstr(): Converts a 64-bit binary integer to bcd. #24454# #24455# INPUT *************************************************************** #24456# d2:d3 = 64-bit binary integer #24457# d0 = desired length (LEN) #24458# a0 = pointer to start in memory for bcd characters #24459# (This pointer must point to byte 4 of the first #24460# lword of the packed decimal memory string.) #24461# #24462# OUTPUT ************************************************************** #24463# a0 = pointer to LEN bcd digits representing the 64-bit integer. #24464# #24465# ALGORITHM *********************************************************** #24466# The 64-bit binary is assumed to have a decimal point before #24467# bit 63. The fraction is multiplied by 10 using a mul by 2 #24468# shift and a mul by 8 shift. The bits shifted out of the #24469# msb form a decimal digit. This process is iterated until #24470# LEN digits are formed. #24471# #24472# A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #24473# digit formed will be assumed the least significant. This is #24474# to force the first byte formed to have a 0 in the upper 4 bits. #24475# #24476# A2. Beginning of the loop: #24477# Copy the fraction in d2:d3 to d4:d5. #24478# #24479# A3. Multiply the fraction in d2:d3 by 8 using bit-field #24480# extracts and shifts. The three msbs from d2 will go into d1. #24481# #24482# A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #24483# will be collected by the carry. #24484# #24485# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #24486# into d2:d3. D1 will contain the bcd digit formed. #24487# #24488# A6. Test d7. If zero, the digit formed is the ms digit. If non- #24489# zero, it is the ls digit. Put the digit in its place in the #24490# upper word of d0. If it is the ls digit, write the word #24491# from d0 to memory. #24492# #24493# A7. Decrement d6 (LEN counter) and repeat the loop until zero. #24494# #24495#########################################################################2449624497# Implementation Notes:24498#24499# The registers are used as follows:24500#24501# d0: LEN counter24502# d1: temp used to form the digit24503# d2: upper 32-bits of fraction for mul by 824504# d3: lower 32-bits of fraction for mul by 824505# d4: upper 32-bits of fraction for mul by 224506# d5: lower 32-bits of fraction for mul by 224507# d6: temp for bit-field extracts24508# d7: byte digit formation word;digit count {0,1}24509# a0: pointer into memory for packed bcd string formation24510#2451124512global binstr24513binstr:24514movm.l &0xff00,-(%sp) # {%d0-%d7}2451524516#24517# A1: Init d724518#24519mov.l &1,%d7 # init d7 for second digit24520subq.l &1,%d0 # for dbf d0 would have LEN+1 passes24521#24522# A2. Copy d2:d3 to d4:d5. Start loop.24523#24524loop:24525mov.l %d2,%d4 # copy the fraction before muls24526mov.l %d3,%d5 # to d4:d524527#24528# A3. Multiply d2:d3 by 8; extract msbs into d1.24529#24530bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d124531asl.l &3,%d2 # shift d2 left by 3 places24532bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d624533asl.l &3,%d3 # shift d3 left by 3 places24534or.l %d6,%d2 # or in msbs from d3 into d224535#24536# A4. Multiply d4:d5 by 2; add carry out to d1.24537#24538asl.l &1,%d5 # mul d5 by 224539roxl.l &1,%d4 # mul d4 by 224540swap %d6 # put 0 in d6 lower word24541addx.w %d6,%d1 # add in extend from mul by 224542#24543# A5. Add mul by 8 to mul by 2. D1 contains the digit formed.24544#24545add.l %d5,%d3 # add lower 32 bits24546nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)24547addx.l %d4,%d2 # add with extend upper 32 bits24548nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)24549addx.w %d6,%d1 # add in extend from add to d124550swap %d6 # with d6 = 0; put 0 in upper word24551#24552# A6. Test d7 and branch.24553#24554tst.w %d7 # if zero, store digit & to loop24555beq.b first_d # if non-zero, form byte & write24556sec_d:24557swap %d7 # bring first digit to word d7b24558asl.w &4,%d7 # first digit in upper 4 bits d7b24559add.w %d1,%d7 # add in ls digit to d7b24560mov.b %d7,(%a0)+ # store d7b byte in memory24561swap %d7 # put LEN counter in word d7a24562clr.w %d7 # set d7a to signal no digits done24563dbf.w %d0,loop # do loop some more!24564bra.b end_bstr # finished, so exit24565first_d:24566swap %d7 # put digit word in d7b24567mov.w %d1,%d7 # put new digit in d7b24568swap %d7 # put LEN counter in word d7a24569addq.w &1,%d7 # set d7a to signal first digit done24570dbf.w %d0,loop # do loop some more!24571swap %d7 # put last digit in string24572lsl.w &4,%d7 # move it to upper 4 bits24573mov.b %d7,(%a0)+ # store it in memory string24574#24575# Clean up and return with result in fp0.24576#24577end_bstr:24578movm.l (%sp)+,&0xff # {%d0-%d7}24579rts2458024581#########################################################################24582# XDEF **************************************************************** #24583# facc_in_b(): dmem_read_byte failed #24584# facc_in_w(): dmem_read_word failed #24585# facc_in_l(): dmem_read_long failed #24586# facc_in_d(): dmem_read of dbl prec failed #24587# facc_in_x(): dmem_read of ext prec failed #24588# #24589# facc_out_b(): dmem_write_byte failed #24590# facc_out_w(): dmem_write_word failed #24591# facc_out_l(): dmem_write_long failed #24592# facc_out_d(): dmem_write of dbl prec failed #24593# facc_out_x(): dmem_write of ext prec failed #24594# #24595# XREF **************************************************************** #24596# _real_access() - exit through access error handler #24597# #24598# INPUT *************************************************************** #24599# None #24600# #24601# OUTPUT ************************************************************** #24602# None #24603# #24604# ALGORITHM *********************************************************** #24605# Flow jumps here when an FP data fetch call gets an error #24606# result. This means the operating system wants an access error frame #24607# made out of the current exception stack frame. #24608# So, we first call restore() which makes sure that any updated #24609# -(an)+ register gets returned to its pre-exception value and then #24610# we change the stack to an access error stack frame. #24611# #24612#########################################################################2461324614facc_in_b:24615movq.l &0x1,%d0 # one byte24616bsr.w restore # fix An2461724618mov.w &0x0121,EXC_VOFF(%a6) # set FSLW24619bra.w facc_finish2462024621facc_in_w:24622movq.l &0x2,%d0 # two bytes24623bsr.w restore # fix An2462424625mov.w &0x0141,EXC_VOFF(%a6) # set FSLW24626bra.b facc_finish2462724628facc_in_l:24629movq.l &0x4,%d0 # four bytes24630bsr.w restore # fix An2463124632mov.w &0x0101,EXC_VOFF(%a6) # set FSLW24633bra.b facc_finish2463424635facc_in_d:24636movq.l &0x8,%d0 # eight bytes24637bsr.w restore # fix An2463824639mov.w &0x0161,EXC_VOFF(%a6) # set FSLW24640bra.b facc_finish2464124642facc_in_x:24643movq.l &0xc,%d0 # twelve bytes24644bsr.w restore # fix An2464524646mov.w &0x0161,EXC_VOFF(%a6) # set FSLW24647bra.b facc_finish2464824649################################################################2465024651facc_out_b:24652movq.l &0x1,%d0 # one byte24653bsr.w restore # restore An2465424655mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW24656bra.b facc_finish2465724658facc_out_w:24659movq.l &0x2,%d0 # two bytes24660bsr.w restore # restore An2466124662mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW24663bra.b facc_finish2466424665facc_out_l:24666movq.l &0x4,%d0 # four bytes24667bsr.w restore # restore An2466824669mov.w &0x0081,EXC_VOFF(%a6) # set FSLW24670bra.b facc_finish2467124672facc_out_d:24673movq.l &0x8,%d0 # eight bytes24674bsr.w restore # restore An2467524676mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW24677bra.b facc_finish2467824679facc_out_x:24680mov.l &0xc,%d0 # twelve bytes24681bsr.w restore # restore An2468224683mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW2468424685# here's where we actually create the access error frame from the24686# current exception stack frame.24687facc_finish:24688mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC2468924690fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp124691fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs24692movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a12469324694unlk %a62469524696mov.l (%sp),-(%sp) # store SR, hi(PC)24697mov.l 0x8(%sp),0x4(%sp) # store lo(PC)24698mov.l 0xc(%sp),0x8(%sp) # store EA24699mov.l &0x00000001,0xc(%sp) # store FSLW24700mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)24701mov.w &0x4008,0x6(%sp) # store voff2470224703btst &0x5,(%sp) # supervisor or user mode?24704beq.b facc_out2 # user24705bset &0x2,0xd(%sp) # set supervisor TM bit2470624707facc_out2:24708bra.l _real_access2470924710##################################################################2471124712# if the effective addressing mode was predecrement or postincrement,24713# the emulation has already changed its value to the correct post-24714# instruction value. but since we're exiting to the access error24715# handler, then AN must be returned to its pre-instruction value.24716# we do that here.24717restore:24718mov.b EXC_OPWORD+0x1(%a6),%d124719andi.b &0x38,%d1 # extract opmode24720cmpi.b %d1,&0x18 # postinc?24721beq.w rest_inc24722cmpi.b %d1,&0x20 # predec?24723beq.w rest_dec24724rts2472524726rest_inc:24727mov.b EXC_OPWORD+0x1(%a6),%d124728andi.w &0x0007,%d1 # fetch An2472924730mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d124731jmp (tbl_rest_inc.b,%pc,%d1.w*1)2473224733tbl_rest_inc:24734short ri_a0 - tbl_rest_inc24735short ri_a1 - tbl_rest_inc24736short ri_a2 - tbl_rest_inc24737short ri_a3 - tbl_rest_inc24738short ri_a4 - tbl_rest_inc24739short ri_a5 - tbl_rest_inc24740short ri_a6 - tbl_rest_inc24741short ri_a7 - tbl_rest_inc2474224743ri_a0:24744sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a024745rts24746ri_a1:24747sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a124748rts24749ri_a2:24750sub.l %d0,%a2 # fix a224751rts24752ri_a3:24753sub.l %d0,%a3 # fix a324754rts24755ri_a4:24756sub.l %d0,%a4 # fix a424757rts24758ri_a5:24759sub.l %d0,%a5 # fix a524760rts24761ri_a6:24762sub.l %d0,(%a6) # fix stacked a624763rts24764# if it's a fmove out instruction, we don't have to fix a724765# because we hadn't changed it yet. if it's an opclass two24766# instruction (data moved in) and the exception was in supervisor24767# mode, then also also wasn't updated. if it was user mode, then24768# restore the correct a7 which is in the USP currently.24769ri_a7:24770cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?24771bne.b ri_a7_done # out2477224773btst &0x5,EXC_SR(%a6) # user or supervisor?24774bne.b ri_a7_done # supervisor24775movc %usp,%a0 # restore USP24776sub.l %d0,%a024777movc %a0,%usp24778ri_a7_done:24779rts2478024781# need to invert adjustment value if the <ea> was predec24782rest_dec:24783neg.l %d024784bra.b rest_inc247852478624787