Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c
22240 views
1
/*
2
* Stack-less Just-In-Time compiler
3
*
4
* Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without modification, are
7
* permitted provided that the following conditions are met:
8
*
9
* 1. Redistributions of source code must retain the above copyright notice, this list of
10
* conditions and the following disclaimer.
11
*
12
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
13
* of conditions and the following disclaimer in the documentation and/or other materials
14
* provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28
{
29
return "LOONGARCH" SLJIT_CPUINFO;
30
}
31
32
typedef sljit_u32 sljit_ins;
33
34
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
35
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
36
#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
37
#define TMP_ZERO 0
38
39
/* Flags are kept in volatile registers. */
40
#define EQUAL_FLAG (SLJIT_NUMBER_OF_REGISTERS + 5)
41
#define RETURN_ADDR_REG TMP_REG2
42
#define OTHER_FLAG (SLJIT_NUMBER_OF_REGISTERS + 6)
43
44
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46
47
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
48
0, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 22, 31, 30, 29, 28, 27, 26, 25, 24, 23, 3, 13, 1, 14, 12, 15
49
};
50
51
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
52
0, 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 31, 30, 29, 28, 27, 26, 25, 24, 8, 9
53
};
54
55
/* --------------------------------------------------------------------- */
56
/* Instruction forms */
57
/* --------------------------------------------------------------------- */
58
59
/*
60
LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them):
61
62
| Format name | Composition |
63
| 2R | Opcode + Rj + Rd |
64
| 3R | Opcode + Rk + Rj + Rd |
65
| 4R | Opcode + Ra + Rk + Rj + Rd |
66
| 2RI8 | Opcode + I8 + Rj + Rd |
67
| 2RI12 | Opcode + I12 + Rj + Rd |
68
| 2RI14 | Opcode + I14 + Rj + Rd |
69
| 2RI16 | Opcode + I16 + Rj + Rd |
70
| 1RI21 | Opcode + I21L + Rj + I21H |
71
| I26 | Opcode + I26L + I26H |
72
73
Rd is the destination register operand, while Rj, Rk and Ra (“a” stands for “additional”) are the source register operands.
74
I8/I12/I14/I16/I21/I26 are immediate operands of respective width. The longer I21 and I26 are stored in separate higher and
75
lower parts in the instruction word, denoted by the “L” and “H” suffixes. */
76
77
#define RD(rd) ((sljit_ins)reg_map[rd])
78
#define RJ(rj) ((sljit_ins)reg_map[rj] << 5)
79
#define RK(rk) ((sljit_ins)reg_map[rk] << 10)
80
#define RA(ra) ((sljit_ins)reg_map[ra] << 15)
81
82
#define FD(fd) ((sljit_ins)reg_map[fd])
83
#define FRD(fd) ((sljit_ins)freg_map[fd])
84
#define FRJ(fj) ((sljit_ins)freg_map[fj] << 5)
85
#define FRK(fk) ((sljit_ins)freg_map[fk] << 10)
86
#define FRA(fa) ((sljit_ins)freg_map[fa] << 15)
87
88
#define IMM_V(imm) ((sljit_ins)(imm) << 10)
89
#define IMM_I8(imm) (((sljit_ins)(imm)&0xff) << 10)
90
#define IMM_I12(imm) (((sljit_ins)(imm)&0xfff) << 10)
91
#define IMM_I14(imm) (((sljit_ins)(imm)&0xfff3) << 10)
92
#define IMM_I16(imm) (((sljit_ins)(imm)&0xffff) << 10)
93
#define IMM_I20(imm) (((sljit_ins)(imm)&0xffffffff) >> 12 << 5)
94
#define IMM_I21(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x1f))
95
#define IMM_I26(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x3ff))
96
97
#define OPC_I26(opc) ((sljit_ins)(opc) << 26)
98
#define OPC_1RI21(opc) ((sljit_ins)(opc) << 26)
99
#define OPC_2RI16(opc) ((sljit_ins)(opc) << 26)
100
#define OPC_2RI14(opc) ((sljit_ins)(opc) << 24)
101
#define OPC_2RI12(opc) ((sljit_ins)(opc) << 22)
102
#define OPC_2RI8(opc) ((sljit_ins)(opc) << 18)
103
#define OPC_4R(opc) ((sljit_ins)(opc) << 20)
104
#define OPC_3R(opc) ((sljit_ins)(opc) << 15)
105
#define OPC_2R(opc) ((sljit_ins)(opc) << 10)
106
#define OPC_1RI20(opc) ((sljit_ins)(opc) << 25)
107
108
/* Arithmetic operation instructions */
109
#define ADD_W OPC_3R(0x20)
110
#define ADD_D OPC_3R(0x21)
111
#define SUB_W OPC_3R(0x22)
112
#define SUB_D OPC_3R(0x23)
113
#define ADDI_W OPC_2RI12(0xa)
114
#define ADDI_D OPC_2RI12(0xb)
115
#define ANDI OPC_2RI12(0xd)
116
#define ORI OPC_2RI12(0xe)
117
#define XORI OPC_2RI12(0xf)
118
#define ADDU16I_D OPC_2RI16(0x4)
119
#define LU12I_W OPC_1RI20(0xa)
120
#define LU32I_D OPC_1RI20(0xb)
121
#define LU52I_D OPC_2RI12(0xc)
122
#define SLT OPC_3R(0x24)
123
#define SLTU OPC_3R(0x25)
124
#define SLTI OPC_2RI12(0x8)
125
#define SLTUI OPC_2RI12(0x9)
126
#define PCADDI OPC_1RI20(0xc)
127
#define PCALAU12I OPC_1RI20(0xd)
128
#define PCADDU12I OPC_1RI20(0xe)
129
#define PCADDU18I OPC_1RI20(0xf)
130
#define NOR OPC_3R(0x28)
131
#define AND OPC_3R(0x29)
132
#define OR OPC_3R(0x2a)
133
#define XOR OPC_3R(0x2b)
134
#define ORN OPC_3R(0x2c)
135
#define ANDN OPC_3R(0x2d)
136
#define MUL_W OPC_3R(0x38)
137
#define MULH_W OPC_3R(0x39)
138
#define MULH_WU OPC_3R(0x3a)
139
#define MUL_D OPC_3R(0x3b)
140
#define MULH_D OPC_3R(0x3c)
141
#define MULH_DU OPC_3R(0x3d)
142
#define MULW_D_W OPC_3R(0x3e)
143
#define MULW_D_WU OPC_3R(0x3f)
144
#define DIV_W OPC_3R(0x40)
145
#define MOD_W OPC_3R(0x41)
146
#define DIV_WU OPC_3R(0x42)
147
#define MOD_WU OPC_3R(0x43)
148
#define DIV_D OPC_3R(0x44)
149
#define MOD_D OPC_3R(0x45)
150
#define DIV_DU OPC_3R(0x46)
151
#define MOD_DU OPC_3R(0x47)
152
153
/* Bit-shift instructions */
154
#define SLL_W OPC_3R(0x2e)
155
#define SRL_W OPC_3R(0x2f)
156
#define SRA_W OPC_3R(0x30)
157
#define SLL_D OPC_3R(0x31)
158
#define SRL_D OPC_3R(0x32)
159
#define SRA_D OPC_3R(0x33)
160
#define ROTR_W OPC_3R(0x36)
161
#define ROTR_D OPC_3R(0x37)
162
#define SLLI_W OPC_3R(0x81)
163
#define SLLI_D ((sljit_ins)(0x41) << 16)
164
#define SRLI_W OPC_3R(0x89)
165
#define SRLI_D ((sljit_ins)(0x45) << 16)
166
#define SRAI_W OPC_3R(0x91)
167
#define SRAI_D ((sljit_ins)(0x49) << 16)
168
#define ROTRI_W OPC_3R(0x99)
169
#define ROTRI_D ((sljit_ins)(0x4d) << 16)
170
171
/* Bit-manipulation instructions */
172
#define CLO_W OPC_2R(0x4)
173
#define CLZ_W OPC_2R(0x5)
174
#define CTO_W OPC_2R(0x6)
175
#define CTZ_W OPC_2R(0x7)
176
#define CLO_D OPC_2R(0x8)
177
#define CLZ_D OPC_2R(0x9)
178
#define CTO_D OPC_2R(0xa)
179
#define CTZ_D OPC_2R(0xb)
180
#define REVB_2H OPC_2R(0xc)
181
#define REVB_4H OPC_2R(0xd)
182
#define REVB_2W OPC_2R(0xe)
183
#define REVB_D OPC_2R(0xf)
184
#define REVH_2W OPC_2R(0x10)
185
#define REVH_D OPC_2R(0x11)
186
#define BITREV_4B OPC_2R(0x12)
187
#define BITREV_8B OPC_2R(0x13)
188
#define BITREV_W OPC_2R(0x14)
189
#define BITREV_D OPC_2R(0x15)
190
#define EXT_W_H OPC_2R(0x16)
191
#define EXT_W_B OPC_2R(0x17)
192
#define BSTRINS_W (0x1 << 22 | 1 << 21)
193
#define BSTRPICK_W (0x1 << 22 | 1 << 21 | 1 << 15)
194
#define BSTRINS_D (0x2 << 22)
195
#define BSTRPICK_D (0x3 << 22)
196
197
/* Branch instructions */
198
#define BEQZ OPC_1RI21(0x10)
199
#define BNEZ OPC_1RI21(0x11)
200
#define JIRL OPC_2RI16(0x13)
201
#define B OPC_I26(0x14)
202
#define BL OPC_I26(0x15)
203
#define BEQ OPC_2RI16(0x16)
204
#define BNE OPC_2RI16(0x17)
205
#define BLT OPC_2RI16(0x18)
206
#define BGE OPC_2RI16(0x19)
207
#define BLTU OPC_2RI16(0x1a)
208
#define BGEU OPC_2RI16(0x1b)
209
210
/* Memory access instructions */
211
#define LD_B OPC_2RI12(0xa0)
212
#define LD_H OPC_2RI12(0xa1)
213
#define LD_W OPC_2RI12(0xa2)
214
#define LD_D OPC_2RI12(0xa3)
215
216
#define ST_B OPC_2RI12(0xa4)
217
#define ST_H OPC_2RI12(0xa5)
218
#define ST_W OPC_2RI12(0xa6)
219
#define ST_D OPC_2RI12(0xa7)
220
221
#define LD_BU OPC_2RI12(0xa8)
222
#define LD_HU OPC_2RI12(0xa9)
223
#define LD_WU OPC_2RI12(0xaa)
224
225
#define LDX_B OPC_3R(0x7000)
226
#define LDX_H OPC_3R(0x7008)
227
#define LDX_W OPC_3R(0x7010)
228
#define LDX_D OPC_3R(0x7018)
229
230
#define STX_B OPC_3R(0x7020)
231
#define STX_H OPC_3R(0x7028)
232
#define STX_W OPC_3R(0x7030)
233
#define STX_D OPC_3R(0x7038)
234
235
#define LDX_BU OPC_3R(0x7040)
236
#define LDX_HU OPC_3R(0x7048)
237
#define LDX_WU OPC_3R(0x7050)
238
239
#define PRELD OPC_2RI12(0xab)
240
241
/* Atomic memory access instructions */
242
#define LL_W OPC_2RI14(0x20)
243
#define SC_W OPC_2RI14(0x21)
244
#define LL_D OPC_2RI14(0x22)
245
#define SC_D OPC_2RI14(0x23)
246
247
/* LoongArch V1.10 Instructions */
248
#define AMCAS_B OPC_3R(0x70B0)
249
#define AMCAS_H OPC_3R(0x70B1)
250
#define AMCAS_W OPC_3R(0x70B2)
251
#define AMCAS_D OPC_3R(0x70B3)
252
253
/* Memory barrier instructions */
254
#define DBAR OPC_3R(0x70e4)
255
256
/* Other instructions */
257
#define BREAK OPC_3R(0x54)
258
#define DBGCALL OPC_3R(0x55)
259
#define NOP ANDI
260
#define SYSCALL OPC_3R(0x56)
261
262
/* Basic Floating-Point Instructions */
263
/* Floating-Point Arithmetic Operation Instructions */
264
#define FADD_S OPC_3R(0x201)
265
#define FADD_D OPC_3R(0x202)
266
#define FSUB_S OPC_3R(0x205)
267
#define FSUB_D OPC_3R(0x206)
268
#define FMUL_S OPC_3R(0x209)
269
#define FMUL_D OPC_3R(0x20a)
270
#define FDIV_S OPC_3R(0x20d)
271
#define FDIV_D OPC_3R(0x20e)
272
#define FCMP_COND_S OPC_4R(0xc1)
273
#define FCMP_COND_D OPC_4R(0xc2)
274
#define FCOPYSIGN_S OPC_3R(0x225)
275
#define FCOPYSIGN_D OPC_3R(0x226)
276
#define FSEL OPC_4R(0xd0)
277
#define FABS_S OPC_2R(0x4501)
278
#define FABS_D OPC_2R(0x4502)
279
#define FNEG_S OPC_2R(0x4505)
280
#define FNEG_D OPC_2R(0x4506)
281
#define FMOV_S OPC_2R(0x4525)
282
#define FMOV_D OPC_2R(0x4526)
283
284
/* Floating-Point Conversion Instructions */
285
#define FCVT_S_D OPC_2R(0x4646)
286
#define FCVT_D_S OPC_2R(0x4649)
287
#define FTINTRZ_W_S OPC_2R(0x46a1)
288
#define FTINTRZ_W_D OPC_2R(0x46a2)
289
#define FTINTRZ_L_S OPC_2R(0x46a9)
290
#define FTINTRZ_L_D OPC_2R(0x46aa)
291
#define FFINT_S_W OPC_2R(0x4744)
292
#define FFINT_S_L OPC_2R(0x4746)
293
#define FFINT_D_W OPC_2R(0x4748)
294
#define FFINT_D_L OPC_2R(0x474a)
295
296
/* Floating-Point Move Instructions */
297
#define FMOV_S OPC_2R(0x4525)
298
#define FMOV_D OPC_2R(0x4526)
299
#define MOVGR2FR_W OPC_2R(0x4529)
300
#define MOVGR2FR_D OPC_2R(0x452a)
301
#define MOVGR2FRH_W OPC_2R(0x452b)
302
#define MOVFR2GR_S OPC_2R(0x452d)
303
#define MOVFR2GR_D OPC_2R(0x452e)
304
#define MOVFRH2GR_S OPC_2R(0x452f)
305
#define MOVGR2FCSR OPC_2R(0x4530)
306
#define MOVFCSR2GR OPC_2R(0x4532)
307
#define MOVFR2CF OPC_2R(0x4534)
308
#define MOVCF2FR OPC_2R(0x4535)
309
#define MOVGR2CF OPC_2R(0x4536)
310
#define MOVCF2GR OPC_2R(0x4537)
311
312
/* Floating-Point Branch Instructions */
313
#define BCEQZ OPC_I26(0x12)
314
#define BCNEZ OPC_I26(0x12)
315
316
/* Floating-Point Common Memory Access Instructions */
317
#define FLD_S OPC_2RI12(0xac)
318
#define FLD_D OPC_2RI12(0xae)
319
#define FST_S OPC_2RI12(0xad)
320
#define FST_D OPC_2RI12(0xaf)
321
322
#define FLDX_S OPC_3R(0x7060)
323
#define FLDX_D OPC_3R(0x7068)
324
#define FSTX_S OPC_3R(0x7070)
325
#define FSTX_D OPC_3R(0x7078)
326
327
/* Vector Instructions */
328
329
/* Vector Arithmetic Instructions */
330
#define VOR_V OPC_3R(0xe24d)
331
#define VXOR_V OPC_3R(0xe24e)
332
#define VAND_V OPC_3R(0xe24c)
333
#define VMSKLTZ OPC_2R(0x1ca710)
334
335
/* Vector Memory Access Instructions */
336
#define VLD OPC_2RI12(0xb0)
337
#define VST OPC_2RI12(0xb1)
338
#define XVLD OPC_2RI12(0xb2)
339
#define XVST OPC_2RI12(0xb3)
340
#define VSTELM OPC_2RI8(0xc40)
341
342
/* Vector Float Conversion Instructions */
343
#define VFCVTL_D_S OPC_2R(0x1ca77c)
344
345
/* Vector Bit Manipulate Instructions */
346
#define VSLLWIL OPC_2R(0x1cc200)
347
348
/* Vector Move And Shuffle Instructions */
349
#define VLDREPL OPC_2R(0xc0000)
350
#define VINSGR2VR OPC_2R(0x1cbac0)
351
#define VPICKVE2GR_U OPC_2R(0x1cbce0)
352
#define VREPLGR2VR OPC_2R(0x1ca7c0)
353
#define VREPLVE OPC_3R(0xe244)
354
#define VREPLVEI OPC_2R(0x1cbde0)
355
#define VSHUF_B OPC_4R(0xd5)
356
#define XVPERMI OPC_2RI8(0x1dfa)
357
358
#define I12_MAX (0x7ff)
359
#define I12_MIN (-0x800)
360
#define BRANCH16_MAX (0x7fff << 2)
361
#define BRANCH16_MIN (-(0x8000 << 2))
362
#define BRANCH21_MAX (0xfffff << 2)
363
#define BRANCH21_MIN (-(0x100000 << 2))
364
#define JUMP_MAX (0x1ffffff << 2)
365
#define JUMP_MIN (-(0x2000000 << 2))
366
#define JIRL_MAX (0x7fff << 2)
367
#define JIRL_MIN (-(0x8000 << 2))
368
369
#define S32_MAX (0x7fffffffl)
370
#define S32_MIN (-0x80000000l)
371
#define S52_MAX (0x7ffffffffffffl)
372
373
#define INST(inst, type) ((sljit_ins)((type & SLJIT_32) ? inst##_W : inst##_D))
374
375
/* LoongArch CPUCFG register for feature detection */
376
#define LOONGARCH_CFG2 0x02
377
#define LOONGARCH_CFG2_LAMCAS (1 << 28)
378
379
static sljit_u32 cfg2_feature_list = 0;
380
381
/* According to Software Development and Build Convention for LoongArch Architectures,
382
+ the status of LSX and LASX extension must be checked through HWCAP */
383
#include <sys/auxv.h>
384
385
#define LOONGARCH_HWCAP_LSX (1 << 4)
386
#define LOONGARCH_HWCAP_LASX (1 << 5)
387
388
static sljit_u32 hwcap_feature_list = 0;
389
390
/* Feature type */
391
#define GET_CFG2 0
392
#define GET_HWCAP 1
393
394
#define LOONGARCH_SUPPORT_AMCAS (LOONGARCH_CFG2_LAMCAS & get_cpu_features(GET_CFG2))
395
396
static SLJIT_INLINE sljit_u32 get_cpu_features(sljit_u32 feature_type)
397
{
398
if (cfg2_feature_list == 0)
399
__asm__ ("cpucfg %0, %1" : "+&r"(cfg2_feature_list) : "r"(LOONGARCH_CFG2));
400
if (hwcap_feature_list == 0)
401
hwcap_feature_list = (sljit_u32)getauxval(AT_HWCAP);
402
403
return feature_type ? hwcap_feature_list : cfg2_feature_list;
404
}
405
406
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
407
{
408
sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
409
FAIL_IF(!ptr);
410
*ptr = ins;
411
compiler->size++;
412
return SLJIT_SUCCESS;
413
}
414
415
static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
416
{
417
sljit_sw diff;
418
sljit_uw target_addr;
419
sljit_uw jump_addr = (sljit_uw)code_ptr;
420
sljit_uw orig_addr = jump->addr;
421
SLJIT_UNUSED_ARG(executable_offset);
422
423
jump->addr = jump_addr;
424
if (jump->flags & SLJIT_REWRITABLE_JUMP)
425
goto exit;
426
427
if (jump->flags & JUMP_ADDR)
428
target_addr = jump->u.target;
429
else {
430
SLJIT_ASSERT(jump->u.label != NULL);
431
target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
432
433
if (jump->u.label->size > orig_addr)
434
jump_addr = (sljit_uw)(code + orig_addr);
435
}
436
437
diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);
438
439
if (jump->flags & IS_COND) {
440
diff += SSIZE_OF(ins);
441
442
if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) {
443
code_ptr--;
444
code_ptr[0] = (code_ptr[0] & 0xfc0003ff) ^ 0x4000000;
445
jump->flags |= PATCH_B;
446
jump->addr = (sljit_uw)code_ptr;
447
return code_ptr;
448
}
449
450
diff -= SSIZE_OF(ins);
451
}
452
453
if (diff >= JUMP_MIN && diff <= JUMP_MAX) {
454
if (jump->flags & IS_COND) {
455
code_ptr[-1] |= (sljit_ins)IMM_I16(2);
456
}
457
458
jump->flags |= PATCH_J;
459
return code_ptr;
460
}
461
462
if (diff >= S32_MIN && diff <= S32_MAX) {
463
if (jump->flags & IS_COND)
464
code_ptr[-1] |= (sljit_ins)IMM_I16(3);
465
466
jump->flags |= PATCH_REL32;
467
code_ptr[1] = code_ptr[0];
468
return code_ptr + 1;
469
}
470
471
if (target_addr <= (sljit_uw)S32_MAX) {
472
if (jump->flags & IS_COND)
473
code_ptr[-1] |= (sljit_ins)IMM_I16(3);
474
475
jump->flags |= PATCH_ABS32;
476
code_ptr[1] = code_ptr[0];
477
return code_ptr + 1;
478
}
479
480
if (target_addr <= S52_MAX) {
481
if (jump->flags & IS_COND)
482
code_ptr[-1] |= (sljit_ins)IMM_I16(4);
483
484
jump->flags |= PATCH_ABS52;
485
code_ptr[2] = code_ptr[0];
486
return code_ptr + 2;
487
}
488
489
exit:
490
if (jump->flags & IS_COND)
491
code_ptr[-1] |= (sljit_ins)IMM_I16(5);
492
code_ptr[3] = code_ptr[0];
493
return code_ptr + 3;
494
}
495
496
static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
497
{
498
sljit_uw addr;
499
sljit_uw jump_addr = (sljit_uw)code_ptr;
500
sljit_sw diff;
501
SLJIT_UNUSED_ARG(executable_offset);
502
503
SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT));
504
if (jump->flags & JUMP_ADDR)
505
addr = jump->u.target;
506
else {
507
addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
508
509
if (jump->u.label->size > jump->addr)
510
jump_addr = (sljit_uw)(code + jump->addr);
511
}
512
513
diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);
514
515
if (diff >= S32_MIN && diff <= S32_MAX) {
516
SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
517
jump->flags |= PATCH_REL32;
518
return 1;
519
}
520
521
if (addr <= S32_MAX) {
522
SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
523
jump->flags |= PATCH_ABS32;
524
return 1;
525
}
526
527
if (addr <= S52_MAX) {
528
SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT));
529
jump->flags |= PATCH_ABS52;
530
return 2;
531
}
532
533
SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT));
534
return 3;
535
}
536
537
static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset)
538
{
539
sljit_uw flags = jump->flags;
540
sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
541
sljit_ins *ins = (sljit_ins*)jump->addr;
542
sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1;
543
SLJIT_UNUSED_ARG(executable_offset);
544
545
if (flags & PATCH_REL32) {
546
addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset);
547
548
SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX);
549
550
if ((addr & 0x800) != 0)
551
addr += 0x1000;
552
553
ins[0] = PCADDU12I | RD(reg) | IMM_I20(addr);
554
555
if (!(flags & JUMP_MOV_ADDR)) {
556
SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
557
ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
558
} else
559
ins[1] = ADDI_D | RD(reg) | RJ(reg) | IMM_I12(addr);
560
return;
561
}
562
563
if (flags & PATCH_ABS32) {
564
SLJIT_ASSERT(addr <= S32_MAX);
565
ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
566
} else if (flags & PATCH_ABS52) {
567
ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
568
ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
569
ins += 1;
570
} else {
571
ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
572
ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
573
ins[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52);
574
ins += 2;
575
}
576
577
if (!(flags & JUMP_MOV_ADDR)) {
578
SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
579
ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
580
} else
581
ins[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr);
582
}
583
584
static SLJIT_INLINE sljit_ins *process_extended_label(sljit_ins *code_ptr, struct sljit_extended_label *ext_label)
585
{
586
SLJIT_ASSERT(ext_label->label.u.index == SLJIT_LABEL_ALIGNED);
587
return (sljit_ins*)((sljit_uw)code_ptr & ~(ext_label->data));
588
}
589
590
static void reduce_code_size(struct sljit_compiler *compiler)
591
{
592
struct sljit_label *label;
593
struct sljit_jump *jump;
594
struct sljit_const *const_;
595
SLJIT_NEXT_DEFINE_TYPES;
596
sljit_uw total_size;
597
sljit_uw size_reduce = 0;
598
sljit_sw diff;
599
600
label = compiler->labels;
601
jump = compiler->jumps;
602
const_ = compiler->consts;
603
604
SLJIT_NEXT_INIT_TYPES();
605
606
while (1) {
607
SLJIT_GET_NEXT_MIN();
608
609
if (next_min_addr == SLJIT_MAX_ADDRESS)
610
break;
611
612
if (next_min_addr == next_label_size) {
613
label->size -= size_reduce;
614
615
label = label->next;
616
next_label_size = SLJIT_GET_NEXT_SIZE(label);
617
}
618
619
if (next_min_addr == next_const_addr) {
620
const_->addr -= size_reduce;
621
const_ = const_->next;
622
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
623
continue;
624
}
625
626
if (next_min_addr != next_jump_addr)
627
continue;
628
629
jump->addr -= size_reduce;
630
if (!(jump->flags & JUMP_MOV_ADDR)) {
631
total_size = JUMP_MAX_SIZE;
632
633
if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
634
if (jump->flags & JUMP_ADDR) {
635
if (jump->u.target <= S32_MAX)
636
total_size = 2;
637
else if (jump->u.target <= S52_MAX)
638
total_size = 3;
639
} else {
640
/* Unit size: instruction. */
641
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
642
if (jump->u.label->size > jump->addr) {
643
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
644
diff -= (sljit_sw)size_reduce;
645
}
646
647
if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins)))
648
total_size = 0;
649
else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins)))
650
total_size = 1;
651
else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
652
total_size = 2;
653
}
654
}
655
656
size_reduce += JUMP_MAX_SIZE - total_size;
657
jump->flags |= total_size << JUMP_SIZE_SHIFT;
658
} else {
659
total_size = 3;
660
661
if (!(jump->flags & JUMP_ADDR)) {
662
/* Real size minus 1. Unit size: instruction. */
663
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
664
if (jump->u.label->size > jump->addr) {
665
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
666
diff -= (sljit_sw)size_reduce;
667
}
668
669
if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
670
total_size = 1;
671
} else if (jump->u.target < S32_MAX)
672
total_size = 1;
673
else if (jump->u.target <= S52_MAX)
674
total_size = 2;
675
676
size_reduce += 3 - total_size;
677
jump->flags |= total_size << JUMP_SIZE_SHIFT;
678
}
679
680
jump = jump->next;
681
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
682
}
683
684
compiler->size -= size_reduce;
685
}
686
687
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
688
{
689
struct sljit_memory_fragment *buf;
690
sljit_ins *code;
691
sljit_ins *code_ptr;
692
sljit_ins *buf_ptr;
693
sljit_ins *buf_end;
694
sljit_uw word_count;
695
SLJIT_NEXT_DEFINE_TYPES;
696
sljit_sw executable_offset;
697
sljit_uw addr;
698
699
struct sljit_label *label;
700
struct sljit_jump *jump;
701
struct sljit_const *const_;
702
703
CHECK_ERROR_PTR();
704
CHECK_PTR(check_sljit_generate_code(compiler, options));
705
706
reduce_code_size(compiler);
707
708
code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
709
PTR_FAIL_WITH_EXEC_IF(code);
710
711
reverse_buf(compiler);
712
buf = compiler->buf;
713
714
code_ptr = code;
715
word_count = 0;
716
label = compiler->labels;
717
jump = compiler->jumps;
718
const_ = compiler->consts;
719
SLJIT_NEXT_INIT_TYPES();
720
SLJIT_GET_NEXT_MIN();
721
722
do {
723
buf_ptr = (sljit_ins*)buf->memory;
724
buf_end = buf_ptr + (buf->used_size >> 2);
725
do {
726
*code_ptr = *buf_ptr++;
727
if (next_min_addr == word_count) {
728
SLJIT_ASSERT(!label || label->size >= word_count);
729
SLJIT_ASSERT(!jump || jump->addr >= word_count);
730
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
731
732
/* These structures are ordered by their address. */
733
if (next_min_addr == next_label_size) {
734
if (label->u.index >= SLJIT_LABEL_ALIGNED) {
735
code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);
736
*code_ptr = buf_ptr[-1];
737
}
738
739
label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
740
label->size = (sljit_uw)(code_ptr - code);
741
label = label->next;
742
next_label_size = SLJIT_GET_NEXT_SIZE(label);
743
}
744
745
if (next_min_addr == next_jump_addr) {
746
if (!(jump->flags & JUMP_MOV_ADDR)) {
747
word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
748
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
749
SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)));
750
} else {
751
word_count += jump->flags >> JUMP_SIZE_SHIFT;
752
addr = (sljit_uw)code_ptr;
753
code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
754
jump->addr = addr;
755
}
756
jump = jump->next;
757
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
758
} else if (next_min_addr == next_const_addr) {
759
const_->addr = (sljit_uw)code_ptr;
760
const_ = const_->next;
761
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
762
}
763
764
SLJIT_GET_NEXT_MIN();
765
}
766
code_ptr++;
767
word_count++;
768
} while (buf_ptr < buf_end);
769
770
buf = buf->next;
771
} while (buf);
772
773
if (label && label->size == word_count) {
774
if (label->u.index >= SLJIT_LABEL_ALIGNED)
775
code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);
776
777
label->u.addr = (sljit_uw)code_ptr;
778
label->size = (sljit_uw)(code_ptr - code);
779
label = label->next;
780
}
781
782
SLJIT_ASSERT(!label);
783
SLJIT_ASSERT(!jump);
784
SLJIT_ASSERT(!const_);
785
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
786
787
jump = compiler->jumps;
788
while (jump) {
789
do {
790
if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) {
791
load_addr_to_reg(jump, executable_offset);
792
break;
793
}
794
795
addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
796
buf_ptr = (sljit_ins *)jump->addr;
797
addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
798
799
if (jump->flags & PATCH_B) {
800
SLJIT_ASSERT((sljit_sw)addr >= BRANCH16_MIN && (sljit_sw)addr <= BRANCH16_MAX);
801
buf_ptr[0] |= (sljit_ins)IMM_I16(addr >> 2);
802
break;
803
}
804
805
SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX);
806
if (jump->flags & IS_CALL)
807
buf_ptr[0] = BL | (sljit_ins)IMM_I26(addr >> 2);
808
else
809
buf_ptr[0] = B | (sljit_ins)IMM_I26(addr >> 2);
810
} while (0);
811
jump = jump->next;
812
}
813
814
compiler->error = SLJIT_ERR_COMPILED;
815
compiler->executable_offset = executable_offset;
816
compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
817
818
code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
819
code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
820
821
SLJIT_CACHE_FLUSH(code, code_ptr);
822
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
823
return code;
824
}
825
826
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
827
{
828
switch (feature_type)
829
{
830
case SLJIT_HAS_FPU:
831
#ifdef SLJIT_IS_FPU_AVAILABLE
832
return (SLJIT_IS_FPU_AVAILABLE) != 0;
833
#else
834
/* Available by default. */
835
return 1;
836
#endif
837
838
case SLJIT_HAS_LASX:
839
return (LOONGARCH_HWCAP_LASX & get_cpu_features(GET_HWCAP));
840
841
case SLJIT_HAS_SIMD:
842
return (LOONGARCH_HWCAP_LSX & get_cpu_features(GET_HWCAP));
843
844
case SLJIT_HAS_CLZ:
845
case SLJIT_HAS_CTZ:
846
case SLJIT_HAS_REV:
847
case SLJIT_HAS_ROT:
848
case SLJIT_HAS_PREFETCH:
849
case SLJIT_HAS_COPY_F32:
850
case SLJIT_HAS_COPY_F64:
851
case SLJIT_HAS_ATOMIC:
852
case SLJIT_HAS_MEMORY_BARRIER:
853
return 1;
854
855
default:
856
return 0;
857
}
858
}
859
860
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
861
{
862
SLJIT_UNUSED_ARG(type);
863
864
return 0;
865
}
866
867
/* --------------------------------------------------------------------- */
868
/* Entry, exit */
869
/* --------------------------------------------------------------------- */
870
871
/* Creates an index in data_transfer_insts array. */
872
#define LOAD_DATA 0x01
873
#define WORD_DATA 0x00
874
#define BYTE_DATA 0x02
875
#define HALF_DATA 0x04
876
#define INT_DATA 0x06
877
#define SIGNED_DATA 0x08
878
/* Separates integer and floating point registers */
879
#define GPR_REG 0x0f
880
#define DOUBLE_DATA 0x10
881
#define SINGLE_DATA 0x12
882
883
#define MEM_MASK 0x1f
884
885
#define ARG_TEST 0x00020
886
#define ALT_KEEP_CACHE 0x00040
887
#define CUMULATIVE_OP 0x00080
888
#define IMM_OP 0x00100
889
#define MOVE_OP 0x00200
890
#define SRC2_IMM 0x00400
891
892
#define UNUSED_DEST 0x00800
893
#define REG_DEST 0x01000
894
#define REG1_SOURCE 0x02000
895
#define REG2_SOURCE 0x04000
896
#define SLOW_SRC1 0x08000
897
#define SLOW_SRC2 0x10000
898
#define SLOW_DEST 0x20000
899
#define MEM_USE_TMP2 0x40000
900
901
#define STACK_STORE ST_D
902
#define STACK_LOAD LD_D
903
904
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm)
905
{
906
if (imm <= I12_MAX && imm >= I12_MIN)
907
return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(TMP_ZERO) | IMM_I12(imm));
908
909
if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
910
FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
911
if (IMM_I12(imm) != 0)
912
return push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm));
913
return SLJIT_SUCCESS;
914
} else if (imm <= 0x7ffffffffffffl && imm >= -0x8000000000000l) {
915
FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
916
if (IMM_I12(imm) != 0)
917
FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
918
return push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5));
919
}
920
FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
921
if (IMM_I12(imm) != 0)
922
FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
923
FAIL_IF(push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5)));
924
return push_inst(compiler, LU52I_D | RD(dst_r) | RJ(dst_r) | IMM_I12(imm >> 52));
925
}
926
927
#define STACK_MAX_DISTANCE (-I12_MIN)
928
929
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw);
930
931
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
932
sljit_s32 options, sljit_s32 arg_types,
933
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
934
{
935
sljit_s32 fscratches;
936
sljit_s32 fsaveds;
937
sljit_s32 i, tmp, offset;
938
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
939
940
CHECK_ERROR();
941
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
942
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
943
944
scratches = ENTER_GET_REGS(scratches);
945
saveds = ENTER_GET_REGS(saveds);
946
fscratches = compiler->fscratches;
947
fsaveds = compiler->fsaveds;
948
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
949
local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
950
951
local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
952
compiler->local_size = local_size;
953
954
if (local_size <= STACK_MAX_DISTANCE) {
955
/* Frequent case. */
956
FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
957
offset = local_size - SSIZE_OF(sw);
958
local_size = 0;
959
} else {
960
FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(STACK_MAX_DISTANCE)));
961
local_size -= STACK_MAX_DISTANCE;
962
963
if (local_size > STACK_MAX_DISTANCE)
964
FAIL_IF(load_immediate(compiler, TMP_REG1, local_size));
965
offset = STACK_MAX_DISTANCE - SSIZE_OF(sw);
966
}
967
968
FAIL_IF(push_inst(compiler, STACK_STORE | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
969
970
tmp = SLJIT_S0 - saveds;
971
for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
972
offset -= SSIZE_OF(sw);
973
FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
974
}
975
976
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
977
offset -= SSIZE_OF(sw);
978
FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
979
}
980
981
tmp = SLJIT_FS0 - fsaveds;
982
for (i = SLJIT_FS0; i > tmp; i--) {
983
offset -= SSIZE_OF(f64);
984
FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
985
}
986
987
for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
988
offset -= SSIZE_OF(f64);
989
FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
990
}
991
992
if (local_size > STACK_MAX_DISTANCE)
993
FAIL_IF(push_inst(compiler, SUB_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG1)));
994
else if (local_size > 0)
995
FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
996
997
if (options & SLJIT_ENTER_REG_ARG)
998
return SLJIT_SUCCESS;
999
1000
arg_types >>= SLJIT_ARG_SHIFT;
1001
saved_arg_count = 0;
1002
tmp = SLJIT_R0;
1003
1004
while (arg_types > 0) {
1005
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1006
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1007
FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_S0 - saved_arg_count) | RJ(tmp) | IMM_I12(0)));
1008
saved_arg_count++;
1009
}
1010
tmp++;
1011
}
1012
1013
arg_types >>= SLJIT_ARG_SHIFT;
1014
}
1015
1016
return SLJIT_SUCCESS;
1017
}
1018
1019
#undef STACK_MAX_DISTANCE
1020
1021
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1022
sljit_s32 options, sljit_s32 arg_types,
1023
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1024
{
1025
sljit_s32 fscratches;
1026
sljit_s32 fsaveds;
1027
1028
CHECK_ERROR();
1029
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
1030
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1031
1032
scratches = ENTER_GET_REGS(scratches);
1033
saveds = ENTER_GET_REGS(saveds);
1034
fscratches = compiler->fscratches;
1035
fsaveds = compiler->fsaveds;
1036
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1037
local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
1038
1039
compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
1040
1041
return SLJIT_SUCCESS;
1042
}
1043
1044
#define STACK_MAX_DISTANCE (-I12_MIN - 16)
1045
1046
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
1047
{
1048
sljit_s32 i, tmp, offset;
1049
sljit_s32 local_size = compiler->local_size;
1050
1051
if (local_size > STACK_MAX_DISTANCE) {
1052
local_size -= STACK_MAX_DISTANCE;
1053
1054
if (local_size > STACK_MAX_DISTANCE) {
1055
FAIL_IF(load_immediate(compiler, TMP_REG2, local_size));
1056
FAIL_IF(push_inst(compiler, ADD_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG2)));
1057
} else
1058
FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size)));
1059
1060
local_size = STACK_MAX_DISTANCE;
1061
}
1062
1063
SLJIT_ASSERT(local_size > 0);
1064
1065
offset = local_size - SSIZE_OF(sw);
1066
if (!is_return_to)
1067
FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
1068
1069
tmp = SLJIT_S0 - compiler->saveds;
1070
for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
1071
offset -= SSIZE_OF(sw);
1072
FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1073
}
1074
1075
for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1076
offset -= SSIZE_OF(sw);
1077
FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1078
}
1079
1080
tmp = SLJIT_FS0 - compiler->fsaveds;
1081
for (i = SLJIT_FS0; i > tmp; i--) {
1082
offset -= SSIZE_OF(f64);
1083
FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1084
}
1085
1086
for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1087
offset -= SSIZE_OF(f64);
1088
FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1089
}
1090
1091
return push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size));
1092
}
1093
1094
#undef STACK_MAX_DISTANCE
1095
1096
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1097
{
1098
CHECK_ERROR();
1099
CHECK(check_sljit_emit_return_void(compiler));
1100
1101
FAIL_IF(emit_stack_frame_release(compiler, 0));
1102
return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
1103
}
1104
1105
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1106
sljit_s32 src, sljit_sw srcw)
1107
{
1108
CHECK_ERROR();
1109
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1110
1111
if (src & SLJIT_MEM) {
1112
ADJUST_LOCAL_OFFSET(src, srcw);
1113
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
1114
src = TMP_REG1;
1115
srcw = 0;
1116
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1117
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
1118
src = TMP_REG1;
1119
srcw = 0;
1120
}
1121
1122
FAIL_IF(emit_stack_frame_release(compiler, 1));
1123
1124
SLJIT_SKIP_CHECKS(compiler);
1125
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1126
}
1127
1128
/* --------------------------------------------------------------------- */
1129
/* Operators */
1130
/* --------------------------------------------------------------------- */
1131
1132
static const sljit_ins data_transfer_insts[16 + 4] = {
1133
/* u w s */ ST_D /* st.d */,
1134
/* u w l */ LD_D /* ld.d */,
1135
/* u b s */ ST_B /* st.b */,
1136
/* u b l */ LD_BU /* ld.bu */,
1137
/* u h s */ ST_H /* st.h */,
1138
/* u h l */ LD_HU /* ld.hu */,
1139
/* u i s */ ST_W /* st.w */,
1140
/* u i l */ LD_WU /* ld.wu */,
1141
1142
/* s w s */ ST_D /* st.d */,
1143
/* s w l */ LD_D /* ld.d */,
1144
/* s b s */ ST_B /* st.b */,
1145
/* s b l */ LD_B /* ld.b */,
1146
/* s h s */ ST_H /* st.h */,
1147
/* s h l */ LD_H /* ld.h */,
1148
/* s i s */ ST_W /* st.w */,
1149
/* s i l */ LD_W /* ld.w */,
1150
1151
/* d s */ FST_D /* fst.d */,
1152
/* d l */ FLD_D /* fld.d */,
1153
/* s s */ FST_S /* fst.s */,
1154
/* s l */ FLD_S /* fld.s */,
1155
};
1156
1157
static const sljit_ins data_transfer_insts_x[16 + 4] = {
1158
/* u w s */ STX_D /* stx.d */,
1159
/* u w l */ LDX_D /* ldx.d */,
1160
/* u b s */ STX_B /* stx.b */,
1161
/* u b l */ LDX_BU /* ldx.bu */,
1162
/* u h s */ STX_H /* stx.h */,
1163
/* u h l */ LDX_HU /* ldx.hu */,
1164
/* u i s */ STX_W /* stx.w */,
1165
/* u i l */ LDX_WU /* ldx.wu */,
1166
1167
/* s w s */ STX_D /* stx.d */,
1168
/* s w l */ LDX_D /* ldx.d */,
1169
/* s b s */ STX_B /* stx.b */,
1170
/* s b l */ LDX_B /* ldx.b */,
1171
/* s h s */ STX_H /* stx.h */,
1172
/* s h l */ LDX_H /* ldx.h */,
1173
/* s i s */ STX_W /* stx.w */,
1174
/* s i l */ LDX_W /* ldx.w */,
1175
1176
/* d s */ FSTX_D /* fstx.d */,
1177
/* d l */ FLDX_D /* fldx.d */,
1178
/* s s */ FSTX_S /* fstx.s */,
1179
/* s l */ FLDX_S /* fldx.s */,
1180
};
1181
1182
static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1183
{
1184
sljit_ins ins;
1185
sljit_s32 base = arg & REG_MASK;
1186
1187
SLJIT_ASSERT(arg & SLJIT_MEM);
1188
1189
if (arg & OFFS_REG_MASK) {
1190
sljit_s32 offs = OFFS_REG(arg);
1191
1192
SLJIT_ASSERT(!argw);
1193
ins = data_transfer_insts_x[flags & MEM_MASK] |
1194
((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1195
RJ(base) | RK(offs);
1196
} else {
1197
SLJIT_ASSERT(argw <= 0xfff && argw >= I12_MIN);
1198
1199
ins = data_transfer_insts[flags & MEM_MASK] |
1200
((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1201
RJ(base) | IMM_I12(argw);
1202
}
1203
return push_inst(compiler, ins);
1204
}
1205
1206
/* Can perform an operation using at most 1 instruction. */
1207
static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1208
{
1209
SLJIT_ASSERT(arg & SLJIT_MEM);
1210
1211
/* argw == 0 (ldx/stx rd, rj, rk) can be used.
1212
* argw in [-2048, 2047] (ld/st rd, rj, imm) can be used. */
1213
if (!argw || (!(arg & OFFS_REG_MASK) && (argw <= I12_MAX && argw >= I12_MIN))) {
1214
/* Works for both absolute and relative addresses. */
1215
if (SLJIT_UNLIKELY(flags & ARG_TEST))
1216
return 1;
1217
1218
FAIL_IF(push_mem_inst(compiler, flags, reg, arg, argw));
1219
return -1;
1220
}
1221
return 0;
1222
}
1223
1224
#define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0))
1225
1226
/* See getput_arg below.
1227
Note: can_cache is called only for binary operators. */
1228
static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1229
{
1230
SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
1231
1232
if (arg & OFFS_REG_MASK)
1233
return 0;
1234
1235
if (arg == next_arg) {
1236
if (((next_argw - argw) <= I12_MAX && (next_argw - argw) >= I12_MIN)
1237
|| TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw))
1238
return 1;
1239
return 0;
1240
}
1241
1242
return 0;
1243
}
1244
1245
/* Emit the necessary instructions. See can_cache above. */
1246
static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1247
{
1248
sljit_s32 base = arg & REG_MASK;
1249
sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1;
1250
sljit_sw offset;
1251
1252
SLJIT_ASSERT(arg & SLJIT_MEM);
1253
if (!(next_arg & SLJIT_MEM)) {
1254
next_arg = 0;
1255
next_argw = 0;
1256
}
1257
1258
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1259
argw &= 0x3;
1260
1261
if (SLJIT_UNLIKELY(argw))
1262
FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1263
return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1264
}
1265
1266
if (compiler->cache_arg == arg && argw - compiler->cache_argw <= I12_MAX && argw - compiler->cache_argw >= I12_MIN)
1267
return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), argw - compiler->cache_argw);
1268
1269
if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= I12_MAX) && (argw - compiler->cache_argw >= I12_MIN)) {
1270
offset = argw - compiler->cache_argw;
1271
} else {
1272
sljit_sw argw_hi=TO_ARGW_HI(argw);
1273
compiler->cache_arg = SLJIT_MEM;
1274
1275
if (next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN && argw_hi != TO_ARGW_HI(next_argw)) {
1276
FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1277
compiler->cache_argw = argw;
1278
offset = 0;
1279
} else {
1280
FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi));
1281
compiler->cache_argw = argw_hi;
1282
offset = argw & 0xfff;
1283
argw = argw_hi;
1284
}
1285
}
1286
1287
if (!base)
1288
return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1289
1290
if (arg == next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN) {
1291
compiler->cache_arg = arg;
1292
FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(base)));
1293
return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1294
}
1295
1296
if (!offset)
1297
return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1298
1299
FAIL_IF(push_inst(compiler, ADD_D | RD(tmp_r) | RJ(TMP_REG3) | RK(base)));
1300
return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), offset);
1301
}
1302
1303
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1304
{
1305
sljit_s32 base = arg & REG_MASK;
1306
sljit_s32 tmp_r = TMP_REG1;
1307
1308
if (getput_arg_fast(compiler, flags, reg, arg, argw))
1309
return compiler->error;
1310
1311
if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1312
tmp_r = reg;
1313
1314
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1315
argw &= 0x3;
1316
1317
if (SLJIT_UNLIKELY(argw))
1318
FAIL_IF(push_inst(compiler, SLLI_D | RD(tmp_r) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1319
return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1320
} else {
1321
FAIL_IF(load_immediate(compiler, tmp_r, argw));
1322
1323
if (base != 0)
1324
return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1325
return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), 0);
1326
}
1327
}
1328
1329
static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1330
{
1331
if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1332
return compiler->error;
1333
return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1334
}
1335
1336
#define IMM_EXTEND(v) (IMM_I12((op & SLJIT_32) ? (v) : (32 + (v))))
1337
1338
/* andi/ori/xori are zero-extended */
1339
#define EMIT_LOGICAL(op_imm, op_reg) \
1340
if (flags & SRC2_IMM) { \
1341
if (op & SLJIT_SET_Z) {\
1342
FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1343
FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); \
1344
} \
1345
if (!(flags & UNUSED_DEST)) { \
1346
if (dst == src1) { \
1347
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1348
FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(TMP_REG1))); \
1349
} else { \
1350
FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1351
FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \
1352
} \
1353
} \
1354
} else { \
1355
if (op & SLJIT_SET_Z) \
1356
FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \
1357
if (!(flags & UNUSED_DEST)) \
1358
FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2))); \
1359
} \
1360
while (0)
1361
1362
#define EMIT_SHIFT(imm, reg) \
1363
op_imm = (imm); \
1364
op_reg = (reg)
1365
1366
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1367
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
1368
{
1369
sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg;
1370
sljit_ins op_imm, op_reg;
1371
sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64);
1372
1373
switch (GET_OPCODE(op)) {
1374
case SLJIT_MOV:
1375
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1376
if (dst != src2)
1377
return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0));
1378
return SLJIT_SUCCESS;
1379
1380
case SLJIT_MOV_U8:
1381
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1382
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1383
return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff));
1384
SLJIT_ASSERT(dst == src2);
1385
return SLJIT_SUCCESS;
1386
1387
case SLJIT_MOV_S8:
1388
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1389
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1390
return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2));
1391
SLJIT_ASSERT(dst == src2);
1392
return SLJIT_SUCCESS;
1393
1394
case SLJIT_MOV_U16:
1395
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1396
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1397
return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16));
1398
SLJIT_ASSERT(dst == src2);
1399
return SLJIT_SUCCESS;
1400
1401
case SLJIT_MOV_S16:
1402
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1403
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1404
return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2));
1405
SLJIT_ASSERT(dst == src2);
1406
return SLJIT_SUCCESS;
1407
1408
case SLJIT_MOV_U32:
1409
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1410
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1411
return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16));
1412
SLJIT_ASSERT(dst == src2);
1413
return SLJIT_SUCCESS;
1414
1415
case SLJIT_MOV_S32:
1416
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1417
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1418
return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0));
1419
SLJIT_ASSERT(dst == src2);
1420
return SLJIT_SUCCESS;
1421
1422
case SLJIT_CLZ:
1423
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1424
return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2));
1425
1426
case SLJIT_CTZ:
1427
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1428
return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2));
1429
1430
case SLJIT_REV:
1431
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1432
return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2));
1433
1434
case SLJIT_REV_S16:
1435
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1436
FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1437
return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst));
1438
1439
case SLJIT_REV_U16:
1440
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1441
FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1442
return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16));
1443
1444
case SLJIT_REV_S32:
1445
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1446
FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1447
return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0));
1448
1449
case SLJIT_REV_U32:
1450
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1451
FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1452
return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16));
1453
1454
case SLJIT_ADD:
1455
/* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */
1456
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1457
carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1458
1459
if (flags & SRC2_IMM) {
1460
if (is_overflow) {
1461
if (src2 >= 0)
1462
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1463
else {
1464
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1)));
1465
FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1466
}
1467
} else if (op & SLJIT_SET_Z)
1468
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1469
1470
/* Only the zero flag is needed. */
1471
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1472
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)));
1473
} else {
1474
if (is_overflow)
1475
FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1476
else if (op & SLJIT_SET_Z)
1477
FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1478
1479
if (is_overflow || carry_src_r != 0) {
1480
if (src1 != dst)
1481
carry_src_r = (sljit_s32)src1;
1482
else if (src2 != dst)
1483
carry_src_r = (sljit_s32)src2;
1484
else {
1485
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(0)));
1486
carry_src_r = OTHER_FLAG;
1487
}
1488
}
1489
1490
/* Only the zero flag is needed. */
1491
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1492
FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src1) | RK(src2)));
1493
}
1494
1495
/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1496
if (is_overflow || carry_src_r != 0) {
1497
if (flags & SRC2_IMM)
1498
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(dst) | IMM_I12(src2)));
1499
else
1500
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(carry_src_r)));
1501
}
1502
1503
if (!is_overflow)
1504
return SLJIT_SUCCESS;
1505
1506
FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1507
if (op & SLJIT_SET_Z)
1508
FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1509
FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1510
return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1511
1512
case SLJIT_ADDC:
1513
carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1514
1515
if (flags & SRC2_IMM) {
1516
FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(src1) | IMM_I12(src2)));
1517
} else {
1518
if (carry_src_r != 0) {
1519
if (src1 != dst)
1520
carry_src_r = (sljit_s32)src1;
1521
else if (src2 != dst)
1522
carry_src_r = (sljit_s32)src2;
1523
else {
1524
FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1525
carry_src_r = EQUAL_FLAG;
1526
}
1527
}
1528
1529
FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(src1) | RK(src2)));
1530
}
1531
1532
/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1533
if (carry_src_r != 0) {
1534
if (flags & SRC2_IMM)
1535
FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(src2)));
1536
else
1537
FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(dst) | RK(carry_src_r)));
1538
}
1539
1540
FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1541
1542
if (carry_src_r == 0)
1543
return SLJIT_SUCCESS;
1544
1545
/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
1546
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG)));
1547
/* Set carry flag. */
1548
return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(EQUAL_FLAG));
1549
1550
case SLJIT_SUB:
1551
if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1552
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1553
src2 = TMP_REG2;
1554
flags &= ~SRC2_IMM;
1555
}
1556
1557
is_handled = 0;
1558
1559
if (flags & SRC2_IMM) {
1560
if (GET_FLAG_TYPE(op) == SLJIT_LESS) {
1561
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1562
is_handled = 1;
1563
} else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) {
1564
FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1565
is_handled = 1;
1566
}
1567
}
1568
1569
if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
1570
is_handled = 1;
1571
1572
if (flags & SRC2_IMM) {
1573
reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1574
FAIL_IF(push_inst(compiler, ADDI_D | RD(reg) | RJ(TMP_ZERO) | IMM_I12(src2)));
1575
src2 = reg;
1576
flags &= ~SRC2_IMM;
1577
}
1578
1579
switch (GET_FLAG_TYPE(op)) {
1580
case SLJIT_LESS:
1581
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1582
break;
1583
case SLJIT_GREATER:
1584
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1585
break;
1586
case SLJIT_SIG_LESS:
1587
FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1588
break;
1589
case SLJIT_SIG_GREATER:
1590
FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1591
break;
1592
}
1593
}
1594
1595
if (is_handled) {
1596
if (flags & SRC2_IMM) {
1597
if (op & SLJIT_SET_Z)
1598
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1599
if (!(flags & UNUSED_DEST))
1600
return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2));
1601
} else {
1602
if (op & SLJIT_SET_Z)
1603
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1604
if (!(flags & UNUSED_DEST))
1605
return push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2));
1606
}
1607
return SLJIT_SUCCESS;
1608
}
1609
1610
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1611
is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1612
1613
if (flags & SRC2_IMM) {
1614
if (is_overflow) {
1615
if (src2 >= 0)
1616
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1617
else {
1618
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1)));
1619
FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1620
}
1621
} else if (op & SLJIT_SET_Z)
1622
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1623
1624
if (is_overflow || is_carry)
1625
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1626
1627
/* Only the zero flag is needed. */
1628
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1629
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1630
} else {
1631
if (is_overflow)
1632
FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1633
else if (op & SLJIT_SET_Z)
1634
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1635
1636
if (is_overflow || is_carry)
1637
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1638
1639
/* Only the zero flag is needed. */
1640
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1641
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1642
}
1643
1644
if (!is_overflow)
1645
return SLJIT_SUCCESS;
1646
1647
FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1648
if (op & SLJIT_SET_Z)
1649
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1650
FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1651
return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1652
1653
case SLJIT_SUBC:
1654
if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1655
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1656
src2 = TMP_REG2;
1657
flags &= ~SRC2_IMM;
1658
}
1659
1660
is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1661
1662
if (flags & SRC2_IMM) {
1663
if (is_carry)
1664
FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1665
1666
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1667
} else {
1668
if (is_carry)
1669
FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1670
1671
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1672
}
1673
1674
if (is_carry)
1675
FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RJ(dst) | RK(OTHER_FLAG)));
1676
1677
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1678
1679
if (!is_carry)
1680
return SLJIT_SUCCESS;
1681
1682
return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(TMP_REG1));
1683
1684
case SLJIT_MUL:
1685
SLJIT_ASSERT(!(flags & SRC2_IMM));
1686
1687
if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW)
1688
return push_inst(compiler, INST(MUL, op) | RD(dst) | RJ(src1) | RK(src2));
1689
1690
if (op & SLJIT_32) {
1691
FAIL_IF(push_inst(compiler, MUL_D | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1692
FAIL_IF(push_inst(compiler, MUL_W | RD(dst) | RJ(src1) | RK(src2)));
1693
return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG));
1694
}
1695
1696
FAIL_IF(push_inst(compiler, MULH_D | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1697
FAIL_IF(push_inst(compiler, MUL_D | RD(dst) | RJ(src1) | RK(src2)));
1698
FAIL_IF(push_inst(compiler, SRAI_D | RD(OTHER_FLAG) | RJ(dst) | IMM_I12((63))));
1699
return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(OTHER_FLAG));
1700
1701
case SLJIT_AND:
1702
EMIT_LOGICAL(ANDI, AND);
1703
return SLJIT_SUCCESS;
1704
1705
case SLJIT_OR:
1706
EMIT_LOGICAL(ORI, OR);
1707
return SLJIT_SUCCESS;
1708
1709
case SLJIT_XOR:
1710
EMIT_LOGICAL(XORI, XOR);
1711
return SLJIT_SUCCESS;
1712
1713
case SLJIT_SHL:
1714
case SLJIT_MSHL:
1715
if (op & SLJIT_32) {
1716
EMIT_SHIFT(SLLI_W, SLL_W);
1717
} else {
1718
EMIT_SHIFT(SLLI_D, SLL_D);
1719
}
1720
break;
1721
1722
case SLJIT_LSHR:
1723
case SLJIT_MLSHR:
1724
if (op & SLJIT_32) {
1725
EMIT_SHIFT(SRLI_W, SRL_W);
1726
} else {
1727
EMIT_SHIFT(SRLI_D, SRL_D);
1728
}
1729
break;
1730
1731
case SLJIT_ASHR:
1732
case SLJIT_MASHR:
1733
if (op & SLJIT_32) {
1734
EMIT_SHIFT(SRAI_W, SRA_W);
1735
} else {
1736
EMIT_SHIFT(SRAI_D, SRA_D);
1737
}
1738
break;
1739
1740
case SLJIT_ROTL:
1741
case SLJIT_ROTR:
1742
if (flags & SRC2_IMM) {
1743
SLJIT_ASSERT(src2 != 0);
1744
1745
if (GET_OPCODE(op) == SLJIT_ROTL)
1746
src2 = word_size - src2;
1747
return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2));
1748
}
1749
1750
if (src2 == TMP_ZERO) {
1751
if (dst != src1)
1752
return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(0));
1753
return SLJIT_SUCCESS;
1754
}
1755
1756
if (GET_OPCODE(op) == SLJIT_ROTL) {
1757
FAIL_IF(push_inst(compiler, INST(SUB, op)| RD(OTHER_FLAG) | RJ(TMP_ZERO) | RK(src2)));
1758
src2 = OTHER_FLAG;
1759
}
1760
return push_inst(compiler, INST(ROTR, op) | RD(dst) | RJ(src1) | RK(src2));
1761
1762
default:
1763
SLJIT_UNREACHABLE();
1764
return SLJIT_SUCCESS;
1765
}
1766
1767
if (flags & SRC2_IMM) {
1768
if (op & SLJIT_SET_Z)
1769
FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1770
1771
if (flags & UNUSED_DEST)
1772
return SLJIT_SUCCESS;
1773
return push_inst(compiler, op_imm | RD(dst) | RJ(src1) | IMM_I12(src2));
1774
}
1775
1776
if (op & SLJIT_SET_Z)
1777
FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1778
1779
if (flags & UNUSED_DEST)
1780
return SLJIT_SUCCESS;
1781
return push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2));
1782
}
1783
1784
#undef IMM_EXTEND
1785
1786
static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1787
sljit_s32 dst, sljit_sw dstw,
1788
sljit_s32 src1, sljit_sw src1w,
1789
sljit_s32 src2, sljit_sw src2w)
1790
{
1791
/* arg1 goes to TMP_REG1 or src reg
1792
arg2 goes to TMP_REG2, imm or src reg
1793
TMP_REG3 can be used for caching
1794
result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1795
sljit_s32 dst_r = TMP_REG2;
1796
sljit_s32 src1_r;
1797
sljit_sw src2_r = 0;
1798
sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2;
1799
1800
if (!(flags & ALT_KEEP_CACHE)) {
1801
compiler->cache_arg = 0;
1802
compiler->cache_argw = 0;
1803
}
1804
1805
if (dst == 0) {
1806
SLJIT_ASSERT(HAS_FLAGS(op));
1807
flags |= UNUSED_DEST;
1808
dst = TMP_REG2;
1809
} else if (FAST_IS_REG(dst)) {
1810
dst_r = dst;
1811
flags |= REG_DEST;
1812
if (flags & MOVE_OP)
1813
src2_tmp_reg = dst_r;
1814
} else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
1815
flags |= SLOW_DEST;
1816
1817
if (flags & IMM_OP) {
1818
if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) {
1819
flags |= SRC2_IMM;
1820
src2_r = src2w;
1821
} else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) {
1822
flags |= SRC2_IMM;
1823
src2_r = src1w;
1824
1825
/* And swap arguments. */
1826
src1 = src2;
1827
src1w = src2w;
1828
src2 = SLJIT_IMM;
1829
/* src2w = src2_r unneeded. */
1830
}
1831
}
1832
1833
/* Source 1. */
1834
if (FAST_IS_REG(src1)) {
1835
src1_r = src1;
1836
flags |= REG1_SOURCE;
1837
} else if (src1 == SLJIT_IMM) {
1838
if (src1w) {
1839
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1840
src1_r = TMP_REG1;
1841
}
1842
else
1843
src1_r = TMP_ZERO;
1844
} else {
1845
if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
1846
FAIL_IF(compiler->error);
1847
else
1848
flags |= SLOW_SRC1;
1849
src1_r = TMP_REG1;
1850
}
1851
1852
/* Source 2. */
1853
if (FAST_IS_REG(src2)) {
1854
src2_r = src2;
1855
flags |= REG2_SOURCE;
1856
if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
1857
dst_r = (sljit_s32)src2_r;
1858
} else if (src2 == SLJIT_IMM) {
1859
if (!(flags & SRC2_IMM)) {
1860
if (src2w) {
1861
FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
1862
src2_r = src2_tmp_reg;
1863
} else {
1864
src2_r = TMP_ZERO;
1865
if (flags & MOVE_OP) {
1866
if (dst & SLJIT_MEM)
1867
dst_r = 0;
1868
else
1869
op = SLJIT_MOV;
1870
}
1871
}
1872
}
1873
} else {
1874
if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w))
1875
FAIL_IF(compiler->error);
1876
else
1877
flags |= SLOW_SRC2;
1878
1879
src2_r = src2_tmp_reg;
1880
}
1881
1882
if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1883
SLJIT_ASSERT(src2_r == TMP_REG2);
1884
if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) {
1885
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1886
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw));
1887
} else {
1888
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1889
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1890
}
1891
}
1892
else if (flags & SLOW_SRC1)
1893
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1894
else if (flags & SLOW_SRC2)
1895
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw));
1896
1897
FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1898
1899
if (dst & SLJIT_MEM) {
1900
if (!(flags & SLOW_DEST)) {
1901
getput_arg_fast(compiler, flags, dst_r, dst, dstw);
1902
return compiler->error;
1903
}
1904
return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
1905
}
1906
1907
return SLJIT_SUCCESS;
1908
}
1909
1910
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1911
{
1912
CHECK_ERROR();
1913
CHECK(check_sljit_emit_op0(compiler, op));
1914
1915
switch (GET_OPCODE(op)) {
1916
case SLJIT_BREAKPOINT:
1917
return push_inst(compiler, BREAK);
1918
case SLJIT_NOP:
1919
return push_inst(compiler, NOP);
1920
case SLJIT_LMUL_UW:
1921
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1922
FAIL_IF(push_inst(compiler, MULH_DU | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1923
return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1924
case SLJIT_LMUL_SW:
1925
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1926
FAIL_IF(push_inst(compiler, MULH_D | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1927
return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1928
case SLJIT_DIVMOD_UW:
1929
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1930
FAIL_IF(push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1931
return push_inst(compiler, ((op & SLJIT_32)? MOD_WU: MOD_DU) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1932
case SLJIT_DIVMOD_SW:
1933
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1934
FAIL_IF(push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1935
return push_inst(compiler, INST(MOD, op) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1936
case SLJIT_DIV_UW:
1937
return push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1938
case SLJIT_DIV_SW:
1939
return push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1940
case SLJIT_MEMORY_BARRIER:
1941
return push_inst(compiler, DBAR);
1942
case SLJIT_ENDBR:
1943
case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1944
return SLJIT_SUCCESS;
1945
}
1946
1947
SLJIT_UNREACHABLE();
1948
return SLJIT_ERR_UNSUPPORTED;
1949
}
1950
1951
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1952
sljit_s32 dst, sljit_sw dstw,
1953
sljit_s32 src, sljit_sw srcw)
1954
{
1955
sljit_s32 flags = 0;
1956
1957
CHECK_ERROR();
1958
CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1959
ADJUST_LOCAL_OFFSET(dst, dstw);
1960
ADJUST_LOCAL_OFFSET(src, srcw);
1961
1962
if (op & SLJIT_32)
1963
flags = INT_DATA | SIGNED_DATA;
1964
1965
switch (GET_OPCODE(op)) {
1966
case SLJIT_MOV:
1967
case SLJIT_MOV_P:
1968
return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw);
1969
1970
case SLJIT_MOV_U32:
1971
return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
1972
1973
case SLJIT_MOV_S32:
1974
/* Logical operators have no W variant, so sign extended input is necessary for them. */
1975
case SLJIT_MOV32:
1976
return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
1977
1978
case SLJIT_MOV_U8:
1979
return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
1980
1981
case SLJIT_MOV_S8:
1982
return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
1983
1984
case SLJIT_MOV_U16:
1985
return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
1986
1987
case SLJIT_MOV_S16:
1988
return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
1989
1990
case SLJIT_CLZ:
1991
case SLJIT_CTZ:
1992
case SLJIT_REV:
1993
return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw);
1994
1995
case SLJIT_REV_U16:
1996
case SLJIT_REV_S16:
1997
return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
1998
1999
case SLJIT_REV_U32:
2000
case SLJIT_REV_S32:
2001
return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
2002
}
2003
2004
SLJIT_UNREACHABLE();
2005
return SLJIT_SUCCESS;
2006
}
2007
2008
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2009
sljit_s32 dst, sljit_sw dstw,
2010
sljit_s32 src1, sljit_sw src1w,
2011
sljit_s32 src2, sljit_sw src2w)
2012
{
2013
sljit_s32 flags = 0;
2014
2015
CHECK_ERROR();
2016
CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2017
ADJUST_LOCAL_OFFSET(dst, dstw);
2018
ADJUST_LOCAL_OFFSET(src1, src1w);
2019
ADJUST_LOCAL_OFFSET(src2, src2w);
2020
2021
if (op & SLJIT_32) {
2022
flags |= INT_DATA | SIGNED_DATA;
2023
if (src1 == SLJIT_IMM)
2024
src1w = (sljit_s32)src1w;
2025
if (src2 == SLJIT_IMM)
2026
src2w = (sljit_s32)src2w;
2027
}
2028
2029
2030
switch (GET_OPCODE(op)) {
2031
case SLJIT_ADD:
2032
case SLJIT_ADDC:
2033
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
2034
return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2035
2036
case SLJIT_SUB:
2037
case SLJIT_SUBC:
2038
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2039
return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2040
2041
case SLJIT_MUL:
2042
compiler->status_flags_state = 0;
2043
return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
2044
2045
case SLJIT_AND:
2046
case SLJIT_OR:
2047
case SLJIT_XOR:
2048
return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2049
2050
case SLJIT_SHL:
2051
case SLJIT_MSHL:
2052
case SLJIT_LSHR:
2053
case SLJIT_MLSHR:
2054
case SLJIT_ASHR:
2055
case SLJIT_MASHR:
2056
case SLJIT_ROTL:
2057
case SLJIT_ROTR:
2058
if (src2 == SLJIT_IMM) {
2059
if (op & SLJIT_32)
2060
src2w &= 0x1f;
2061
else
2062
src2w &= 0x3f;
2063
}
2064
2065
return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2066
}
2067
2068
SLJIT_UNREACHABLE();
2069
return SLJIT_SUCCESS;
2070
}
2071
2072
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2073
sljit_s32 src1, sljit_sw src1w,
2074
sljit_s32 src2, sljit_sw src2w)
2075
{
2076
CHECK_ERROR();
2077
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2078
2079
SLJIT_SKIP_CHECKS(compiler);
2080
return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w);
2081
}
2082
2083
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2084
sljit_s32 dst_reg,
2085
sljit_s32 src1, sljit_sw src1w,
2086
sljit_s32 src2, sljit_sw src2w)
2087
{
2088
CHECK_ERROR();
2089
CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2090
2091
switch (GET_OPCODE(op)) {
2092
case SLJIT_MULADD:
2093
SLJIT_SKIP_CHECKS(compiler);
2094
FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w));
2095
return push_inst(compiler, ADD_D | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG2));
2096
}
2097
2098
return SLJIT_SUCCESS;
2099
}
2100
2101
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2102
sljit_s32 dst_reg,
2103
sljit_s32 src1_reg,
2104
sljit_s32 src2_reg,
2105
sljit_s32 src3, sljit_sw src3w)
2106
{
2107
sljit_s32 is_left;
2108
sljit_ins ins1, ins2, ins3;
2109
sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2110
sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
2111
2112
2113
CHECK_ERROR();
2114
CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2115
2116
is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2117
2118
if (src1_reg == src2_reg) {
2119
SLJIT_SKIP_CHECKS(compiler);
2120
return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
2121
}
2122
2123
ADJUST_LOCAL_OFFSET(src3, src3w);
2124
2125
if (src3 == SLJIT_IMM) {
2126
src3w &= bit_length - 1;
2127
2128
if (src3w == 0)
2129
return SLJIT_SUCCESS;
2130
2131
if (is_left) {
2132
ins1 = INST(SLLI, op) | IMM_I12(src3w);
2133
src3w = bit_length - src3w;
2134
ins2 = INST(SRLI, op) | IMM_I12(src3w);
2135
} else {
2136
ins1 = INST(SRLI, op) | IMM_I12(src3w);
2137
src3w = bit_length - src3w;
2138
ins2 = INST(SLLI, op) | IMM_I12(src3w);
2139
}
2140
2141
FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg)));
2142
FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg)));
2143
return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2144
}
2145
2146
if (src3 & SLJIT_MEM) {
2147
FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w));
2148
src3 = TMP_REG2;
2149
} else if (dst_reg == src3) {
2150
push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(src3) | IMM_I12(0));
2151
src3 = TMP_REG2;
2152
}
2153
2154
if (is_left) {
2155
ins1 = INST(SLL, op);
2156
ins2 = INST(SRLI, op);
2157
ins3 = INST(SRL, op);
2158
} else {
2159
ins1 = INST(SRL, op);
2160
ins2 = INST(SLLI, op);
2161
ins3 = INST(SLL, op);
2162
}
2163
2164
FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg) | RK(src3)));
2165
2166
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
2167
FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg) | IMM_I12(1)));
2168
FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RJ(src3) | IMM_I12((sljit_ins)bit_length - 1)));
2169
src2_reg = TMP_REG1;
2170
} else
2171
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | RK(src3)));
2172
2173
FAIL_IF(push_inst(compiler, ins3 | RD(TMP_REG1) | RJ(src2_reg) | RK(TMP_REG2)));
2174
return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2175
}
2176
2177
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2_shift(struct sljit_compiler *compiler, sljit_s32 op,
2178
sljit_s32 dst, sljit_sw dstw,
2179
sljit_s32 src1, sljit_sw src1w,
2180
sljit_s32 src2, sljit_sw src2w,
2181
sljit_sw shift_arg)
2182
{
2183
sljit_s32 dst_r, tmp_r;
2184
2185
CHECK_ERROR();
2186
CHECK(check_sljit_emit_op2_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w, shift_arg));
2187
ADJUST_LOCAL_OFFSET(dst, dstw);
2188
ADJUST_LOCAL_OFFSET(src1, src1w);
2189
ADJUST_LOCAL_OFFSET(src2, src2w);
2190
2191
shift_arg &= (sljit_sw)((sizeof(sljit_sw) * 8) - 1);
2192
2193
if (src2 == SLJIT_IMM) {
2194
src2w = src2w << shift_arg;
2195
shift_arg = 0;
2196
}
2197
2198
if (shift_arg == 0) {
2199
SLJIT_SKIP_CHECKS(compiler);
2200
return sljit_emit_op2(compiler, GET_OPCODE(op), dst, dstw, src1, src1w, src2, src2w);
2201
}
2202
2203
if (src2 & SLJIT_MEM) {
2204
FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
2205
src2 = TMP_REG2;
2206
}
2207
2208
if (src1 == SLJIT_IMM) {
2209
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
2210
src1 = TMP_REG1;
2211
} else if (src1 & SLJIT_MEM) {
2212
FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
2213
src1 = TMP_REG1;
2214
}
2215
2216
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2217
tmp_r = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
2218
FAIL_IF(push_inst(compiler, SLLI_D | RD(tmp_r) | RJ(src2) | IMM_I12(shift_arg)));
2219
FAIL_IF(push_inst(compiler, ADD_D | RD(dst_r) | RJ(src1) | RK(tmp_r)));
2220
2221
if (dst & SLJIT_MEM)
2222
return emit_op_mem2(compiler, WORD_DATA, dst_r, dst, dstw, 0, 0);
2223
return SLJIT_SUCCESS;
2224
}
2225
2226
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2227
sljit_s32 src, sljit_sw srcw)
2228
{
2229
sljit_s32 base = src & REG_MASK;
2230
2231
CHECK_ERROR();
2232
CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2233
ADJUST_LOCAL_OFFSET(src, srcw);
2234
2235
switch (op) {
2236
case SLJIT_FAST_RETURN:
2237
if (FAST_IS_REG(src)) {
2238
if (src != RETURN_ADDR_REG)
2239
FAIL_IF(push_inst(compiler, ADDI_D | RD(RETURN_ADDR_REG) | RJ(src) | IMM_I12(0)));
2240
} else
2241
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
2242
2243
return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2244
case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2245
return SLJIT_SUCCESS;
2246
case SLJIT_PREFETCH_L1:
2247
case SLJIT_PREFETCH_L2:
2248
case SLJIT_PREFETCH_L3:
2249
case SLJIT_PREFETCH_ONCE:
2250
if (SLJIT_UNLIKELY(src & OFFS_REG_MASK)) {
2251
srcw &= 0x3;
2252
if (SLJIT_UNLIKELY(srcw))
2253
FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(src)) | IMM_I12(srcw)));
2254
FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2255
} else {
2256
if (base && srcw <= I12_MAX && srcw >= I12_MIN)
2257
return push_inst(compiler,PRELD | RJ(base) | IMM_I12(srcw));
2258
2259
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2260
if (base != 0)
2261
FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2262
}
2263
return push_inst(compiler, PRELD | RD(0) | RJ(TMP_REG1));
2264
}
2265
return SLJIT_SUCCESS;
2266
}
2267
2268
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2269
sljit_s32 dst, sljit_sw dstw)
2270
{
2271
sljit_s32 dst_r;
2272
2273
CHECK_ERROR();
2274
CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2275
ADJUST_LOCAL_OFFSET(dst, dstw);
2276
2277
switch (op) {
2278
case SLJIT_FAST_ENTER:
2279
if (FAST_IS_REG(dst)) {
2280
if (dst == RETURN_ADDR_REG)
2281
return SLJIT_SUCCESS;
2282
return push_inst(compiler, ADDI_D | RD(dst) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2283
}
2284
2285
SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2);
2286
break;
2287
case SLJIT_GET_RETURN_ADDRESS:
2288
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2289
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw)));
2290
break;
2291
}
2292
2293
if (dst & SLJIT_MEM)
2294
return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
2295
2296
return SLJIT_SUCCESS;
2297
}
2298
2299
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2300
{
2301
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2302
2303
if (type == SLJIT_GP_REGISTER)
2304
return reg_map[reg];
2305
2306
if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256)
2307
return -1;
2308
2309
return freg_map[reg];
2310
}
2311
2312
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2313
void *instruction, sljit_u32 size)
2314
{
2315
SLJIT_UNUSED_ARG(size);
2316
CHECK_ERROR();
2317
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2318
2319
return push_inst(compiler, *(sljit_ins*)instruction);
2320
}
2321
2322
/* --------------------------------------------------------------------- */
2323
/* Floating point operators */
2324
/* --------------------------------------------------------------------- */
2325
#define SET_COND(cond) (sljit_ins)(cond << 15)
2326
2327
#define COND_CUN SET_COND(0x8) /* UN */
2328
#define COND_CEQ SET_COND(0x4) /* EQ */
2329
#define COND_CUEQ SET_COND(0xc) /* UN EQ */
2330
#define COND_CLT SET_COND(0x2) /* LT */
2331
#define COND_CULT SET_COND(0xa) /* UN LT */
2332
#define COND_CLE SET_COND(0x6) /* LT EQ */
2333
#define COND_CULE SET_COND(0xe) /* UN LT EQ */
2334
#define COND_CNE SET_COND(0x10) /* GT LT */
2335
#define COND_CUNE SET_COND(0x18) /* UN GT LT */
2336
#define COND_COR SET_COND(0x14) /* GT LT EQ */
2337
2338
#define FINST(inst, type) (sljit_ins)((type & SLJIT_32) ? inst##_S : inst##_D)
2339
#define FCD(cd) (sljit_ins)(cd & 0x7)
2340
#define FCJ(cj) (sljit_ins)((cj & 0x7) << 5)
2341
#define FCA(ca) (sljit_ins)((ca & 0x7) << 15)
2342
#define F_OTHER_FLAG 1
2343
2344
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))
2345
2346
/* convert to inter exact toward zero */
2347
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2348
sljit_s32 dst, sljit_sw dstw,
2349
sljit_s32 src, sljit_sw srcw)
2350
{
2351
sljit_ins inst;
2352
sljit_u32 word_data = 0;
2353
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2354
2355
switch (GET_OPCODE(op))
2356
{
2357
case SLJIT_CONV_SW_FROM_F64:
2358
word_data = 1;
2359
inst = FINST(FTINTRZ_L, op);
2360
break;
2361
case SLJIT_CONV_S32_FROM_F64:
2362
inst = FINST(FTINTRZ_W, op);
2363
break;
2364
default:
2365
inst = BREAK;
2366
SLJIT_UNREACHABLE();
2367
}
2368
2369
if (src & SLJIT_MEM) {
2370
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
2371
src = TMP_FREG1;
2372
}
2373
2374
FAIL_IF(push_inst(compiler, inst | FRD(TMP_FREG1) | FRJ(src)));
2375
FAIL_IF(push_inst(compiler, FINST(MOVFR2GR, word_data) | RD(dst_r) | FRJ(TMP_FREG1)));
2376
2377
if (dst & SLJIT_MEM)
2378
return emit_op_mem2(compiler, word_data ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0);
2379
return SLJIT_SUCCESS;
2380
}
2381
2382
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op,
2383
sljit_s32 dst, sljit_sw dstw,
2384
sljit_s32 src, sljit_sw srcw)
2385
{
2386
sljit_ins inst;
2387
sljit_u32 word_data = 0;
2388
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2389
2390
switch (GET_OPCODE(op))
2391
{
2392
case SLJIT_CONV_F64_FROM_SW:
2393
word_data = 1;
2394
inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2395
break;
2396
case SLJIT_CONV_F64_FROM_S32:
2397
inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2398
break;
2399
default:
2400
inst = BREAK;
2401
SLJIT_UNREACHABLE();
2402
}
2403
2404
if (src & SLJIT_MEM) {
2405
FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2406
src = TMP_REG1;
2407
} else if (src == SLJIT_IMM) {
2408
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2409
srcw = (sljit_s32)srcw;
2410
2411
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2412
src = TMP_REG1;
2413
}
2414
FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2415
FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2416
2417
if (dst & SLJIT_MEM)
2418
return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2419
return SLJIT_SUCCESS;
2420
}
2421
2422
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2423
sljit_s32 dst, sljit_sw dstw,
2424
sljit_s32 src, sljit_sw srcw)
2425
{
2426
return sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw);
2427
}
2428
2429
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2430
sljit_s32 dst, sljit_sw dstw,
2431
sljit_s32 src, sljit_sw srcw)
2432
{
2433
sljit_ins inst;
2434
sljit_u32 word_data = 0;
2435
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2436
2437
switch (GET_OPCODE(op))
2438
{
2439
case SLJIT_CONV_F64_FROM_UW:
2440
word_data = 1;
2441
inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2442
break;
2443
case SLJIT_CONV_F64_FROM_U32:
2444
inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2445
break;
2446
default:
2447
inst = BREAK;
2448
SLJIT_UNREACHABLE();
2449
}
2450
2451
if (src & SLJIT_MEM) {
2452
FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2453
src = TMP_REG1;
2454
} else if (src == SLJIT_IMM) {
2455
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
2456
srcw = (sljit_u32)srcw;
2457
2458
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2459
src = TMP_REG1;
2460
}
2461
2462
if (!word_data)
2463
FAIL_IF(push_inst(compiler, SRLI_W | RD(src) | RJ(src) | IMM_I12(0)));
2464
2465
FAIL_IF(push_inst(compiler, BLT | RJ(src) | RD(TMP_ZERO) | IMM_I16(4)));
2466
2467
FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2468
FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2469
FAIL_IF(push_inst(compiler, B | IMM_I26(7)));
2470
2471
FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG2) | RJ(src) | IMM_I12(1)));
2472
FAIL_IF(push_inst(compiler, (word_data ? SRLI_D : SRLI_W) | RD(TMP_REG1) | RJ(src) | IMM_I12(1)));
2473
FAIL_IF(push_inst(compiler, OR | RD(TMP_REG1) | RJ(TMP_REG1) | RK(TMP_REG2)));
2474
FAIL_IF(push_inst(compiler, INST(MOVGR2FR, (!word_data)) | FRD(dst_r) | RJ(TMP_REG1)));
2475
FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2476
FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(dst_r) | FRK(dst_r)));
2477
2478
if (dst & SLJIT_MEM)
2479
return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2480
return SLJIT_SUCCESS;
2481
}
2482
2483
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2484
sljit_s32 src1, sljit_sw src1w,
2485
sljit_s32 src2, sljit_sw src2w)
2486
{
2487
if (src1 & SLJIT_MEM) {
2488
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2489
src1 = TMP_FREG1;
2490
}
2491
2492
if (src2 & SLJIT_MEM) {
2493
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
2494
src2 = TMP_FREG2;
2495
}
2496
2497
FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(OTHER_FLAG)));
2498
2499
switch (GET_FLAG_TYPE(op)) {
2500
case SLJIT_F_EQUAL:
2501
case SLJIT_ORDERED_EQUAL:
2502
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2503
break;
2504
case SLJIT_F_LESS:
2505
case SLJIT_ORDERED_LESS:
2506
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2507
break;
2508
case SLJIT_F_GREATER:
2509
case SLJIT_ORDERED_GREATER:
2510
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2511
break;
2512
case SLJIT_UNORDERED_OR_GREATER:
2513
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2514
break;
2515
case SLJIT_UNORDERED_OR_LESS:
2516
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2517
break;
2518
case SLJIT_UNORDERED_OR_EQUAL:
2519
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2520
break;
2521
default: /* SLJIT_UNORDERED */
2522
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUN | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2523
}
2524
return push_inst(compiler, MOVCF2GR | RD(OTHER_FLAG) | FCJ(F_OTHER_FLAG));
2525
}
2526
2527
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2528
sljit_s32 dst, sljit_sw dstw,
2529
sljit_s32 src, sljit_sw srcw)
2530
{
2531
sljit_s32 dst_r;
2532
2533
CHECK_ERROR();
2534
compiler->cache_arg = 0;
2535
compiler->cache_argw = 0;
2536
2537
SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
2538
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2539
2540
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
2541
op ^= SLJIT_32;
2542
2543
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2544
2545
if (src & SLJIT_MEM) {
2546
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
2547
src = dst_r;
2548
}
2549
2550
switch (GET_OPCODE(op)) {
2551
case SLJIT_MOV_F64:
2552
if (src != dst_r) {
2553
if (!(dst & SLJIT_MEM))
2554
FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src)));
2555
else
2556
dst_r = src;
2557
}
2558
break;
2559
case SLJIT_NEG_F64:
2560
FAIL_IF(push_inst(compiler, FINST(FNEG, op) | FRD(dst_r) | FRJ(src)));
2561
break;
2562
case SLJIT_ABS_F64:
2563
FAIL_IF(push_inst(compiler, FINST(FABS, op) | FRD(dst_r) | FRJ(src)));
2564
break;
2565
case SLJIT_CONV_F64_FROM_F32:
2566
/* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */
2567
FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? FCVT_D_S : FCVT_S_D) | FRD(dst_r) | FRJ(src)));
2568
op ^= SLJIT_32;
2569
break;
2570
}
2571
2572
if (dst & SLJIT_MEM)
2573
return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
2574
return SLJIT_SUCCESS;
2575
}
2576
2577
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2578
sljit_s32 dst, sljit_sw dstw,
2579
sljit_s32 src1, sljit_sw src1w,
2580
sljit_s32 src2, sljit_sw src2w)
2581
{
2582
sljit_s32 dst_r, flags = 0;
2583
2584
CHECK_ERROR();
2585
CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2586
ADJUST_LOCAL_OFFSET(dst, dstw);
2587
ADJUST_LOCAL_OFFSET(src1, src1w);
2588
ADJUST_LOCAL_OFFSET(src2, src2w);
2589
2590
compiler->cache_arg = 0;
2591
compiler->cache_argw = 0;
2592
2593
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
2594
2595
if (src1 & SLJIT_MEM) {
2596
if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
2597
FAIL_IF(compiler->error);
2598
src1 = TMP_FREG1;
2599
} else
2600
flags |= SLOW_SRC1;
2601
}
2602
2603
if (src2 & SLJIT_MEM) {
2604
if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
2605
FAIL_IF(compiler->error);
2606
src2 = TMP_FREG2;
2607
} else
2608
flags |= SLOW_SRC2;
2609
}
2610
2611
if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2612
if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2613
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
2614
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2615
} else {
2616
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2617
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2618
}
2619
}
2620
else if (flags & SLOW_SRC1)
2621
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2622
else if (flags & SLOW_SRC2)
2623
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2624
2625
if (flags & SLOW_SRC1)
2626
src1 = TMP_FREG1;
2627
if (flags & SLOW_SRC2)
2628
src2 = TMP_FREG2;
2629
2630
switch (GET_OPCODE(op)) {
2631
case SLJIT_ADD_F64:
2632
FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2633
break;
2634
case SLJIT_SUB_F64:
2635
FAIL_IF(push_inst(compiler, FINST(FSUB, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2636
break;
2637
case SLJIT_MUL_F64:
2638
FAIL_IF(push_inst(compiler, FINST(FMUL, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2639
break;
2640
case SLJIT_DIV_F64:
2641
FAIL_IF(push_inst(compiler, FINST(FDIV, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2642
break;
2643
}
2644
2645
if (dst_r != dst)
2646
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2647
return SLJIT_SUCCESS;
2648
}
2649
2650
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
2651
sljit_s32 dst_freg,
2652
sljit_s32 src1, sljit_sw src1w,
2653
sljit_s32 src2, sljit_sw src2w)
2654
{
2655
sljit_s32 reg;
2656
2657
CHECK_ERROR();
2658
CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
2659
ADJUST_LOCAL_OFFSET(src1, src1w);
2660
ADJUST_LOCAL_OFFSET(src2, src2w);
2661
2662
if (src2 & SLJIT_MEM) {
2663
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src2, src2w, 0, 0));
2664
src2 = TMP_FREG1;
2665
}
2666
2667
if (src1 & SLJIT_MEM) {
2668
reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
2669
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, reg, src1, src1w, 0, 0));
2670
src1 = reg;
2671
}
2672
2673
return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst_freg) | FRJ(src1) | FRK(src2));
2674
}
2675
2676
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2677
sljit_s32 freg, sljit_f32 value)
2678
{
2679
union {
2680
sljit_s32 imm;
2681
sljit_f32 value;
2682
} u;
2683
2684
CHECK_ERROR();
2685
CHECK(check_sljit_emit_fset32(compiler, freg, value));
2686
2687
u.value = value;
2688
2689
if (u.imm == 0)
2690
return push_inst(compiler, MOVGR2FR_W | RJ(TMP_ZERO) | FRD(freg));
2691
2692
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2693
return push_inst(compiler, MOVGR2FR_W | RJ(TMP_REG1) | FRD(freg));
2694
}
2695
2696
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2697
sljit_s32 freg, sljit_f64 value)
2698
{
2699
union {
2700
sljit_sw imm;
2701
sljit_f64 value;
2702
} u;
2703
2704
CHECK_ERROR();
2705
CHECK(check_sljit_emit_fset64(compiler, freg, value));
2706
2707
u.value = value;
2708
2709
if (u.imm == 0)
2710
return push_inst(compiler, MOVGR2FR_D | RJ(TMP_ZERO) | FRD(freg));
2711
2712
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2713
return push_inst(compiler, MOVGR2FR_D | RJ(TMP_REG1) | FRD(freg));
2714
}
2715
2716
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2717
sljit_s32 freg, sljit_s32 reg)
2718
{
2719
sljit_ins inst;
2720
2721
CHECK_ERROR();
2722
CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2723
2724
if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
2725
inst = ((op & SLJIT_32) ? MOVGR2FR_W : MOVGR2FR_D) | FRD(freg) | RJ(reg);
2726
else
2727
inst = ((op & SLJIT_32) ? MOVFR2GR_S : MOVFR2GR_D) | RD(reg) | FRJ(freg);
2728
return push_inst(compiler, inst);
2729
}
2730
2731
/* --------------------------------------------------------------------- */
2732
/* Conditional instructions */
2733
/* --------------------------------------------------------------------- */
2734
2735
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2736
{
2737
struct sljit_label *label;
2738
2739
CHECK_ERROR_PTR();
2740
CHECK_PTR(check_sljit_emit_label(compiler));
2741
2742
if (compiler->last_label && compiler->last_label->size == compiler->size)
2743
return compiler->last_label;
2744
2745
label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2746
PTR_FAIL_IF(!label);
2747
set_label(label, compiler);
2748
return label;
2749
}
2750
2751
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_aligned_label(struct sljit_compiler *compiler,
2752
sljit_s32 alignment, struct sljit_read_only_buffer *buffers)
2753
{
2754
sljit_uw mask, i;
2755
struct sljit_label *label;
2756
struct sljit_label *next_label;
2757
struct sljit_extended_label *ext_label;
2758
2759
CHECK_ERROR_PTR();
2760
CHECK_PTR(check_sljit_emit_aligned_label(compiler, alignment, buffers));
2761
2762
sljit_reset_read_only_buffers(buffers);
2763
2764
if (alignment <= SLJIT_LABEL_ALIGN_4) {
2765
SLJIT_SKIP_CHECKS(compiler);
2766
label = sljit_emit_label(compiler);
2767
PTR_FAIL_IF(!label);
2768
} else {
2769
/* The used space is filled with NOPs. */
2770
mask = ((sljit_uw)1 << alignment) - sizeof(sljit_ins);
2771
2772
for (i = (mask >> 2); i != 0; i--)
2773
PTR_FAIL_IF(push_inst(compiler, NOP));
2774
2775
ext_label = (struct sljit_extended_label*)ensure_abuf(compiler, sizeof(struct sljit_extended_label));
2776
PTR_FAIL_IF(!ext_label);
2777
set_extended_label(ext_label, compiler, SLJIT_LABEL_ALIGNED, mask);
2778
label = &ext_label->label;
2779
}
2780
2781
if (buffers == NULL)
2782
return label;
2783
2784
next_label = label;
2785
2786
while (1) {
2787
buffers->u.label = next_label;
2788
2789
for (i = (buffers->size + 3) >> 2; i > 0; i--)
2790
PTR_FAIL_IF(push_inst(compiler, NOP));
2791
2792
buffers = buffers->next;
2793
2794
if (buffers == NULL)
2795
break;
2796
2797
SLJIT_SKIP_CHECKS(compiler);
2798
next_label = sljit_emit_label(compiler);
2799
PTR_FAIL_IF(!next_label);
2800
}
2801
2802
return label;
2803
}
2804
2805
static sljit_ins get_jump_instruction(sljit_s32 type)
2806
{
2807
switch (type) {
2808
case SLJIT_EQUAL:
2809
return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2810
case SLJIT_NOT_EQUAL:
2811
return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2812
case SLJIT_LESS:
2813
case SLJIT_GREATER:
2814
case SLJIT_SIG_LESS:
2815
case SLJIT_SIG_GREATER:
2816
case SLJIT_OVERFLOW:
2817
case SLJIT_CARRY:
2818
case SLJIT_ATOMIC_STORED:
2819
return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2820
case SLJIT_GREATER_EQUAL:
2821
case SLJIT_LESS_EQUAL:
2822
case SLJIT_SIG_GREATER_EQUAL:
2823
case SLJIT_SIG_LESS_EQUAL:
2824
case SLJIT_NOT_OVERFLOW:
2825
case SLJIT_NOT_CARRY:
2826
case SLJIT_ATOMIC_NOT_STORED:
2827
return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2828
case SLJIT_F_EQUAL:
2829
case SLJIT_ORDERED_EQUAL:
2830
case SLJIT_F_LESS:
2831
case SLJIT_ORDERED_LESS:
2832
case SLJIT_ORDERED_GREATER:
2833
case SLJIT_UNORDERED_OR_GREATER:
2834
case SLJIT_F_GREATER:
2835
case SLJIT_UNORDERED_OR_LESS:
2836
case SLJIT_UNORDERED_OR_EQUAL:
2837
case SLJIT_UNORDERED:
2838
return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2839
case SLJIT_ORDERED_NOT_EQUAL:
2840
case SLJIT_ORDERED_LESS_EQUAL:
2841
case SLJIT_ORDERED_GREATER_EQUAL:
2842
case SLJIT_F_NOT_EQUAL:
2843
case SLJIT_UNORDERED_OR_NOT_EQUAL:
2844
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2845
case SLJIT_UNORDERED_OR_LESS_EQUAL:
2846
case SLJIT_F_LESS_EQUAL:
2847
case SLJIT_F_GREATER_EQUAL:
2848
case SLJIT_ORDERED:
2849
return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2850
default:
2851
/* Not conditional branch. */
2852
return 0;
2853
}
2854
}
2855
2856
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2857
{
2858
struct sljit_jump *jump;
2859
sljit_ins inst;
2860
2861
CHECK_ERROR_PTR();
2862
CHECK_PTR(check_sljit_emit_jump(compiler, type));
2863
2864
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2865
PTR_FAIL_IF(!jump);
2866
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2867
type &= 0xff;
2868
2869
inst = get_jump_instruction(type);
2870
2871
if (inst != 0) {
2872
PTR_FAIL_IF(push_inst(compiler, inst));
2873
jump->flags |= IS_COND;
2874
}
2875
2876
jump->addr = compiler->size;
2877
inst = JIRL | RJ(TMP_REG1) | IMM_I16(0);
2878
2879
if (type >= SLJIT_FAST_CALL) {
2880
jump->flags |= IS_CALL;
2881
inst |= RD(RETURN_ADDR_REG);
2882
}
2883
2884
PTR_FAIL_IF(push_inst(compiler, inst));
2885
2886
/* Maximum number of instructions required for generating a constant. */
2887
compiler->size += JUMP_MAX_SIZE - 1;
2888
return jump;
2889
}
2890
2891
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2892
sljit_s32 arg_types)
2893
{
2894
SLJIT_UNUSED_ARG(arg_types);
2895
CHECK_ERROR_PTR();
2896
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2897
2898
if (type & SLJIT_CALL_RETURN) {
2899
PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
2900
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2901
}
2902
2903
SLJIT_SKIP_CHECKS(compiler);
2904
return sljit_emit_jump(compiler, type);
2905
}
2906
2907
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
2908
sljit_s32 src1, sljit_sw src1w,
2909
sljit_s32 src2, sljit_sw src2w)
2910
{
2911
struct sljit_jump *jump;
2912
sljit_s32 flags;
2913
sljit_ins inst;
2914
sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2915
2916
CHECK_ERROR_PTR();
2917
CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
2918
ADJUST_LOCAL_OFFSET(src1, src1w);
2919
ADJUST_LOCAL_OFFSET(src2, src2w);
2920
2921
compiler->cache_arg = 0;
2922
compiler->cache_argw = 0;
2923
2924
flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2925
2926
if (src1 & SLJIT_MEM) {
2927
PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w));
2928
src1 = TMP_REG1;
2929
}
2930
2931
if (src2 & SLJIT_MEM) {
2932
PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0));
2933
src2 = src2_tmp_reg;
2934
}
2935
2936
if (src1 == SLJIT_IMM) {
2937
if (src1w != 0) {
2938
PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
2939
src1 = TMP_REG1;
2940
}
2941
else
2942
src1 = TMP_ZERO;
2943
}
2944
2945
if (src2 == SLJIT_IMM) {
2946
if (src2w != 0) {
2947
PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
2948
src2 = src2_tmp_reg;
2949
}
2950
else
2951
src2 = TMP_ZERO;
2952
}
2953
2954
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2955
PTR_FAIL_IF(!jump);
2956
set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND));
2957
type &= 0xff;
2958
2959
switch (type) {
2960
case SLJIT_EQUAL:
2961
inst = BNE | RJ(src1) | RD(src2);
2962
break;
2963
case SLJIT_NOT_EQUAL:
2964
inst = BEQ | RJ(src1) | RD(src2);
2965
break;
2966
case SLJIT_LESS:
2967
inst = BGEU | RJ(src1) | RD(src2);
2968
break;
2969
case SLJIT_GREATER_EQUAL:
2970
inst = BLTU | RJ(src1) | RD(src2);
2971
break;
2972
case SLJIT_GREATER:
2973
inst = BGEU | RJ(src2) | RD(src1);
2974
break;
2975
case SLJIT_LESS_EQUAL:
2976
inst = BLTU | RJ(src2) | RD(src1);
2977
break;
2978
case SLJIT_SIG_LESS:
2979
inst = BGE | RJ(src1) | RD(src2);
2980
break;
2981
case SLJIT_SIG_GREATER_EQUAL:
2982
inst = BLT | RJ(src1) | RD(src2);
2983
break;
2984
case SLJIT_SIG_GREATER:
2985
inst = BGE | RJ(src2) | RD(src1);
2986
break;
2987
case SLJIT_SIG_LESS_EQUAL:
2988
inst = BLT | RJ(src2) | RD(src1);
2989
break;
2990
default:
2991
inst = BREAK;
2992
SLJIT_UNREACHABLE();
2993
}
2994
2995
PTR_FAIL_IF(push_inst(compiler, inst));
2996
2997
jump->addr = compiler->size;
2998
PTR_FAIL_IF(push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2999
3000
/* Maximum number of instructions required for generating a constant. */
3001
compiler->size += JUMP_MAX_SIZE - 1;
3002
3003
return jump;
3004
}
3005
3006
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3007
{
3008
struct sljit_jump *jump;
3009
3010
CHECK_ERROR();
3011
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3012
3013
if (src != SLJIT_IMM) {
3014
if (src & SLJIT_MEM) {
3015
ADJUST_LOCAL_OFFSET(src, srcw);
3016
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
3017
src = TMP_REG1;
3018
}
3019
return push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(src) | IMM_I12(0));
3020
}
3021
3022
/* These jumps are converted to jump/call instructions when possible. */
3023
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3024
FAIL_IF(!jump);
3025
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0));
3026
jump->u.target = (sljit_uw)srcw;
3027
3028
jump->addr = compiler->size;
3029
FAIL_IF(push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
3030
3031
/* Maximum number of instructions required for generating a constant. */
3032
compiler->size += JUMP_MAX_SIZE - 1;
3033
3034
return SLJIT_SUCCESS;
3035
}
3036
3037
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3038
sljit_s32 arg_types,
3039
sljit_s32 src, sljit_sw srcw)
3040
{
3041
SLJIT_UNUSED_ARG(arg_types);
3042
CHECK_ERROR();
3043
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3044
3045
if (src & SLJIT_MEM) {
3046
ADJUST_LOCAL_OFFSET(src, srcw);
3047
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
3048
src = TMP_REG1;
3049
}
3050
3051
if (type & SLJIT_CALL_RETURN) {
3052
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
3053
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
3054
src = TMP_REG1;
3055
}
3056
3057
FAIL_IF(emit_stack_frame_release(compiler, 0));
3058
type = SLJIT_JUMP;
3059
}
3060
3061
SLJIT_SKIP_CHECKS(compiler);
3062
return sljit_emit_ijump(compiler, type, src, srcw);
3063
}
3064
3065
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3066
sljit_s32 dst, sljit_sw dstw,
3067
sljit_s32 type)
3068
{
3069
sljit_s32 src_r, dst_r, invert;
3070
sljit_s32 saved_op = op;
3071
sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
3072
3073
CHECK_ERROR();
3074
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3075
ADJUST_LOCAL_OFFSET(dst, dstw);
3076
3077
op = GET_OPCODE(op);
3078
dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
3079
3080
compiler->cache_arg = 0;
3081
compiler->cache_argw = 0;
3082
3083
if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
3084
FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
3085
3086
if (type < SLJIT_F_EQUAL) {
3087
src_r = OTHER_FLAG;
3088
invert = type & 0x1;
3089
3090
switch (type) {
3091
case SLJIT_EQUAL:
3092
case SLJIT_NOT_EQUAL:
3093
FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
3094
src_r = dst_r;
3095
break;
3096
case SLJIT_ATOMIC_STORED:
3097
case SLJIT_ATOMIC_NOT_STORED:
3098
FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1)));
3099
src_r = dst_r;
3100
invert ^= 0x1;
3101
break;
3102
case SLJIT_OVERFLOW:
3103
case SLJIT_NOT_OVERFLOW:
3104
if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
3105
src_r = OTHER_FLAG;
3106
break;
3107
}
3108
FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1)));
3109
src_r = dst_r;
3110
invert ^= 0x1;
3111
break;
3112
}
3113
} else {
3114
invert = 0;
3115
src_r = OTHER_FLAG;
3116
3117
switch (type) {
3118
case SLJIT_ORDERED_NOT_EQUAL:
3119
case SLJIT_ORDERED_LESS_EQUAL:
3120
case SLJIT_ORDERED_GREATER_EQUAL:
3121
case SLJIT_F_NOT_EQUAL:
3122
case SLJIT_UNORDERED_OR_NOT_EQUAL:
3123
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
3124
case SLJIT_UNORDERED_OR_LESS_EQUAL:
3125
case SLJIT_F_LESS_EQUAL:
3126
case SLJIT_F_GREATER_EQUAL:
3127
case SLJIT_ORDERED:
3128
invert = 1;
3129
break;
3130
}
3131
}
3132
3133
if (invert) {
3134
FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RJ(src_r) | IMM_I12(1)));
3135
src_r = dst_r;
3136
}
3137
3138
if (op < SLJIT_ADD) {
3139
if (dst & SLJIT_MEM)
3140
return emit_op_mem(compiler, mem_type, src_r, dst, dstw);
3141
3142
if (src_r != dst_r)
3143
return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(src_r) | IMM_I12(0));
3144
return SLJIT_SUCCESS;
3145
}
3146
3147
mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
3148
3149
if (dst & SLJIT_MEM)
3150
return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0);
3151
return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0);
3152
}
3153
3154
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3155
sljit_s32 dst_reg,
3156
sljit_s32 src1, sljit_sw src1w,
3157
sljit_s32 src2_reg)
3158
{
3159
sljit_ins *ptr;
3160
sljit_uw size;
3161
sljit_s32 is_compare = (type & SLJIT_COMPARE_SELECT);
3162
sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
3163
3164
CHECK_ERROR();
3165
CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3166
ADJUST_LOCAL_OFFSET(src1, src1w);
3167
3168
if (src1 == SLJIT_IMM && type & SLJIT_32)
3169
src1w = (sljit_s32)src1w;
3170
3171
type &= ~(SLJIT_32 | SLJIT_COMPARE_SELECT);
3172
3173
if (dst_reg != src2_reg) {
3174
if (dst_reg == src1) {
3175
src1 = src2_reg;
3176
src1w = 0;
3177
if (!is_compare)
3178
type ^= 0x1;
3179
} else {
3180
if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3181
SLJIT_ASSERT(!(type & SLJIT_COMPARE_SELECT));
3182
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(dst_reg) | IMM_I12(0)));
3183
3184
if ((src1 & REG_MASK) == dst_reg)
3185
src1 = (src1 & ~REG_MASK) | TMP_REG1;
3186
3187
if (OFFS_REG(src1) == dst_reg)
3188
src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
3189
}
3190
3191
FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0)));
3192
}
3193
}
3194
3195
if (is_compare) {
3196
if (src1 & SLJIT_MEM) {
3197
FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG3, src1, src1w));
3198
} else if (src1 == SLJIT_IMM) {
3199
FAIL_IF(load_immediate(compiler, TMP_REG3, src1w));
3200
} else
3201
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG3) | RJ(src1) | IMM_I12(0)));
3202
}
3203
3204
size = compiler->size;
3205
3206
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
3207
FAIL_IF(!ptr);
3208
compiler->size++;
3209
3210
if (src1 & SLJIT_MEM) {
3211
FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));
3212
} else if (src1 == SLJIT_IMM) {
3213
FAIL_IF(load_immediate(compiler, dst_reg, src1w));
3214
} else
3215
FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src1) | IMM_I12(0)));
3216
3217
if (is_compare) {
3218
switch (type) {
3219
case SLJIT_LESS:
3220
case SLJIT_LESS_EQUAL:
3221
*ptr = BGEU;
3222
break;
3223
case SLJIT_GREATER:
3224
case SLJIT_GREATER_EQUAL:
3225
*ptr = BLTU;
3226
break;
3227
case SLJIT_SIG_LESS:
3228
case SLJIT_SIG_LESS_EQUAL:
3229
*ptr = BGE;
3230
break;
3231
default:
3232
*ptr = BLT;
3233
break;
3234
}
3235
3236
*ptr |= RJ(TMP_REG3) | RD(dst_reg);
3237
} else {
3238
*ptr = get_jump_instruction(type);
3239
}
3240
3241
*ptr |= IMM_I16(compiler->size - size);
3242
return SLJIT_SUCCESS;
3243
}
3244
3245
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3246
sljit_s32 dst_freg,
3247
sljit_s32 src1, sljit_sw src1w,
3248
sljit_s32 src2_freg)
3249
{
3250
sljit_s32 invert = 0;
3251
3252
CHECK_ERROR();
3253
CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3254
3255
ADJUST_LOCAL_OFFSET(src1, src1w);
3256
3257
if ((type & ~SLJIT_32) == SLJIT_EQUAL || (type & ~SLJIT_32) == SLJIT_NOT_EQUAL) {
3258
if ((type & ~SLJIT_32) == SLJIT_EQUAL)
3259
invert = 1;
3260
FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG)));
3261
} else {
3262
if (get_jump_instruction(type & ~SLJIT_32) == (BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO)))
3263
invert = 1;
3264
FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG)));
3265
}
3266
3267
if (src1 & SLJIT_MEM) {
3268
FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src1, src1w));
3269
if (invert)
3270
return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(TMP_FREG2) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3271
return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(TMP_FREG2) | FCA(F_OTHER_FLAG));
3272
} else {
3273
if (invert)
3274
return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3275
return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(src1) | FCA(F_OTHER_FLAG));
3276
}
3277
}
3278
3279
#undef FLOAT_DATA
3280
3281
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3282
sljit_s32 reg,
3283
sljit_s32 mem, sljit_sw memw)
3284
{
3285
sljit_s32 flags;
3286
3287
CHECK_ERROR();
3288
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3289
3290
if (!(reg & REG_PAIR_MASK))
3291
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3292
3293
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3294
memw &= 0x3;
3295
3296
if (SLJIT_UNLIKELY(memw != 0)) {
3297
FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(mem)) | IMM_I12(memw)));
3298
FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3299
} else
3300
FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(mem & REG_MASK) | RK(OFFS_REG(mem))));
3301
3302
mem = TMP_REG1;
3303
memw = 0;
3304
} else if (memw > I12_MAX - SSIZE_OF(sw) || memw < I12_MIN) {
3305
if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) {
3306
FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw)));
3307
memw &= 0xfff;
3308
} else {
3309
FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
3310
memw = 0;
3311
}
3312
3313
if (mem & REG_MASK)
3314
FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3315
3316
mem = TMP_REG1;
3317
} else {
3318
mem &= REG_MASK;
3319
memw &= 0xfff;
3320
}
3321
3322
SLJIT_ASSERT((memw >= 0 && memw <= I12_MAX - SSIZE_OF(sw)) || (memw > I12_MAX && memw <= 0xfff));
3323
3324
if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {
3325
FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff));
3326
return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw);
3327
}
3328
3329
flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0);
3330
3331
FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw));
3332
return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff);
3333
}
3334
3335
#undef TO_ARGW_HI
3336
3337
static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3338
{
3339
sljit_s32 mem = *mem_ptr;
3340
3341
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3342
*mem_ptr = TMP_REG3;
3343
FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(mem)) | IMM_I12(memw & 0x3)));
3344
return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem & REG_MASK));
3345
}
3346
3347
if (!(mem & REG_MASK)) {
3348
*mem_ptr = TMP_REG3;
3349
return load_immediate(compiler, TMP_REG3, memw);
3350
}
3351
3352
mem &= REG_MASK;
3353
3354
if (memw == 0) {
3355
*mem_ptr = mem;
3356
return SLJIT_SUCCESS;
3357
}
3358
3359
*mem_ptr = TMP_REG3;
3360
3361
FAIL_IF(load_immediate(compiler, TMP_REG3, memw));
3362
return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem));
3363
}
3364
3365
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3366
sljit_s32 vreg,
3367
sljit_s32 srcdst, sljit_sw srcdstw)
3368
{
3369
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3370
sljit_ins ins = 0;
3371
3372
CHECK_ERROR();
3373
CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));
3374
3375
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3376
3377
if (reg_size != 5 && reg_size != 4)
3378
return SLJIT_ERR_UNSUPPORTED;
3379
3380
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3381
return SLJIT_ERR_UNSUPPORTED;
3382
3383
if (type & SLJIT_SIMD_TEST)
3384
return SLJIT_SUCCESS;
3385
3386
if (!(srcdst & SLJIT_MEM)) {
3387
if (type & SLJIT_SIMD_STORE)
3388
ins = FRD(srcdst) | FRJ(vreg) | FRK(vreg);
3389
else
3390
ins = FRD(vreg) | FRJ(srcdst) | FRK(srcdst);
3391
3392
if (reg_size == 5)
3393
ins |= VOR_V | (sljit_ins)1 << 26;
3394
else
3395
ins |= VOR_V;
3396
3397
return push_inst(compiler, ins);
3398
}
3399
3400
ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3401
3402
if (reg_size == 5)
3403
ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3404
3405
if (FAST_IS_REG(srcdst) && srcdst >= 0 && (srcdstw >= I12_MIN && srcdstw <= I12_MAX))
3406
return push_inst(compiler, ins | FRD(vreg) | RJ((sljit_u8)srcdst) | IMM_I12(srcdstw));
3407
else {
3408
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3409
return push_inst(compiler, ins | FRD(vreg) | RJ(srcdst) | IMM_I12(0));
3410
}
3411
}
3412
3413
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3414
sljit_s32 vreg,
3415
sljit_s32 src, sljit_sw srcw)
3416
{
3417
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3418
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3419
sljit_ins ins = 0;
3420
3421
CHECK_ERROR();
3422
CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));
3423
3424
ADJUST_LOCAL_OFFSET(src, srcw);
3425
3426
if (reg_size != 5 && reg_size != 4)
3427
return SLJIT_ERR_UNSUPPORTED;
3428
3429
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3430
return SLJIT_ERR_UNSUPPORTED;
3431
3432
if (type & SLJIT_SIMD_TEST)
3433
return SLJIT_SUCCESS;
3434
3435
if (src & SLJIT_MEM) {
3436
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3437
3438
if (reg_size == 5)
3439
ins = (sljit_ins)1 << 25;
3440
3441
return push_inst(compiler, VLDREPL | ins | FRD(vreg) | RJ(src) | (sljit_ins)1 << (23 - elem_size));
3442
}
3443
3444
if (reg_size == 5)
3445
ins = (sljit_ins)1 << 26;
3446
3447
if (type & SLJIT_SIMD_FLOAT) {
3448
if (src == SLJIT_IMM)
3449
return push_inst(compiler, VREPLGR2VR | ins | FRD(vreg) | RJ(TMP_ZERO) | (sljit_ins)elem_size << 10);
3450
3451
FAIL_IF(push_inst(compiler, VREPLVE | ins | FRD(vreg) | FRJ(src) | RK(TMP_ZERO) | (sljit_ins)elem_size << 15));
3452
3453
if (reg_size == 5) {
3454
ins = (sljit_ins)(0x44 << 10);
3455
return push_inst(compiler, XVPERMI | ins | FRD(vreg) | FRJ(vreg));
3456
}
3457
3458
return SLJIT_SUCCESS;
3459
}
3460
3461
ins |= VREPLGR2VR | (sljit_ins)elem_size << 10;
3462
3463
if (src == SLJIT_IMM) {
3464
FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
3465
src = TMP_REG2;
3466
}
3467
3468
return push_inst(compiler, ins | FRD(vreg) | RJ(src));
3469
}
3470
3471
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3472
sljit_s32 vreg, sljit_s32 lane_index,
3473
sljit_s32 srcdst, sljit_sw srcdstw)
3474
{
3475
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3476
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3477
sljit_ins ins = 0;
3478
3479
CHECK_ERROR();
3480
CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));
3481
3482
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3483
3484
if (reg_size != 5 && reg_size != 4)
3485
return SLJIT_ERR_UNSUPPORTED;
3486
3487
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3488
return SLJIT_ERR_UNSUPPORTED;
3489
3490
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3491
return SLJIT_ERR_UNSUPPORTED;
3492
3493
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3494
return SLJIT_ERR_UNSUPPORTED;
3495
3496
if (type & SLJIT_SIMD_TEST)
3497
return SLJIT_SUCCESS;
3498
3499
if (type & SLJIT_SIMD_LANE_ZERO) {
3500
ins = (reg_size == 5) ? ((sljit_ins)1 << 26) : 0;
3501
3502
if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) {
3503
FAIL_IF(push_inst(compiler, VOR_V | ins | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));
3504
srcdst = TMP_FREG1;
3505
srcdstw = 0;
3506
}
3507
3508
FAIL_IF(push_inst(compiler, VXOR_V | ins | FRD(vreg) | FRJ(vreg) | FRK(vreg)));
3509
}
3510
3511
if (srcdst & SLJIT_MEM) {
3512
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3513
3514
if (reg_size == 5)
3515
ins = (sljit_ins)1 << 25;
3516
3517
if (type & SLJIT_SIMD_STORE) {
3518
ins |= (sljit_ins)lane_index << 18 | (sljit_ins)(1 << (23 - elem_size));
3519
return push_inst(compiler, VSTELM | ins | FRD(vreg) | RJ(srcdst));
3520
} else {
3521
emit_op_mem(compiler, (elem_size == 3 ? WORD_DATA : (elem_size == 2 ? INT_DATA : (elem_size == 1 ? HALF_DATA : BYTE_DATA))) | LOAD_DATA, TMP_REG1, srcdst | SLJIT_MEM, 0);
3522
srcdst = TMP_REG1;
3523
ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3524
3525
if (reg_size == 5) {
3526
if (elem_size < 2) {
3527
FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));
3528
if (lane_index >= (2 << (3 - elem_size))) {
3529
FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1)));
3530
FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3531
return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(2));
3532
} else {
3533
FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index)));
3534
return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(18));
3535
}
3536
} else
3537
ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3538
}
3539
3540
return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index));
3541
}
3542
}
3543
3544
if (type & SLJIT_SIMD_FLOAT) {
3545
ins = (reg_size == 5) ? (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26 : (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3546
3547
if (type & SLJIT_SIMD_STORE) {
3548
FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(vreg) | IMM_V(lane_index)));
3549
return push_inst(compiler, VINSGR2VR | ins | FRD(srcdst) | RJ(TMP_REG1) | IMM_V(0));
3550
} else {
3551
FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(srcdst) | IMM_V(0)));
3552
return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(TMP_REG1) | IMM_V(lane_index));
3553
}
3554
}
3555
3556
if (srcdst == SLJIT_IMM) {
3557
FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw));
3558
srcdst = TMP_REG1;
3559
}
3560
3561
if (type & SLJIT_SIMD_STORE) {
3562
ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3563
3564
if (type & SLJIT_SIMD_LANE_SIGNED)
3565
ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3566
else
3567
ins |= VPICKVE2GR_U;
3568
3569
if (reg_size == 5) {
3570
if (elem_size < 2) {
3571
if (lane_index >= (2 << (3 - elem_size))) {
3572
if (type & SLJIT_SIMD_LANE_SIGNED)
3573
ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3574
else
3575
ins |= VPICKVE2GR_U;
3576
3577
FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));
3578
FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1)));
3579
return push_inst(compiler, ins | RD(srcdst) | FRJ(TMP_FREG1) | IMM_V(lane_index % (2 << (3 - elem_size))));
3580
}
3581
} else {
3582
ins ^= (sljit_ins)1 << (15 - elem_size);
3583
ins |= (sljit_ins)1 << 26;
3584
}
3585
}
3586
3587
return push_inst(compiler, ins | RD(srcdst) | FRJ(vreg) | IMM_V(lane_index));
3588
} else {
3589
ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3590
3591
if (reg_size == 5) {
3592
if (elem_size < 2) {
3593
FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));
3594
if (lane_index >= (2 << (3 - elem_size))) {
3595
FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1)));
3596
FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3597
return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(2));
3598
} else {
3599
FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index)));
3600
return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(18));
3601
}
3602
} else
3603
ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3604
}
3605
3606
return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index));
3607
}
3608
3609
return SLJIT_ERR_UNSUPPORTED;
3610
}
3611
3612
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3613
sljit_s32 vreg,
3614
sljit_s32 src, sljit_s32 src_lane_index)
3615
{
3616
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3617
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3618
sljit_ins ins = 0;
3619
3620
CHECK_ERROR();
3621
CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));
3622
3623
if (reg_size != 5 && reg_size != 4)
3624
return SLJIT_ERR_UNSUPPORTED;
3625
3626
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3627
return SLJIT_ERR_UNSUPPORTED;
3628
3629
if (type & SLJIT_SIMD_TEST)
3630
return SLJIT_SUCCESS;
3631
3632
ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3633
3634
if (reg_size == 5) {
3635
FAIL_IF(push_inst(compiler, VREPLVEI | (sljit_ins)1 << 26 | ins | FRD(vreg) | FRJ(src) | IMM_V(src_lane_index % (2 << (3 - elem_size)))));
3636
3637
ins = (src_lane_index < (2 << (3 - elem_size))) ? (sljit_ins)(0x44 << 10) : (sljit_ins)(0xee << 10);
3638
3639
return push_inst(compiler, XVPERMI | ins | FRD(vreg) | FRJ(vreg));
3640
}
3641
3642
return push_inst(compiler, VREPLVEI | ins | FRD(vreg) | FRJ(src) | IMM_V(src_lane_index));
3643
}
3644
3645
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
3646
sljit_s32 vreg,
3647
sljit_s32 src, sljit_sw srcw)
3648
{
3649
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3650
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3651
sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3652
sljit_ins ins = 0;
3653
3654
CHECK_ERROR();
3655
CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));
3656
3657
ADJUST_LOCAL_OFFSET(src, srcw);
3658
3659
if (reg_size != 5 && reg_size != 4)
3660
return SLJIT_ERR_UNSUPPORTED;
3661
3662
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3663
return SLJIT_ERR_UNSUPPORTED;
3664
3665
if (type & SLJIT_SIMD_TEST)
3666
return SLJIT_SUCCESS;
3667
3668
if (src & SLJIT_MEM) {
3669
ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3670
3671
if (reg_size == 5)
3672
ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3673
3674
if (FAST_IS_REG(src) && src >= 0 && (srcw >= I12_MIN && srcw <= I12_MAX))
3675
FAIL_IF(push_inst(compiler, ins | FRD(vreg) | RJ(src) | IMM_I12(srcw)));
3676
else {
3677
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3678
FAIL_IF(push_inst(compiler, ins | FRD(vreg) | RJ(src) | IMM_I12(0)));
3679
}
3680
src = vreg;
3681
}
3682
3683
if (type & SLJIT_SIMD_FLOAT) {
3684
if (elem_size != 2 || elem2_size != 3)
3685
return SLJIT_ERR_UNSUPPORTED;
3686
3687
ins = 0;
3688
if (reg_size == 5) {
3689
ins = (sljit_ins)1 << 26;
3690
FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3691
}
3692
3693
return push_inst(compiler, VFCVTL_D_S | ins | FRD(vreg) | FRJ(src));
3694
}
3695
3696
ins = (type & SLJIT_SIMD_EXTEND_SIGNED) ? VSLLWIL : (VSLLWIL | (sljit_ins)1 << 18);
3697
3698
if (reg_size == 5)
3699
ins |= (sljit_ins)1 << 26;
3700
3701
do {
3702
if (reg_size == 5)
3703
FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3704
3705
FAIL_IF(push_inst(compiler, ins | ((sljit_ins)1 << (13 + elem_size)) | FRD(vreg) | FRJ(src)));
3706
src = vreg;
3707
} while (++elem_size < elem2_size);
3708
3709
return SLJIT_SUCCESS;
3710
}
3711
3712
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
3713
sljit_s32 vreg,
3714
sljit_s32 dst, sljit_sw dstw)
3715
{
3716
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3717
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3718
sljit_ins ins = 0;
3719
sljit_s32 dst_r;
3720
3721
CHECK_ERROR();
3722
CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));
3723
3724
ADJUST_LOCAL_OFFSET(dst, dstw);
3725
3726
if (reg_size != 5 && reg_size != 4)
3727
return SLJIT_ERR_UNSUPPORTED;
3728
3729
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3730
return SLJIT_ERR_UNSUPPORTED;
3731
3732
if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3733
return SLJIT_ERR_UNSUPPORTED;
3734
3735
if (type & SLJIT_SIMD_TEST)
3736
return SLJIT_SUCCESS;
3737
3738
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3739
3740
if (reg_size == 5)
3741
ins = (sljit_ins)1 << 26;
3742
3743
FAIL_IF(push_inst(compiler, VMSKLTZ | ins | (sljit_ins)(elem_size << 10) | FRD(TMP_FREG1) | FRJ(vreg)));
3744
3745
FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x3c << 10) | RD(dst_r) | FRJ(TMP_FREG1)));
3746
3747
if (reg_size == 5) {
3748
FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x38 << 10) | ins | RD(TMP_REG3) | FRJ(TMP_FREG1) | IMM_V(2)));
3749
FAIL_IF(push_inst(compiler, SLLI_W | RD(TMP_REG3) | RJ(TMP_REG3) | IMM_I12(2 << (3 - elem_size))));
3750
FAIL_IF(push_inst(compiler, OR | RD(dst_r) | RJ(dst_r) | RK(TMP_REG3)));
3751
}
3752
3753
if (dst_r == TMP_REG2)
3754
return emit_op_mem(compiler, ((type & SLJIT_32) ? INT_DATA : WORD_DATA), TMP_REG2, dst, dstw);
3755
3756
return SLJIT_SUCCESS;
3757
}
3758
3759
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
3760
sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)
3761
{
3762
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3763
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3764
sljit_ins ins = 0;
3765
3766
CHECK_ERROR();
3767
CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));
3768
ADJUST_LOCAL_OFFSET(src2, src2w);
3769
3770
if (reg_size != 5 && reg_size != 4)
3771
return SLJIT_ERR_UNSUPPORTED;
3772
3773
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3774
return SLJIT_ERR_UNSUPPORTED;
3775
3776
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3777
return SLJIT_ERR_UNSUPPORTED;
3778
3779
if (type & SLJIT_SIMD_TEST)
3780
return SLJIT_SUCCESS;
3781
3782
if (src2 & SLJIT_MEM) {
3783
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src2, src2w));
3784
FAIL_IF(push_inst(compiler, (reg_size == 4 ? VLD : XVLD) | FRD(TMP_FREG1) | RJ(src2) | IMM_I12(0)));
3785
src2 = TMP_FREG1;
3786
}
3787
3788
switch (SLJIT_SIMD_GET_OPCODE(type)) {
3789
case SLJIT_SIMD_OP2_AND:
3790
ins = VAND_V;
3791
break;
3792
case SLJIT_SIMD_OP2_OR:
3793
ins = VOR_V;
3794
break;
3795
case SLJIT_SIMD_OP2_XOR:
3796
ins = VXOR_V;
3797
break;
3798
case SLJIT_SIMD_OP2_SHUFFLE:
3799
if (reg_size != 4)
3800
return SLJIT_ERR_UNSUPPORTED;
3801
3802
return push_inst(compiler, VSHUF_B | FRD(dst_vreg) | FRJ(src1_vreg) | FRK(src1_vreg) | FRA(src2));
3803
}
3804
3805
if (reg_size == 5)
3806
ins |= (sljit_ins)1 << 26;
3807
3808
return push_inst(compiler, ins | FRD(dst_vreg) | FRJ(src1_vreg) | FRK(src2));
3809
}
3810
3811
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler,
3812
sljit_s32 op,
3813
sljit_s32 dst_reg,
3814
sljit_s32 mem_reg)
3815
{
3816
sljit_ins ins;
3817
3818
CHECK_ERROR();
3819
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
3820
3821
if ((op & SLJIT_ATOMIC_USE_LS) || !LOONGARCH_SUPPORT_AMCAS) {
3822
if (op & SLJIT_ATOMIC_USE_CAS)
3823
return SLJIT_ERR_UNSUPPORTED;
3824
3825
switch (GET_OPCODE(op)) {
3826
case SLJIT_MOV:
3827
case SLJIT_MOV_P:
3828
ins = LL_D;
3829
break;
3830
case SLJIT_MOV_S32:
3831
case SLJIT_MOV32:
3832
ins = LL_W;
3833
break;
3834
3835
default:
3836
return SLJIT_ERR_UNSUPPORTED;
3837
}
3838
3839
if (op & SLJIT_ATOMIC_TEST)
3840
return SLJIT_SUCCESS;
3841
3842
return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg));
3843
}
3844
3845
switch(GET_OPCODE(op)) {
3846
case SLJIT_MOV_S8:
3847
ins = LD_B;
3848
break;
3849
case SLJIT_MOV_U8:
3850
ins = LD_BU;
3851
break;
3852
case SLJIT_MOV_S16:
3853
ins = LD_H;
3854
break;
3855
case SLJIT_MOV_U16:
3856
ins = LD_HU;
3857
break;
3858
case SLJIT_MOV32:
3859
case SLJIT_MOV_S32:
3860
ins = LD_W;
3861
break;
3862
case SLJIT_MOV_U32:
3863
ins = LD_WU;
3864
break;
3865
default:
3866
ins = LD_D;
3867
break;
3868
}
3869
3870
if (op & SLJIT_ATOMIC_TEST)
3871
return SLJIT_SUCCESS;
3872
3873
return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0));
3874
}
3875
3876
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,
3877
sljit_s32 op,
3878
sljit_s32 src_reg,
3879
sljit_s32 mem_reg,
3880
sljit_s32 temp_reg)
3881
{
3882
sljit_ins ins = 0;
3883
sljit_ins unsign = 0;
3884
sljit_s32 tmp = temp_reg;
3885
3886
CHECK_ERROR();
3887
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
3888
3889
if ((op & SLJIT_ATOMIC_USE_LS) || !LOONGARCH_SUPPORT_AMCAS) {
3890
if (op & SLJIT_ATOMIC_USE_CAS)
3891
return SLJIT_ERR_UNSUPPORTED;
3892
3893
switch (GET_OPCODE(op)) {
3894
case SLJIT_MOV:
3895
case SLJIT_MOV_P:
3896
ins = SC_D;
3897
break;
3898
case SLJIT_MOV_S32:
3899
case SLJIT_MOV32:
3900
ins = SC_W;
3901
break;
3902
3903
default:
3904
return SLJIT_ERR_UNSUPPORTED;
3905
}
3906
3907
if (op & SLJIT_ATOMIC_TEST)
3908
return SLJIT_SUCCESS;
3909
3910
FAIL_IF(push_inst(compiler, ADD_D | RD(OTHER_FLAG) | RJ(src_reg) | RK(TMP_ZERO)));
3911
return push_inst(compiler, ins | RD(OTHER_FLAG) | RJ(mem_reg));
3912
}
3913
3914
switch (GET_OPCODE(op)) {
3915
case SLJIT_MOV_S8:
3916
ins = AMCAS_B;
3917
break;
3918
case SLJIT_MOV_U8:
3919
ins = AMCAS_B;
3920
unsign = BSTRPICK_D | (7 << 16);
3921
break;
3922
case SLJIT_MOV_S16:
3923
ins = AMCAS_H;
3924
break;
3925
case SLJIT_MOV_U16:
3926
ins = AMCAS_H;
3927
unsign = BSTRPICK_D | (15 << 16);
3928
break;
3929
case SLJIT_MOV32:
3930
case SLJIT_MOV_S32:
3931
ins = AMCAS_W;
3932
break;
3933
case SLJIT_MOV_U32:
3934
ins = AMCAS_W;
3935
unsign = BSTRPICK_D | (31 << 16);
3936
break;
3937
default:
3938
ins = AMCAS_D;
3939
break;
3940
}
3941
3942
if (op & SLJIT_ATOMIC_TEST)
3943
return SLJIT_SUCCESS;
3944
3945
if (op & SLJIT_SET_ATOMIC_STORED) {
3946
FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG3) | RJ(temp_reg) | RK(TMP_ZERO)));
3947
tmp = TMP_REG3;
3948
}
3949
FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg)));
3950
if (!(op & SLJIT_SET_ATOMIC_STORED))
3951
return SLJIT_SUCCESS;
3952
3953
if (unsign)
3954
FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp)));
3955
3956
FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(tmp) | RK(temp_reg)));
3957
return push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | IMM_I12(1));
3958
}
3959
3960
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
3961
{
3962
SLJIT_UNUSED_ARG(last_ins);
3963
3964
FAIL_IF(push_inst(compiler, LU12I_W | RD(dst) | (sljit_ins)(((init_value & 0xffffffff) >> 12) << 5)));
3965
FAIL_IF(push_inst(compiler, LU32I_D | RD(dst) | (sljit_ins)(((init_value >> 32) & 0xfffff) << 5)));
3966
FAIL_IF(push_inst(compiler, LU52I_D | RD(dst) | RJ(dst) | (sljit_ins)(IMM_I12(init_value >> 52))));
3967
return push_inst(compiler, ORI | RD(dst) | RJ(dst) | IMM_I12(init_value));
3968
}
3969
3970
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3971
{
3972
sljit_ins *inst = (sljit_ins*)addr;
3973
SLJIT_UNUSED_ARG(executable_offset);
3974
3975
SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
3976
3977
SLJIT_ASSERT((inst[0] & OPC_1RI20(0x7f)) == LU12I_W);
3978
inst[0] = (inst[0] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(((new_target & 0xffffffff) >> 12) << 5);
3979
3980
SLJIT_ASSERT((inst[1] & OPC_1RI20(0x7f)) == LU32I_D);
3981
inst[1] = (inst[1] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(sljit_ins)(((new_target >> 32) & 0xfffff) << 5);
3982
3983
SLJIT_ASSERT((inst[2] & OPC_2RI12(0x3ff)) == LU52I_D);
3984
inst[2] = (inst[2] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target >> 52);
3985
3986
SLJIT_ASSERT((inst[3] & OPC_2RI12(0x3ff)) == ORI || (inst[3] & OPC_2RI16(0x3f)) == JIRL);
3987
if ((inst[3] & OPC_2RI12(0x3ff)) == ORI)
3988
inst[3] = (inst[3] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target);
3989
else
3990
inst[3] = (inst[3] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I12((new_target & 0xfff) >> 2);
3991
3992
SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
3993
3994
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
3995
SLJIT_CACHE_FLUSH(inst, inst + 4);
3996
}
3997
3998
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 op,
3999
sljit_s32 dst, sljit_sw dstw,
4000
sljit_sw init_value)
4001
{
4002
struct sljit_const *const_;
4003
sljit_s32 dst_r;
4004
sljit_s32 mem_flags = WORD_DATA;
4005
4006
CHECK_ERROR_PTR();
4007
CHECK_PTR(check_sljit_emit_const(compiler, op, dst, dstw, init_value));
4008
ADJUST_LOCAL_OFFSET(dst, dstw);
4009
4010
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4011
PTR_FAIL_IF(!const_);
4012
set_const(const_, compiler);
4013
4014
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4015
4016
switch (GET_OPCODE(op)) {
4017
case SLJIT_MOV_U8:
4018
if (init_value & 0x100)
4019
init_value |= 0xf00;
4020
else
4021
init_value &= 0xff;
4022
4023
PTR_FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_r) | RJ(TMP_ZERO) | IMM_I12(init_value)));
4024
mem_flags = BYTE_DATA;
4025
break;
4026
4027
case SLJIT_MOV32:
4028
mem_flags = INT_DATA;
4029
SLJIT_FALLTHROUGH
4030
case SLJIT_MOV_S32:
4031
PTR_FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)((init_value >> 7) & 0x1ffffe0)));
4032
PTR_FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(init_value)));
4033
break;
4034
4035
default:
4036
PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, 0));
4037
break;
4038
}
4039
4040
if (dst & SLJIT_MEM)
4041
PTR_FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, dst, dstw));
4042
4043
return const_;
4044
}
4045
4046
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op_addr(struct sljit_compiler *compiler, sljit_s32 op,
4047
sljit_s32 dst, sljit_sw dstw)
4048
{
4049
struct sljit_jump *jump;
4050
sljit_s32 dst_r, target_r;
4051
SLJIT_UNUSED_ARG(op);
4052
4053
CHECK_ERROR_PTR();
4054
CHECK_PTR(check_sljit_emit_op_addr(compiler, op, dst, dstw));
4055
ADJUST_LOCAL_OFFSET(dst, dstw);
4056
4057
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4058
4059
if (op != SLJIT_ADD_ABS_ADDR)
4060
target_r = dst_r;
4061
else {
4062
target_r = TMP_REG1;
4063
4064
if (dst & SLJIT_MEM)
4065
PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, dst, dstw));
4066
}
4067
4068
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4069
PTR_FAIL_IF(!jump);
4070
set_mov_addr(jump, compiler, 0);
4071
4072
PTR_FAIL_IF(push_inst(compiler, (sljit_ins)target_r));
4073
4074
compiler->size += JUMP_MAX_SIZE - 1;
4075
4076
if (op == SLJIT_ADD_ABS_ADDR)
4077
PTR_FAIL_IF(push_inst(compiler, ADD_D | RD(dst_r) | RJ(dst_r) | RK(TMP_REG1)));
4078
4079
if (dst & SLJIT_MEM)
4080
PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
4081
4082
return jump;
4083
}
4084
4085
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_s32 op, sljit_sw new_constant, sljit_sw executable_offset)
4086
{
4087
sljit_ins* inst;
4088
4089
switch (GET_OPCODE(op)) {
4090
case SLJIT_MOV_U8:
4091
inst = (sljit_ins*)addr;
4092
SLJIT_ASSERT((inst[0] & OPC_2RI12(0xb)) == ADDI_D);
4093
4094
if (new_constant & 0x100)
4095
new_constant |= 0xf00;
4096
else
4097
new_constant &= 0xff;
4098
4099
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
4100
inst[0] = (inst[0] & 0xffc003ff) | (sljit_ins)((new_constant & 0xfff) << 10);
4101
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
4102
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4103
SLJIT_CACHE_FLUSH(inst, inst + 1);
4104
return;
4105
4106
case SLJIT_MOV32:
4107
case SLJIT_MOV_S32:
4108
inst = (sljit_ins *)addr;
4109
SLJIT_ASSERT((inst[0] & OPC_1RI20(0xa)) == LU12I_W && (inst[1] & OPC_2RI12(0xe)) == ORI);
4110
4111
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
4112
inst[0] = (inst[0] & (OPC_1RI20(0xa) | 0x1f)) | (sljit_ins)((new_constant >> 7) & 0x1ffffe0);
4113
inst[1] = (inst[1] & (OPC_2RI12(0xe) | 0x3ff)) | (sljit_ins)((new_constant & 0xfff) << 10);
4114
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
4115
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4116
SLJIT_CACHE_FLUSH(inst, inst + 2);
4117
return;
4118
4119
default:
4120
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4121
return;
4122
}
4123
}
4124
4125