Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/re-spirv/re-spirv.cpp
14709 views
1
//
2
// re-spirv
3
//
4
// Copyright (c) 2024 renderbag and contributors. All rights reserved.
5
// Licensed under the MIT license. See LICENSE file for details.
6
//
7
8
#include "re-spirv.h"
9
10
#include <algorithm>
11
#include <cassert>
12
#include <cstdio>
13
#include <cstring>
14
#include <unordered_map>
15
16
#define SPV_ENABLE_UTILITY_CODE
17
18
#include "spirv/unified1/spirv.h"
19
20
// Enables more extensive output on errors.
21
#define RESPV_VERBOSE_ERRORS 0
22
23
namespace respv {
24
// Common.
25
26
static bool SpvIsSupported(SpvOp pOpCode) {
27
switch (pOpCode) {
28
case SpvOpUndef:
29
case SpvOpSource:
30
case SpvOpName:
31
case SpvOpMemberName:
32
case SpvOpExtension:
33
case SpvOpExtInstImport:
34
case SpvOpExtInst:
35
case SpvOpMemoryModel:
36
case SpvOpEntryPoint:
37
case SpvOpExecutionMode:
38
case SpvOpCapability:
39
case SpvOpTypeVoid:
40
case SpvOpTypeBool:
41
case SpvOpTypeInt:
42
case SpvOpTypeFloat:
43
case SpvOpTypeVector:
44
case SpvOpTypeMatrix:
45
case SpvOpTypeImage:
46
case SpvOpTypeSampler:
47
case SpvOpTypeSampledImage:
48
case SpvOpTypeArray:
49
case SpvOpTypeRuntimeArray:
50
case SpvOpTypeStruct:
51
case SpvOpTypePointer:
52
case SpvOpTypeFunction:
53
case SpvOpConstantTrue:
54
case SpvOpConstantFalse:
55
case SpvOpConstant:
56
case SpvOpConstantComposite:
57
case SpvOpConstantNull:
58
case SpvOpSpecConstantTrue:
59
case SpvOpSpecConstantFalse:
60
case SpvOpSpecConstant:
61
case SpvOpSpecConstantOp:
62
case SpvOpFunction:
63
case SpvOpFunctionParameter:
64
case SpvOpFunctionEnd:
65
case SpvOpFunctionCall:
66
case SpvOpVariable:
67
case SpvOpImageTexelPointer:
68
case SpvOpLoad:
69
case SpvOpStore:
70
case SpvOpAccessChain:
71
case SpvOpDecorate:
72
case SpvOpMemberDecorate:
73
case SpvOpVectorShuffle:
74
case SpvOpCompositeConstruct:
75
case SpvOpCompositeExtract:
76
case SpvOpCompositeInsert:
77
case SpvOpCopyObject:
78
case SpvOpTranspose:
79
case SpvOpSampledImage:
80
case SpvOpImageSampleImplicitLod:
81
case SpvOpImageSampleExplicitLod:
82
case SpvOpImageSampleDrefImplicitLod:
83
case SpvOpImageSampleDrefExplicitLod:
84
case SpvOpImageSampleProjImplicitLod:
85
case SpvOpImageSampleProjExplicitLod:
86
case SpvOpImageSampleProjDrefImplicitLod:
87
case SpvOpImageSampleProjDrefExplicitLod:
88
case SpvOpImageFetch:
89
case SpvOpImageGather:
90
case SpvOpImageDrefGather:
91
case SpvOpImageRead:
92
case SpvOpImageWrite:
93
case SpvOpImage:
94
case SpvOpImageQueryFormat:
95
case SpvOpImageQueryOrder:
96
case SpvOpImageQuerySizeLod:
97
case SpvOpImageQuerySize:
98
case SpvOpImageQueryLod:
99
case SpvOpImageQueryLevels:
100
case SpvOpImageQuerySamples:
101
case SpvOpConvertFToU:
102
case SpvOpConvertFToS:
103
case SpvOpConvertSToF:
104
case SpvOpConvertUToF:
105
case SpvOpUConvert:
106
case SpvOpSConvert:
107
case SpvOpFConvert:
108
case SpvOpBitcast:
109
case SpvOpSNegate:
110
case SpvOpFNegate:
111
case SpvOpIAdd:
112
case SpvOpFAdd:
113
case SpvOpISub:
114
case SpvOpFSub:
115
case SpvOpIMul:
116
case SpvOpFMul:
117
case SpvOpUDiv:
118
case SpvOpSDiv:
119
case SpvOpFDiv:
120
case SpvOpUMod:
121
case SpvOpSRem:
122
case SpvOpSMod:
123
case SpvOpFRem:
124
case SpvOpFMod:
125
case SpvOpVectorTimesScalar:
126
case SpvOpMatrixTimesScalar:
127
case SpvOpVectorTimesMatrix:
128
case SpvOpMatrixTimesVector:
129
case SpvOpMatrixTimesMatrix:
130
case SpvOpOuterProduct:
131
case SpvOpDot:
132
case SpvOpIAddCarry:
133
case SpvOpISubBorrow:
134
case SpvOpUMulExtended:
135
case SpvOpSMulExtended:
136
case SpvOpAny:
137
case SpvOpAll:
138
case SpvOpIsNan:
139
case SpvOpIsInf:
140
case SpvOpIsFinite:
141
case SpvOpIsNormal:
142
case SpvOpLogicalEqual:
143
case SpvOpLogicalNotEqual:
144
case SpvOpLogicalOr:
145
case SpvOpLogicalAnd:
146
case SpvOpLogicalNot:
147
case SpvOpSelect:
148
case SpvOpIEqual:
149
case SpvOpINotEqual:
150
case SpvOpUGreaterThan:
151
case SpvOpSGreaterThan:
152
case SpvOpUGreaterThanEqual:
153
case SpvOpSGreaterThanEqual:
154
case SpvOpULessThan:
155
case SpvOpSLessThan:
156
case SpvOpULessThanEqual:
157
case SpvOpSLessThanEqual:
158
case SpvOpFOrdEqual:
159
case SpvOpFUnordEqual:
160
case SpvOpFOrdNotEqual:
161
case SpvOpFUnordNotEqual:
162
case SpvOpFOrdLessThan:
163
case SpvOpFUnordLessThan:
164
case SpvOpFOrdGreaterThan:
165
case SpvOpFUnordGreaterThan:
166
case SpvOpFOrdLessThanEqual:
167
case SpvOpFUnordLessThanEqual:
168
case SpvOpFOrdGreaterThanEqual:
169
case SpvOpFUnordGreaterThanEqual:
170
case SpvOpShiftRightLogical:
171
case SpvOpShiftRightArithmetic:
172
case SpvOpShiftLeftLogical:
173
case SpvOpBitwiseOr:
174
case SpvOpBitwiseXor:
175
case SpvOpBitwiseAnd:
176
case SpvOpNot:
177
case SpvOpBitFieldInsert:
178
case SpvOpBitFieldSExtract:
179
case SpvOpBitFieldUExtract:
180
case SpvOpBitReverse:
181
case SpvOpBitCount:
182
case SpvOpDPdx:
183
case SpvOpDPdy:
184
case SpvOpFwidth:
185
case SpvOpControlBarrier:
186
case SpvOpMemoryBarrier:
187
case SpvOpAtomicLoad:
188
case SpvOpAtomicStore:
189
case SpvOpAtomicExchange:
190
case SpvOpAtomicCompareExchange:
191
case SpvOpAtomicCompareExchangeWeak:
192
case SpvOpAtomicIIncrement:
193
case SpvOpAtomicIDecrement:
194
case SpvOpAtomicIAdd:
195
case SpvOpAtomicISub:
196
case SpvOpAtomicSMin:
197
case SpvOpAtomicUMin:
198
case SpvOpAtomicSMax:
199
case SpvOpAtomicUMax:
200
case SpvOpAtomicAnd:
201
case SpvOpAtomicOr:
202
case SpvOpAtomicXor:
203
case SpvOpPhi:
204
case SpvOpLoopMerge:
205
case SpvOpSelectionMerge:
206
case SpvOpLabel:
207
case SpvOpBranch:
208
case SpvOpBranchConditional:
209
case SpvOpSwitch:
210
case SpvOpKill:
211
case SpvOpReturn:
212
case SpvOpReturnValue:
213
case SpvOpUnreachable:
214
case SpvOpGroupNonUniformElect:
215
case SpvOpGroupNonUniformAll:
216
case SpvOpGroupNonUniformAny:
217
case SpvOpGroupNonUniformAllEqual:
218
case SpvOpGroupNonUniformBroadcast:
219
case SpvOpGroupNonUniformBroadcastFirst:
220
case SpvOpGroupNonUniformBallot:
221
case SpvOpGroupNonUniformInverseBallot:
222
case SpvOpGroupNonUniformBallotBitExtract:
223
case SpvOpGroupNonUniformBallotBitCount:
224
case SpvOpGroupNonUniformBallotFindLSB:
225
case SpvOpGroupNonUniformBallotFindMSB:
226
case SpvOpGroupNonUniformShuffle:
227
case SpvOpGroupNonUniformShuffleXor:
228
case SpvOpGroupNonUniformShuffleUp:
229
case SpvOpGroupNonUniformShuffleDown:
230
case SpvOpGroupNonUniformIAdd:
231
case SpvOpGroupNonUniformFAdd:
232
case SpvOpGroupNonUniformIMul:
233
case SpvOpGroupNonUniformFMul:
234
case SpvOpGroupNonUniformSMin:
235
case SpvOpGroupNonUniformUMin:
236
case SpvOpGroupNonUniformFMin:
237
case SpvOpGroupNonUniformSMax:
238
case SpvOpGroupNonUniformUMax:
239
case SpvOpGroupNonUniformFMax:
240
case SpvOpGroupNonUniformBitwiseAnd:
241
case SpvOpGroupNonUniformBitwiseOr:
242
case SpvOpGroupNonUniformBitwiseXor:
243
case SpvOpGroupNonUniformLogicalAnd:
244
case SpvOpGroupNonUniformLogicalOr:
245
case SpvOpGroupNonUniformLogicalXor:
246
case SpvOpGroupNonUniformQuadBroadcast:
247
case SpvOpGroupNonUniformQuadSwap:
248
case SpvOpCopyLogical:
249
return true;
250
default:
251
return false;
252
}
253
}
254
255
static bool SpvIsIgnored(SpvOp pOpCode) {
256
switch (pOpCode) {
257
case SpvOpSource:
258
case SpvOpName:
259
case SpvOpMemberName:
260
return true;
261
default:
262
return false;
263
}
264
}
265
266
static bool SpvHasOperands(SpvOp pOpCode, uint32_t &rOperandWordStart, uint32_t &rOperandWordCount, uint32_t &rOperandWordStride, uint32_t &rOperandWordSkip, bool &rOperandWordSkipString, bool pIncludePhi) {
267
switch (pOpCode) {
268
case SpvOpExecutionMode:
269
case SpvOpBranchConditional:
270
case SpvOpSwitch:
271
case SpvOpReturnValue:
272
case SpvOpDecorate:
273
case SpvOpMemberDecorate:
274
rOperandWordStart = 1;
275
rOperandWordCount = 1;
276
rOperandWordStride = 1;
277
rOperandWordSkip = UINT32_MAX;
278
rOperandWordSkipString = false;
279
return true;
280
case SpvOpStore:
281
case SpvOpMemoryBarrier:
282
rOperandWordStart = 1;
283
rOperandWordCount = 2;
284
rOperandWordStride = 1;
285
rOperandWordSkip = UINT32_MAX;
286
rOperandWordSkipString = false;
287
return true;
288
case SpvOpControlBarrier:
289
rOperandWordStart = 1;
290
rOperandWordCount = 3;
291
rOperandWordStride = 1;
292
rOperandWordSkip = UINT32_MAX;
293
rOperandWordSkipString = false;
294
return true;
295
case SpvOpTypeVector:
296
case SpvOpTypeMatrix:
297
case SpvOpTypeImage:
298
case SpvOpTypeSampledImage:
299
case SpvOpTypeRuntimeArray:
300
rOperandWordStart = 2;
301
rOperandWordCount = 1;
302
rOperandWordStride = 1;
303
rOperandWordSkip = UINT32_MAX;
304
rOperandWordSkipString = false;
305
return true;
306
case SpvOpTypeArray:
307
rOperandWordStart = 2;
308
rOperandWordCount = 2;
309
rOperandWordStride = 1;
310
rOperandWordSkip = UINT32_MAX;
311
rOperandWordSkipString = false;
312
return true;
313
case SpvOpTypeStruct:
314
case SpvOpTypeFunction:
315
rOperandWordStart = 2;
316
rOperandWordCount = UINT32_MAX;
317
rOperandWordStride = 1;
318
rOperandWordSkip = UINT32_MAX;
319
rOperandWordSkipString = false;
320
return true;
321
case SpvOpEntryPoint:
322
rOperandWordStart = 2;
323
rOperandWordCount = UINT32_MAX;
324
rOperandWordStride = 1;
325
rOperandWordSkip = 1;
326
rOperandWordSkipString = true;
327
return true;
328
case SpvOpTypePointer:
329
case SpvOpLoad:
330
case SpvOpCompositeExtract:
331
case SpvOpCopyObject:
332
case SpvOpTranspose:
333
case SpvOpImage:
334
case SpvOpImageQueryFormat:
335
case SpvOpImageQueryOrder:
336
case SpvOpImageQuerySize:
337
case SpvOpImageQueryLevels:
338
case SpvOpImageQuerySamples:
339
case SpvOpConvertFToU:
340
case SpvOpConvertFToS:
341
case SpvOpConvertSToF:
342
case SpvOpConvertUToF:
343
case SpvOpUConvert:
344
case SpvOpSConvert:
345
case SpvOpFConvert:
346
case SpvOpBitcast:
347
case SpvOpSNegate:
348
case SpvOpFNegate:
349
case SpvOpAny:
350
case SpvOpAll:
351
case SpvOpIsNan:
352
case SpvOpIsInf:
353
case SpvOpIsFinite:
354
case SpvOpIsNormal:
355
case SpvOpLogicalNot:
356
case SpvOpNot:
357
case SpvOpBitReverse:
358
case SpvOpBitCount:
359
case SpvOpDPdx:
360
case SpvOpDPdy:
361
case SpvOpFwidth:
362
case SpvOpGroupNonUniformElect:
363
case SpvOpCopyLogical:
364
rOperandWordStart = 3;
365
rOperandWordCount = 1;
366
rOperandWordStride = 1;
367
rOperandWordSkip = UINT32_MAX;
368
rOperandWordSkipString = false;
369
return true;
370
case SpvOpVectorShuffle:
371
case SpvOpCompositeInsert:
372
case SpvOpSampledImage:
373
case SpvOpImageQuerySizeLod:
374
case SpvOpImageQueryLod:
375
case SpvOpIAdd:
376
case SpvOpFAdd:
377
case SpvOpISub:
378
case SpvOpFSub:
379
case SpvOpIMul:
380
case SpvOpFMul:
381
case SpvOpUDiv:
382
case SpvOpSDiv:
383
case SpvOpFDiv:
384
case SpvOpUMod:
385
case SpvOpSRem:
386
case SpvOpSMod:
387
case SpvOpFRem:
388
case SpvOpFMod:
389
case SpvOpVectorTimesScalar:
390
case SpvOpMatrixTimesScalar:
391
case SpvOpVectorTimesMatrix:
392
case SpvOpMatrixTimesVector:
393
case SpvOpMatrixTimesMatrix:
394
case SpvOpOuterProduct:
395
case SpvOpDot:
396
case SpvOpIAddCarry:
397
case SpvOpISubBorrow:
398
case SpvOpUMulExtended:
399
case SpvOpSMulExtended:
400
case SpvOpLogicalEqual:
401
case SpvOpLogicalNotEqual:
402
case SpvOpLogicalOr:
403
case SpvOpLogicalAnd:
404
case SpvOpIEqual:
405
case SpvOpINotEqual:
406
case SpvOpUGreaterThan:
407
case SpvOpSGreaterThan:
408
case SpvOpUGreaterThanEqual:
409
case SpvOpSGreaterThanEqual:
410
case SpvOpULessThan:
411
case SpvOpSLessThan:
412
case SpvOpULessThanEqual:
413
case SpvOpSLessThanEqual:
414
case SpvOpFOrdEqual:
415
case SpvOpFUnordEqual:
416
case SpvOpFOrdNotEqual:
417
case SpvOpFUnordNotEqual:
418
case SpvOpFOrdLessThan:
419
case SpvOpFUnordLessThan:
420
case SpvOpFOrdGreaterThan:
421
case SpvOpFUnordGreaterThan:
422
case SpvOpFOrdLessThanEqual:
423
case SpvOpFUnordLessThanEqual:
424
case SpvOpFOrdGreaterThanEqual:
425
case SpvOpFUnordGreaterThanEqual:
426
case SpvOpShiftRightLogical:
427
case SpvOpShiftRightArithmetic:
428
case SpvOpShiftLeftLogical:
429
case SpvOpBitwiseOr:
430
case SpvOpBitwiseAnd:
431
case SpvOpBitwiseXor:
432
case SpvOpGroupNonUniformAll:
433
case SpvOpGroupNonUniformAny:
434
case SpvOpGroupNonUniformAllEqual:
435
case SpvOpGroupNonUniformBroadcastFirst:
436
case SpvOpGroupNonUniformBallot:
437
case SpvOpGroupNonUniformInverseBallot:
438
case SpvOpGroupNonUniformBallotFindLSB:
439
case SpvOpGroupNonUniformBallotFindMSB:
440
rOperandWordStart = 3;
441
rOperandWordCount = 2;
442
rOperandWordStride = 1;
443
rOperandWordSkip = UINT32_MAX;
444
rOperandWordSkipString = false;
445
return true;
446
case SpvOpImageTexelPointer:
447
case SpvOpSelect:
448
case SpvOpBitFieldSExtract:
449
case SpvOpBitFieldUExtract:
450
case SpvOpAtomicLoad:
451
case SpvOpAtomicIIncrement:
452
case SpvOpAtomicIDecrement:
453
case SpvOpGroupNonUniformBroadcast:
454
case SpvOpGroupNonUniformBallotBitExtract:
455
case SpvOpGroupNonUniformShuffle:
456
case SpvOpGroupNonUniformShuffleXor:
457
case SpvOpGroupNonUniformShuffleUp:
458
case SpvOpGroupNonUniformShuffleDown:
459
case SpvOpGroupNonUniformQuadBroadcast:
460
case SpvOpGroupNonUniformQuadSwap:
461
rOperandWordStart = 3;
462
rOperandWordCount = 3;
463
rOperandWordStride = 1;
464
rOperandWordSkip = UINT32_MAX;
465
rOperandWordSkipString = false;
466
return true;
467
case SpvOpGroupNonUniformBallotBitCount:
468
rOperandWordStart = 3;
469
rOperandWordCount = 3;
470
rOperandWordStride = 1;
471
rOperandWordSkip = 1;
472
rOperandWordSkipString = false;
473
return true;
474
case SpvOpAtomicStore:
475
rOperandWordStart = 1;
476
rOperandWordCount = 4;
477
rOperandWordStride = 1;
478
rOperandWordSkip = UINT32_MAX;
479
rOperandWordSkipString = false;
480
return true;
481
case SpvOpBitFieldInsert:
482
case SpvOpAtomicExchange:
483
case SpvOpAtomicIAdd:
484
case SpvOpAtomicISub:
485
case SpvOpAtomicSMin:
486
case SpvOpAtomicUMin:
487
case SpvOpAtomicSMax:
488
case SpvOpAtomicUMax:
489
case SpvOpAtomicAnd:
490
case SpvOpAtomicOr:
491
case SpvOpAtomicXor:
492
rOperandWordStart = 3;
493
rOperandWordCount = 4;
494
rOperandWordStride = 1;
495
rOperandWordSkip = UINT32_MAX;
496
rOperandWordSkipString = false;
497
return true;
498
case SpvOpAtomicCompareExchange:
499
case SpvOpAtomicCompareExchangeWeak:
500
rOperandWordStart = 3;
501
rOperandWordCount = 6;
502
rOperandWordStride = 1;
503
rOperandWordSkip = UINT32_MAX;
504
rOperandWordSkipString = false;
505
return true;
506
case SpvOpConstantComposite:
507
case SpvOpFunctionCall:
508
case SpvOpAccessChain:
509
case SpvOpCompositeConstruct:
510
rOperandWordStart = 3;
511
rOperandWordCount = UINT32_MAX;
512
rOperandWordStride = 1;
513
rOperandWordSkip = UINT32_MAX;
514
rOperandWordSkipString = false;
515
return true;
516
case SpvOpSpecConstantOp:
517
rOperandWordStart = 3;
518
rOperandWordCount = UINT32_MAX;
519
rOperandWordStride = 1;
520
rOperandWordSkip = 0;
521
rOperandWordSkipString = false;
522
return true;
523
case SpvOpExtInst:
524
case SpvOpGroupNonUniformIAdd:
525
case SpvOpGroupNonUniformFAdd:
526
case SpvOpGroupNonUniformIMul:
527
case SpvOpGroupNonUniformFMul:
528
case SpvOpGroupNonUniformSMin:
529
case SpvOpGroupNonUniformUMin:
530
case SpvOpGroupNonUniformFMin:
531
case SpvOpGroupNonUniformSMax:
532
case SpvOpGroupNonUniformUMax:
533
case SpvOpGroupNonUniformFMax:
534
case SpvOpGroupNonUniformBitwiseAnd:
535
case SpvOpGroupNonUniformBitwiseOr:
536
case SpvOpGroupNonUniformBitwiseXor:
537
case SpvOpGroupNonUniformLogicalAnd:
538
case SpvOpGroupNonUniformLogicalOr:
539
case SpvOpGroupNonUniformLogicalXor:
540
rOperandWordStart = 3;
541
rOperandWordCount = UINT32_MAX;
542
rOperandWordStride = 1;
543
rOperandWordSkip = 1;
544
rOperandWordSkipString = false;
545
return true;
546
case SpvOpImageWrite:
547
rOperandWordStart = 1;
548
rOperandWordCount = UINT32_MAX;
549
rOperandWordStride = 1;
550
rOperandWordSkip = 3;
551
rOperandWordSkipString = false;
552
return true;
553
case SpvOpImageSampleImplicitLod:
554
case SpvOpImageSampleExplicitLod:
555
case SpvOpImageSampleProjImplicitLod:
556
case SpvOpImageSampleProjExplicitLod:
557
case SpvOpImageFetch:
558
case SpvOpImageRead:
559
rOperandWordStart = 3;
560
rOperandWordCount = UINT32_MAX;
561
rOperandWordStride = 1;
562
rOperandWordSkip = 2;
563
rOperandWordSkipString = false;
564
return true;
565
case SpvOpImageSampleDrefImplicitLod:
566
case SpvOpImageSampleDrefExplicitLod:
567
case SpvOpImageSampleProjDrefImplicitLod:
568
case SpvOpImageSampleProjDrefExplicitLod:
569
case SpvOpImageGather:
570
case SpvOpImageDrefGather:
571
rOperandWordStart = 3;
572
rOperandWordCount = UINT32_MAX;
573
rOperandWordStride = 1;
574
rOperandWordSkip = 3;
575
rOperandWordSkipString = false;
576
return true;
577
case SpvOpPhi:
578
if (pIncludePhi) {
579
rOperandWordStart = 3;
580
rOperandWordCount = UINT32_MAX;
581
rOperandWordStride = 2;
582
rOperandWordSkip = UINT32_MAX;
583
rOperandWordSkipString = false;
584
return true;
585
}
586
else {
587
rOperandWordStart = 0;
588
rOperandWordCount = 0;
589
rOperandWordStride = 0;
590
rOperandWordSkip = 0;
591
rOperandWordSkipString = false;
592
return true;
593
}
594
case SpvOpFunction:
595
case SpvOpVariable:
596
rOperandWordStart = 4;
597
rOperandWordCount = 1;
598
rOperandWordStride = 1;
599
rOperandWordSkip = UINT32_MAX;
600
rOperandWordSkipString = false;
601
return true;
602
case SpvOpLabel:
603
case SpvOpBranch:
604
case SpvOpConstantTrue:
605
case SpvOpConstantFalse:
606
case SpvOpConstant:
607
case SpvOpConstantSampler:
608
case SpvOpConstantNull:
609
case SpvOpSpecConstantTrue:
610
case SpvOpSpecConstantFalse:
611
case SpvOpSpecConstant:
612
case SpvOpCapability:
613
case SpvOpExtInstImport:
614
case SpvOpMemoryModel:
615
case SpvOpTypeVoid:
616
case SpvOpTypeBool:
617
case SpvOpTypeInt:
618
case SpvOpTypeFloat:
619
case SpvOpTypeSampler:
620
case SpvOpLoopMerge:
621
case SpvOpSelectionMerge:
622
case SpvOpKill:
623
case SpvOpReturn:
624
case SpvOpUnreachable:
625
case SpvOpFunctionParameter:
626
case SpvOpFunctionEnd:
627
case SpvOpExtension:
628
case SpvOpUndef:
629
rOperandWordStart = 0;
630
rOperandWordCount = 0;
631
rOperandWordStride = 0;
632
rOperandWordSkip = 0;
633
rOperandWordSkipString = false;
634
return true;
635
default:
636
return false;
637
}
638
}
639
640
static bool SpvHasLabels(SpvOp pOpCode, uint32_t &rLabelWordStart, uint32_t &rLabelWordCount, uint32_t &rLabelWordStride, bool pIncludePhi) {
641
switch (pOpCode) {
642
case SpvOpSelectionMerge:
643
case SpvOpBranch:
644
rLabelWordStart = 1;
645
rLabelWordCount = 1;
646
rLabelWordStride = 1;
647
return true;
648
case SpvOpLoopMerge:
649
rLabelWordStart = 1;
650
rLabelWordCount = 2;
651
rLabelWordStride = 1;
652
return true;
653
case SpvOpBranchConditional:
654
rLabelWordStart = 2;
655
rLabelWordCount = 2;
656
rLabelWordStride = 1;
657
return true;
658
case SpvOpSwitch:
659
rLabelWordStart = 2;
660
rLabelWordCount = UINT32_MAX;
661
rLabelWordStride = 2;
662
return true;
663
case SpvOpPhi:
664
if (pIncludePhi) {
665
rLabelWordStart = 4;
666
rLabelWordCount = UINT32_MAX;
667
rLabelWordStride = 2;
668
return true;
669
}
670
else {
671
return false;
672
}
673
default:
674
return false;
675
}
676
}
677
678
// Used to indicate which operations have side effects and can't be discarded if their result is not used.
679
static bool SpvHasSideEffects(SpvOp pOpCode) {
680
switch (pOpCode) {
681
case SpvOpFunctionCall:
682
case SpvOpAtomicExchange:
683
case SpvOpAtomicCompareExchange:
684
case SpvOpAtomicCompareExchangeWeak:
685
case SpvOpAtomicIIncrement:
686
case SpvOpAtomicIDecrement:
687
case SpvOpAtomicIAdd:
688
case SpvOpAtomicISub:
689
case SpvOpAtomicSMin:
690
case SpvOpAtomicUMin:
691
case SpvOpAtomicSMax:
692
case SpvOpAtomicUMax:
693
case SpvOpAtomicAnd:
694
case SpvOpAtomicOr:
695
case SpvOpAtomicXor:
696
case SpvOpAtomicFlagTestAndSet:
697
case SpvOpAtomicFlagClear:
698
return true;
699
default:
700
return false;
701
}
702
}
703
704
static bool SpvOpIsTerminator(SpvOp pOpCode) {
705
switch (pOpCode) {
706
case SpvOpBranch:
707
case SpvOpBranchConditional:
708
case SpvOpSwitch:
709
case SpvOpReturn:
710
case SpvOpReturnValue:
711
case SpvOpKill:
712
case SpvOpUnreachable:
713
return true;
714
default:
715
return false;
716
}
717
}
718
719
static bool checkOperandWordSkip(uint32_t pWordIndex, const uint32_t *pSpirvWords, uint32_t pRelativeWordIndex, uint32_t pOperandWordSkip, bool pOperandWordSkipString, uint32_t &rOperandWordIndex) {
720
if (pRelativeWordIndex == pOperandWordSkip) {
721
if (pOperandWordSkipString) {
722
const char *operandString = reinterpret_cast<const char *>(&pSpirvWords[pWordIndex + rOperandWordIndex]);
723
uint32_t stringLengthInWords = (strlen(operandString) + sizeof(uint32_t)) / sizeof(uint32_t);
724
rOperandWordIndex += stringLengthInWords;
725
}
726
else {
727
rOperandWordIndex++;
728
}
729
730
return true;
731
}
732
else {
733
return false;
734
}
735
}
736
737
static uint32_t addToList(uint32_t pInstructionIndex, uint32_t pListIndex, std::vector<ListNode> &rListNodes) {
738
rListNodes.emplace_back(pInstructionIndex, pListIndex);
739
return uint32_t(rListNodes.size() - 1);
740
}
741
742
// Shader
743
744
Shader::Shader() {
745
// Empty.
746
}
747
748
Shader::Shader(const void *pData, size_t pSize, bool pInlineFunctions) {
749
parse(pData, pSize, pInlineFunctions);
750
}
751
752
void Shader::clear() {
753
extSpirvWords = nullptr;
754
extSpirvWordCount = 0;
755
inlinedSpirvWords.clear();
756
instructions.clear();
757
instructionAdjacentListIndices.clear();
758
instructionInDegrees.clear();
759
instructionOutDegrees.clear();
760
instructionOrder.clear();
761
blocks.clear();
762
blockPreOrderIndices.clear();
763
blockPostOrderIndices.clear();
764
functions.clear();
765
variableOrder.clear();
766
results.clear();
767
specializations.clear();
768
decorations.clear();
769
phis.clear();
770
loopHeaders.clear();
771
listNodes.clear();
772
defaultSwitchOpConstantInt = UINT32_MAX;
773
}
774
775
constexpr uint32_t SpvStartWordIndex = 5;
776
777
bool Shader::checkData(const void *pData, size_t pSize) {
778
const uint32_t *words = reinterpret_cast<const uint32_t *>(pData);
779
const size_t wordCount = pSize / sizeof(uint32_t);
780
if (wordCount < SpvStartWordIndex) {
781
fprintf(stderr, "Not enough words in SPIR-V.\n");
782
return false;
783
}
784
785
if (words[0] != SpvMagicNumber) {
786
fprintf(stderr, "Invalid SPIR-V Magic Number on header.\n");
787
return false;
788
}
789
790
if (words[1] > SpvVersion) {
791
fprintf(stderr, "SPIR-V Version is too new for the library. Max version for the library is 0x%X.\n", SpvVersion);
792
return false;
793
}
794
795
return true;
796
}
797
798
bool Shader::inlineData(const void *pData, size_t pSize) {
799
assert(pData != nullptr);
800
assert(pSize > 0);
801
802
struct CallItem {
803
uint32_t wordIndex = 0;
804
uint32_t functionId = UINT32_MAX;
805
uint32_t blockId = UINT32_MAX;
806
uint32_t startBlockId = UINT32_MAX;
807
uint32_t loopBlockId = UINT32_MAX;
808
uint32_t continueBlockId = UINT32_MAX;
809
uint32_t returnBlockId = UINT32_MAX;
810
uint32_t resultType = UINT32_MAX;
811
uint32_t resultId = UINT32_MAX;
812
uint32_t parameterIndex = 0;
813
uint32_t remapsPendingCount = 0;
814
uint32_t returnParametersCount = 0;
815
uint32_t sameBlockOperationsCount = 0;
816
bool startBlockIdAssigned = false;
817
bool functionInlined = false;
818
819
CallItem(uint32_t wordIndex, uint32_t functionId = UINT32_MAX, bool functionInlined = false, uint32_t startBlockId = UINT32_MAX, uint32_t loopBlockId = UINT32_MAX, uint32_t continueBlockId = UINT32_MAX, uint32_t returnBlockId = UINT32_MAX, uint32_t resultType = UINT32_MAX, uint32_t resultId = UINT32_MAX)
820
: wordIndex(wordIndex), functionId(functionId), functionInlined(functionInlined), startBlockId(startBlockId), loopBlockId(loopBlockId), continueBlockId(continueBlockId), returnBlockId(returnBlockId), resultType(resultType), resultId(resultId)
821
{
822
// Regular constructor.
823
}
824
};
825
826
struct FunctionDefinition {
827
uint32_t wordIndex = 0;
828
uint32_t wordCount = 0;
829
uint32_t resultId = UINT32_MAX;
830
uint32_t functionWordCount = 0;
831
uint32_t codeWordCount = 0;
832
uint32_t variableWordCount = 0;
833
uint32_t decorationWordCount = 0;
834
uint32_t inlineWordCount = 0;
835
uint32_t returnValueCount = 0;
836
uint32_t callIndex = 0;
837
uint32_t callCount = 0;
838
uint32_t parameterIndex = 0;
839
uint32_t parameterCount = 0;
840
uint32_t inlinedVariableWordCount = 0;
841
bool canInline = true;
842
843
FunctionDefinition() {
844
// Default empty constructor.
845
}
846
847
FunctionDefinition(uint32_t resultId) : resultId(resultId) {
848
// Constructor for sorting.
849
}
850
851
bool operator<(const FunctionDefinition &other) const {
852
return resultId < other.resultId;
853
}
854
};
855
856
struct FunctionParameter {
857
uint32_t resultId = 0;
858
859
FunctionParameter(uint32_t resultId) : resultId(resultId) {
860
// Regular constructor.
861
}
862
};
863
864
struct FunctionCall {
865
uint32_t wordIndex = 0;
866
uint32_t functionId = 0;
867
uint32_t sameBlockWordCount = 0;
868
869
FunctionCall(uint32_t wordIndex, uint32_t functionId, uint32_t sameBlockWordCount) : wordIndex(wordIndex), functionId(functionId), sameBlockWordCount(sameBlockWordCount) {
870
// Regular constructor.
871
}
872
};
873
874
struct FunctionResult {
875
uint32_t wordIndex = UINT32_MAX;
876
uint32_t decorationIndex = UINT32_MAX;
877
};
878
879
typedef std::vector<FunctionDefinition>::iterator FunctionDefinitionIterator;
880
881
struct FunctionItem {
882
FunctionDefinitionIterator function = {};
883
FunctionDefinitionIterator rootFunction = {};
884
uint32_t callIndex = 0;
885
886
FunctionItem(FunctionDefinitionIterator function, FunctionDefinitionIterator rootFunction, uint32_t callIndex) : function(function), rootFunction(rootFunction), callIndex(callIndex) {
887
// Regular constructor.
888
}
889
};
890
891
struct ResultDecoration {
892
uint32_t wordIndex = 0;
893
uint32_t nextDecorationIndex = 0;
894
895
ResultDecoration(uint32_t wordIndex, uint32_t nextDecorationIndex) : wordIndex(wordIndex), nextDecorationIndex(nextDecorationIndex) {
896
// Regular constructor.
897
}
898
};
899
900
thread_local std::vector<FunctionResult> functionResultMap;
901
thread_local std::vector<ResultDecoration> resultDecorations;
902
thread_local std::vector<uint32_t> loopMergeIdStack;
903
thread_local std::vector<FunctionDefinition> functionDefinitions;
904
thread_local std::vector<FunctionParameter> functionParameters;
905
thread_local std::vector<FunctionCall> functionCalls;
906
thread_local std::vector<FunctionItem> functionStack;
907
thread_local std::vector<CallItem> callStack;
908
thread_local std::vector<uint32_t> shaderResultMap;
909
thread_local std::vector<uint32_t> storeMap;
910
thread_local std::vector<uint32_t> storeMapChanges;
911
thread_local std::vector<uint32_t> loadMap;
912
thread_local std::vector<uint32_t> loadMapChanges;
913
thread_local std::vector<uint32_t> phiMap;
914
thread_local std::vector<uint32_t> opPhis;
915
thread_local std::vector<uint32_t> remapsPending;
916
thread_local std::vector<uint32_t> returnParameters;
917
thread_local std::vector<uint32_t> sameBlockOperations;
918
functionResultMap.clear();
919
resultDecorations.clear();
920
loopMergeIdStack.clear();
921
functionDefinitions.clear();
922
functionParameters.clear();
923
functionCalls.clear();
924
callStack.clear();
925
shaderResultMap.clear();
926
storeMap.clear();
927
storeMapChanges.clear();
928
loadMap.clear();
929
loadMapChanges.clear();
930
phiMap.clear();
931
opPhis.clear();
932
remapsPending.clear();
933
returnParameters.clear();
934
sameBlockOperations.clear();
935
936
// Parse all instructions in the shader first.
937
const uint32_t *dataWords = reinterpret_cast<const uint32_t *>(pData);
938
const size_t dataWordCount = pSize / sizeof(uint32_t);
939
const uint32_t dataIdBound = dataWords[3];
940
functionResultMap.resize(dataIdBound);
941
942
FunctionDefinition currentFunction;
943
uint32_t parseWordIndex = SpvStartWordIndex;
944
uint32_t entryPointFunctionId = UINT32_MAX;
945
uint32_t globalWordCount = 0;
946
uint32_t sameBlockWordCount = 0;
947
while (parseWordIndex < dataWordCount) {
948
SpvOp opCode = SpvOp(dataWords[parseWordIndex] & 0xFFFFU);
949
uint32_t wordCount = (dataWords[parseWordIndex] >> 16U) & 0xFFFFU;
950
if (wordCount == 0) {
951
fprintf(stderr, "Invalid word count found at %d.\n", parseWordIndex);
952
return false;
953
}
954
955
switch (opCode) {
956
case SpvOpFunction:
957
if (currentFunction.resultId != UINT32_MAX) {
958
fprintf(stderr, "Found function start without the previous function ending.\n");
959
return false;
960
}
961
962
currentFunction.resultId = dataWords[parseWordIndex + 2];
963
currentFunction.wordIndex = parseWordIndex;
964
currentFunction.functionWordCount = wordCount;
965
break;
966
case SpvOpFunctionEnd:
967
if (currentFunction.resultId == UINT32_MAX) {
968
fprintf(stderr, "Found function end without a function start.\n");
969
return false;
970
}
971
972
currentFunction.wordCount = parseWordIndex + wordCount - currentFunction.wordIndex;
973
currentFunction.functionWordCount += wordCount;
974
functionDefinitions.emplace_back(currentFunction);
975
976
// Reset the current function to being empty again.
977
currentFunction = FunctionDefinition();
978
break;
979
case SpvOpFunctionParameter:
980
if (currentFunction.resultId == UINT32_MAX) {
981
fprintf(stderr, "Found function parameter without a function start.\n");
982
return false;
983
}
984
985
currentFunction.functionWordCount += wordCount;
986
987
if (currentFunction.parameterCount == 0) {
988
currentFunction.parameterIndex = uint32_t(functionParameters.size());
989
}
990
991
functionParameters.emplace_back(dataWords[parseWordIndex + 2]);
992
currentFunction.parameterCount++;
993
break;
994
case SpvOpFunctionCall:
995
if (currentFunction.resultId == UINT32_MAX) {
996
fprintf(stderr, "Found function call without a function start.\n");
997
return false;
998
}
999
1000
currentFunction.codeWordCount += wordCount;
1001
1002
if (currentFunction.callCount == 0) {
1003
currentFunction.callIndex = uint32_t(functionCalls.size());
1004
}
1005
1006
functionCalls.emplace_back(parseWordIndex, dataWords[parseWordIndex + 3], sameBlockWordCount);
1007
currentFunction.callCount++;
1008
break;
1009
case SpvOpDecorate: {
1010
uint32_t resultId = dataWords[parseWordIndex + 1];
1011
if (resultId >= dataIdBound) {
1012
fprintf(stderr, "Found decoration with invalid result %u.\n", resultId);
1013
return false;
1014
}
1015
1016
uint32_t nextDecorationIndex = functionResultMap[resultId].decorationIndex;
1017
functionResultMap[resultId].decorationIndex = uint32_t(resultDecorations.size());
1018
resultDecorations.emplace_back(parseWordIndex, nextDecorationIndex);
1019
globalWordCount += wordCount;
1020
break;
1021
}
1022
case SpvOpVariable:
1023
if (currentFunction.resultId != UINT32_MAX) {
1024
// Identify the variable as a local function variable.
1025
uint32_t resultId = dataWords[parseWordIndex + 2];
1026
if (resultId >= dataIdBound) {
1027
fprintf(stderr, "Found variable with invalid result %u.\n", resultId);
1028
return false;
1029
}
1030
1031
currentFunction.variableWordCount += wordCount;
1032
}
1033
else {
1034
globalWordCount += wordCount;
1035
}
1036
1037
break;
1038
case SpvOpReturn:
1039
// Functions that use early returns while on a loop can't be inlined.
1040
if (!loopMergeIdStack.empty()) {
1041
currentFunction.canInline = false;
1042
}
1043
1044
// If inlined, an OpBranch is required to replace the return.
1045
currentFunction.inlineWordCount += 2;
1046
currentFunction.functionWordCount += wordCount;
1047
break;
1048
case SpvOpReturnValue:
1049
// Functions that use early returns while on a loop can't be inlined.
1050
if (!loopMergeIdStack.empty()) {
1051
currentFunction.canInline = false;
1052
}
1053
1054
// If inlined, an OpPhi with at least one argument is required to handle return values.
1055
if (currentFunction.returnValueCount == 1) {
1056
currentFunction.inlineWordCount += 5;
1057
}
1058
1059
currentFunction.returnValueCount++;
1060
1061
// An OpBranch is required to replace the return.
1062
currentFunction.inlineWordCount += 2;
1063
1064
// An argument in OpPhi is required if there's more than one return value.
1065
if (currentFunction.returnValueCount > 1) {
1066
currentFunction.inlineWordCount += 2;
1067
}
1068
1069
currentFunction.functionWordCount += wordCount;
1070
break;
1071
case SpvOpEntryPoint:
1072
if (entryPointFunctionId != UINT32_MAX) {
1073
fprintf(stderr, "Found more than one entry point, which is not yet supported.\n");
1074
return false;
1075
}
1076
1077
entryPointFunctionId = dataWords[parseWordIndex + 2];
1078
globalWordCount += wordCount;
1079
break;
1080
case SpvOpStore: {
1081
if (currentFunction.resultId == UINT32_MAX) {
1082
fprintf(stderr, "Found store outside of a function.\n");
1083
return false;
1084
}
1085
1086
currentFunction.codeWordCount += wordCount;
1087
break;
1088
}
1089
case SpvOpLabel: {
1090
if (currentFunction.resultId == UINT32_MAX) {
1091
fprintf(stderr, "Found label outside of a function.\n");
1092
return false;
1093
}
1094
1095
uint32_t labelId = dataWords[parseWordIndex + 1];
1096
if (!loopMergeIdStack.empty() && (loopMergeIdStack.back() == labelId)) {
1097
loopMergeIdStack.pop_back();
1098
}
1099
1100
currentFunction.codeWordCount += wordCount;
1101
sameBlockWordCount = 0;
1102
break;
1103
}
1104
case SpvOpLoopMerge: {
1105
if (currentFunction.resultId == UINT32_MAX) {
1106
fprintf(stderr, "Found loop outside of a function.\n");
1107
return false;
1108
}
1109
1110
uint32_t mergeId = dataWords[parseWordIndex + 1];
1111
loopMergeIdStack.emplace_back(mergeId);
1112
currentFunction.codeWordCount += wordCount;
1113
break;
1114
}
1115
case SpvOpImage:
1116
case SpvOpSampledImage: {
1117
if (currentFunction.resultId == UINT32_MAX) {
1118
fprintf(stderr, "Found loop outside of a function.\n");
1119
return false;
1120
}
1121
1122
sameBlockWordCount += wordCount;
1123
currentFunction.codeWordCount += wordCount;
1124
break;
1125
}
1126
default:
1127
if (currentFunction.resultId != UINT32_MAX) {
1128
currentFunction.codeWordCount += wordCount;
1129
}
1130
else {
1131
globalWordCount += wordCount;
1132
}
1133
1134
break;
1135
}
1136
1137
if (currentFunction.resultId != UINT32_MAX) {
1138
bool hasResult, hasType;
1139
SpvHasResultAndType(opCode, &hasResult, &hasType);
1140
1141
if (hasResult) {
1142
// Indicate the result is associated to a function.
1143
uint32_t resultId = dataWords[parseWordIndex + (hasType ? 2 : 1)];
1144
functionResultMap[resultId].wordIndex = parseWordIndex;
1145
1146
// Look for all decorations associated to this result. These will be skipped when rewriting
1147
// the shader and written back when the result is parsed again.
1148
uint32_t decorationIndex = functionResultMap[resultId].decorationIndex;
1149
while (decorationIndex != UINT32_MAX) {
1150
const ResultDecoration &decoration = resultDecorations[decorationIndex];
1151
uint32_t decorationWordCount = (dataWords[decoration.wordIndex] >> 16U) & 0xFFFFU;
1152
currentFunction.decorationWordCount += decorationWordCount;
1153
globalWordCount -= decorationWordCount;
1154
decorationIndex = decoration.nextDecorationIndex;
1155
}
1156
}
1157
}
1158
1159
parseWordIndex += wordCount;
1160
}
1161
1162
if (entryPointFunctionId == UINT32_MAX) {
1163
fprintf(stderr, "Unable to find function entry point.\n");
1164
return false;
1165
}
1166
1167
// Make sure function array is sorted to make lower bound searches possible.
1168
std::sort(functionDefinitions.begin(), functionDefinitions.end());
1169
1170
// Find the entry point function and mark that it shouldn't be inlined.
1171
FunctionDefinitionIterator entryFunctionIt = std::lower_bound(functionDefinitions.begin(), functionDefinitions.end(), entryPointFunctionId);
1172
if (entryFunctionIt == functionDefinitions.end()) {
1173
fprintf(stderr, "Unable to find entry point function %d.\n", entryPointFunctionId);
1174
return false;
1175
}
1176
1177
entryFunctionIt->canInline = false;
1178
1179
// Do a first iteration pass with the functions that can't be inlined as the starting points of the stack.
1180
// This pass will figure out the total size required for the final inlined shader.
1181
FunctionDefinitionIterator startFunctionIt = functionDefinitions.begin();
1182
while (startFunctionIt != functionDefinitions.end()) {
1183
if (!startFunctionIt->canInline) {
1184
functionStack.emplace_back(startFunctionIt, startFunctionIt, 0);
1185
}
1186
1187
startFunctionIt++;
1188
}
1189
1190
uint32_t codeWordCount = 0;
1191
uint32_t functionDecorationWordCount = 0;
1192
while (!functionStack.empty()) {
1193
FunctionItem &functionItem = functionStack.back();
1194
if (functionItem.callIndex == functionItem.function->callCount) {
1195
// Add this function's code and variables.
1196
codeWordCount += functionItem.function->codeWordCount;
1197
codeWordCount += functionItem.function->variableWordCount;
1198
functionDecorationWordCount += functionItem.function->decorationWordCount;
1199
1200
// This function will be inlined so its variables should be reserved on the parent function instead.
1201
if (functionItem.function->canInline) {
1202
codeWordCount += functionItem.function->inlineWordCount;
1203
functionItem.rootFunction->inlinedVariableWordCount += functionItem.function->variableWordCount;
1204
}
1205
// Only add the function's word counts if can't be inlined.
1206
else {
1207
codeWordCount += functionItem.function->functionWordCount;
1208
}
1209
1210
functionStack.pop_back();
1211
}
1212
else {
1213
// Traverse the function calls to be inlined
1214
const FunctionCall &functionCall = functionCalls[functionItem.function->callIndex + functionItem.callIndex];
1215
functionItem.callIndex++;
1216
1217
uint32_t callFunctionId = dataWords[functionCall.wordIndex + 3];
1218
FunctionDefinitionIterator callFunctionIt = std::lower_bound(functionDefinitions.begin(), functionDefinitions.end(), callFunctionId);
1219
if (callFunctionIt == functionDefinitions.end()) {
1220
fprintf(stderr, "Unable to find function %d.\n", callFunctionId);
1221
return false;
1222
}
1223
1224
if (callFunctionIt->canInline) {
1225
// Function call will be replaced by one OpLoopMerge, three OpLabel and three OpBranch.
1226
// All words required for preserving same block operations will also be added.
1227
// Substract the word count for the function call as it'll not be copied.
1228
uint32_t callWordCount = (dataWords[functionCall.wordIndex] >> 16U) & 0xFFFFU;
1229
codeWordCount += 4 + 2 * 3 + 2 * 3;
1230
codeWordCount += functionCall.sameBlockWordCount;
1231
codeWordCount -= callWordCount;
1232
functionStack.emplace_back(callFunctionIt, functionItem.rootFunction, 0);
1233
}
1234
}
1235
}
1236
1237
// Figure out the total size of the shader and copy the header.
1238
size_t totalWordCount = SpvStartWordIndex + globalWordCount + codeWordCount + functionDecorationWordCount;
1239
inlinedSpirvWords.resize(totalWordCount);
1240
memcpy(inlinedSpirvWords.data(), pData, SpvStartWordIndex * sizeof(uint32_t));
1241
1242
// To avoid reallocation of these unless the shader really warrants it, we reserve some memory for these vectors.
1243
uint32_t &inlinedIdBound = inlinedSpirvWords[3];
1244
uint32_t dstWordIndex = SpvStartWordIndex;
1245
shaderResultMap.resize(dataIdBound, UINT32_MAX);
1246
storeMap.resize(dataIdBound, UINT32_MAX);
1247
loadMap.resize(dataIdBound, UINT32_MAX);
1248
phiMap.resize(dataIdBound, UINT32_MAX);
1249
1250
auto copyInstruction = [&](uint32_t dataWordIndex, bool renameResult, uint32_t &copyWordIndex, uint32_t &copyDecorationIndex) {
1251
copyDecorationIndex = UINT32_MAX;
1252
1253
SpvOp opCode = SpvOp(dataWords[dataWordIndex] & 0xFFFFU);
1254
uint32_t wordCount = (dataWords[dataWordIndex] >> 16U) & 0xFFFFU;
1255
for (uint32_t i = 0; i < wordCount; i++) {
1256
inlinedSpirvWords[copyWordIndex + i] = dataWords[dataWordIndex + i];
1257
}
1258
1259
bool hasResult, hasType;
1260
SpvHasResultAndType(opCode, &hasResult, &hasType);
1261
1262
if (hasResult) {
1263
// Any inlined functions must remap all their results and operands.
1264
uint32_t &resultId = inlinedSpirvWords[copyWordIndex + (hasType ? 2 : 1)];
1265
if ((resultId < dataIdBound) && (functionResultMap[resultId].wordIndex != UINT32_MAX)) {
1266
copyDecorationIndex = functionResultMap[resultId].decorationIndex;
1267
}
1268
1269
if (renameResult) {
1270
// First labels in a function will be replaced by the assigned label if present.
1271
uint32_t newResultId;
1272
if ((opCode == SpvOpLabel) && (callStack.back().startBlockId != UINT32_MAX) && !callStack.back().startBlockIdAssigned) {
1273
newResultId = callStack.back().startBlockId;
1274
callStack.back().startBlockIdAssigned = true;
1275
}
1276
else {
1277
newResultId = inlinedIdBound++;
1278
}
1279
1280
// Remap and replace the result ID in the instruction.
1281
shaderResultMap[resultId] = newResultId;
1282
resultId = newResultId;
1283
1284
// Store the current block's remapped label.
1285
if (opCode == SpvOpLabel) {
1286
callStack.back().blockId = resultId;
1287
}
1288
}
1289
}
1290
1291
// Remap any operands or labels present in the instructions.
1292
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
1293
bool operandWordSkipString;
1294
if (SpvHasOperands(opCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, true)) {
1295
uint32_t operandWordIndex = operandWordStart;
1296
for (uint32_t j = 0; j < operandWordCount; j++) {
1297
if (checkOperandWordSkip(callStack.back().wordIndex, dataWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
1298
continue;
1299
}
1300
1301
if (operandWordIndex >= wordCount) {
1302
break;
1303
}
1304
1305
uint32_t shaderWordIndex = copyWordIndex + operandWordIndex;
1306
uint32_t &operandId = inlinedSpirvWords[shaderWordIndex];
1307
1308
// Discard any known stores for variables that are used in operations that the effect is not explicitly considered yet.
1309
if ((opCode != SpvOpStore) && (opCode != SpvOpLoad)) {
1310
storeMap[operandId] = dataIdBound;
1311
}
1312
1313
// Rename the operand if it originates from a load.
1314
if (loadMap[operandId] < dataIdBound) {
1315
operandId = loadMap[operandId];
1316
}
1317
1318
// Apply the result remapping.
1319
if (shaderResultMap[operandId] != UINT32_MAX) {
1320
operandId = shaderResultMap[operandId];
1321
}
1322
1323
operandWordIndex += operandWordStride;
1324
}
1325
}
1326
1327
uint32_t labelWordStart, labelWordCount, labelWordStride;
1328
if (SpvHasLabels(opCode, labelWordStart, labelWordCount, labelWordStride, true)) {
1329
for (uint32_t j = 0; (j < labelWordCount) && ((labelWordStart + j * labelWordStride) < wordCount); j++) {
1330
uint32_t labelWordIndex = labelWordStart + j * labelWordStride;
1331
remapsPending.emplace_back(copyWordIndex + labelWordIndex);
1332
callStack.back().remapsPendingCount++;
1333
}
1334
}
1335
1336
copyWordIndex += wordCount;
1337
};
1338
1339
auto copyDecorations = [&](uint32_t copyDecorationIndex, uint32_t &copyWordIndex) {
1340
uint32_t placeholderWordIndex;
1341
while (copyDecorationIndex != UINT32_MAX) {
1342
copyInstruction(resultDecorations[copyDecorationIndex].wordIndex, false, copyWordIndex, placeholderWordIndex);
1343
copyDecorationIndex = resultDecorations[copyDecorationIndex].nextDecorationIndex;
1344
}
1345
};
1346
1347
// Perform the final pass for inlining all functions.
1348
uint32_t copyDecorationIndex;
1349
uint32_t dstInlinedDecorationWordIndex = UINT32_MAX;
1350
uint32_t dstInlinedDecorationWordIndexMax = UINT32_MAX;
1351
uint32_t dstInlinedVariableWordIndex = UINT32_MAX;
1352
uint32_t dstInlinedVariableWordIndexMax = UINT32_MAX;
1353
callStack.emplace_back(SpvStartWordIndex);
1354
while (!callStack.empty()) {
1355
uint32_t callWordIndex = callStack.back().wordIndex;
1356
if (callWordIndex >= dataWordCount) {
1357
break;
1358
}
1359
1360
bool copyWords = true;
1361
bool copyWordsToVariables = false;
1362
SpvOp opCode = SpvOp(dataWords[callWordIndex] & 0xFFFFU);
1363
uint32_t wordCount = (dataWords[callWordIndex] >> 16U) & 0xFFFFU;
1364
if (wordCount == 0) {
1365
fprintf(stderr, "Function iteration landed in an invalid instruction due to an implementation error.\n");
1366
return false;
1367
}
1368
1369
switch (opCode) {
1370
case SpvOpLabel:
1371
while (!storeMapChanges.empty()) {
1372
storeMap[storeMapChanges.back()] = UINT32_MAX;
1373
storeMapChanges.pop_back();
1374
}
1375
1376
while (!loadMapChanges.empty()) {
1377
loadMap[loadMapChanges.back()] = UINT32_MAX;
1378
loadMapChanges.pop_back();
1379
}
1380
1381
sameBlockOperations.resize(sameBlockOperations.size() - callStack.back().sameBlockOperationsCount);
1382
callStack.back().blockId = dataWords[callWordIndex + 1];
1383
callStack.back().sameBlockOperationsCount = 0;
1384
break;
1385
case SpvOpFunction: {
1386
uint32_t functionId = dataWords[callWordIndex + 2];
1387
FunctionDefinitionIterator functionIt = std::lower_bound(functionDefinitions.begin(), functionDefinitions.end(), functionId);
1388
if (functionIt == functionDefinitions.end()) {
1389
fprintf(stderr, "Unable to find function %d.\n", functionId);
1390
return false;
1391
}
1392
1393
// If we're iterating on the top of the shader, we skip over the function.
1394
// Only copy the function's words if it's not inlined and we're iterating on it.
1395
if (callStack.back().functionId == UINT32_MAX) {
1396
// Skip parsing the entire function on this stack level.
1397
callStack.back().wordIndex += functionIt->wordCount;
1398
1399
// Insert a new stack level if we found function that isn't inlined.
1400
if (!functionIt->canInline) {
1401
callStack.emplace_back(callWordIndex - wordCount, functionId);
1402
}
1403
else {
1404
callStack.back().wordIndex -= wordCount;
1405
}
1406
1407
copyWords = false;
1408
}
1409
else {
1410
copyWords = !functionIt->canInline;
1411
}
1412
1413
break;
1414
}
1415
case SpvOpFunctionParameter:
1416
// Only copy the function's parameters if it's not inlined.
1417
copyWords = !callStack.back().functionInlined;
1418
break;
1419
case SpvOpFunctionEnd: {
1420
// Apply any pending remappings from instructions with labels.
1421
for (size_t i = remapsPending.size() - callStack.back().remapsPendingCount; i < remapsPending.size(); i++) {
1422
uint32_t &resultId = inlinedSpirvWords[remapsPending[i]];
1423
if (shaderResultMap[resultId] != UINT32_MAX) {
1424
resultId = shaderResultMap[resultId];
1425
}
1426
}
1427
1428
// Only copy the function's end if it's not inlined.
1429
if (!callStack.back().functionInlined) {
1430
copyWords = true;
1431
1432
if (dstInlinedVariableWordIndex != dstInlinedVariableWordIndexMax) {
1433
fprintf(stderr, "Failed to fill all available variable space due to an implementation error.\n");
1434
return false;
1435
}
1436
1437
dstInlinedVariableWordIndex = UINT32_MAX;
1438
dstInlinedVariableWordIndexMax = UINT32_MAX;
1439
}
1440
else {
1441
// Insert a label for the continue block that connects back to the start along with a branch.
1442
inlinedSpirvWords[dstWordIndex++] = SpvOpLabel | (2 << 16U);
1443
inlinedSpirvWords[dstWordIndex++] = callStack.back().continueBlockId;
1444
1445
inlinedSpirvWords[dstWordIndex++] = SpvOpBranch | (2 << 16U);
1446
inlinedSpirvWords[dstWordIndex++] = callStack.back().loopBlockId;
1447
1448
// Insert a label for the return block.
1449
inlinedSpirvWords[dstWordIndex++] = SpvOpLabel | (2 << 16U);
1450
inlinedSpirvWords[dstWordIndex++] = callStack.back().returnBlockId;
1451
1452
// If the function only returns one possible value, the caller instead will just remap the result to this one.
1453
if (callStack.back().returnParametersCount == 2) {
1454
uint32_t functionResultId = callStack.back().resultId;
1455
shaderResultMap[functionResultId] = returnParameters[returnParameters.size() - callStack.back().returnParametersCount];
1456
}
1457
// Insert an OpPhi for selecting the result from a function call that called a function that returns multiple values.
1458
else if (callStack.back().returnParametersCount > 2) {
1459
// Remap the function result if necessary.
1460
const CallItem &previousCallStack = callStack[callStack.size() - 2];
1461
uint32_t functionResultId = callStack.back().resultId;
1462
if ((previousCallStack.functionId != UINT32_MAX) && previousCallStack.functionInlined) {
1463
uint32_t newFunctionResultId = inlinedIdBound++;
1464
shaderResultMap[functionResultId] = newFunctionResultId;
1465
functionResultId = newFunctionResultId;
1466
}
1467
1468
opPhis.emplace_back(dstWordIndex);
1469
inlinedSpirvWords[dstWordIndex++] = SpvOpPhi | ((3 + callStack.back().returnParametersCount) << 16U);
1470
inlinedSpirvWords[dstWordIndex++] = callStack.back().resultType;
1471
inlinedSpirvWords[dstWordIndex++] = functionResultId;
1472
1473
// Copy the OpPhi arguments directly.
1474
for (size_t i = returnParameters.size() - callStack.back().returnParametersCount; i < returnParameters.size(); i++) {
1475
inlinedSpirvWords[dstWordIndex++] = returnParameters[i];
1476
}
1477
}
1478
1479
copyWords = false;
1480
}
1481
1482
// Pop this stack level and return to iterating on the previous one.
1483
remapsPending.resize(remapsPending.size() - callStack.back().remapsPendingCount);
1484
returnParameters.resize(returnParameters.size() - callStack.back().returnParametersCount);
1485
sameBlockOperations.resize(sameBlockOperations.size() - callStack.back().sameBlockOperationsCount);
1486
callStack.pop_back();
1487
1488
if (!callStack.empty()) {
1489
// Copy the same block operations and rename the results even if the function wasn't inlined.
1490
for (size_t i = sameBlockOperations.size() - callStack.back().sameBlockOperationsCount; i < sameBlockOperations.size(); i++) {
1491
copyInstruction(sameBlockOperations[i], true, dstWordIndex, copyDecorationIndex);
1492
copyDecorations(copyDecorationIndex, dstInlinedDecorationWordIndex);
1493
}
1494
1495
callStack.back().wordIndex -= wordCount;
1496
}
1497
1498
break;
1499
}
1500
case SpvOpFunctionCall: {
1501
// Inline the function by inserting two labels and a branch.
1502
uint32_t functionId = dataWords[callWordIndex + 3];
1503
FunctionDefinitionIterator functionIt = std::lower_bound(functionDefinitions.begin(), functionDefinitions.end(), functionId);
1504
if (functionIt == functionDefinitions.end()) {
1505
fprintf(stderr, "Unable to find function %d.\n", functionId);
1506
return false;
1507
}
1508
1509
if (functionIt->canInline) {
1510
// Generate the ID that will be used to indicate the function's start and the return block.
1511
uint32_t loopLabelId = inlinedIdBound++;
1512
uint32_t startLabelId = inlinedIdBound++;
1513
uint32_t continueLabelId = inlinedIdBound++;
1514
uint32_t returnLabelId = inlinedIdBound++;
1515
1516
// In any future Phi operations, rename the current label to the return label.
1517
if (callStack.back().blockId >= phiMap.size()) {
1518
phiMap.resize(callStack.back().blockId + 1, UINT32_MAX);
1519
}
1520
1521
phiMap[callStack.back().blockId] = returnLabelId;
1522
1523
// Branch into a new block. The new block will contain a single iteration loop.
1524
inlinedSpirvWords[dstWordIndex++] = SpvOpBranch | (2 << 16U);
1525
inlinedSpirvWords[dstWordIndex++] = loopLabelId;
1526
1527
inlinedSpirvWords[dstWordIndex++] = SpvOpLabel | (2 << 16U);
1528
inlinedSpirvWords[dstWordIndex++] = loopLabelId;
1529
1530
inlinedSpirvWords[dstWordIndex++] = SpvOpLoopMerge | (4 << 16U);
1531
inlinedSpirvWords[dstWordIndex++] = returnLabelId;
1532
inlinedSpirvWords[dstWordIndex++] = continueLabelId;
1533
inlinedSpirvWords[dstWordIndex++] = SpvLoopControlMaskNone;
1534
1535
inlinedSpirvWords[dstWordIndex++] = SpvOpBranch | (2 << 16U);
1536
inlinedSpirvWords[dstWordIndex++] = startLabelId;
1537
1538
// Pass the result Id unmodified. The function evaluation will determine how it should be remapped.
1539
uint32_t functionResultId = dataWords[callWordIndex + 2];
1540
callStack.back().wordIndex += wordCount;
1541
1542
// Word count should be substracted as the loop's end will add it.
1543
callStack.emplace_back(functionIt->wordIndex - wordCount, functionIt->resultId, true, startLabelId, loopLabelId, continueLabelId, returnLabelId, dataWords[callWordIndex + 1], functionResultId);
1544
1545
for (uint32_t i = 0; i < functionIt->parameterCount; i++) {
1546
if (wordCount <= (4 + i)) {
1547
fprintf(stderr, "Not enough words for argument %d in function call.\n", i);
1548
return false;
1549
}
1550
1551
uint32_t functionParameterId = functionParameters[functionIt->parameterIndex + i].resultId;
1552
uint32_t localParameterId = dataWords[callWordIndex + 4 + i];
1553
if (shaderResultMap[localParameterId] != UINT32_MAX) {
1554
localParameterId = shaderResultMap[localParameterId];
1555
}
1556
1557
shaderResultMap[functionParameterId] = localParameterId;
1558
}
1559
1560
copyWords = false;
1561
}
1562
else {
1563
copyWords = true;
1564
}
1565
1566
break;
1567
}
1568
case SpvOpDecorate: {
1569
if (dstInlinedDecorationWordIndex == UINT32_MAX) {
1570
// Upon encountering the first decoration in the shader, reserve space to write out any decorations
1571
// that are found to be linked to function results.
1572
dstInlinedDecorationWordIndex = dstWordIndex;
1573
dstWordIndex += functionDecorationWordCount;
1574
dstInlinedDecorationWordIndexMax = dstWordIndex;
1575
}
1576
1577
// Only copy the decoration as-is if it doesn't belong to a result in a function.
1578
uint32_t resultId = dataWords[callWordIndex + 1];
1579
copyWords = (functionResultMap[resultId].wordIndex == UINT32_MAX);
1580
break;
1581
}
1582
case SpvOpVariable:
1583
if ((callStack.back().functionId < UINT32_MAX) && !callStack.back().functionInlined) {
1584
// As soon as we find a variable local to the function, reserve the space to insert all
1585
// inlined function variables that we encounter.
1586
if (dstInlinedVariableWordIndex == UINT32_MAX) {
1587
FunctionDefinitionIterator functionIt = std::lower_bound(functionDefinitions.begin(), functionDefinitions.end(), callStack.back().functionId);
1588
if (functionIt == functionDefinitions.end()) {
1589
fprintf(stderr, "Unable to find function %d.\n", callStack.back().functionId);
1590
return false;
1591
}
1592
1593
dstInlinedVariableWordIndex = dstWordIndex;
1594
dstWordIndex += functionIt->inlinedVariableWordCount;
1595
dstInlinedVariableWordIndexMax = dstWordIndex;
1596
}
1597
}
1598
else {
1599
// Copy the variables into the entry point function's variables.
1600
copyWordsToVariables = (callStack.back().functionId != UINT32_MAX);
1601
}
1602
1603
copyWords = true;
1604
break;
1605
case SpvOpReturn:
1606
if (callStack.back().functionInlined) {
1607
// Replace return with a branch to the return label.
1608
inlinedSpirvWords[dstWordIndex++] = SpvOpBranch | (2 << 16U);
1609
inlinedSpirvWords[dstWordIndex++] = callStack.back().returnBlockId;
1610
copyWords = false;
1611
}
1612
else {
1613
// Copy as is.
1614
}
1615
1616
break;
1617
case SpvOpReturnValue: {
1618
if (callStack.back().functionInlined) {
1619
// Replace return with a branch to the return label.
1620
inlinedSpirvWords[dstWordIndex++] = SpvOpBranch | (2 << 16U);
1621
inlinedSpirvWords[dstWordIndex++] = callStack.back().returnBlockId;
1622
copyWords = false;
1623
1624
// Store parameters for Phi operator.
1625
uint32_t operandId = dataWords[callStack.back().wordIndex + 1];
1626
if (shaderResultMap[operandId] != UINT32_MAX) {
1627
operandId = shaderResultMap[operandId];
1628
}
1629
1630
returnParameters.emplace_back(operandId);
1631
returnParameters.emplace_back(callStack.back().blockId);
1632
callStack.back().returnParametersCount += 2;
1633
}
1634
else {
1635
// Copy as is.
1636
}
1637
1638
break;
1639
}
1640
case SpvOpLoad: {
1641
// If the pointer being loaded was modified this block, store its result to rename the
1642
// operands that use the result of this load operation. This load operation will go
1643
// unused and be deleted in the optimization pass.
1644
// Ignore load operations with memory operands.
1645
if (wordCount == 4) {
1646
uint32_t pointerId = dataWords[callStack.back().wordIndex + 3];
1647
if (pointerId >= dataIdBound) {
1648
fprintf(stderr, "Found load operation with invalid pointer %u.\n", pointerId);
1649
return false;
1650
}
1651
1652
uint32_t pointerWordIndex = functionResultMap[pointerId].wordIndex;
1653
if ((pointerWordIndex != UINT32_MAX) && (SpvOp(dataWords[pointerWordIndex] & 0xFFFFU) == SpvOpVariable) && (storeMap[pointerId] < dataIdBound)) {
1654
uint32_t resultId = dataWords[callStack.back().wordIndex + 2];
1655
if (loadMap[resultId] != storeMap[pointerId]) {
1656
loadMap[resultId] = storeMap[pointerId];
1657
loadMapChanges.emplace_back(resultId);
1658
}
1659
}
1660
}
1661
1662
break;
1663
}
1664
case SpvOpStore: {
1665
// Keep track of the result last stored to the pointer on this block.
1666
// Ignore store operations with memory operands.
1667
if (wordCount == 3) {
1668
uint32_t pointerId = dataWords[callStack.back().wordIndex + 1];
1669
if (pointerId >= dataIdBound) {
1670
fprintf(stderr, "Found store operation with invalid pointer %u.\n", pointerId);
1671
return false;
1672
}
1673
1674
uint32_t resultId = dataWords[callStack.back().wordIndex + 2];
1675
if (resultId >= dataIdBound) {
1676
fprintf(stderr, "Found store operation with invalid result %u.\n", resultId);
1677
return false;
1678
}
1679
1680
if (storeMap[pointerId] != resultId) {
1681
storeMap[pointerId] = resultId;
1682
storeMapChanges.emplace_back(pointerId);
1683
}
1684
}
1685
1686
break;
1687
}
1688
case SpvOpPhi:
1689
opPhis.emplace_back(dstWordIndex);
1690
break;
1691
case SpvOpImage:
1692
case SpvOpSampledImage: {
1693
sameBlockOperations.emplace_back(callStack.back().wordIndex);
1694
callStack.back().sameBlockOperationsCount++;
1695
break;
1696
}
1697
default:
1698
break;
1699
}
1700
1701
if (copyWords) {
1702
uint32_t &copyWordIndex = copyWordsToVariables ? dstInlinedVariableWordIndex : dstWordIndex;
1703
copyInstruction(callWordIndex, callStack.back().functionInlined, copyWordIndex, copyDecorationIndex);
1704
copyDecorations(copyDecorationIndex, dstInlinedDecorationWordIndex);
1705
}
1706
1707
if (!callStack.empty()) {
1708
callStack.back().wordIndex += wordCount;
1709
}
1710
1711
assert(dstWordIndex <= totalWordCount && "Not enough words were reserved for the shader.");
1712
assert(dstInlinedVariableWordIndex <= dstInlinedVariableWordIndexMax && "Not enough words were reserved for inlined variables.");
1713
assert(dstInlinedDecorationWordIndex <= dstInlinedDecorationWordIndexMax && "Not enough words were reserved for function decorations.");
1714
}
1715
1716
if (dstWordIndex != totalWordCount) {
1717
fprintf(stderr, "Failed to fill all shader data due to an implementation error.\n");
1718
return false;
1719
}
1720
1721
// Fix any OpPhi operators with the labels for the blocks that were split.
1722
for (uint32_t wordIndex : opPhis) {
1723
uint32_t wordCount = (inlinedSpirvWords[wordIndex] >> 16U) & 0xFFFFU;
1724
for (uint32_t j = 3; j < wordCount; j += 2) {
1725
uint32_t &labelId = inlinedSpirvWords[wordIndex + j + 1];
1726
while ((phiMap.size() > labelId) && (phiMap[labelId] != UINT32_MAX)) {
1727
labelId = phiMap[labelId];
1728
}
1729
}
1730
}
1731
1732
return true;
1733
}
1734
1735
bool Shader::parseData(const void *pData, size_t pSize) {
1736
assert(pData != nullptr);
1737
assert(pSize > 0);
1738
1739
const uint32_t *dataWords = reinterpret_cast<const uint32_t *>(pData);
1740
const size_t dataWordCount = pSize / sizeof(uint32_t);
1741
const uint32_t idBound = dataWords[3];
1742
instructions.reserve(idBound);
1743
results.resize(idBound, Result());
1744
results.shrink_to_fit();
1745
1746
// Parse all instructions.
1747
uint32_t blockIndex = UINT32_MAX;
1748
uint32_t functionInstructionIndex = UINT32_MAX;
1749
uint32_t functionLabelIndex = UINT32_MAX;
1750
uint32_t blockInstructionIndex = UINT32_MAX;
1751
uint32_t wordIndex = SpvStartWordIndex;
1752
while (wordIndex < dataWordCount) {
1753
SpvOp opCode = SpvOp(dataWords[wordIndex] & 0xFFFFU);
1754
uint32_t wordCount = (dataWords[wordIndex] >> 16U) & 0xFFFFU;
1755
if (wordCount == 0) {
1756
fprintf(stderr, "SPIR-V Parsing error. Invalid instruction word count at word %d.\n", wordIndex);
1757
return false;
1758
}
1759
1760
bool hasResult, hasType;
1761
SpvHasResultAndType(opCode, &hasResult, &hasType);
1762
1763
uint32_t instructionIndex = uint32_t(instructions.size());
1764
if (hasResult) {
1765
uint32_t resultId = dataWords[wordIndex + (hasType ? 2 : 1)];
1766
if (resultId >= idBound) {
1767
fprintf(stderr, "SPIR-V Parsing error. Invalid Result ID: %u.\n", resultId);
1768
return false;
1769
}
1770
1771
results[resultId].instructionIndex = instructionIndex;
1772
}
1773
1774
// Handle specific instructions.
1775
switch (opCode) {
1776
case SpvOpFunction:
1777
functionInstructionIndex = instructionIndex;
1778
break;
1779
case SpvOpFunctionEnd:
1780
functions.emplace_back(functionInstructionIndex, functionLabelIndex);
1781
functionInstructionIndex = functionLabelIndex = UINT32_MAX;
1782
break;
1783
case SpvOpDecorate:
1784
case SpvOpMemberDecorate:
1785
decorations.emplace_back(instructionIndex);
1786
break;
1787
case SpvOpPhi:
1788
phis.emplace_back(instructionIndex);
1789
break;
1790
case SpvOpLoopMerge:
1791
loopHeaders.emplace_back(instructionIndex, blockInstructionIndex);
1792
break;
1793
case SpvOpLabel:
1794
blockIndex = uint32_t(blocks.size());
1795
blockInstructionIndex = instructionIndex;
1796
1797
if (functionLabelIndex == UINT32_MAX) {
1798
functionLabelIndex = blockInstructionIndex;
1799
}
1800
1801
break;
1802
default:
1803
break;
1804
}
1805
1806
instructions.emplace_back(wordIndex, blockIndex);
1807
1808
if (SpvOpIsTerminator(opCode)) {
1809
blocks.emplace_back(blockInstructionIndex, instructionIndex);
1810
blockIndex = UINT32_MAX;
1811
blockInstructionIndex = UINT32_MAX;
1812
}
1813
1814
wordIndex += wordCount;
1815
}
1816
1817
// Initialize all adjacent indices for the lists.
1818
instructionAdjacentListIndices.resize(instructions.size(), UINT32_MAX);
1819
1820
return true;
1821
}
1822
1823
bool Shader::process(const void *pData, size_t pSize) {
1824
// Greatly decreases the costs of adding nodes to the linked list.
1825
listNodes.reserve(instructions.size() * 2);
1826
1827
thread_local std::vector<uint32_t> loopMergeBlockStack;
1828
thread_local std::vector<uint32_t> loopMergeInstructionStack;
1829
thread_local std::vector<bool> preOrderVisitedBlocks;
1830
thread_local std::vector<bool> postOrderVisitedBlocks;
1831
loopMergeBlockStack.clear();
1832
loopMergeInstructionStack.clear();
1833
preOrderVisitedBlocks.clear();
1834
postOrderVisitedBlocks.clear();
1835
1836
bool foundOpSwitch = false;
1837
const uint32_t *dataWords = reinterpret_cast<const uint32_t *>(pData);
1838
const size_t dataWordCount = pSize / sizeof(uint32_t);
1839
uint32_t currentBlockId = 0;
1840
uint32_t currentLoopHeaderIndex = 0;
1841
for (uint32_t i = 0; i < uint32_t(instructions.size()); i++) {
1842
uint32_t wordIndex = instructions[i].wordIndex;
1843
SpvOp opCode = SpvOp(dataWords[wordIndex] & 0xFFFFU);
1844
uint32_t wordCount = (dataWords[wordIndex] >> 16U) & 0xFFFFU;
1845
if (!SpvIsSupported(opCode)) {
1846
fprintf(stderr, "%s is not supported yet.\n", SpvOpToString(opCode));
1847
return false;
1848
}
1849
1850
bool hasResult, hasType;
1851
SpvHasResultAndType(opCode, &hasResult, &hasType);
1852
1853
if (hasType) {
1854
uint32_t typeId = dataWords[wordIndex + 1];
1855
if (typeId >= results.size()) {
1856
fprintf(stderr, "SPIR-V Parsing error. Invalid Type ID: %u.\n", typeId);
1857
return false;
1858
}
1859
1860
if (results[typeId].instructionIndex == UINT32_MAX) {
1861
fprintf(stderr, "SPIR-V Parsing error. Result %u is not valid.\n", typeId);
1862
return false;
1863
}
1864
1865
uint32_t typeInstructionIndex = results[typeId].instructionIndex;
1866
instructionAdjacentListIndices[typeInstructionIndex] = addToList(i, instructionAdjacentListIndices[typeInstructionIndex], listNodes);
1867
1868
// Check if it's an OpConstant of Int type so it can be reused on switches.
1869
if ((opCode == SpvOpConstant) && (defaultSwitchOpConstantInt == UINT32_MAX)) {
1870
uint32_t typeWordIndex = instructions[typeInstructionIndex].wordIndex;
1871
SpvOp typeOpCode = SpvOp(dataWords[typeWordIndex] & 0xFFFFU);
1872
if (typeOpCode == SpvOpTypeInt) {
1873
defaultSwitchOpConstantInt = dataWords[wordIndex + 2];
1874
}
1875
}
1876
}
1877
1878
// Every operand should be adjacent to this instruction.
1879
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
1880
bool operandWordSkipString;
1881
if (SpvHasOperands(opCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, false)) {
1882
uint32_t operandWordIndex = operandWordStart;
1883
for (uint32_t j = 0; j < operandWordCount; j++) {
1884
if (checkOperandWordSkip(wordIndex, dataWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
1885
continue;
1886
}
1887
1888
if (operandWordIndex >= wordCount) {
1889
break;
1890
}
1891
1892
uint32_t operandId = dataWords[wordIndex + operandWordIndex];
1893
if (operandId >= results.size()) {
1894
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", operandId);
1895
return false;
1896
}
1897
1898
if (results[operandId].instructionIndex == UINT32_MAX) {
1899
fprintf(stderr, "SPIR-V Parsing error. Result %u is not valid.\n", operandId);
1900
return false;
1901
}
1902
1903
uint32_t resultIndex = results[operandId].instructionIndex;
1904
instructionAdjacentListIndices[resultIndex] = addToList(i, instructionAdjacentListIndices[resultIndex], listNodes);
1905
operandWordIndex += operandWordStride;
1906
}
1907
}
1908
else {
1909
fprintf(stderr, "SPIR-V Parsing error. Operands for %s are not implemented yet.\n", SpvOpToString(opCode));
1910
return false;
1911
}
1912
1913
// This instruction should be adjacent to every label referenced.
1914
uint32_t labelWordStart, labelWordCount, labelWordStride;
1915
if (SpvHasLabels(opCode, labelWordStart, labelWordCount, labelWordStride, false)) {
1916
for (uint32_t j = 0; (j < labelWordCount) && ((labelWordStart + j * labelWordStride) < wordCount); j++) {
1917
uint32_t labelId = dataWords[wordIndex + labelWordStart + j * labelWordStride];
1918
if (labelId >= results.size()) {
1919
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", labelId);
1920
return false;
1921
}
1922
1923
if (results[labelId].instructionIndex == UINT32_MAX) {
1924
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", labelId);
1925
return false;
1926
}
1927
1928
// Make sure this label not pointing back to the loop header while on a loop merge.
1929
if (!loopMergeBlockStack.empty() && (labelId == loopMergeBlockStack.back())) {
1930
continue;
1931
}
1932
1933
uint32_t labelIndex = results[labelId].instructionIndex;
1934
instructionAdjacentListIndices[i] = addToList(labelIndex, instructionAdjacentListIndices[i], listNodes);
1935
}
1936
}
1937
1938
// Parse parented blocks of OpPhi to indicate the dependency.
1939
if (opCode == SpvOpPhi) {
1940
uint32_t continueLabelId = UINT32_MAX;
1941
if (!loopMergeInstructionStack.empty()) {
1942
uint32_t loopMergeWordIndex = instructions[loopMergeInstructionStack.back()].wordIndex;
1943
continueLabelId = dataWords[loopMergeWordIndex + 2];
1944
}
1945
1946
for (uint32_t j = 3; j < wordCount; j += 2) {
1947
uint32_t labelId = dataWords[wordIndex + j + 1];
1948
if (labelId >= results.size()) {
1949
fprintf(stderr, "SPIR-V Parsing error. Invalid Parent ID: %u.\n", labelId);
1950
return false;
1951
}
1952
1953
if (results[labelId].instructionIndex == UINT32_MAX) {
1954
fprintf(stderr, "SPIR-V Parsing error. Invalid Parent ID: %u.\n", labelId);
1955
return false;
1956
}
1957
1958
// Make sure this label doesn't come from the loop continue.
1959
if (labelId == continueLabelId) {
1960
continue;
1961
}
1962
1963
uint32_t operandId = dataWords[wordIndex + j];
1964
if (operandId >= results.size()) {
1965
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", operandId);
1966
return false;
1967
}
1968
1969
if (results[operandId].instructionIndex == UINT32_MAX) {
1970
fprintf(stderr, "SPIR-V Parsing error. Result %u is not valid.\n", operandId);
1971
return false;
1972
}
1973
1974
uint32_t labelIndex = results[labelId].instructionIndex;
1975
uint32_t resultIndex = results[operandId].instructionIndex;
1976
instructionAdjacentListIndices[labelIndex] = addToList(i, instructionAdjacentListIndices[labelIndex], listNodes);
1977
instructionAdjacentListIndices[resultIndex] = addToList(i, instructionAdjacentListIndices[resultIndex], listNodes);
1978
}
1979
}
1980
// Parse decorations.
1981
else if (opCode == SpvOpDecorate) {
1982
uint32_t decoration = dataWords[wordIndex + 2];
1983
if (decoration == SpvDecorationSpecId) {
1984
uint32_t resultId = dataWords[wordIndex + 1];
1985
uint32_t constantId = dataWords[wordIndex + 3];
1986
if (resultId >= results.size()) {
1987
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", resultId);
1988
return false;
1989
}
1990
1991
uint32_t resultInstructionIndex = results[resultId].instructionIndex;
1992
if (resultInstructionIndex == UINT32_MAX) {
1993
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", resultId);
1994
return false;
1995
}
1996
1997
specializations.resize(std::max(specializations.size(), size_t(constantId + 1)));
1998
specializations[constantId].constantInstructionIndex = resultInstructionIndex;
1999
specializations[constantId].decorationInstructionIndex = i;
2000
}
2001
}
2002
// Check if a switch is used in the shader.
2003
else if (opCode == SpvOpSwitch) {
2004
foundOpSwitch = true;
2005
}
2006
// If a loop merge stack is active, pop it if it corresponds to the merge block.
2007
else if (opCode == SpvOpLabel) {
2008
currentBlockId = dataWords[wordIndex + 1];
2009
2010
if ((currentLoopHeaderIndex < loopHeaders.size()) && (i == loopHeaders[currentLoopHeaderIndex].blockInstructionIndex)) {
2011
loopMergeBlockStack.emplace_back(currentBlockId);
2012
loopMergeInstructionStack.emplace_back(loopHeaders[currentLoopHeaderIndex].instructionIndex);
2013
currentLoopHeaderIndex++;
2014
}
2015
2016
if (!loopMergeBlockStack.empty() && !loopMergeInstructionStack.empty()) {
2017
uint32_t loopMergeWordIndex = instructions[loopMergeInstructionStack.back()].wordIndex;
2018
uint32_t mergeBlockId = dataWords[loopMergeWordIndex + 1];
2019
if (currentBlockId == mergeBlockId) {
2020
loopMergeBlockStack.pop_back();
2021
loopMergeInstructionStack.pop_back();
2022
}
2023
}
2024
}
2025
}
2026
2027
// Do a pre-order and post-order traversal of the tree starting from each function. These indices are
2028
// later used to figure out whether instructions dominate other instructions when doing optimizations.
2029
thread_local std::vector<uint32_t> blockIndexStack;
2030
thread_local std::vector<uint32_t> blockAdjacentStack;
2031
uint32_t preOrderIndex = 0;
2032
uint32_t postOrderIndex = 0;
2033
blockPreOrderIndices.resize(blocks.size(), 0);
2034
blockPostOrderIndices.resize(blocks.size(), 0);
2035
preOrderVisitedBlocks.resize(blocks.size(), false);
2036
postOrderVisitedBlocks.resize(blocks.size(), false);
2037
for (uint32_t i = 0; i < uint32_t(functions.size()); i++) {
2038
const Function &function = functions[i];
2039
const Instruction &functionLabelInstruction = instructions[function.labelInstructionIndex];
2040
blockIndexStack.clear();
2041
blockAdjacentStack.clear();
2042
blockIndexStack.emplace_back(functionLabelInstruction.blockIndex);
2043
blockAdjacentStack.emplace_back(UINT32_MAX);
2044
while (!blockIndexStack.empty()) {
2045
uint32_t blockIndex = blockIndexStack.back();
2046
uint32_t blockAdjacentIndex = blockAdjacentStack.back();
2047
blockIndexStack.pop_back();
2048
blockAdjacentStack.pop_back();
2049
2050
uint32_t terminatorInstructorIndex = blocks[blockIndex].terminatorInstructionIndex;
2051
if (!preOrderVisitedBlocks[blockIndex]) {
2052
blockPreOrderIndices[blockIndex] = preOrderIndex++;
2053
blockAdjacentIndex = instructionAdjacentListIndices[terminatorInstructorIndex];
2054
preOrderVisitedBlocks[blockIndex] = true;
2055
}
2056
2057
if ((blockAdjacentIndex == UINT32_MAX) && !postOrderVisitedBlocks[blockIndex]) {
2058
blockPostOrderIndices[blockIndex] = postOrderIndex++;
2059
postOrderVisitedBlocks[blockIndex] = true;
2060
}
2061
2062
while (blockAdjacentIndex != UINT32_MAX) {
2063
const ListNode &adjacentListNode = listNodes[blockAdjacentIndex];
2064
const Instruction &adjacentInstruction = instructions[adjacentListNode.instructionIndex];
2065
SpvOp adjacentOpCode = SpvOp(dataWords[adjacentInstruction.wordIndex] & 0xFFFFU);
2066
if (adjacentOpCode == SpvOpLabel) {
2067
blockIndexStack.emplace_back(blockIndex);
2068
blockAdjacentStack.emplace_back(adjacentListNode.nextListIndex);
2069
blockIndexStack.emplace_back(adjacentInstruction.blockIndex);
2070
blockAdjacentStack.emplace_back(UINT32_MAX);
2071
blockAdjacentIndex = UINT32_MAX;
2072
}
2073
else {
2074
blockAdjacentIndex = adjacentListNode.nextListIndex;
2075
}
2076
}
2077
}
2078
}
2079
2080
if (foundOpSwitch && (defaultSwitchOpConstantInt == UINT32_MAX)) {
2081
fprintf(stderr, "Unable to find an OpConstantInt to use as replacement for switches. Adding this instruction automatically is not supported yet.\n");
2082
return false;
2083
}
2084
2085
return true;
2086
}
2087
2088
struct InstructionSort {
2089
union {
2090
struct {
2091
uint64_t instructionIndex : 32;
2092
uint64_t instructionLevel : 32;
2093
};
2094
2095
uint64_t instructionValue = 0;
2096
};
2097
2098
InstructionSort() {
2099
// Empty.
2100
}
2101
2102
bool operator<(const InstructionSort &i) const {
2103
return instructionValue < i.instructionValue;
2104
}
2105
};
2106
2107
bool Shader::sort(const void *pData, size_t pSize) {
2108
const uint32_t *dataWords = reinterpret_cast<const uint32_t *>(pData);
2109
const size_t dataWordCount = pSize / sizeof(uint32_t);
2110
2111
// Count the in and out degrees for all instructions.
2112
instructionInDegrees.clear();
2113
instructionOutDegrees.clear();
2114
instructionInDegrees.resize(instructions.size(), 0);
2115
instructionOutDegrees.resize(instructions.size(), 0);
2116
for (uint32_t i = 0; i < uint32_t(instructions.size()); i++) {
2117
uint32_t listIndex = instructionAdjacentListIndices[i];
2118
while (listIndex != UINT32_MAX) {
2119
const ListNode &listNode = listNodes[listIndex];
2120
instructionInDegrees[listNode.instructionIndex]++;
2121
instructionOutDegrees[i]++;
2122
listIndex = listNode.nextListIndex;
2123
}
2124
}
2125
2126
// Sort degrees doesn't need to be cleared as its contents will be copied over.
2127
thread_local std::vector<uint32_t> sortDegrees;
2128
thread_local std::vector<uint32_t> instructionStack;
2129
thread_local std::vector<InstructionSort> instructionSortVector;
2130
instructionStack.clear();
2131
instructionSortVector.clear();
2132
2133
// Make a copy of the degrees as they'll be used to perform a topological sort.
2134
sortDegrees.resize(instructionInDegrees.size());
2135
memcpy(sortDegrees.data(), instructionInDegrees.data(), sizeof(uint32_t) * sortDegrees.size());
2136
2137
// The first nodes to be processed should be the ones with no incoming connections.
2138
for (uint32_t i = 0; i < uint32_t(instructions.size()); i++) {
2139
if (sortDegrees[i] == 0) {
2140
instructionStack.emplace_back(i);
2141
}
2142
}
2143
2144
instructionOrder.reserve(instructions.size());
2145
instructionOrder.clear();
2146
while (!instructionStack.empty()) {
2147
uint32_t i = instructionStack.back();
2148
instructionStack.pop_back();
2149
instructionOrder.emplace_back(i);
2150
2151
// Look for the adjacents and reduce their degree. Push it to the stack if their degree reaches zero.
2152
uint32_t listIndex = instructionAdjacentListIndices[i];
2153
while (listIndex != UINT32_MAX) {
2154
const ListNode &listNode = listNodes[listIndex];
2155
uint32_t &sortDegree = sortDegrees[listNode.instructionIndex];
2156
assert(sortDegree > 0);
2157
sortDegree--;
2158
if (sortDegree == 0) {
2159
instructionStack.emplace_back(listNode.instructionIndex);
2160
}
2161
2162
listIndex = listNode.nextListIndex;
2163
}
2164
}
2165
2166
if (instructionOrder.size() < instructions.size()) {
2167
fprintf(stderr, "Sorting shader failed. Not all instructions could be reached.\n");
2168
#if RESPV_VERBOSE_ERRORS
2169
for (uint32_t i = 0; i < uint32_t(instructions.size()); i++) {
2170
if (sortDegrees[i] != 0) {
2171
fprintf(stderr, "[%d] Remaining Degrees %d\n", i, sortDegrees[i]);
2172
}
2173
}
2174
#endif
2175
return false;
2176
}
2177
2178
instructionSortVector.resize(instructionOrder.size(), InstructionSort());
2179
for (uint32_t instructionIndex : instructionOrder) {
2180
uint64_t nextLevel = instructionSortVector[instructionIndex].instructionLevel + 1;
2181
uint32_t listIndex = instructionAdjacentListIndices[instructionIndex];
2182
while (listIndex != UINT32_MAX) {
2183
const ListNode &listNode = listNodes[listIndex];
2184
instructionSortVector[listNode.instructionIndex].instructionLevel = std::max(instructionSortVector[listNode.instructionIndex].instructionLevel, nextLevel);
2185
listIndex = listNode.nextListIndex;
2186
}
2187
2188
instructionSortVector[instructionIndex].instructionIndex = instructionIndex;
2189
}
2190
2191
std::sort(instructionSortVector.begin(), instructionSortVector.end());
2192
2193
// Rebuild the instruction order vector with the sorted indices. If any of the instructions are pointers, store
2194
// them in a separate vector that will be used for another optimization pass.
2195
instructionOrder.clear();
2196
variableOrder.clear();
2197
for (InstructionSort &instructionSort : instructionSortVector) {
2198
instructionOrder.emplace_back(uint32_t(instructionSort.instructionIndex));
2199
2200
uint32_t wordIndex = instructions[instructionSort.instructionIndex].wordIndex;
2201
SpvOp opCode = SpvOp(dataWords[wordIndex] & 0xFFFFU);
2202
if (opCode == SpvOpVariable) {
2203
variableOrder.emplace_back(uint32_t(instructionSort.instructionIndex));
2204
}
2205
}
2206
2207
return true;
2208
}
2209
2210
bool Shader::parse(const void *pData, size_t pSize, bool pInlineFunctions) {
2211
assert(pData != nullptr);
2212
assert((pSize % sizeof(uint32_t) == 0) && "Size of data must be aligned to the word size.");
2213
2214
clear();
2215
2216
if (!checkData(pData, pSize)) {
2217
return false;
2218
}
2219
2220
extSpirvWords = reinterpret_cast<const uint32_t *>(pData);
2221
extSpirvWordCount = pSize / sizeof(uint32_t);
2222
2223
if (pInlineFunctions && !inlineData(pData, pSize)) {
2224
clear();
2225
return false;
2226
}
2227
2228
const void *data = pInlineFunctions ? inlinedSpirvWords.data() : pData;
2229
const size_t size = pInlineFunctions ? (inlinedSpirvWords.size() * sizeof(uint32_t)) : pSize;
2230
if (!parseData(data, size)) {
2231
clear();
2232
return false;
2233
}
2234
2235
if (!process(data, size)) {
2236
clear();
2237
return false;
2238
}
2239
2240
if (!sort(data, size)) {
2241
clear();
2242
return false;
2243
}
2244
2245
return true;
2246
}
2247
2248
bool Shader::empty() const {
2249
return inlinedSpirvWords.empty() && ((extSpirvWords == nullptr) || (extSpirvWordCount == 0));
2250
}
2251
2252
// Optimizer
2253
2254
struct Resolution {
2255
enum Type {
2256
Unknown,
2257
Constant,
2258
Variable
2259
};
2260
2261
Type type = Type::Unknown;
2262
2263
struct {
2264
union {
2265
int32_t i32;
2266
uint32_t u32;
2267
};
2268
} value = {};
2269
2270
static Resolution fromBool(bool pValue) {
2271
Resolution r;
2272
r.type = Type::Constant;
2273
r.value.u32 = pValue ? 1 : 0;
2274
return r;
2275
}
2276
2277
static Resolution fromInt32(int32_t pValue) {
2278
Resolution r;
2279
r.type = Type::Constant;
2280
r.value.i32 = pValue;
2281
return r;
2282
}
2283
2284
static Resolution fromUint32(uint32_t pValue) {
2285
Resolution r;
2286
r.type = Type::Constant;
2287
r.value.u32 = pValue;
2288
return r;
2289
}
2290
};
2291
2292
struct OptimizerContext {
2293
const Shader &shader;
2294
std::vector<uint32_t> &instructionAdjacentListIndices;
2295
std::vector<uint32_t> &instructionInDegrees;
2296
std::vector<uint32_t> &instructionOutDegrees;
2297
std::vector<ListNode> &listNodes;
2298
std::vector<Resolution> &resolutions;
2299
std::vector<uint8_t> &optimizedData;
2300
Options options;
2301
2302
OptimizerContext() = delete;
2303
};
2304
2305
static void optimizerEliminateInstruction(uint32_t pInstructionIndex, OptimizerContext &rContext) {
2306
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2307
uint32_t wordIndex = rContext.shader.instructions[pInstructionIndex].wordIndex;
2308
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2309
for (uint32_t j = 0; j < wordCount; j++) {
2310
optimizedWords[wordIndex + j] = UINT32_MAX;
2311
}
2312
}
2313
2314
static void optimizerReduceResultDegrees(OptimizerContext &rContext, std::vector<uint32_t> &rResultStack) {
2315
const uint32_t *optimizedWords = reinterpret_cast<const uint32_t *>(rContext.optimizedData.data());
2316
auto optimizerCheckOperands = [&](SpvOp opCode, uint32_t wordIndex, uint32_t wordCount) {
2317
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
2318
bool operandWordSkipString;
2319
if (SpvHasOperands(opCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, true)) {
2320
uint32_t operandWordIndex = operandWordStart;
2321
for (uint32_t j = 0; j < operandWordCount; j++) {
2322
if (checkOperandWordSkip(wordIndex, optimizedWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
2323
continue;
2324
}
2325
2326
if (operandWordIndex >= wordCount) {
2327
break;
2328
}
2329
2330
uint32_t operandId = optimizedWords[wordIndex + operandWordIndex];
2331
rResultStack.emplace_back(operandId);
2332
operandWordIndex += operandWordStride;
2333
}
2334
}
2335
};
2336
2337
while (!rResultStack.empty()) {
2338
uint32_t resultId = rResultStack.back();
2339
rResultStack.pop_back();
2340
2341
uint32_t instructionIndex = rContext.shader.results[resultId].instructionIndex;
2342
uint32_t wordIndex = rContext.shader.instructions[instructionIndex].wordIndex;
2343
2344
// Instruction's been deleted.
2345
if (optimizedWords[wordIndex] == UINT32_MAX) {
2346
continue;
2347
}
2348
2349
// Consider it's possible for a result to have no outgoing connections on an unoptimized shader.
2350
if (rContext.instructionOutDegrees[instructionIndex] > 0) {
2351
rContext.instructionOutDegrees[instructionIndex]--;
2352
}
2353
2354
// When nothing uses the result from this instruction anymore, we can delete it. Push any operands it uses into the stack as well to reduce their out degrees.
2355
// Function calls are excluded from this as it's not easy to evaluate whether the function has side effects or not.
2356
SpvOp opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
2357
if ((rContext.instructionOutDegrees[instructionIndex] == 0) && !SpvHasSideEffects(opCode)) {
2358
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2359
optimizerCheckOperands(opCode, wordIndex, wordCount);
2360
2361
// Function parameters are excluded from being deleted as they'd break the function type definitions.
2362
// For being able to delete them, the original function type would have to be modified and only as long as no other functions are reusing the same type definition.
2363
if (opCode != SpvOpFunctionParameter) {
2364
optimizerEliminateInstruction(instructionIndex, rContext);
2365
}
2366
2367
// When a function is deleted, we just delete any instructions we can find until finding the function end.
2368
if (opCode == SpvOpFunction) {
2369
bool foundFunctionEnd = false;
2370
uint32_t instructionCount = rContext.shader.instructions.size();
2371
for (uint32_t i = instructionIndex; (i < instructionCount) && !foundFunctionEnd; i++) {
2372
wordIndex = rContext.shader.instructions[i].wordIndex;
2373
if (optimizedWords[wordIndex] == UINT32_MAX) {
2374
continue;
2375
}
2376
2377
opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
2378
wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2379
foundFunctionEnd = opCode == SpvOpFunctionEnd;
2380
2381
optimizerCheckOperands(opCode, wordIndex, wordCount);
2382
optimizerEliminateInstruction(i, rContext);
2383
}
2384
}
2385
}
2386
}
2387
}
2388
2389
static bool optimizerPrepareData(OptimizerContext &rContext) {
2390
OptimizerContext &c = rContext;
2391
c.resolutions.clear();
2392
c.resolutions.resize(c.shader.results.size(), Resolution());
2393
c.instructionAdjacentListIndices.resize(c.shader.instructionAdjacentListIndices.size());
2394
c.instructionInDegrees.resize(c.shader.instructionInDegrees.size());
2395
c.instructionOutDegrees.resize(c.shader.instructionOutDegrees.size());
2396
c.listNodes.resize(c.shader.listNodes.size());
2397
memcpy(c.instructionAdjacentListIndices.data(), c.shader.instructionAdjacentListIndices.data(), sizeof(uint32_t) * c.shader.instructionAdjacentListIndices.size());
2398
memcpy(c.instructionInDegrees.data(), c.shader.instructionInDegrees.data(), sizeof(uint32_t) * c.shader.instructionInDegrees.size());
2399
memcpy(c.instructionOutDegrees.data(), c.shader.instructionOutDegrees.data(), sizeof(uint32_t) * c.shader.instructionOutDegrees.size());
2400
memcpy(c.listNodes.data(), c.shader.listNodes.data(), sizeof(ListNode) * c.shader.listNodes.size());
2401
2402
if (c.shader.inlinedSpirvWords.empty()) {
2403
c.optimizedData.resize(c.shader.extSpirvWordCount * sizeof(uint32_t));
2404
memcpy(c.optimizedData.data(), c.shader.extSpirvWords, c.optimizedData.size());
2405
}
2406
else {
2407
c.optimizedData.resize(c.shader.inlinedSpirvWords.size() * sizeof(uint32_t));
2408
memcpy(c.optimizedData.data(), c.shader.inlinedSpirvWords.data(), c.optimizedData.size());
2409
}
2410
2411
return true;
2412
}
2413
2414
static bool optimizerPatchSpecializationConstants(const SpecConstant *pNewSpecConstants, uint32_t pNewSpecConstantCount, OptimizerContext &rContext) {
2415
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2416
for (uint32_t i = 0; i < pNewSpecConstantCount; i++) {
2417
const SpecConstant &newSpecConstant = pNewSpecConstants[i];
2418
if (newSpecConstant.specId >= rContext.shader.specializations.size()) {
2419
continue;
2420
}
2421
2422
const Specialization &specialization = rContext.shader.specializations[newSpecConstant.specId];
2423
if (specialization.constantInstructionIndex == UINT32_MAX) {
2424
continue;
2425
}
2426
2427
uint32_t constantWordIndex = rContext.shader.instructions[specialization.constantInstructionIndex].wordIndex;
2428
SpvOp constantOpCode = SpvOp(optimizedWords[constantWordIndex] & 0xFFFFU);
2429
uint32_t constantWordCount = (optimizedWords[constantWordIndex] >> 16U) & 0xFFFFU;
2430
switch (constantOpCode) {
2431
case SpvOpSpecConstantTrue:
2432
case SpvOpSpecConstantFalse:
2433
optimizedWords[constantWordIndex] = (newSpecConstant.values[0] ? SpvOpConstantTrue : SpvOpConstantFalse) | (constantWordCount << 16U);
2434
break;
2435
case SpvOpSpecConstant:
2436
if (constantWordCount <= 3) {
2437
fprintf(stderr, "Optimization error. Specialization constant has less words than expected.\n");
2438
return false;
2439
}
2440
2441
if (newSpecConstant.values.size() != (constantWordCount - 3)) {
2442
fprintf(stderr, "Optimization error. Value count for specialization constant %u differs from the expected size.\n", newSpecConstant.specId);
2443
return false;
2444
}
2445
2446
optimizedWords[constantWordIndex] = SpvOpConstant | (constantWordCount << 16U);
2447
memcpy(&optimizedWords[constantWordIndex + 3], newSpecConstant.values.data(), sizeof(uint32_t) * (constantWordCount - 3));
2448
break;
2449
default:
2450
fprintf(stderr, "Optimization error. Can't patch opCode %u.\n", constantOpCode);
2451
return false;
2452
}
2453
2454
// Eliminate the decorator instruction as well.
2455
optimizerEliminateInstruction(specialization.decorationInstructionIndex, rContext);
2456
}
2457
2458
return true;
2459
}
2460
2461
static void optimizerEvaluateResult(uint32_t pResultId, OptimizerContext &rContext) {
2462
const uint32_t *optimizedWords = reinterpret_cast<const uint32_t *>(rContext.optimizedData.data());
2463
const Result &result = rContext.shader.results[pResultId];
2464
Resolution &resolution = rContext.resolutions[pResultId];
2465
uint32_t resultWordIndex = rContext.shader.instructions[result.instructionIndex].wordIndex;
2466
SpvOp opCode = SpvOp(optimizedWords[resultWordIndex] & 0xFFFFU);
2467
uint32_t wordCount = (optimizedWords[resultWordIndex] >> 16U) & 0xFFFFU;
2468
switch (opCode) {
2469
case SpvOpConstant: {
2470
// Parse the known type of constants. Any other types will be considered as variable.
2471
const Result &typeResult = rContext.shader.results[optimizedWords[resultWordIndex + 1]];
2472
uint32_t typeWordIndex = rContext.shader.instructions[typeResult.instructionIndex].wordIndex;
2473
SpvOp typeOpCode = SpvOp(optimizedWords[typeWordIndex] & 0xFFFFU);
2474
uint32_t typeWidthInBits = optimizedWords[typeWordIndex + 2];
2475
uint32_t typeSigned = optimizedWords[typeWordIndex + 3];
2476
if ((typeOpCode == SpvOpTypeInt) && (typeWidthInBits == 32)) {
2477
if (typeSigned) {
2478
resolution = Resolution::fromInt32(int32_t(optimizedWords[resultWordIndex + 3]));
2479
}
2480
else {
2481
resolution = Resolution::fromUint32(optimizedWords[resultWordIndex + 3]);
2482
}
2483
}
2484
else {
2485
resolution.type = Resolution::Type::Variable;
2486
}
2487
2488
break;
2489
}
2490
case SpvOpConstantTrue:
2491
resolution = Resolution::fromBool(true);
2492
break;
2493
case SpvOpConstantFalse:
2494
resolution = Resolution::fromBool(false);
2495
break;
2496
case SpvOpBitcast: {
2497
const Resolution &operandResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2498
resolution = Resolution::fromUint32(operandResolution.value.u32);
2499
break;
2500
}
2501
case SpvOpIAdd: {
2502
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2503
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2504
resolution = Resolution::fromUint32(firstResolution.value.u32 + secondResolution.value.u32);
2505
break;
2506
}
2507
case SpvOpISub: {
2508
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2509
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2510
resolution = Resolution::fromUint32(firstResolution.value.u32 - secondResolution.value.u32);
2511
break;
2512
}
2513
case SpvOpIMul: {
2514
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2515
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2516
resolution = Resolution::fromUint32(firstResolution.value.u32 * secondResolution.value.u32);
2517
break;
2518
}
2519
case SpvOpUDiv: {
2520
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2521
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2522
resolution = Resolution::fromUint32(firstResolution.value.u32 / secondResolution.value.u32);
2523
break;
2524
}
2525
case SpvOpSDiv: {
2526
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2527
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2528
resolution = Resolution::fromUint32(firstResolution.value.i32 / secondResolution.value.i32);
2529
break;
2530
}
2531
case SpvOpLogicalEqual: {
2532
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2533
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2534
resolution = Resolution::fromBool((firstResolution.value.u32 != 0) == (secondResolution.value.u32 != 0));
2535
break;
2536
}
2537
case SpvOpLogicalNotEqual: {
2538
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2539
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2540
resolution = Resolution::fromBool((firstResolution.value.u32 != 0) != (secondResolution.value.u32 != 0));
2541
break;
2542
}
2543
case SpvOpLogicalOr: {
2544
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2545
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2546
resolution = Resolution::fromBool((firstResolution.value.u32 != 0) || (secondResolution.value.u32 != 0));
2547
break;
2548
}
2549
case SpvOpLogicalAnd: {
2550
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2551
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2552
resolution = Resolution::fromBool((firstResolution.value.u32 != 0) && (secondResolution.value.u32 != 0));
2553
break;
2554
}
2555
case SpvOpLogicalNot: {
2556
const Resolution &operandResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2557
resolution = Resolution::fromBool(operandResolution.value.u32 == 0);
2558
break;
2559
}
2560
case SpvOpSelect: {
2561
const Resolution &conditionResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2562
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2563
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 5]];
2564
resolution = (conditionResolution.value.u32 != 0) ? firstResolution : secondResolution;
2565
break;
2566
}
2567
case SpvOpIEqual: {
2568
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2569
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2570
resolution = Resolution::fromBool(firstResolution.value.u32 == secondResolution.value.u32);
2571
break;
2572
}
2573
case SpvOpINotEqual: {
2574
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2575
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2576
resolution = Resolution::fromBool(firstResolution.value.u32 != secondResolution.value.u32);
2577
break;
2578
}
2579
case SpvOpUGreaterThan: {
2580
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2581
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2582
resolution = Resolution::fromBool(firstResolution.value.u32 > secondResolution.value.u32);
2583
break;
2584
}
2585
case SpvOpSGreaterThan: {
2586
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2587
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2588
resolution = Resolution::fromBool(firstResolution.value.i32 > secondResolution.value.i32);
2589
break;
2590
}
2591
case SpvOpUGreaterThanEqual: {
2592
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2593
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2594
resolution = Resolution::fromBool(firstResolution.value.u32 >= secondResolution.value.u32);
2595
break;
2596
}
2597
case SpvOpSGreaterThanEqual: {
2598
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2599
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2600
resolution = Resolution::fromBool(firstResolution.value.i32 >= secondResolution.value.i32);
2601
break;
2602
}
2603
case SpvOpULessThan: {
2604
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2605
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2606
resolution = Resolution::fromBool(firstResolution.value.u32 < secondResolution.value.u32);
2607
break;
2608
}
2609
case SpvOpSLessThan: {
2610
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2611
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2612
resolution = Resolution::fromBool(firstResolution.value.i32 < secondResolution.value.i32);
2613
break;
2614
}
2615
case SpvOpULessThanEqual: {
2616
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2617
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2618
resolution = Resolution::fromBool(firstResolution.value.u32 <= secondResolution.value.u32);
2619
break;
2620
}
2621
case SpvOpSLessThanEqual: {
2622
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2623
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2624
resolution = Resolution::fromBool(firstResolution.value.i32 <= secondResolution.value.i32);
2625
break;
2626
}
2627
case SpvOpShiftRightLogical: {
2628
const Resolution &baseResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2629
const Resolution &shiftResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2630
resolution = Resolution::fromUint32(baseResolution.value.u32 >> shiftResolution.value.u32);
2631
break;
2632
}
2633
case SpvOpShiftRightArithmetic: {
2634
const Resolution &baseResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2635
const Resolution &shiftResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2636
resolution = Resolution::fromInt32(baseResolution.value.i32 >> shiftResolution.value.i32);
2637
break;
2638
}
2639
case SpvOpShiftLeftLogical: {
2640
const Resolution &baseResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2641
const Resolution &shiftResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2642
resolution = Resolution::fromUint32(baseResolution.value.u32 << shiftResolution.value.u32);
2643
break;
2644
}
2645
case SpvOpBitwiseOr: {
2646
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2647
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2648
resolution = Resolution::fromUint32(firstResolution.value.u32 | secondResolution.value.u32);
2649
break;
2650
}
2651
case SpvOpBitwiseAnd: {
2652
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2653
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2654
resolution = Resolution::fromUint32(firstResolution.value.u32 & secondResolution.value.u32);
2655
break;
2656
}
2657
case SpvOpBitwiseXor: {
2658
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2659
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2660
resolution = Resolution::fromUint32(firstResolution.value.u32 ^ secondResolution.value.u32);
2661
break;
2662
}
2663
case SpvOpNot: {
2664
const Resolution &operandResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2665
resolution = Resolution::fromUint32(~operandResolution.value.u32);
2666
break;
2667
}
2668
case SpvOpPhi: {
2669
// Resolve as constant if Phi operator was compacted to only one option.
2670
if (wordCount == 5) {
2671
resolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2672
}
2673
else {
2674
resolution.type = Resolution::Type::Variable;
2675
}
2676
2677
break;
2678
}
2679
default:
2680
// It's not known how to evaluate the instruction, consider the result a variable.
2681
resolution.type = Resolution::Type::Variable;
2682
break;
2683
}
2684
}
2685
2686
static void optimizerReduceLabelDegree(uint32_t pFirstLabelId, OptimizerContext &rContext) {
2687
thread_local std::vector<uint32_t> labelStack;
2688
thread_local std::vector<uint32_t> resultStack;
2689
thread_local std::vector<uint32_t> degreeReductions;
2690
labelStack.emplace_back(pFirstLabelId);
2691
resultStack.clear();
2692
degreeReductions.clear();
2693
2694
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2695
while (!labelStack.empty()) {
2696
uint32_t labelId = labelStack.back();
2697
labelStack.pop_back();
2698
2699
uint32_t instructionIndex = rContext.shader.results[labelId].instructionIndex;
2700
if (rContext.instructionInDegrees[instructionIndex] == 0) {
2701
continue;
2702
}
2703
2704
rContext.instructionInDegrees[instructionIndex]--;
2705
2706
// If a label's degree becomes 0, eliminate all the instructions of the block.
2707
// Eliminate as many instructions as possible until finding the terminator of the block.
2708
// When finding the terminator, look at the labels it has and push them to the stack to
2709
// reduce their degrees as well.
2710
if (rContext.instructionInDegrees[instructionIndex] == 0) {
2711
bool foundTerminator = false;
2712
uint32_t instructionCount = rContext.shader.instructions.size();
2713
for (uint32_t i = instructionIndex; (i < instructionCount) && !foundTerminator; i++) {
2714
uint32_t wordIndex = rContext.shader.instructions[i].wordIndex;
2715
if (optimizedWords[wordIndex] == UINT32_MAX) {
2716
continue;
2717
}
2718
2719
// If the instruction has labels it can reference, we push the labels to reduce their degrees as well.
2720
SpvOp opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
2721
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2722
uint32_t labelWordStart, labelWordCount, labelWordStride;
2723
if (SpvHasLabels(opCode, labelWordStart, labelWordCount, labelWordStride, false)) {
2724
for (uint32_t j = 0; (j < labelWordCount) && ((labelWordStart + j * labelWordStride) < wordCount); j++) {
2725
uint32_t terminatorLabelId = optimizedWords[wordIndex + labelWordStart + j * labelWordStride];
2726
labelStack.emplace_back(terminatorLabelId);
2727
}
2728
}
2729
2730
// If the instruction has operands, decrease their degree.
2731
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
2732
bool operandWordSkipString;
2733
if (SpvHasOperands(opCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, true)) {
2734
uint32_t operandWordIndex = operandWordStart;
2735
for (uint32_t j = 0; j < operandWordCount; j++) {
2736
if (checkOperandWordSkip(wordIndex, optimizedWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
2737
continue;
2738
}
2739
2740
if (operandWordIndex >= wordCount) {
2741
break;
2742
}
2743
2744
uint32_t operandId = optimizedWords[wordIndex + operandWordIndex];
2745
resultStack.emplace_back(operandId);
2746
operandWordIndex += operandWordStride;
2747
}
2748
}
2749
2750
foundTerminator = SpvOpIsTerminator(opCode);
2751
optimizerEliminateInstruction(i, rContext);
2752
}
2753
}
2754
}
2755
2756
optimizerReduceResultDegrees(rContext, resultStack);
2757
}
2758
2759
static void optimizerEvaluateTerminator(uint32_t pInstructionIndex, OptimizerContext &rContext) {
2760
// For each type of supported terminator, check if the operands can be resolved into constants.
2761
// If they can be resolved, eliminate any other branches that don't pass the condition.
2762
uint32_t wordIndex = rContext.shader.instructions[pInstructionIndex].wordIndex;
2763
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2764
SpvOp opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
2765
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2766
uint32_t defaultLabelId = UINT32_MAX;
2767
2768
// Both instructions share that the second word is the operator they must use to resolve the condition.
2769
// Operator can't be anything but a constant to be able to resolve a terminator.
2770
const uint32_t operatorId = optimizedWords[wordIndex + 1];
2771
const Resolution &operatorResolution = rContext.resolutions[operatorId];
2772
if (operatorResolution.type != Resolution::Type::Constant) {
2773
return;
2774
}
2775
2776
if (opCode == SpvOpBranchConditional) {
2777
// Branch conditional only needs to choose either label depending on whether the result is true or false.
2778
if (operatorResolution.value.u32) {
2779
defaultLabelId = optimizedWords[wordIndex + 2];
2780
optimizerReduceLabelDegree(optimizedWords[wordIndex + 3], rContext);
2781
}
2782
else {
2783
defaultLabelId = optimizedWords[wordIndex + 3];
2784
optimizerReduceLabelDegree(optimizedWords[wordIndex + 2], rContext);
2785
}
2786
2787
// If there's a selection merge before this branch, we place the unconditional branch in its place.
2788
const uint32_t mergeWordCount = 3;
2789
uint32_t mergeWordIndex = wordIndex - mergeWordCount;
2790
SpvOp mergeOpCode = SpvOp(optimizedWords[mergeWordIndex] & 0xFFFFU);
2791
2792
uint32_t patchWordIndex;
2793
if (mergeOpCode == SpvOpSelectionMerge) {
2794
optimizerReduceLabelDegree(optimizedWords[mergeWordIndex + 1], rContext);
2795
patchWordIndex = mergeWordIndex;
2796
}
2797
else {
2798
patchWordIndex = wordIndex;
2799
}
2800
2801
// Make the final label the new default case and reduce the word count.
2802
optimizedWords[patchWordIndex] = SpvOpBranch | (2U << 16U);
2803
optimizedWords[patchWordIndex + 1] = defaultLabelId;
2804
2805
// Eliminate any remaining words on the block.
2806
for (uint32_t i = patchWordIndex + 2; i < (wordIndex + wordCount); i++) {
2807
optimizedWords[i] = UINT32_MAX;
2808
}
2809
}
2810
else if (opCode == SpvOpSwitch) {
2811
// Switch must compare the integer result of the operator to all the possible labels.
2812
// If the label is not as possible result, then reduce its block's degree.
2813
for (uint32_t i = 3; i < wordCount; i += 2) {
2814
if (operatorResolution.value.u32 == optimizedWords[wordIndex + i]) {
2815
defaultLabelId = optimizedWords[wordIndex + i + 1];
2816
}
2817
else {
2818
optimizerReduceLabelDegree(optimizedWords[wordIndex + i + 1], rContext);
2819
}
2820
}
2821
2822
// If none are chosen, the default label is selected. Otherwise, reduce the block's degree
2823
// for the default label.
2824
if (defaultLabelId == UINT32_MAX) {
2825
defaultLabelId = optimizedWords[wordIndex + 2];
2826
}
2827
else {
2828
optimizerReduceLabelDegree(optimizedWords[wordIndex + 2], rContext);
2829
}
2830
2831
// Make the final label the new default case and reduce the word count.
2832
optimizedWords[wordIndex] = SpvOpSwitch | (3U << 16U);
2833
optimizedWords[wordIndex + 1] = rContext.shader.defaultSwitchOpConstantInt;
2834
optimizedWords[wordIndex + 2] = defaultLabelId;
2835
2836
// Increase the degree of the default constant that was chosen so it's not considered as dead code.
2837
uint32_t defaultConstantInstructionIndex = rContext.shader.results[rContext.shader.defaultSwitchOpConstantInt].instructionIndex;
2838
rContext.instructionOutDegrees[defaultConstantInstructionIndex]++;
2839
2840
// Eliminate any remaining words on the block.
2841
for (uint32_t i = wordIndex + 3; i < (wordIndex + wordCount); i++) {
2842
optimizedWords[i] = UINT32_MAX;
2843
}
2844
}
2845
2846
// The condition operator can be discarded.
2847
thread_local std::vector<uint32_t> resultStack;
2848
resultStack.clear();
2849
resultStack.emplace_back(operatorId);
2850
optimizerReduceResultDegrees(rContext, resultStack);
2851
}
2852
2853
static bool optimizerCompactPhi(uint32_t pInstructionIndex, OptimizerContext &rContext) {
2854
// Do a backwards search first to find out what label this instruction belongs to.
2855
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2856
uint32_t searchInstructionIndex = pInstructionIndex;
2857
uint32_t instructionLabelId = UINT32_MAX;
2858
while (searchInstructionIndex > 0) {
2859
uint32_t searchWordIndex = rContext.shader.instructions[searchInstructionIndex].wordIndex;
2860
SpvOp searchOpCode = SpvOp(optimizedWords[searchWordIndex] & 0xFFFFU);
2861
if (searchOpCode == SpvOpLabel) {
2862
instructionLabelId = optimizedWords[searchWordIndex + 1];
2863
break;
2864
}
2865
2866
searchInstructionIndex--;
2867
}
2868
2869
if (instructionLabelId == UINT32_MAX) {
2870
fprintf(stderr, "Unable to find a label before OpPhi.\n");
2871
return false;
2872
}
2873
2874
thread_local std::vector<uint32_t> resultStack;
2875
resultStack.clear();
2876
2877
uint32_t wordIndex = rContext.shader.instructions[pInstructionIndex].wordIndex;
2878
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2879
uint32_t newWordCount = 3;
2880
uint32_t instructionCount = rContext.shader.instructions.size();
2881
for (uint32_t i = 3; i < wordCount; i += 2) {
2882
uint32_t labelId = optimizedWords[wordIndex + i + 1];
2883
uint32_t labelInstructionIndex = rContext.shader.results[labelId].instructionIndex;
2884
uint32_t labelWordIndex = rContext.shader.instructions[labelInstructionIndex].wordIndex;
2885
2886
// Label's been eliminated. Skip it.
2887
if (optimizedWords[labelWordIndex] == UINT32_MAX) {
2888
resultStack.emplace_back(optimizedWords[wordIndex + i]);
2889
continue;
2890
}
2891
2892
// While the label may not have been eliminated, verify its terminator is still pointing to this block.
2893
bool foundBranchToThisBlock = false;
2894
for (uint32_t j = labelInstructionIndex; j < instructionCount; j++) {
2895
uint32_t searchWordIndex = rContext.shader.instructions[j].wordIndex;
2896
SpvOp searchOpCode = SpvOp(optimizedWords[searchWordIndex] & 0xFFFFU);
2897
uint32_t searchWordCount = (optimizedWords[searchWordIndex] >> 16U) & 0xFFFFU;
2898
if (SpvOpIsTerminator(searchOpCode)) {
2899
uint32_t labelWordStart, labelWordCount, labelWordStride;
2900
if (SpvHasLabels(searchOpCode, labelWordStart, labelWordCount, labelWordStride, false)) {
2901
for (uint32_t j = 0; (j < labelWordCount) && ((labelWordStart + j * labelWordStride) < searchWordCount); j++) {
2902
uint32_t searchLabelId = optimizedWords[searchWordIndex + labelWordStart + j * labelWordStride];
2903
if (searchLabelId == instructionLabelId) {
2904
foundBranchToThisBlock = true;
2905
break;
2906
}
2907
}
2908
}
2909
2910
break;
2911
}
2912
}
2913
2914
// The preceding block did not have any reference to this block. Skip it.
2915
if (!foundBranchToThisBlock) {
2916
resultStack.emplace_back(optimizedWords[wordIndex + i]);
2917
continue;
2918
}
2919
2920
// Copy the words.
2921
optimizedWords[wordIndex + newWordCount + 0] = optimizedWords[wordIndex + i + 0];
2922
optimizedWords[wordIndex + newWordCount + 1] = optimizedWords[wordIndex + i + 1];
2923
newWordCount += 2;
2924
}
2925
2926
// Patch in the new word count.
2927
assert((optimizedWords[wordIndex] != UINT32_MAX) && "The instruction shouldn't be getting deleted from reducing the degree of the operands.");
2928
optimizedWords[wordIndex] = SpvOpPhi | (newWordCount << 16U);
2929
2930
// Delete any of the remaining words.
2931
for (uint32_t i = newWordCount; i < wordCount; i++) {
2932
optimizedWords[wordIndex + i] = UINT32_MAX;
2933
}
2934
2935
optimizerReduceResultDegrees(rContext, resultStack);
2936
2937
return true;
2938
}
2939
2940
static bool optimizerRunEvaluationPass(OptimizerContext &rContext) {
2941
if (!rContext.options.removeDeadCode) {
2942
return true;
2943
}
2944
2945
thread_local std::vector<uint32_t> resultStack;
2946
resultStack.clear();
2947
2948
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2949
uint32_t orderCount = uint32_t(rContext.shader.instructionOrder.size());
2950
for (uint32_t i = 0; i < orderCount; i++) {
2951
uint32_t instructionIndex = rContext.shader.instructionOrder[i];
2952
uint32_t wordIndex = rContext.shader.instructions[instructionIndex].wordIndex;
2953
2954
// Instruction has been deleted.
2955
if (optimizedWords[wordIndex] == UINT32_MAX) {
2956
continue;
2957
}
2958
2959
SpvOp opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
2960
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2961
uint32_t patchedWordCount = wordCount;
2962
bool hasResult, hasType;
2963
SpvHasResultAndType(opCode, &hasResult, &hasType);
2964
2965
if (hasResult) {
2966
const uint32_t resultId = optimizedWords[wordIndex + (hasType ? 2 : 1)];
2967
if ((opCode != SpvOpLabel) && (opCode != SpvOpFunctionCall) && (rContext.instructionOutDegrees[instructionIndex] == 0)) {
2968
resultStack.emplace_back(resultId);
2969
}
2970
else {
2971
if (opCode == SpvOpPhi) {
2972
if (optimizerCompactPhi(instructionIndex, rContext)) {
2973
patchedWordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2974
}
2975
else {
2976
return false;
2977
}
2978
}
2979
2980
// Check if any of the operands isn't a constant.
2981
bool allOperandsAreConstant = true;
2982
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
2983
bool operandWordSkipString;
2984
if (SpvHasOperands(opCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, true)) {
2985
uint32_t operandWordIndex = operandWordStart;
2986
for (uint32_t j = 0; j < operandWordCount; j++) {
2987
if (checkOperandWordSkip(wordIndex, optimizedWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
2988
continue;
2989
}
2990
2991
if (operandWordIndex >= patchedWordCount) {
2992
break;
2993
}
2994
2995
uint32_t operandId = optimizedWords[wordIndex + operandWordIndex];
2996
assert((operandId != UINT32_MAX) && "An operand that's been deleted shouldn't be getting evaluated.");
2997
2998
// It shouldn't be possible for an operand to not be solved, but OpPhi can do so because previous blocks might've been deleted.
2999
if ((opCode != SpvOpPhi) && (rContext.resolutions[operandId].type == Resolution::Type::Unknown)) {
3000
fprintf(stderr, "Error in resolution of the operations. Operand %u was not solved.\n", operandId);
3001
return false;
3002
}
3003
3004
if (rContext.resolutions[operandId].type == Resolution::Type::Variable) {
3005
allOperandsAreConstant = false;
3006
break;
3007
}
3008
3009
operandWordIndex += operandWordStride;
3010
}
3011
}
3012
3013
// The result can only be evaluated if all operands are constant.
3014
if (allOperandsAreConstant) {
3015
optimizerEvaluateResult(resultId, rContext);
3016
}
3017
else {
3018
rContext.resolutions[resultId].type = Resolution::Type::Variable;
3019
}
3020
}
3021
}
3022
else if ((opCode == SpvOpBranchConditional) || (opCode == SpvOpSwitch)) {
3023
optimizerEvaluateTerminator(instructionIndex, rContext);
3024
}
3025
}
3026
3027
optimizerReduceResultDegrees(rContext, resultStack);
3028
3029
return true;
3030
}
3031
3032
static bool optimizerDoesInstructionDominate(const Shader &pShader, const Instruction &pInstructionA, const Instruction &pInstructionB) {
3033
// If on the same block, the instruction will only dominate the other one if it precedes it.
3034
if (pInstructionA.blockIndex == pInstructionB.blockIndex) {
3035
return pInstructionA.wordIndex < pInstructionB.wordIndex;
3036
}
3037
// If the blocks are different, compare the indices of the pre-order and post-order traversal
3038
// to determine whether it dominates the other block.
3039
else {
3040
const uint32_t aPreIndex = pShader.blockPreOrderIndices[pInstructionA.blockIndex];
3041
const uint32_t bPreIndex = pShader.blockPreOrderIndices[pInstructionB.blockIndex];
3042
const uint32_t aPostIndex = pShader.blockPostOrderIndices[pInstructionA.blockIndex];
3043
const uint32_t bPostIndex = pShader.blockPostOrderIndices[pInstructionB.blockIndex];
3044
return (aPreIndex < bPreIndex) && (aPostIndex > bPostIndex);
3045
}
3046
}
3047
3048
static bool optimizerRemoveUnusedVariables(OptimizerContext &rContext) {
3049
if (!rContext.options.removeDeadCode) {
3050
return true;
3051
}
3052
3053
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
3054
int32_t orderCount = int32_t(rContext.shader.variableOrder.size());
3055
for (int32_t i = orderCount - 1; i >= 0; i--) {
3056
uint32_t instructionIndex = rContext.shader.variableOrder[i];
3057
const Instruction &instruction = rContext.shader.instructions[instructionIndex];
3058
uint32_t resultId = optimizedWords[instruction.wordIndex + 2];
3059
if (resultId == UINT32_MAX) {
3060
// This variable has already been deleted.
3061
continue;
3062
}
3063
3064
SpvStorageClass storageClass = SpvStorageClass(optimizedWords[instruction.wordIndex + 3]);
3065
if (storageClass != SpvStorageClassFunction) {
3066
// Only evaluate variables local to the function.
3067
continue;
3068
}
3069
3070
thread_local std::vector<uint32_t> resultStack;
3071
thread_local std::vector<uint32_t> accessStack;
3072
thread_local std::vector<uint32_t> storeInstructionIndices;
3073
thread_local std::vector<uint32_t> partialLoadInstructionIndices;
3074
thread_local std::vector<uint32_t> fullLoadInstructionIndices;
3075
bool storeIsFull = true;
3076
resultStack.clear();
3077
accessStack.clear();
3078
storeInstructionIndices.clear();
3079
partialLoadInstructionIndices.clear();
3080
fullLoadInstructionIndices.clear();
3081
accessStack.emplace_back(instructionIndex);
3082
while (!accessStack.empty()) {
3083
uint32_t accessInstructionIndex = accessStack.back();
3084
const Instruction &accessInstruction = rContext.shader.instructions[accessInstructionIndex];
3085
accessStack.pop_back();
3086
3087
if (rContext.instructionOutDegrees[accessInstructionIndex] > 0) {
3088
uint32_t listIndex = rContext.instructionAdjacentListIndices[accessInstructionIndex];
3089
while (listIndex != UINT32_MAX) {
3090
uint32_t adjacentInstructionIndex = rContext.listNodes[listIndex].instructionIndex;
3091
uint32_t adjacentWordIndex = rContext.shader.instructions[adjacentInstructionIndex].wordIndex;
3092
listIndex = rContext.listNodes[listIndex].nextListIndex;
3093
3094
// Only check the instruction if it hasn't been deleted yet.
3095
if (optimizedWords[adjacentWordIndex] != UINT32_MAX) {
3096
SpvOp opCode = SpvOp(optimizedWords[adjacentWordIndex] & 0xFFFFU);
3097
if (opCode == SpvOpAccessChain) {
3098
accessStack.emplace_back(adjacentInstructionIndex);
3099
}
3100
else if (opCode == SpvOpStore) {
3101
storeInstructionIndices.emplace_back(adjacentInstructionIndex);
3102
storeIsFull = storeIsFull && (optimizedWords[adjacentWordIndex + 1] == resultId);
3103
}
3104
else if (opCode == SpvOpLoad) {
3105
if (optimizedWords[adjacentWordIndex + 3] == resultId) {
3106
fullLoadInstructionIndices.emplace_back(adjacentInstructionIndex);
3107
}
3108
else {
3109
partialLoadInstructionIndices.emplace_back(adjacentInstructionIndex);
3110
}
3111
}
3112
else {
3113
// The whole search process is stopped if anything in the access chain is not recognized.
3114
accessStack.clear();
3115
storeInstructionIndices.clear();
3116
fullLoadInstructionIndices.clear();
3117
partialLoadInstructionIndices.clear();
3118
listIndex = UINT32_MAX;
3119
}
3120
}
3121
}
3122
}
3123
else {
3124
resultStack.emplace_back(resultId);
3125
}
3126
}
3127
3128
// Single store load elimination. Any variables that are only stored to once can eliminate any loads
3129
// and remap the results of the adjacent instructions. However, a strict requirement is that the block
3130
// that holds the store must dominate the block that holds the load as per SPIR-V rules.
3131
size_t fullLoadInstructionsEliminated = 0;
3132
if (!fullLoadInstructionIndices.empty() && (storeInstructionIndices.size() == 1) && storeIsFull) {
3133
uint32_t storeInstructionIndex = storeInstructionIndices.front();
3134
const Instruction &storeInstruction = rContext.shader.instructions[storeInstructionIndex];
3135
if (optimizedWords[storeInstruction.wordIndex] != UINT32_MAX) {
3136
uint32_t storeResultId = optimizedWords[storeInstruction.wordIndex + 2];
3137
uint32_t storeResultInstructionIndex = rContext.shader.results[storeResultId].instructionIndex;
3138
for (uint32_t loadInstructionIndex : fullLoadInstructionIndices) {
3139
const Instruction &loadInstruction = rContext.shader.instructions[loadInstructionIndex];
3140
uint32_t loadWordIndex = loadInstruction.wordIndex;
3141
if (optimizedWords[loadWordIndex] == UINT32_MAX) {
3142
// Instruction has been deleted already.
3143
continue;
3144
}
3145
3146
if (!optimizerDoesInstructionDominate(rContext.shader, storeInstruction, loadInstruction)) {
3147
// Store's block must dominate the load's block for the elimination to be possible.
3148
continue;
3149
}
3150
3151
uint32_t loadResultId = optimizedWords[loadWordIndex + 2];
3152
uint32_t listIndex = rContext.instructionAdjacentListIndices[loadInstructionIndex];
3153
while (listIndex != UINT32_MAX) {
3154
uint32_t adjacentInstructionIndex = rContext.listNodes[listIndex].instructionIndex;
3155
uint32_t adjacentWordIndex = rContext.shader.instructions[adjacentInstructionIndex].wordIndex;
3156
if (optimizedWords[adjacentWordIndex] != UINT32_MAX) {
3157
SpvOp adjacentOpCode = SpvOp(optimizedWords[adjacentWordIndex] & 0xFFFFU);
3158
uint32_t adjancentWordCount = (optimizedWords[adjacentWordIndex] >> 16U) & 0xFFFFU;
3159
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
3160
bool operandWordSkipString;
3161
if (SpvHasOperands(adjacentOpCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, true)) {
3162
uint32_t operandWordIndex = operandWordStart;
3163
for (uint32_t j = 0; j < operandWordCount; j++) {
3164
if (checkOperandWordSkip(adjacentWordIndex, optimizedWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
3165
continue;
3166
}
3167
3168
if (operandWordIndex >= adjancentWordCount) {
3169
break;
3170
}
3171
3172
uint32_t shaderWordIndex = adjacentWordIndex + operandWordIndex;
3173
uint32_t &operandId = optimizedWords[shaderWordIndex];
3174
if (operandId == loadResultId) {
3175
operandId = storeResultId;
3176
resultStack.emplace_back(loadResultId);
3177
rContext.instructionAdjacentListIndices[storeResultInstructionIndex] = addToList(adjacentInstructionIndex, rContext.instructionAdjacentListIndices[storeResultInstructionIndex], rContext.listNodes);
3178
rContext.instructionOutDegrees[storeResultInstructionIndex]++;
3179
}
3180
3181
operandWordIndex += operandWordStride;
3182
}
3183
}
3184
}
3185
3186
listIndex = rContext.listNodes[listIndex].nextListIndex;
3187
}
3188
3189
fullLoadInstructionsEliminated++;
3190
}
3191
}
3192
}
3193
3194
if ((fullLoadInstructionIndices.size() == fullLoadInstructionsEliminated) && partialLoadInstructionIndices.empty()) {
3195
// Unused store elimination. Any variables which have no loads but have stores can be eliminated.
3196
for (uint32_t storeInstructionIndex : storeInstructionIndices) {
3197
uint32_t storeWordIndex = rContext.shader.instructions[storeInstructionIndex].wordIndex;
3198
if (optimizedWords[storeWordIndex] == UINT32_MAX) {
3199
// Instruction has been deleted already.
3200
continue;
3201
}
3202
3203
resultStack.emplace_back(optimizedWords[storeWordIndex + 1]);
3204
resultStack.emplace_back(optimizedWords[storeWordIndex + 2]);
3205
optimizerEliminateInstruction(storeInstructionIndex, rContext);
3206
}
3207
}
3208
3209
optimizerReduceResultDegrees(rContext, resultStack);
3210
}
3211
3212
return true;
3213
}
3214
3215
static bool optimizerRemoveUnusedDecorations(OptimizerContext &rContext) {
3216
if (!rContext.options.removeDeadCode) {
3217
return true;
3218
}
3219
3220
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
3221
for (Decoration decoration : rContext.shader.decorations) {
3222
uint32_t wordIndex = rContext.shader.instructions[decoration.instructionIndex].wordIndex;
3223
uint32_t resultId = optimizedWords[wordIndex + 1];
3224
if (resultId == UINT32_MAX) {
3225
// This decoration has already been deleted.
3226
continue;
3227
}
3228
3229
uint32_t resultInstructionIndex = rContext.shader.results[resultId].instructionIndex;
3230
uint32_t resultWordIndex = rContext.shader.instructions[resultInstructionIndex].wordIndex;
3231
3232
// The result has been deleted, so we delete the decoration as well.
3233
if (optimizedWords[resultWordIndex] == UINT32_MAX) {
3234
optimizerEliminateInstruction(decoration.instructionIndex, rContext);
3235
}
3236
}
3237
3238
return true;
3239
}
3240
3241
static bool optimizerCompactPhis(OptimizerContext &rContext) {
3242
if (!rContext.options.removeDeadCode) {
3243
return true;
3244
}
3245
3246
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
3247
for (Phi phi : rContext.shader.phis) {
3248
uint32_t wordIndex = rContext.shader.instructions[phi.instructionIndex].wordIndex;
3249
if (optimizedWords[wordIndex] == UINT32_MAX) {
3250
// This operation has already been deleted.
3251
continue;
3252
}
3253
3254
if (!optimizerCompactPhi(phi.instructionIndex, rContext)) {
3255
return false;
3256
}
3257
}
3258
3259
return true;
3260
}
3261
3262
static bool optimizerCompactData(OptimizerContext &rContext) {
3263
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
3264
uint32_t optimizedWordCount = 0;
3265
uint32_t instructionCount = rContext.shader.instructions.size();
3266
3267
// Copy the header.
3268
const uint32_t startingWordIndex = 5;
3269
for (uint32_t i = 0; i < startingWordIndex; i++) {
3270
optimizedWords[optimizedWordCount++] = optimizedWords[i];
3271
}
3272
3273
// Write out all the words for all the instructions and skip any that were marked as deleted.
3274
for (uint32_t i = 0; i < instructionCount; i++) {
3275
uint32_t wordIndex = rContext.shader.instructions[i].wordIndex;
3276
3277
// Instruction has been deleted.
3278
if (optimizedWords[wordIndex] == UINT32_MAX) {
3279
continue;
3280
}
3281
3282
// Check if the instruction should be ignored.
3283
SpvOp opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
3284
if (rContext.options.removeDeadCode && SpvIsIgnored(opCode)) {
3285
continue;
3286
}
3287
3288
// Copy all the words of the instruction.
3289
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
3290
for (uint32_t j = 0; j < wordCount; j++) {
3291
optimizedWords[optimizedWordCount++] = optimizedWords[wordIndex + j];
3292
}
3293
}
3294
3295
rContext.optimizedData.resize(optimizedWordCount * sizeof(uint32_t));
3296
3297
return true;
3298
}
3299
3300
bool Optimizer::run(const Shader &pShader, const SpecConstant *pNewSpecConstants, uint32_t pNewSpecConstantCount, std::vector<uint8_t> &pOptimizedData, Options pOptions) {
3301
thread_local std::vector<uint32_t> instructionAdjacentListIndices;
3302
thread_local std::vector<uint32_t> instructionInDegrees;
3303
thread_local std::vector<uint32_t> instructionOutDegrees;
3304
thread_local std::vector<ListNode> listNodes;
3305
thread_local std::vector<Resolution> resolutions;
3306
OptimizerContext context = { pShader, instructionAdjacentListIndices, instructionInDegrees, instructionOutDegrees, listNodes, resolutions, pOptimizedData, pOptions };
3307
if (!optimizerPrepareData(context)) {
3308
return false;
3309
}
3310
3311
if (!optimizerPatchSpecializationConstants(pNewSpecConstants, pNewSpecConstantCount, context)) {
3312
return false;
3313
}
3314
3315
if (!optimizerRunEvaluationPass(context)) {
3316
return false;
3317
}
3318
3319
if (!optimizerRemoveUnusedVariables(context)) {
3320
return false;
3321
}
3322
3323
if (!optimizerRemoveUnusedDecorations(context)) {
3324
return false;
3325
}
3326
3327
// FIXME: For some reason, it seems that based on the order of the resolution, OpPhis can be compacted
3328
// before all their preceding blocks have been evaluated in time whether they should be deleted or not.
3329
// This pass merely re-runs the compaction step as a safeguard to remove any stale references. There's
3330
// potential for further optimization if this is fixed properly.
3331
if (!optimizerCompactPhis(context)) {
3332
return false;
3333
}
3334
3335
if (!optimizerCompactData(context)) {
3336
return false;
3337
}
3338
3339
return true;
3340
}
3341
}; //namespace respv
3342
3343