CoCalc -- cabac.c

05. Matplotlib / ffmpeg-3.0 / libx264 / encoder / cabac.c
⁵²⁸⁶⁶ views
1
/*****************************************************************************
2
 * cabac.c: cabac bitstream writing
3
 *****************************************************************************
4
 * Copyright (C) 2003-2016 x264 project
5
 *
6
 * Authors: Laurent Aimar <[email protected]>
7
 *          Loren Merritt <[email protected]>
8
 *          Fiona Glaser <[email protected]>
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
23
 *
24
 * This program is also available under a commercial proprietary license.
25
 * For more information, contact us at [email protected].
26
 *****************************************************************************/
27

28
#include "common/common.h"
29
#include "macroblock.h"
30

31
#ifndef RDO_SKIP_BS
32
#define RDO_SKIP_BS 0
33
#endif
34

35
static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
36
                    int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
37
{
38
    if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
39
    {
40
        x264_cabac_encode_decision_noup( cb, ctx0, 0 );
41
    }
42
#if !RDO_SKIP_BS
43
    else if( i_mb_type == I_PCM )
44
    {
45
        x264_cabac_encode_decision_noup( cb, ctx0, 1 );
46
        x264_cabac_encode_flush( h, cb );
47
    }
48
#endif
49
    else
50
    {
51
        int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode];
52

53
        x264_cabac_encode_decision_noup( cb, ctx0, 1 );
54
        x264_cabac_encode_terminal( cb );
55

56
        x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma );
57
        if( h->mb.i_cbp_chroma == 0 )
58
            x264_cabac_encode_decision_noup( cb, ctx2, 0 );
59
        else
60
        {
61
            x264_cabac_encode_decision( cb, ctx2, 1 );
62
            x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 );
63
        }
64
        x264_cabac_encode_decision( cb, ctx4, i_pred>>1 );
65
        x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 );
66
    }
67
}
68

69
#if !RDO_SKIP_BS
70
static void x264_cabac_field_decoding_flag( x264_t *h, x264_cabac_t *cb )
71
{
72
    int ctx = 0;
73
    ctx += h->mb.field_decoding_flag & !!h->mb.i_mb_x;
74
    ctx += (h->mb.i_mb_top_mbpair_xy >= 0
75
            && h->mb.slice_table[h->mb.i_mb_top_mbpair_xy] == h->sh.i_first_mb
76
            && h->mb.field[h->mb.i_mb_top_mbpair_xy]);
77

78
    x264_cabac_encode_decision_noup( cb, 70 + ctx, MB_INTERLACED );
79
    h->mb.field_decoding_flag = MB_INTERLACED;
80
}
81
#endif
82

83
static void x264_cabac_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode )
84
{
85
    if( i_pred == i_mode )
86
        x264_cabac_encode_decision( cb, 68, 1 );
87
    else
88
    {
89
        x264_cabac_encode_decision( cb, 68, 0 );
90
        if( i_mode > i_pred  )
91
            i_mode--;
92
        x264_cabac_encode_decision( cb, 69, (i_mode     )&0x01 );
93
        x264_cabac_encode_decision( cb, 69, (i_mode >> 1)&0x01 );
94
        x264_cabac_encode_decision( cb, 69, (i_mode >> 2)      );
95
    }
96
}
97

98
static void x264_cabac_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
99
{
100
    int i_mode = x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode];
101
    int ctx = 0;
102

103
    /* No need to test for I4x4 or I_16x16 as cache_save handle that */
104
    if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy[0]] != 0 )
105
        ctx++;
106
    if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 )
107
        ctx++;
108

109
    x264_cabac_encode_decision_noup( cb, 64 + ctx, i_mode > 0 );
110
    if( i_mode > 0 )
111
    {
112
        x264_cabac_encode_decision( cb, 64 + 3, i_mode > 1 );
113
        if( i_mode > 1 )
114
            x264_cabac_encode_decision_noup( cb, 64 + 3, i_mode > 2 );
115
    }
116
}
117

118
static void x264_cabac_cbp_luma( x264_t *h, x264_cabac_t *cb )
119
{
120
    int cbp = h->mb.i_cbp_luma;
121
    int cbp_l = h->mb.cache.i_cbp_left;
122
    int cbp_t = h->mb.cache.i_cbp_top;
123
    x264_cabac_encode_decision     ( cb, 76 - ((cbp_l >> 1) & 1) - ((cbp_t >> 1) & 2), (cbp >> 0) & 1 );
124
    x264_cabac_encode_decision     ( cb, 76 - ((cbp   >> 0) & 1) - ((cbp_t >> 2) & 2), (cbp >> 1) & 1 );
125
    x264_cabac_encode_decision     ( cb, 76 - ((cbp_l >> 3) & 1) - ((cbp   << 1) & 2), (cbp >> 2) & 1 );
126
    x264_cabac_encode_decision_noup( cb, 76 - ((cbp   >> 2) & 1) - ((cbp   >> 0) & 2), (cbp >> 3) & 1 );
127
}
128

129
static void x264_cabac_cbp_chroma( x264_t *h, x264_cabac_t *cb )
130
{
131
    int cbp_a = h->mb.cache.i_cbp_left & 0x30;
132
    int cbp_b = h->mb.cache.i_cbp_top  & 0x30;
133
    int ctx = 0;
134

135
    if( cbp_a && h->mb.cache.i_cbp_left != -1 ) ctx++;
136
    if( cbp_b && h->mb.cache.i_cbp_top  != -1 ) ctx+=2;
137
    if( h->mb.i_cbp_chroma == 0 )
138
        x264_cabac_encode_decision_noup( cb, 77 + ctx, 0 );
139
    else
140
    {
141
        x264_cabac_encode_decision_noup( cb, 77 + ctx, 1 );
142

143
        ctx = 4;
144
        if( cbp_a == 0x20 ) ctx++;
145
        if( cbp_b == 0x20 ) ctx += 2;
146
        x264_cabac_encode_decision_noup( cb, 77 + ctx, h->mb.i_cbp_chroma >> 1 );
147
    }
148
}
149

150
static void x264_cabac_qp_delta( x264_t *h, x264_cabac_t *cb )
151
{
152
    int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
153
    int ctx;
154

155
    /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely
156
     * flat background area. Don't do this if it would raise the quantizer, since that could
157
     * cause unexpected deblocking artifacts. */
158
    if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] && h->mb.i_qp > h->mb.i_last_qp )
159
    {
160
#if !RDO_SKIP_BS
161
        h->mb.i_qp = h->mb.i_last_qp;
162
#endif
163
        i_dqp = 0;
164
    }
165

166
    ctx = h->mb.i_last_dqp && (h->mb.type[h->mb.i_mb_prev_xy] == I_16x16 || (h->mb.cbp[h->mb.i_mb_prev_xy]&0x3f));
167

168
    if( i_dqp != 0 )
169
    {
170
        /* Faster than (i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp-1)).
171
         * If you so much as sneeze on these lines, gcc will compile this suboptimally. */
172
        i_dqp *= 2;
173
        int val = 1 - i_dqp;
174
        if( val < 0 ) val = i_dqp;
175
        val--;
176
        /* dqp is interpreted modulo (QP_MAX_SPEC+1) */
177
        if( val >= QP_MAX_SPEC && val != QP_MAX_SPEC+1 )
178
            val = 2*QP_MAX_SPEC+1 - val;
179
        do
180
        {
181
            x264_cabac_encode_decision( cb, 60 + ctx, 1 );
182
            ctx = 2+(ctx>>1);
183
        } while( --val );
184
    }
185
    x264_cabac_encode_decision_noup( cb, 60 + ctx, 0 );
186
}
187

188
#if !RDO_SKIP_BS
189
void x264_cabac_mb_skip( x264_t *h, int b_skip )
190
{
191
    int ctx = h->mb.cache.i_neighbour_skip + 11;
192
    if( h->sh.i_type != SLICE_TYPE_P )
193
       ctx += 13;
194
    x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
195
}
196
#endif
197

198
static inline void x264_cabac_subpartition_p( x264_cabac_t *cb, int i_sub )
199
{
200
    if( i_sub == D_L0_8x8 )
201
    {
202
        x264_cabac_encode_decision( cb, 21, 1 );
203
        return;
204
    }
205
    x264_cabac_encode_decision( cb, 21, 0 );
206
    if( i_sub == D_L0_8x4 )
207
        x264_cabac_encode_decision( cb, 22, 0 );
208
    else
209
    {
210
        x264_cabac_encode_decision( cb, 22, 1 );
211
        x264_cabac_encode_decision( cb, 23, i_sub == D_L0_4x8 );
212
    }
213
}
214

215
static ALWAYS_INLINE void x264_cabac_subpartition_b( x264_cabac_t *cb, int i_sub )
216
{
217
    if( i_sub == D_DIRECT_8x8 )
218
    {
219
        x264_cabac_encode_decision( cb, 36, 0 );
220
        return;
221
    }
222
    x264_cabac_encode_decision( cb, 36, 1 );
223
    if( i_sub == D_BI_8x8 )
224
    {
225
        x264_cabac_encode_decision( cb, 37, 1 );
226
        x264_cabac_encode_decision( cb, 38, 0 );
227
        x264_cabac_encode_decision( cb, 39, 0 );
228
        x264_cabac_encode_decision( cb, 39, 0 );
229
        return;
230
    }
231
    x264_cabac_encode_decision( cb, 37, 0 );
232
    x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 );
233
}
234

235
static ALWAYS_INLINE void x264_cabac_transform_size( x264_t *h, x264_cabac_t *cb )
236
{
237
    int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
238
    x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
239
}
240

241
static ALWAYS_INLINE void x264_cabac_ref_internal( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int bframe )
242
{
243
    const int i8 = x264_scan8[idx];
244
    const int i_refa = h->mb.cache.ref[i_list][i8 - 1];
245
    const int i_refb = h->mb.cache.ref[i_list][i8 - 8];
246
    int ctx = 0;
247

248
    if( i_refa > 0 && (!bframe || !h->mb.cache.skip[i8 - 1]) )
249
        ctx++;
250
    if( i_refb > 0 && (!bframe || !h->mb.cache.skip[i8 - 8]) )
251
        ctx += 2;
252

253
    for( int i_ref = h->mb.cache.ref[i_list][i8]; i_ref > 0; i_ref-- )
254
    {
255
        x264_cabac_encode_decision( cb, 54 + ctx, 1 );
256
        ctx = (ctx>>2)+4;
257
    }
258
    x264_cabac_encode_decision( cb, 54 + ctx, 0 );
259
}
260

261
static NOINLINE void x264_cabac_ref_p( x264_t *h, x264_cabac_t *cb, int idx )
262
{
263
    x264_cabac_ref_internal( h, cb, 0, idx, 0 );
264
}
265
static NOINLINE void x264_cabac_ref_b( x264_t *h, x264_cabac_t *cb, int i_list, int idx )
266
{
267
    x264_cabac_ref_internal( h, cb, i_list, idx, 1 );
268
}
269

270
static ALWAYS_INLINE int x264_cabac_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
271
{
272
    int ctxbase = l ? 47 : 40;
273

274
    if( mvd == 0 )
275
    {
276
        x264_cabac_encode_decision( cb, ctxbase + ctx, 0 );
277
        return 0;
278
    }
279

280
    int i_abs = abs( mvd );
281
    x264_cabac_encode_decision( cb, ctxbase + ctx, 1 );
282
#if RDO_SKIP_BS
283
    if( i_abs <= 3 )
284
    {
285
        for( int i = 1; i < i_abs; i++ )
286
            x264_cabac_encode_decision( cb, ctxbase + i + 2, 1 );
287
        x264_cabac_encode_decision( cb, ctxbase + i_abs + 2, 0 );
288
        x264_cabac_encode_bypass( cb, mvd >> 31 );
289
    }
290
    else
291
    {
292
        x264_cabac_encode_decision( cb, ctxbase + 3, 1 );
293
        x264_cabac_encode_decision( cb, ctxbase + 4, 1 );
294
        x264_cabac_encode_decision( cb, ctxbase + 5, 1 );
295
        if( i_abs < 9 )
296
        {
297
            cb->f8_bits_encoded += x264_cabac_size_unary[i_abs - 3][cb->state[ctxbase+6]];
298
            cb->state[ctxbase+6] = x264_cabac_transition_unary[i_abs - 3][cb->state[ctxbase+6]];
299
        }
300
        else
301
        {
302
            cb->f8_bits_encoded += cabac_size_5ones[cb->state[ctxbase+6]];
303
            cb->state[ctxbase+6] = cabac_transition_5ones[cb->state[ctxbase+6]];
304
            x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 );
305
        }
306
    }
307
#else
308
    static const uint8_t ctxes[8] = { 3,4,5,6,6,6,6,6 };
309

310
    if( i_abs < 9 )
311
    {
312
        for( int i = 1; i < i_abs; i++ )
313
            x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 );
314
        x264_cabac_encode_decision( cb, ctxbase + ctxes[i_abs-1], 0 );
315
    }
316
    else
317
    {
318
        for( int i = 1; i < 9; i++ )
319
            x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 );
320
        x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 );
321
    }
322
    x264_cabac_encode_bypass( cb, mvd >> 31 );
323
#endif
324
    /* Since we don't need to keep track of MVDs larger than 66, just cap the value.
325
     * This lets us store MVDs as 8-bit values instead of 16-bit. */
326
    return X264_MIN( i_abs, 66 );
327
}
328

329
static NOINLINE uint16_t x264_cabac_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
330
{
331
    ALIGNED_4( int16_t mvp[2] );
332
    int mdx, mdy;
333

334
    /* Calculate mvd */
335
    x264_mb_predict_mv( h, i_list, idx, width, mvp );
336
    mdx = h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0];
337
    mdy = h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1];
338
    uint16_t amvd = x264_cabac_mvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1],
339
                                       h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]);
340

341
    /* encode */
342
    mdx = x264_cabac_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFF );
343
    mdy = x264_cabac_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>8 );
344

345
    return pack8to16(mdx,mdy);
346
}
347

348
#define x264_cabac_mvd(h,cb,i_list,idx,width,height)\
349
do\
350
{\
351
    uint16_t mvd = x264_cabac_mvd(h,cb,i_list,idx,width);\
352
    x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mvd );\
353
} while(0)
354

355
static inline void x264_cabac_8x8_mvd( x264_t *h, x264_cabac_t *cb, int i )
356
{
357
    switch( h->mb.i_sub_partition[i] )
358
    {
359
        case D_L0_8x8:
360
            x264_cabac_mvd( h, cb, 0, 4*i, 2, 2 );
361
            break;
362
        case D_L0_8x4:
363
            x264_cabac_mvd( h, cb, 0, 4*i+0, 2, 1 );
364
            x264_cabac_mvd( h, cb, 0, 4*i+2, 2, 1 );
365
            break;
366
        case D_L0_4x8:
367
            x264_cabac_mvd( h, cb, 0, 4*i+0, 1, 2 );
368
            x264_cabac_mvd( h, cb, 0, 4*i+1, 1, 2 );
369
            break;
370
        case D_L0_4x4:
371
            x264_cabac_mvd( h, cb, 0, 4*i+0, 1, 1 );
372
            x264_cabac_mvd( h, cb, 0, 4*i+1, 1, 1 );
373
            x264_cabac_mvd( h, cb, 0, 4*i+2, 1, 1 );
374
            x264_cabac_mvd( h, cb, 0, 4*i+3, 1, 1 );
375
            break;
376
        default:
377
            assert(0);
378
    }
379
}
380

381
static ALWAYS_INLINE void x264_cabac_mb_header_i( x264_t *h, x264_cabac_t *cb, int i_mb_type, int slice_type, int chroma )
382
{
383
    if( slice_type == SLICE_TYPE_I )
384
    {
385
        int ctx = 0;
386
        if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != I_4x4 )
387
            ctx++;
388
        if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != I_4x4 )
389
            ctx++;
390

391
        x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
392
    }
393
    else if( slice_type == SLICE_TYPE_P )
394
    {
395
        /* prefix */
396
        x264_cabac_encode_decision_noup( cb, 14, 1 );
397

398
        /* suffix */
399
        x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
400
    }
401
    else if( slice_type == SLICE_TYPE_B )
402
    {
403
        /* prefix */
404
        x264_cabac_encode_decision_noup( cb, 27+3,   1 );
405
        x264_cabac_encode_decision_noup( cb, 27+4,   1 );
406
        x264_cabac_encode_decision( cb, 27+5,   1 );
407
        x264_cabac_encode_decision( cb, 27+5,   0 );
408
        x264_cabac_encode_decision( cb, 27+5,   1 );
409

410
        /* suffix */
411
        x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
412
    }
413

414
    if( i_mb_type == I_PCM )
415
        return;
416

417
    if( i_mb_type != I_16x16 )
418
    {
419
        if( h->pps->b_transform_8x8_mode )
420
            x264_cabac_transform_size( h, cb );
421

422
        int di = h->mb.b_transform_8x8 ? 4 : 1;
423
        for( int i = 0; i < 16; i += di )
424
        {
425
            const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
426
            const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
427
            x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
428
        }
429
    }
430

431
    if( chroma )
432
        x264_cabac_intra_chroma_pred_mode( h, cb );
433
}
434

435
static ALWAYS_INLINE void x264_cabac_mb_header_p( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma )
436
{
437
    if( i_mb_type == P_L0 )
438
    {
439
        x264_cabac_encode_decision_noup( cb, 14, 0 );
440
        if( h->mb.i_partition == D_16x16 )
441
        {
442
            x264_cabac_encode_decision_noup( cb, 15, 0 );
443
            x264_cabac_encode_decision_noup( cb, 16, 0 );
444
            if( h->mb.pic.i_fref[0] > 1 )
445
                x264_cabac_ref_p( h, cb, 0 );
446
            x264_cabac_mvd( h, cb, 0, 0, 4, 4 );
447
        }
448
        else if( h->mb.i_partition == D_16x8 )
449
        {
450
            x264_cabac_encode_decision_noup( cb, 15, 1 );
451
            x264_cabac_encode_decision_noup( cb, 17, 1 );
452
            if( h->mb.pic.i_fref[0] > 1 )
453
            {
454
                x264_cabac_ref_p( h, cb, 0 );
455
                x264_cabac_ref_p( h, cb, 8 );
456
            }
457
            x264_cabac_mvd( h, cb, 0, 0, 4, 2 );
458
            x264_cabac_mvd( h, cb, 0, 8, 4, 2 );
459
        }
460
        else //if( h->mb.i_partition == D_8x16 )
461
        {
462
            x264_cabac_encode_decision_noup( cb, 15, 1 );
463
            x264_cabac_encode_decision_noup( cb, 17, 0 );
464
            if( h->mb.pic.i_fref[0] > 1 )
465
            {
466
                x264_cabac_ref_p( h, cb, 0 );
467
                x264_cabac_ref_p( h, cb, 4 );
468
            }
469
            x264_cabac_mvd( h, cb, 0, 0, 2, 4 );
470
            x264_cabac_mvd( h, cb, 0, 4, 2, 4 );
471
        }
472
    }
473
    else if( i_mb_type == P_8x8 )
474
    {
475
        x264_cabac_encode_decision_noup( cb, 14, 0 );
476
        x264_cabac_encode_decision_noup( cb, 15, 0 );
477
        x264_cabac_encode_decision_noup( cb, 16, 1 );
478

479
        /* sub mb type */
480
        for( int i = 0; i < 4; i++ )
481
            x264_cabac_subpartition_p( cb, h->mb.i_sub_partition[i] );
482

483
        /* ref 0 */
484
        if( h->mb.pic.i_fref[0] > 1 )
485
        {
486
            x264_cabac_ref_p( h, cb,  0 );
487
            x264_cabac_ref_p( h, cb,  4 );
488
            x264_cabac_ref_p( h, cb,  8 );
489
            x264_cabac_ref_p( h, cb, 12 );
490
        }
491

492
        for( int i = 0; i < 4; i++ )
493
            x264_cabac_8x8_mvd( h, cb, i );
494
    }
495
    else /* intra */
496
        x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_P, chroma );
497
}
498

499
static ALWAYS_INLINE void x264_cabac_mb_header_b( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma )
500
{
501
    int ctx = 0;
502
    if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != B_SKIP && h->mb.i_mb_type_left[0] != B_DIRECT )
503
        ctx++;
504
    if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
505
        ctx++;
506

507
    if( i_mb_type == B_DIRECT )
508
    {
509
        x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
510
        return;
511
    }
512
    x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );
513

514
    if( i_mb_type == B_8x8 )
515
    {
516
        x264_cabac_encode_decision_noup( cb, 27+3,   1 );
517
        x264_cabac_encode_decision_noup( cb, 27+4,   1 );
518
        x264_cabac_encode_decision( cb, 27+5,   1 );
519
        x264_cabac_encode_decision( cb, 27+5,   1 );
520
        x264_cabac_encode_decision_noup( cb, 27+5,   1 );
521

522
        /* sub mb type */
523
        for( int i = 0; i < 4; i++ )
524
            x264_cabac_subpartition_b( cb, h->mb.i_sub_partition[i] );
525

526
        /* ref */
527
        if( h->mb.pic.i_fref[0] > 1 )
528
            for( int i = 0; i < 4; i++ )
529
                if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
530
                    x264_cabac_ref_b( h, cb, 0, 4*i );
531

532
        if( h->mb.pic.i_fref[1] > 1 )
533
            for( int i = 0; i < 4; i++ )
534
                if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
535
                    x264_cabac_ref_b( h, cb, 1, 4*i );
536

537
        for( int i = 0; i < 4; i++ )
538
            if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
539
                x264_cabac_mvd( h, cb, 0, 4*i, 2, 2 );
540

541
        for( int i = 0; i < 4; i++ )
542
            if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
543
                x264_cabac_mvd( h, cb, 1, 4*i, 2, 2 );
544
    }
545
    else if( i_mb_type >= B_L0_L0 && i_mb_type <= B_BI_BI )
546
    {
547
        /* All B modes */
548
        static const uint8_t i_mb_bits[9*3] =
549
        {
550
            0x31, 0x29, 0x4, /* L0 L0 */
551
            0x35, 0x2d, 0,   /* L0 L1 */
552
            0x43, 0x63, 0,   /* L0 BI */
553
            0x3d, 0x2f, 0,   /* L1 L0 */
554
            0x39, 0x25, 0x6, /* L1 L1 */
555
            0x53, 0x73, 0,   /* L1 BI */
556
            0x4b, 0x6b, 0,   /* BI L0 */
557
            0x5b, 0x7b, 0,   /* BI L1 */
558
            0x47, 0x67, 0x21 /* BI BI */
559
        };
560

561
        const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
562
        int bits = i_mb_bits[idx];
563

564
        x264_cabac_encode_decision_noup( cb, 27+3, bits&1 );
565
        x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2;
566
        if( bits != 1 )
567
        {
568
            x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
569
            x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
570
            x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
571
            if( bits != 1 )
572
                x264_cabac_encode_decision_noup( cb, 27+5, bits&1 );
573
        }
574

575
        const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
576
        if( h->mb.pic.i_fref[0] > 1 )
577
        {
578
            if( b_list[0][0] )
579
                x264_cabac_ref_b( h, cb, 0, 0 );
580
            if( b_list[0][1] && h->mb.i_partition != D_16x16 )
581
                x264_cabac_ref_b( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
582
        }
583
        if( h->mb.pic.i_fref[1] > 1 )
584
        {
585
            if( b_list[1][0] )
586
                x264_cabac_ref_b( h, cb, 1, 0 );
587
            if( b_list[1][1] && h->mb.i_partition != D_16x16 )
588
                x264_cabac_ref_b( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
589
        }
590
        for( int i_list = 0; i_list < 2; i_list++ )
591
        {
592
            if( h->mb.i_partition == D_16x16 )
593
            {
594
                if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 4, 4 );
595
            }
596
            else if( h->mb.i_partition == D_16x8 )
597
            {
598
                if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 4, 2 );
599
                if( b_list[i_list][1] ) x264_cabac_mvd( h, cb, i_list, 8, 4, 2 );
600
            }
601
            else //if( h->mb.i_partition == D_8x16 )
602
            {
603
                if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 2, 4 );
604
                if( b_list[i_list][1] ) x264_cabac_mvd( h, cb, i_list, 4, 2, 4 );
605
            }
606
        }
607
    }
608
    else /* intra */
609
        x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_B, chroma );
610
}
611

612
static int ALWAYS_INLINE x264_cabac_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra, int b_dc )
613
{
614
    static const uint16_t base_ctx[14] = {85,89,93,97,101,1012,460,464,468,1016,472,476,480,1020};
615

616
    if( b_dc )
617
    {
618
        i_idx -= LUMA_DC;
619
        if( i_cat == DCT_CHROMA_DC )
620
        {
621
            int i_nza = h->mb.cache.i_cbp_left != -1 ? (h->mb.cache.i_cbp_left >> (8 + i_idx)) & 1 : b_intra;
622
            int i_nzb = h->mb.cache.i_cbp_top  != -1 ? (h->mb.cache.i_cbp_top  >> (8 + i_idx)) & 1 : b_intra;
623
            return base_ctx[i_cat] + 2*i_nzb + i_nza;
624
        }
625
        else
626
        {
627
            int i_nza = (h->mb.cache.i_cbp_left >> (8 + i_idx)) & 1;
628
            int i_nzb = (h->mb.cache.i_cbp_top  >> (8 + i_idx)) & 1;
629
            return base_ctx[i_cat] + 2*i_nzb + i_nza;
630
        }
631
    }
632
    else
633
    {
634
        int i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
635
        int i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
636
        if( x264_constant_p(b_intra) && !b_intra )
637
            return base_ctx[i_cat] + ((2*i_nzb + i_nza)&0x7f);
638
        else
639
        {
640
            i_nza &= 0x7f + (b_intra << 7);
641
            i_nzb &= 0x7f + (b_intra << 7);
642
            return base_ctx[i_cat] + 2*!!i_nzb + !!i_nza;
643
        }
644
    }
645
}
646

647
#if !RDO_SKIP_BS
648
extern const uint8_t x264_significant_coeff_flag_offset_8x8[2][64];
649
extern const uint8_t x264_last_coeff_flag_offset_8x8[63];
650
extern const uint8_t x264_coeff_flag_offset_chroma_422_dc[7];
651
extern const uint16_t x264_significant_coeff_flag_offset[2][16];
652
extern const uint16_t x264_last_coeff_flag_offset[2][16];
653
extern const uint16_t x264_coeff_abs_level_m1_offset[16];
654
extern const uint8_t x264_count_cat_m1[14];
655
#else
656
/* Padded to [64] for easier addressing */
657
const uint8_t x264_significant_coeff_flag_offset_8x8[2][64] =
658
{{
659
    0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
660
    4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
661
    7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
662
   12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
663
},{
664
    0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
665
    6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
666
    9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
667
    9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14
668
}};
669
const uint8_t x264_last_coeff_flag_offset_8x8[63] =
670
{
671
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
672
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
673
    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
674
    5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
675
};
676
const uint8_t x264_coeff_flag_offset_chroma_422_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* MIN( i/2, 2 ) */
677
const uint16_t x264_significant_coeff_flag_offset[2][16] =
678
{
679
    { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718, 0, 0 },
680
    { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733, 0, 0 }
681
};
682
const uint16_t x264_last_coeff_flag_offset[2][16] =
683
{
684
    { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748, 0, 0 },
685
    { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757, 0, 0 }
686
};
687
const uint16_t x264_coeff_abs_level_m1_offset[16] =
688
{
689
    227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
690
};
691
const uint8_t x264_count_cat_m1[14] = {15, 14, 15, 3, 14, 63, 15, 14, 15, 63, 15, 14, 15, 63};
692
#endif
693

694
// node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
695
//           4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
696
/* map node ctx => cabac ctx for level=1 */
697
static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
698
/* map node ctx => cabac ctx for level>1 */
699
static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
700
/* 4:2:2 chroma dc uses a slightly different state machine for some reason, also note that
701
 * 4:2:0 chroma dc doesn't use the last state so it has identical output with both arrays. */
702
static const uint8_t coeff_abs_levelgt1_ctx_chroma_dc[8] = { 5, 5, 5, 5, 6, 7, 8, 8 };
703

704
static const uint8_t coeff_abs_level_transition[2][8] = {
705
/* update node ctx after coding a level=1 */
706
    { 1, 2, 3, 3, 4, 5, 6, 7 },
707
/* update node ctx after coding a level>1 */
708
    { 4, 4, 4, 4, 5, 6, 7, 7 }
709
};
710

711
#if !RDO_SKIP_BS
712
static ALWAYS_INLINE void x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int chroma422dc )
713
{
714
    int ctx_sig = x264_significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
715
    int ctx_last = x264_last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
716
    int ctx_level = x264_coeff_abs_level_m1_offset[ctx_block_cat];
717
    int coeff_idx = -1, node_ctx = 0;
718
    int last = h->quantf.coeff_last[ctx_block_cat]( l );
719
    const uint8_t *levelgt1_ctx = chroma422dc ? coeff_abs_levelgt1_ctx_chroma_dc : coeff_abs_levelgt1_ctx;
720
    dctcoef coeffs[64];
721

722
#define WRITE_SIGMAP( sig_off, last_off )\
723
{\
724
    int i = 0;\
725
    while( 1 )\
726
    {\
727
        if( l[i] )\
728
        {\
729
            coeffs[++coeff_idx] = l[i];\
730
            x264_cabac_encode_decision( cb, ctx_sig + sig_off, 1 );\
731
            if( i == last )\
732
            {\
733
                x264_cabac_encode_decision( cb, ctx_last + last_off, 1 );\
734
                break;\
735
            }\
736
            else\
737
                x264_cabac_encode_decision( cb, ctx_last + last_off, 0 );\
738
        }\
739
        else\
740
            x264_cabac_encode_decision( cb, ctx_sig + sig_off, 0 );\
741
        if( ++i == count_m1 )\
742
        {\
743
            coeffs[++coeff_idx] = l[i];\
744
            break;\
745
        }\
746
    }\
747
}
748

749
    if( chroma422dc )
750
    {
751
        int count_m1 = 7;
752
        WRITE_SIGMAP( x264_coeff_flag_offset_chroma_422_dc[i], x264_coeff_flag_offset_chroma_422_dc[i] )
753
    }
754
    else
755
    {
756
        int count_m1 = x264_count_cat_m1[ctx_block_cat];
757
        if( count_m1 == 63 )
758
        {
759
            const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
760
            WRITE_SIGMAP( sig_offset[i], x264_last_coeff_flag_offset_8x8[i] )
761
        }
762
        else
763
            WRITE_SIGMAP( i, i )
764
    }
765

766
    do
767
    {
768
        /* write coeff_abs - 1 */
769
        int coeff = coeffs[coeff_idx];
770
        int abs_coeff = abs(coeff);
771
        int coeff_sign = coeff >> 31;
772
        int ctx = coeff_abs_level1_ctx[node_ctx] + ctx_level;
773

774
        if( abs_coeff > 1 )
775
        {
776
            x264_cabac_encode_decision( cb, ctx, 1 );
777
            ctx = levelgt1_ctx[node_ctx] + ctx_level;
778
            for( int i = X264_MIN( abs_coeff, 15 ) - 2; i > 0; i-- )
779
                x264_cabac_encode_decision( cb, ctx, 1 );
780
            if( abs_coeff < 15 )
781
                x264_cabac_encode_decision( cb, ctx, 0 );
782
            else
783
                x264_cabac_encode_ue_bypass( cb, 0, abs_coeff - 15 );
784

785
            node_ctx = coeff_abs_level_transition[1][node_ctx];
786
        }
787
        else
788
        {
789
            x264_cabac_encode_decision( cb, ctx, 0 );
790
            node_ctx = coeff_abs_level_transition[0][node_ctx];
791
        }
792

793
        x264_cabac_encode_bypass( cb, coeff_sign );
794
    } while( --coeff_idx >= 0 );
795
}
796

797
void x264_cabac_block_residual_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
798
{
799
    x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0 );
800
}
801

802
static void ALWAYS_INLINE x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
803
{
804
#if ARCH_X86_64 && HAVE_MMX
805
    h->bsf.cabac_block_residual_internal( l, MB_INTERLACED, ctx_block_cat, cb );
806
#else
807
    x264_cabac_block_residual_c( h, cb, ctx_block_cat, l );
808
#endif
809
}
810
static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
811
{
812
    /* Template a version specifically for chroma 4:2:2 DC in order to avoid
813
     * slowing down everything else due to the added complexity. */
814
    x264_cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 1 );
815
}
816
#define x264_cabac_block_residual_8x8( h, cb, cat, l ) x264_cabac_block_residual( h, cb, cat, l )
817
#else
818

819
/* Faster RDO by merging sigmap and level coding. Note that for 8x8dct and chroma 4:2:2 dc this is
820
 * slightly incorrect because the sigmap is not reversible (contexts are repeated). However, there
821
 * is nearly no quality penalty for this (~0.001db) and the speed boost (~30%) is worth it. */
822
static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc )
823
{
824
    const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
825
    int ctx_sig = x264_significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
826
    int ctx_last = x264_last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
827
    int ctx_level = x264_coeff_abs_level_m1_offset[ctx_block_cat];
828
    int last = h->quantf.coeff_last[ctx_block_cat]( l );
829
    int coeff_abs = abs(l[last]);
830
    int ctx = coeff_abs_level1_ctx[0] + ctx_level;
831
    int node_ctx;
832
    const uint8_t *levelgt1_ctx = chroma422dc ? coeff_abs_levelgt1_ctx_chroma_dc : coeff_abs_levelgt1_ctx;
833

834
    if( last != (b_8x8 ? 63 : chroma422dc ? 7 : x264_count_cat_m1[ctx_block_cat]) )
835
    {
836
        x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[last] :
837
                                    chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[last] : last), 1 );
838
        x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? x264_last_coeff_flag_offset_8x8[last] :
839
                                    chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[last] : last), 1 );
840
    }
841

842
    if( coeff_abs > 1 )
843
    {
844
        x264_cabac_encode_decision( cb, ctx, 1 );
845
        ctx = levelgt1_ctx[0] + ctx_level;
846
        if( coeff_abs < 15 )
847
        {
848
            cb->f8_bits_encoded += x264_cabac_size_unary[coeff_abs-1][cb->state[ctx]];
849
            cb->state[ctx] = x264_cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
850
        }
851
        else
852
        {
853
            cb->f8_bits_encoded += x264_cabac_size_unary[14][cb->state[ctx]];
854
            cb->state[ctx] = x264_cabac_transition_unary[14][cb->state[ctx]];
855
            x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
856
        }
857
        node_ctx = coeff_abs_level_transition[1][0];
858
    }
859
    else
860
    {
861
        x264_cabac_encode_decision( cb, ctx, 0 );
862
        node_ctx = coeff_abs_level_transition[0][0];
863
        x264_cabac_encode_bypass( cb, 0 ); // sign
864
    }
865

866
    for( int i = last-1 ; i >= 0; i-- )
867
    {
868
        if( l[i] )
869
        {
870
            coeff_abs = abs(l[i]);
871
            x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] :
872
                                        chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 1 );
873
            x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? x264_last_coeff_flag_offset_8x8[i] :
874
                                        chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 0 );
875
            ctx = coeff_abs_level1_ctx[node_ctx] + ctx_level;
876

877
            if( coeff_abs > 1 )
878
            {
879
                x264_cabac_encode_decision( cb, ctx, 1 );
880
                ctx = levelgt1_ctx[node_ctx] + ctx_level;
881
                if( coeff_abs < 15 )
882
                {
883
                    cb->f8_bits_encoded += x264_cabac_size_unary[coeff_abs-1][cb->state[ctx]];
884
                    cb->state[ctx] = x264_cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
885
                }
886
                else
887
                {
888
                    cb->f8_bits_encoded += x264_cabac_size_unary[14][cb->state[ctx]];
889
                    cb->state[ctx] = x264_cabac_transition_unary[14][cb->state[ctx]];
890
                    x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
891
                }
892
                node_ctx = coeff_abs_level_transition[1][node_ctx];
893
            }
894
            else
895
            {
896
                x264_cabac_encode_decision( cb, ctx, 0 );
897
                node_ctx = coeff_abs_level_transition[0][node_ctx];
898
                x264_cabac_encode_bypass( cb, 0 );
899
            }
900
        }
901
        else
902
            x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] :
903
                                        chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 0 );
904
    }
905
}
906

907
void x264_cabac_block_residual_8x8_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
908
{
909
    x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 1, 0 );
910
}
911
void x264_cabac_block_residual_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
912
{
913
    x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0, 0 );
914
}
915

916
static ALWAYS_INLINE void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
917
{
918
#if ARCH_X86_64 && HAVE_MMX
919
    h->bsf.cabac_block_residual_8x8_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
920
#else
921
    x264_cabac_block_residual_8x8_rd_c( h, cb, ctx_block_cat, l );
922
#endif
923
}
924
static ALWAYS_INLINE void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
925
{
926
#if ARCH_X86_64 && HAVE_MMX
927
    h->bsf.cabac_block_residual_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
928
#else
929
    x264_cabac_block_residual_rd_c( h, cb, ctx_block_cat, l );
930
#endif
931
}
932

933
static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
934
{
935
    x264_cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 0, 1 );
936
}
937
#endif
938

939
#define x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, b_dc, name )\
940
do\
941
{\
942
    int ctxidxinc = x264_cabac_cbf_ctxidxinc( h, ctx_block_cat, i_idx, b_intra, b_dc );\
943
    if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\
944
    {\
945
        x264_cabac_encode_decision( cb, ctxidxinc, 1 );\
946
        x264_cabac_block_residual##name( h, cb, ctx_block_cat, l );\
947
    }\
948
    else\
949
        x264_cabac_encode_decision( cb, ctxidxinc, 0 );\
950
} while(0)
951

952
#define x264_cabac_block_residual_dc_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
953
    x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 1, )
954

955
#define x264_cabac_block_residual_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
956
    x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, )
957

958
#define x264_cabac_block_residual_8x8_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
959
    x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, _8x8 )
960

961
#define x264_cabac_block_residual_422_dc_cbf( h, cb, ch, b_intra )\
962
    x264_cabac_block_residual_cbf_internal( h, cb, DCT_CHROMA_DC, CHROMA_DC+(ch), h->dct.chroma_dc[ch], b_intra, 1, _422_dc )
963

964
static ALWAYS_INLINE void x264_macroblock_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int plane_count, int chroma )
965
{
966
    const int i_mb_type = h->mb.i_type;
967

968
#if !RDO_SKIP_BS
969
    const int i_mb_pos_start = x264_cabac_pos( cb );
970
    int       i_mb_pos_tex;
971

972
    if( SLICE_MBAFF &&
973
        (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
974
    {
975
        x264_cabac_field_decoding_flag( h, cb );
976
    }
977
#endif
978

979
    if( h->sh.i_type == SLICE_TYPE_P )
980
        x264_cabac_mb_header_p( h, cb, i_mb_type, chroma );
981
    else if( h->sh.i_type == SLICE_TYPE_B )
982
        x264_cabac_mb_header_b( h, cb, i_mb_type, chroma );
983
    else //if( h->sh.i_type == SLICE_TYPE_I )
984
        x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_I, chroma );
985

986
#if !RDO_SKIP_BS
987
    i_mb_pos_tex = x264_cabac_pos( cb );
988
    h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
989

990
    if( i_mb_type == I_PCM )
991
    {
992
        bs_t s;
993
        bs_init( &s, cb->p, cb->p_end - cb->p );
994

995
        for( int p = 0; p < plane_count; p++ )
996
            for( int i = 0; i < 256; i++ )
997
                bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[p][i] );
998
        if( chroma )
999
            for( int ch = 1; ch < 3; ch++ )
1000
                for( int i = 0; i < 16>>CHROMA_V_SHIFT; i++ )
1001
                    for( int j = 0; j < 8; j++ )
1002
                        bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
1003

1004
        bs_flush( &s );
1005
        cb->p = s.p;
1006
        x264_cabac_encode_init_core( cb );
1007

1008
        h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
1009
        return;
1010
    }
1011
#endif
1012

1013
    if( i_mb_type != I_16x16 )
1014
    {
1015
        x264_cabac_cbp_luma( h, cb );
1016
        if( chroma )
1017
            x264_cabac_cbp_chroma( h, cb );
1018
    }
1019

1020
    if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
1021
    {
1022
        x264_cabac_transform_size( h, cb );
1023
    }
1024

1025
    if( h->mb.i_cbp_luma || (chroma && h->mb.i_cbp_chroma) || i_mb_type == I_16x16 )
1026
    {
1027
        const int b_intra = IS_INTRA( i_mb_type );
1028
        x264_cabac_qp_delta( h, cb );
1029

1030
        /* write residual */
1031
        if( i_mb_type == I_16x16 )
1032
        {
1033
            /* DC Luma */
1034
            for( int p = 0; p < plane_count; p++ )
1035
            {
1036
                x264_cabac_block_residual_dc_cbf( h, cb, ctx_cat_plane[DCT_LUMA_DC][p], LUMA_DC+p, h->dct.luma16x16_dc[p], 1 );
1037

1038
                /* AC Luma */
1039
                if( h->mb.i_cbp_luma )
1040
                    for( int i = p*16; i < p*16+16; i++ )
1041
                        x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_AC][p], i, h->dct.luma4x4[i]+1, 1 );
1042
            }
1043
        }
1044
        else if( h->mb.b_transform_8x8 )
1045
        {
1046
            if( plane_count == 3 )
1047
            {
1048
                ALIGNED_4( uint8_t nnzbak[3][8] );
1049

1050
/* Stupid nnz munging in the case that neighbors don't have
1051
 * 8x8 transform enabled. */
1052
#define BACKUP( dst, src, res )\
1053
    dst = src;\
1054
    src = res;
1055

1056
#define RESTORE( dst, src, res )\
1057
    src = dst;
1058

1059
#define MUNGE_8x8_NNZ( MUNGE )\
1060
if( (h->mb.i_neighbour & MB_LEFT) && !h->mb.mb_transform_size[h->mb.i_mb_left_xy[0]] )\
1061
{\
1062
    MUNGE( nnzbak[0][0], h->mb.cache.non_zero_count[x264_scan8[16*0+ 0] - 1], 0x80 )\
1063
    MUNGE( nnzbak[0][1], h->mb.cache.non_zero_count[x264_scan8[16*0+ 2] - 1], 0x80 )\
1064
    MUNGE( nnzbak[1][0], h->mb.cache.non_zero_count[x264_scan8[16*1+ 0] - 1], 0x80 )\
1065
    MUNGE( nnzbak[1][1], h->mb.cache.non_zero_count[x264_scan8[16*1+ 2] - 1], 0x80 )\
1066
    MUNGE( nnzbak[2][0], h->mb.cache.non_zero_count[x264_scan8[16*2+ 0] - 1], 0x80 )\
1067
    MUNGE( nnzbak[2][1], h->mb.cache.non_zero_count[x264_scan8[16*2+ 2] - 1], 0x80 )\
1068
}\
1069
if( (h->mb.i_neighbour & MB_LEFT) && !h->mb.mb_transform_size[h->mb.i_mb_left_xy[1]] )\
1070
{\
1071
    MUNGE( nnzbak[0][2], h->mb.cache.non_zero_count[x264_scan8[16*0+ 8] - 1], 0x80 )\
1072
    MUNGE( nnzbak[0][3], h->mb.cache.non_zero_count[x264_scan8[16*0+10] - 1], 0x80 )\
1073
    MUNGE( nnzbak[1][2], h->mb.cache.non_zero_count[x264_scan8[16*1+ 8] - 1], 0x80 )\
1074
    MUNGE( nnzbak[1][3], h->mb.cache.non_zero_count[x264_scan8[16*1+10] - 1], 0x80 )\
1075
    MUNGE( nnzbak[2][2], h->mb.cache.non_zero_count[x264_scan8[16*2+ 8] - 1], 0x80 )\
1076
    MUNGE( nnzbak[2][3], h->mb.cache.non_zero_count[x264_scan8[16*2+10] - 1], 0x80 )\
1077
}\
1078
if( (h->mb.i_neighbour & MB_TOP) && !h->mb.mb_transform_size[h->mb.i_mb_top_xy] )\
1079
{\
1080
    MUNGE( M32( &nnzbak[0][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*0] - 8] ), 0x80808080U )\
1081
    MUNGE( M32( &nnzbak[1][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*1] - 8] ), 0x80808080U )\
1082
    MUNGE( M32( &nnzbak[2][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*2] - 8] ), 0x80808080U )\
1083
}
1084

1085
                MUNGE_8x8_NNZ( BACKUP )
1086

1087
                for( int p = 0; p < 3; p++ )
1088
                    FOREACH_BIT( i, 0, h->mb.i_cbp_luma )
1089
                        x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i*4+p*16, h->dct.luma8x8[i+p*4], b_intra );
1090

1091
                MUNGE_8x8_NNZ( RESTORE )
1092
            }
1093
            else
1094
            {
1095
                FOREACH_BIT( i, 0, h->mb.i_cbp_luma )
1096
                    x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i] );
1097
            }
1098
        }
1099
        else
1100
        {
1101
            for( int p = 0; p < plane_count; p++ )
1102
                FOREACH_BIT( i8x8, 0, h->mb.i_cbp_luma )
1103
                    for( int i = 0; i < 4; i++ )
1104
                        x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i+i8x8*4+p*16, h->dct.luma4x4[i+i8x8*4+p*16], b_intra );
1105
        }
1106

1107
        if( chroma && h->mb.i_cbp_chroma ) /* Chroma DC residual present */
1108
        {
1109
            if( CHROMA_FORMAT == CHROMA_422 )
1110
            {
1111
                x264_cabac_block_residual_422_dc_cbf( h, cb, 0, b_intra );
1112
                x264_cabac_block_residual_422_dc_cbf( h, cb, 1, b_intra );
1113
            }
1114
            else
1115
            {
1116
                x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], b_intra );
1117
                x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], b_intra );
1118
            }
1119

1120
            if( h->mb.i_cbp_chroma == 2 ) /* Chroma AC residual present */
1121
            {
1122
                int step = 8 << CHROMA_V_SHIFT;
1123
                for( int i = 16; i < 3*16; i += step )
1124
                    for( int j = i; j < i+4; j++ )
1125
                        x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, b_intra );
1126
            }
1127
        }
1128
    }
1129

1130
#if !RDO_SKIP_BS
1131
    h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
1132
#endif
1133
}
1134

1135
void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
1136
{
1137
    if( CHROMA444 )
1138
        x264_macroblock_write_cabac_internal( h, cb, 3, 0 );
1139
    else
1140
        x264_macroblock_write_cabac_internal( h, cb, 1, 1 );
1141
}
1142

1143
#if RDO_SKIP_BS
1144
/*****************************************************************************
1145
 * RD only; doesn't generate a valid bitstream
1146
 * doesn't write cbp or chroma dc (I don't know how much this matters)
1147
 * doesn't write ref (never varies between calls, so no point in doing so)
1148
 * only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO
1149
 * works on all partition sizes except 16x16
1150
 *****************************************************************************/
1151
static void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel )
1152
{
1153
    const int i_mb_type = h->mb.i_type;
1154
    int b_8x16 = h->mb.i_partition == D_8x16;
1155
    int plane_count = CHROMA444 ? 3 : 1;
1156

1157
    if( i_mb_type == P_8x8 )
1158
    {
1159
        x264_cabac_8x8_mvd( h, cb, i8 );
1160
        x264_cabac_subpartition_p( cb, h->mb.i_sub_partition[i8] );
1161
    }
1162
    else if( i_mb_type == P_L0 )
1163
        x264_cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1164
    else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
1165
    {
1166
        if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1167
        if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cabac_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
1168
    }
1169
    else //if( i_mb_type == B_8x8 )
1170
    {
1171
        if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
1172
            x264_cabac_mvd( h, cb, 0, 4*i8, 2, 2 );
1173
        if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
1174
            x264_cabac_mvd( h, cb, 1, 4*i8, 2, 2 );
1175
    }
1176

1177
    for( int j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
1178
    {
1179
        if( h->mb.i_cbp_luma & (1 << i8) )
1180
        {
1181
            if( h->mb.b_transform_8x8 )
1182
            {
1183
                if( CHROMA444 )
1184
                    for( int p = 0; p < 3; p++ )
1185
                        x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 0 );
1186
                else
1187
                    x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
1188
            }
1189
            else
1190
                for( int p = 0; p < plane_count; p++ )
1191
                    for( int i4 = 0; i4 < 4; i4++ )
1192
                        x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16], 0 );
1193
        }
1194

1195
        if( h->mb.i_cbp_chroma )
1196
        {
1197
            if( CHROMA_FORMAT == CHROMA_422 )
1198
            {
1199
                int offset = (5*i8) & 0x09;
1200
                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+offset, h->dct.luma4x4[16+offset]+1, 0 );
1201
                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 18+offset, h->dct.luma4x4[18+offset]+1, 0 );
1202
                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+offset, h->dct.luma4x4[32+offset]+1, 0 );
1203
                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 34+offset, h->dct.luma4x4[34+offset]+1, 0 );
1204
            }
1205
            else
1206
            {
1207
                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 );
1208
                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+i8, h->dct.luma4x4[32+i8]+1, 0 );
1209
            }
1210
        }
1211

1212
        i8 += x264_pixel_size[i_pixel].h >> 3;
1213
    }
1214
}
1215

1216
static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel )
1217
{
1218
    int b_8x4 = i_pixel == PIXEL_8x4;
1219
    int plane_count = CHROMA444 ? 3 : 1;
1220
    if( i_pixel == PIXEL_4x4 )
1221
        x264_cabac_mvd( h, cb, 0, i4, 1, 1 );
1222
    else
1223
        x264_cabac_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
1224
    for( int p = 0; p < plane_count; p++ )
1225
    {
1226
        x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4, h->dct.luma4x4[p*16+i4], 0 );
1227
        if( i_pixel != PIXEL_4x4 )
1228
            x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4+2-b_8x4, h->dct.luma4x4[p*16+i4+2-b_8x4], 0 );
1229
    }
1230
}
1231

1232
static void x264_partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode )
1233
{
1234
    const int i_pred = x264_mb_predict_intra4x4_mode( h, 4*i8 );
1235
    i_mode = x264_mb_pred_mode4x4_fix( i_mode );
1236
    x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
1237
    x264_cabac_cbp_luma( h, cb );
1238
    if( h->mb.i_cbp_luma & (1 << i8) )
1239
    {
1240
        if( CHROMA444 )
1241
            for( int p = 0; p < 3; p++ )
1242
                x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 1 );
1243
        else
1244
            x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
1245
    }
1246
}
1247

1248
static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode )
1249
{
1250
    const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 );
1251
    int plane_count = CHROMA444 ? 3 : 1;
1252
    i_mode = x264_mb_pred_mode4x4_fix( i_mode );
1253
    x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
1254
    for( int p = 0; p < plane_count; p++ )
1255
        x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+p*16, h->dct.luma4x4[i4+p*16], 1 );
1256
}
1257

1258
static void x264_chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
1259
{
1260
    x264_cabac_intra_chroma_pred_mode( h, cb );
1261
    x264_cabac_cbp_chroma( h, cb );
1262
    if( h->mb.i_cbp_chroma )
1263
    {
1264
        if( CHROMA_FORMAT == CHROMA_422 )
1265
        {
1266
            x264_cabac_block_residual_422_dc_cbf( h, cb, 0, 1 );
1267
            x264_cabac_block_residual_422_dc_cbf( h, cb, 1, 1 );
1268
        }
1269
        else
1270
        {
1271
            x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], 1 );
1272
            x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], 1 );
1273
        }
1274

1275
        if( h->mb.i_cbp_chroma == 2 )
1276
        {
1277
            int step = 8 << CHROMA_V_SHIFT;
1278
            for( int i = 16; i < 3*16; i += step )
1279
                for( int j = i; j < i+4; j++ )
1280
                    x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, 1 );
1281
        }
1282
    }
1283
}
1284
#endif
1285

1286
Product

Resources

Company