Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52868 views
1
;*****************************************************************************
2
;* bitstream-a.asm: x86 bitstream functions
3
;*****************************************************************************
4
;* Copyright (C) 2010-2016 x264 project
5
;*
6
;* Authors: Fiona Glaser <fiona@x264.com>
7
;* Henrik Gramner <henrik@gramner.com>
8
;*
9
;* This program is free software; you can redistribute it and/or modify
10
;* it under the terms of the GNU General Public License as published by
11
;* the Free Software Foundation; either version 2 of the License, or
12
;* (at your option) any later version.
13
;*
14
;* This program is distributed in the hope that it will be useful,
15
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
;* GNU General Public License for more details.
18
;*
19
;* You should have received a copy of the GNU General Public License
20
;* along with this program; if not, write to the Free Software
21
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22
;*
23
;* This program is also available under a commercial proprietary license.
24
;* For more information, contact us at licensing@x264.com.
25
;*****************************************************************************
26
27
%include "x86inc.asm"
28
%include "x86util.asm"
29
30
SECTION .text
31
32
;-----------------------------------------------------------------------------
33
; uint8_t *x264_nal_escape( uint8_t *dst, uint8_t *src, uint8_t *end )
34
;-----------------------------------------------------------------------------
35
%macro NAL_LOOP 2
36
%%escape:
37
; Detect false positive to avoid unneccessary escape loop
38
xor r3d, r3d
39
cmp byte [r0+r1-1], 0
40
setnz r3b
41
xor k3, k4
42
jnz .escape
43
jmp %%continue
44
ALIGN 16
45
%1:
46
mova [r0+r1+mmsize], m1
47
pcmpeqb m1, m0
48
mova [r0+r1], m2
49
pcmpeqb m2, m0
50
pmovmskb r3d, m1
51
%2 m1, [r1+r2+3*mmsize]
52
pmovmskb r4d, m2
53
%2 m2, [r1+r2+2*mmsize]
54
shl k3, mmsize
55
or k3, k4
56
lea k4, [2*r3+1]
57
and k4, k3
58
jnz %%escape
59
%%continue:
60
add r1, 2*mmsize
61
jl %1
62
%endmacro
63
64
%macro NAL_ESCAPE 0
65
%if mmsize == 32
66
%xdefine k3 r3
67
%xdefine k4 r4
68
%else
69
%xdefine k3 r3d
70
%xdefine k4 r4d
71
%endif
72
73
cglobal nal_escape, 3,5
74
movzx r3d, byte [r1]
75
sub r1, r2 ; r1 = offset of current src pointer from end of src
76
pxor m0, m0
77
mov [r0], r3b
78
sub r0, r1 ; r0 = projected end of dst, assuming no more escapes
79
or r3d, 0xffffff00 ; ignore data before src
80
81
; Start off by jumping into the escape loop in case there's an escape at the start.
82
; And do a few more in scalar until dst is aligned.
83
jmp .escape_loop
84
85
%if mmsize == 16
86
NAL_LOOP .loop_aligned, mova
87
jmp .ret
88
%endif
89
NAL_LOOP .loop_unaligned, movu
90
.ret:
91
movifnidn rax, r0
92
RET
93
94
.escape:
95
; Skip bytes that are known to be valid
96
and k4, k3
97
tzcnt k4, k4
98
xor r3d, r3d ; the last two bytes are known to be zero
99
add r1, r4
100
.escape_loop:
101
inc r1
102
jge .ret
103
movzx r4d, byte [r1+r2]
104
shl r3d, 8
105
or r3d, r4d
106
test r3d, 0xfffffc ; if the last two bytes are 0 and the current byte is <=3
107
jz .add_escape_byte
108
.escaped:
109
lea r4d, [r0+r1]
110
mov [r0+r1], r3b
111
test r4d, mmsize-1 ; Do SIMD when dst is aligned
112
jnz .escape_loop
113
movu m1, [r1+r2+mmsize]
114
movu m2, [r1+r2]
115
%if mmsize == 16
116
lea r4d, [r1+r2]
117
test r4d, mmsize-1
118
jz .loop_aligned
119
%endif
120
jmp .loop_unaligned
121
122
.add_escape_byte:
123
mov byte [r0+r1], 3
124
inc r0
125
or r3d, 0x0300
126
jmp .escaped
127
%endmacro
128
129
INIT_MMX mmx2
130
NAL_ESCAPE
131
INIT_XMM sse2
132
NAL_ESCAPE
133
%if ARCH_X86_64
134
INIT_YMM avx2
135
NAL_ESCAPE
136
%endif
137
138