FFmpeg
2.6.9
Main Page
Related Pages
Modules
Namespaces
Data Structures
Files
Examples
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
Pages
libavcodec
x86
mpegvideoencdsp_init.c
Go to the documentation of this file.
1
/*
2
* This file is part of FFmpeg.
3
*
4
* FFmpeg is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU Lesser General Public
6
* License as published by the Free Software Foundation; either
7
* version 2.1 of the License, or (at your option) any later version.
8
*
9
* FFmpeg is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
* Lesser General Public License for more details.
13
*
14
* You should have received a copy of the GNU Lesser General Public
15
* License along with FFmpeg; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
*/
18
19
#include "
libavutil/attributes.h
"
20
#include "
libavutil/avassert.h
"
21
#include "
libavutil/cpu.h
"
22
#include "
libavutil/x86/cpu.h
"
23
#include "
libavcodec/avcodec.h
"
24
#include "
libavcodec/mpegvideoencdsp.h
"
25
26
int
ff_pix_sum16_mmx
(
uint8_t
*pix,
int
line_size);
27
int
ff_pix_sum16_mmxext
(
uint8_t
*pix,
int
line_size);
28
int
ff_pix_sum16_sse2
(
uint8_t
*pix,
int
line_size);
29
int
ff_pix_sum16_xop
(
uint8_t
*pix,
int
line_size);
30
int
ff_pix_norm1_mmx
(
uint8_t
*pix,
int
line_size);
31
int
ff_pix_norm1_sse2
(
uint8_t
*pix,
int
line_size);
32
33
#if HAVE_INLINE_ASM
34
35
#define PHADDD(a, t) \
36
"movq " #a ", " #t " \n\t" \
37
"psrlq $32, " #a " \n\t" \
38
"paddd " #t ", " #a " \n\t"
39
40
/*
41
* pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
42
* pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
43
* pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
44
*/
45
#define PMULHRW(x, y, s, o) \
46
"pmulhw " #s ", " #x " \n\t" \
47
"pmulhw " #s ", " #y " \n\t" \
48
"paddw " #o ", " #x " \n\t" \
49
"paddw " #o ", " #y " \n\t" \
50
"psraw $1, " #x " \n\t" \
51
"psraw $1, " #y " \n\t"
52
#define DEF(x) x ## _mmx
53
#define SET_RND MOVQ_WONE
54
#define SCALE_OFFSET 1
55
56
#include "
mpegvideoenc_qns_template.c
"
57
58
#undef DEF
59
#undef SET_RND
60
#undef SCALE_OFFSET
61
#undef PMULHRW
62
63
#define DEF(x) x ## _3dnow
64
#define SET_RND(x)
65
#define SCALE_OFFSET 0
66
#define PMULHRW(x, y, s, o) \
67
"pmulhrw " #s ", " #x " \n\t" \
68
"pmulhrw " #s ", " #y " \n\t"
69
70
#include "
mpegvideoenc_qns_template.c
"
71
72
#undef DEF
73
#undef SET_RND
74
#undef SCALE_OFFSET
75
#undef PMULHRW
76
77
#if HAVE_SSSE3_INLINE
78
#undef PHADDD
79
#define DEF(x) x ## _ssse3
80
#define SET_RND(x)
81
#define SCALE_OFFSET -1
82
83
#define PHADDD(a, t) \
84
"pshufw $0x0E, " #a ", " #t " \n\t" \
85
/* faster than phaddd on core2 */
\
86
"paddd " #t ", " #a " \n\t"
87
88
#define PMULHRW(x, y, s, o) \
89
"pmulhrsw " #s ", " #x " \n\t" \
90
"pmulhrsw " #s ", " #y " \n\t"
91
92
#include "
mpegvideoenc_qns_template.c
"
93
94
#undef DEF
95
#undef SET_RND
96
#undef SCALE_OFFSET
97
#undef PMULHRW
98
#undef PHADDD
99
#endif
/* HAVE_SSSE3_INLINE */
100
101
/* Draw the edges of width 'w' of an image of size width, height
102
* this MMX version can only handle w == 8 || w == 16. */
103
static
void
draw_edges_mmx(
uint8_t
*
buf
,
int
wrap
,
int
width
,
int
height
,
104
int
w,
int
h,
int
sides)
105
{
106
uint8_t
*ptr, *last_line;
107
int
i;
108
109
last_line = buf + (height - 1) * wrap;
110
/* left and right */
111
ptr =
buf
;
112
if
(w == 8) {
113
__asm__
volatile
(
114
"1: \n\t"
115
"movd (%0), %%mm0 \n\t"
116
"punpcklbw %%mm0, %%mm0 \n\t"
117
"punpcklwd %%mm0, %%mm0 \n\t"
118
"punpckldq %%mm0, %%mm0 \n\t"
119
"movq %%mm0, -8(%0) \n\t"
120
"movq -8(%0, %2), %%mm1 \n\t"
121
"punpckhbw %%mm1, %%mm1 \n\t"
122
"punpckhwd %%mm1, %%mm1 \n\t"
123
"punpckhdq %%mm1, %%mm1 \n\t"
124
"movq %%mm1, (%0, %2) \n\t"
125
"add %1, %0 \n\t"
126
"cmp %3, %0 \n\t"
127
"jb 1b \n\t"
128
:
"+r"
(ptr)
129
:
"r"
((
x86_reg
)
wrap
),
"r"
((
x86_reg
)
width
),
130
"r"
(ptr + wrap * height));
131
}
else
if
(w == 16) {
132
__asm__
volatile
(
133
"1: \n\t"
134
"movd (%0), %%mm0 \n\t"
135
"punpcklbw %%mm0, %%mm0 \n\t"
136
"punpcklwd %%mm0, %%mm0 \n\t"
137
"punpckldq %%mm0, %%mm0 \n\t"
138
"movq %%mm0, -8(%0) \n\t"
139
"movq %%mm0, -16(%0) \n\t"
140
"movq -8(%0, %2), %%mm1 \n\t"
141
"punpckhbw %%mm1, %%mm1 \n\t"
142
"punpckhwd %%mm1, %%mm1 \n\t"
143
"punpckhdq %%mm1, %%mm1 \n\t"
144
"movq %%mm1, (%0, %2) \n\t"
145
"movq %%mm1, 8(%0, %2) \n\t"
146
"add %1, %0 \n\t"
147
"cmp %3, %0 \n\t"
148
"jb 1b \n\t"
149
:
"+r"
(ptr)
150
:
"r"
((
x86_reg
)
wrap
),
"r"
((
x86_reg
)
width
),
"r"
(ptr + wrap * height)
151
);
152
}
else
{
153
av_assert1
(w == 4);
154
__asm__
volatile
(
155
"1: \n\t"
156
"movd (%0), %%mm0 \n\t"
157
"punpcklbw %%mm0, %%mm0 \n\t"
158
"punpcklwd %%mm0, %%mm0 \n\t"
159
"movd %%mm0, -4(%0) \n\t"
160
"movd -4(%0, %2), %%mm1 \n\t"
161
"punpcklbw %%mm1, %%mm1 \n\t"
162
"punpckhwd %%mm1, %%mm1 \n\t"
163
"punpckhdq %%mm1, %%mm1 \n\t"
164
"movd %%mm1, (%0, %2) \n\t"
165
"add %1, %0 \n\t"
166
"cmp %3, %0 \n\t"
167
"jb 1b \n\t"
168
:
"+r"
(ptr)
169
:
"r"
((
x86_reg
)
wrap
),
"r"
((
x86_reg
)
width
),
170
"r"
(ptr + wrap * height));
171
}
172
173
/* top and bottom (and hopefully also the corners) */
174
if
(sides &
EDGE_TOP
) {
175
for
(i = 0; i < h; i += 4) {
176
ptr = buf - (i + 1) * wrap - w;
177
__asm__
volatile
(
178
"1: \n\t"
179
"movq (%1, %0), %%mm0 \n\t"
180
"movq %%mm0, (%0) \n\t"
181
"movq %%mm0, (%0, %2) \n\t"
182
"movq %%mm0, (%0, %2, 2) \n\t"
183
"movq %%mm0, (%0, %3) \n\t"
184
"add $8, %0 \n\t"
185
"cmp %4, %0 \n\t"
186
"jb 1b \n\t"
187
:
"+r"
(ptr)
188
:
"r"
((
x86_reg
) buf - (
x86_reg
) ptr - w),
189
"r"
((
x86_reg
) - wrap),
"r"
((
x86_reg
) - wrap * 3),
190
"r"
(ptr + width + 2 * w));
191
}
192
}
193
194
if
(sides &
EDGE_BOTTOM
) {
195
for
(i = 0; i < h; i += 4) {
196
ptr = last_line + (i + 1) * wrap - w;
197
__asm__
volatile
(
198
"1: \n\t"
199
"movq (%1, %0), %%mm0 \n\t"
200
"movq %%mm0, (%0) \n\t"
201
"movq %%mm0, (%0, %2) \n\t"
202
"movq %%mm0, (%0, %2, 2) \n\t"
203
"movq %%mm0, (%0, %3) \n\t"
204
"add $8, %0 \n\t"
205
"cmp %4, %0 \n\t"
206
"jb 1b \n\t"
207
:
"+r"
(ptr)
208
:
"r"
((
x86_reg
) last_line - (
x86_reg
) ptr - w),
209
"r"
((
x86_reg
) wrap),
"r"
((
x86_reg
) wrap * 3),
210
"r"
(ptr + width + 2 * w));
211
}
212
}
213
}
214
215
#endif
/* HAVE_INLINE_ASM */
216
217
av_cold
void
ff_mpegvideoencdsp_init_x86
(
MpegvideoEncDSPContext
*
c
,
218
AVCodecContext
*avctx)
219
{
220
int
cpu_flags =
av_get_cpu_flags
();
221
222
#if ARCH_X86_32
223
if
(
EXTERNAL_MMX
(cpu_flags)) {
224
c->
pix_sum
=
ff_pix_sum16_mmx
;
225
c->
pix_norm1
=
ff_pix_norm1_mmx
;
226
}
227
228
if
(
EXTERNAL_MMXEXT
(cpu_flags)) {
229
c->
pix_sum
=
ff_pix_sum16_mmxext
;
230
}
231
#endif
232
233
if
(
EXTERNAL_SSE2
(cpu_flags)) {
234
c->
pix_sum
=
ff_pix_sum16_sse2
;
235
c->
pix_norm1
=
ff_pix_norm1_sse2
;
236
}
237
238
if
(
EXTERNAL_XOP
(cpu_flags)) {
239
c->
pix_sum
=
ff_pix_sum16_xop
;
240
}
241
242
#if HAVE_INLINE_ASM
243
244
if
(
INLINE_MMX
(cpu_flags)) {
245
if
(!(avctx->
flags
&
CODEC_FLAG_BITEXACT
)) {
246
c->
try_8x8basis
= try_8x8basis_mmx;
247
}
248
c->
add_8x8basis
= add_8x8basis_mmx;
249
250
if
(avctx->
bits_per_raw_sample
<= 8) {
251
c->
draw_edges
= draw_edges_mmx;
252
}
253
}
254
255
if
(
INLINE_AMD3DNOW
(cpu_flags)) {
256
if
(!(avctx->
flags
&
CODEC_FLAG_BITEXACT
)) {
257
c->
try_8x8basis
= try_8x8basis_3dnow;
258
}
259
c->
add_8x8basis
= add_8x8basis_3dnow;
260
}
261
262
#if HAVE_SSSE3_INLINE
263
if
(
INLINE_SSSE3
(cpu_flags)) {
264
if
(!(avctx->
flags
&
CODEC_FLAG_BITEXACT
)) {
265
c->
try_8x8basis
= try_8x8basis_ssse3;
266
}
267
c->
add_8x8basis
= add_8x8basis_ssse3;
268
}
269
#endif
/* HAVE_SSSE3_INLINE */
270
271
#endif
/* HAVE_INLINE_ASM */
272
}
Generated on Thu Sep 30 2021 22:47:08 for FFmpeg by
1.8.1.2