Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
GSoC
GSoC2018
macOS
vlc
Commits
f22f4ef5
Commit
f22f4ef5
authored
Dec 18, 2002
by
Sam Hocevar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
* fixed several format string inconsistencies and deprecated C constructions.
parent
f9e12228
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
21 changed files
with
577 additions
and
572 deletions
+577
-572
modules/access/ftp.c
modules/access/ftp.c
+2
-2
modules/access/mms/mms.c
modules/access/mms/mms.c
+7
-3
modules/audio_output/oss.c
modules/audio_output/oss.c
+2
-2
modules/codec/adpcm.c
modules/codec/adpcm.c
+2
-1
modules/codec/ffmpeg/audio.c
modules/codec/ffmpeg/audio.c
+2
-2
modules/codec/ffmpeg/postprocessing/postprocessing_mmx.c
modules/codec/ffmpeg/postprocessing/postprocessing_mmx.c
+265
-265
modules/codec/ffmpeg/postprocessing/postprocessing_mmxext.c
modules/codec/ffmpeg/postprocessing/postprocessing_mmxext.c
+252
-255
modules/codec/mpeg_video/parser.c
modules/codec/mpeg_video/parser.c
+6
-6
modules/demux/asf/libasf.c
modules/demux/asf/libasf.c
+4
-3
modules/demux/demuxdump.c
modules/demux/demuxdump.c
+2
-2
modules/demux/mp4/libmp4.c
modules/demux/mp4/libmp4.c
+4
-4
modules/demux/ogg.c
modules/demux/ogg.c
+2
-2
modules/demux/wav/wav.c
modules/demux/wav/wav.c
+2
-2
modules/gui/gtk/display.c
modules/gui/gtk/display.c
+2
-2
modules/mux/mpeg/ps.c
modules/mux/mpeg/ps.c
+3
-3
src/audio_output/mixer.c
src/audio_output/mixer.c
+2
-2
src/input/input.c
src/input/input.c
+3
-3
src/input/input_ext-intf.c
src/input/input_ext-intf.c
+4
-4
src/misc/threads.c
src/misc/threads.c
+6
-5
src/misc/variables.c
src/misc/variables.c
+2
-2
src/video_output/video_output.c
src/video_output/video_output.c
+3
-2
No files found.
modules/access/ftp.c
View file @
f22f4ef5
...
...
@@ -2,7 +2,7 @@
* ftp.c:
*****************************************************************************
* Copyright (C) 2001, 2002 VideoLAN
* $Id: ftp.c,v 1.
2
2002/12/1
6
1
5
:17:
12 fenrir
Exp $
* $Id: ftp.c,v 1.
3
2002/12/1
8
1
4
:17:
09 sam
Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
...
...
@@ -350,7 +350,7 @@ static int Open( vlc_object_t *p_this )
}
#endif
msg_Dbg
(
p_input
,
"file size:
%d"
,
p_access
->
i_filesize
);
msg_Dbg
(
p_input
,
"file size:
"
I64Fd
,
p_access
->
i_filesize
);
FREE
(
psz_arg
);
if
(
ftp_StartStream
(
p_input
,
0
)
<
0
)
...
...
modules/access/mms/mms.c
View file @
f22f4ef5
...
...
@@ -2,7 +2,7 @@
* mms.c: MMS access plug-in
*****************************************************************************
* Copyright (C) 2001, 2002 VideoLAN
* $Id: mms.c,v 1.1
3
2002/12/1
2
1
5
:1
0:58 gbazin
Exp $
* $Id: mms.c,v 1.1
4
2002/12/1
8
1
4
:1
7:09 sam
Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
...
...
@@ -1559,11 +1559,15 @@ static int mms_ParseCommand( input_thread_t *p_input,
GET32
(
0
),
GET32
(
4
),
GET32
(
8
),
/* 12: protocol type "MMS " */
GET32
(
16
),
GET32
(
20
),
/* 24: unknown (0) */
/* 28: unknown (0) */
GET32
(
32
),
GET32
(
36
),
GET32
(
40
)
);
GET32
(
36
)
/* 40: switches */
/* 44: extra */
);
p_access
->
i_command
=
GET32
(
36
)
&
0xffff
;
...
...
modules/audio_output/oss.c
View file @
f22f4ef5
...
...
@@ -2,7 +2,7 @@
* oss.c : OSS /dev/dsp module for vlc
*****************************************************************************
* Copyright (C) 2000-2002 VideoLAN
* $Id: oss.c,v 1.3
8
2002/12/1
0
1
8:22:01 gbazin
Exp $
* $Id: oss.c,v 1.3
9
2002/12/1
8
1
4:17:09 sam
Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Samuel Hocevar <sam@zoy.org>
...
...
@@ -376,7 +376,7 @@ static int Open( vlc_object_t *p_this )
if
(
ioctl
(
p_sys
->
i_fd
,
SNDCTL_DSP_CHANNELS
,
&
i_nb_channels
)
<
0
||
i_nb_channels
!=
aout_FormatNbChannels
(
&
p_aout
->
output
.
output
)
)
{
msg_Err
(
p_aout
,
"cannot set number of audio channels (%
x
)"
,
msg_Err
(
p_aout
,
"cannot set number of audio channels (%
s
)"
,
aout_FormatPrintChannels
(
&
p_aout
->
output
.
output
)
);
close
(
p_sys
->
i_fd
);
free
(
p_sys
);
...
...
modules/codec/adpcm.c
View file @
f22f4ef5
...
...
@@ -2,7 +2,7 @@
* adpcm.c : adpcm variant audio decoder
*****************************************************************************
* Copyright (C) 2001, 2002 VideoLAN
* $Id: adpcm.c,v 1.
1
2002/12/
03 17:00:16 fenrir
Exp $
* $Id: adpcm.c,v 1.
2
2002/12/
18 14:17:10 sam
Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
...
...
@@ -378,6 +378,7 @@ static void DecodeThread( adec_thread_t *p_adec )
DecodeAdpcmMs
(
p_adec
,
p_aout_buffer
);
break
;
default:
break
;
}
...
...
modules/codec/ffmpeg/audio.c
View file @
f22f4ef5
...
...
@@ -2,7 +2,7 @@
* audio.c: audio decoder using ffmpeg library
*****************************************************************************
* Copyright (C) 1999-2001 VideoLAN
* $Id: audio.c,v 1.
7
2002/12/
06
14:
22:55 fenrir
Exp $
* $Id: audio.c,v 1.
8
2002/12/
18
14:
17:10 sam
Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
...
...
@@ -238,7 +238,7 @@ void E_( DecodeThread_Audio )( adec_thread_t *p_adec )
p_adec
->
p_context
->
channels
>
5
)
{
msg_Warn
(
p_adec
->
p_fifo
,
"invalid channels count"
,
"invalid channels count
%d
"
,
p_adec
->
p_context
->
channels
);
}
...
...
modules/codec/ffmpeg/postprocessing/postprocessing_mmx.c
View file @
f22f4ef5
...
...
@@ -2,15 +2,15 @@
* postprocessing_mmx.c: Post Processing library in MMX
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: postprocessing_mmx.c,v 1.
2
2002/12/
06 16:34:05
sam Exp $
* $Id: postprocessing_mmx.c,v 1.
3
2002/12/
18 14:17:10
sam Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
...
...
@@ -21,7 +21,7 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include <vlc/vlc.h>
/* only use u
8, u32
.... */
#include <vlc/vlc.h>
/* only use u
int8_t, uint32_t
.... */
#include "postprocessing.h"
#include "postprocessing_common.h"
...
...
@@ -42,8 +42,8 @@
/* Use same things as in idct but how it work ? */
#define UNUSED_LONGLONG( foo ) \
static const unsigned long long foo __asm__ (#foo) __attribute__((unused))
/* to calculate isDC_mode for mmx */
/* to calculate isDC_mode for mmx */
UNUSED_LONGLONG
(
mmx_127_thr1
)
=
(
(
127ULL
-
PP_THR1
)
<<
56
)
|
(
(
127ULL
-
PP_THR1
)
<<
48
)
|
(
(
127ULL
-
PP_THR1
)
<<
40
)
|
...
...
@@ -67,38 +67,38 @@ UNUSED_LONGLONG( mmx_m2_5_m5_2 ) = 0xfffe0005fffb0002ULL;
#if 0
/* find min bytes from r ans set it in r, t is destroyed */
#define MMXEXT_GET_PMIN( r, t ) \
"movq " #r ", " #t " \n\
psrlq $8, " #t " \n\
pminub " #t ", " #r " \n\
pshufw $0xf5, " #r ", " #t " #instead of shift with tmp reg \n\
pminub " #t ", " #r " \n\
pshufw $0xfe, " #r ", " #t " \n\
pminub " #t ", " #r " \n"
"movq " #r ", " #t " \n
"
\
"
psrlq $8, " #t " \n
"
\
"
pminub " #t ", " #r " \n
"
\
"
pshufw $0xf5, " #r ", " #t " #instead of shift with tmp reg \n
"
\
"
pminub " #t ", " #r " \n
"
\
"
pshufw $0xfe, " #r ", " #t " \n
"
\
"
pminub " #t ", " #r " \n"
/* find mzx bytes from r ans set it in r, t is destroyed */
#define MMXEXT_GET_PMAX( r, t ) \
"movq " #r ", " #t " \n\
psrlq $8, " #t " \n\
pmaxub " #t ", " #r " \n\
pshufw $0xf5, " #r ", " #t " \n\
pmaxub " #t ", " #r " \n\
pshufw $0xfe, " #r ", " #t " \n\
pmaxub " #t ", " #r " \n"
"movq " #r ", " #t " \n
"
\
"
psrlq $8, " #t " \n
"
\
"
pmaxub " #t ", " #r " \n
"
\
"
pshufw $0xf5, " #r ", " #t " \n
"
\
"
pmaxub " #t ", " #r " \n
"
\
"
pshufw $0xfe, " #r ", " #t " \n
"
\
"
pmaxub " #t ", " #r " \n"
#define MMXEXT_GET_LMINMAX( s, m, M, t ) \
"movq " #s ", " #t " \n\
pminub " #t ", " #m " \n\
pmaxub " #t ", " #M " \n"
"movq " #s ", " #t " \n
"
\
"
pminub " #t ", " #m " \n
"
\
"
pmaxub " #t ", " #M " \n"
/* Some tips for MMX
* |a-b| :
d1 = a - b with unsigned saturate
d2 = b - a with ...
|a-b| = d1 | d2
d2 = b - a with ...
|a-b| = d1 | d2
*/
...
...
@@ -112,45 +112,45 @@ UNUSED_LONGLONG( mmx_m2_5_m5_2 ) = 0xfffe0005fffb0002ULL;
* so need to be fast ...
*
****************************************************************************/
static
inline
int
pp_deblock_isDC_mode
(
u
8
*
p_v
)
static
inline
int
pp_deblock_isDC_mode
(
u
int8_t
*
p_v
)
{
unsigned
int
i_eq_cnt
;
/* algo :
x = v[i] - v[i+1] without signed saturation
( XXX see if there is'nt problem, but can't be with signed
sat because pixel will be saturate :(
x = v[i] - v[i+1] without signed saturation
( XXX see if there is'nt problem, but can't be with signed
sat because pixel will be saturate :(
so x within [-128, 127] and we have to test if it fit in [-M, M]
we add 127-M with wrap around -> good value fit in [ 127-2*M, 127]
and if x >= 127 - 2 * M ie x > 127 -2*M - 1 value is good
*/
__asm__
__volatile__
(
"
\n
\
#* Do (v0-v1) to (v7-v8)
\n
\
movq (%1), %%mm1 # load v0->v7
\n
\
movq 1(%1), %%mm2 # load v1->v8
\n
\
psubb %%mm2, %%mm1 # v[i]-v[i+1]
\n
\
paddb mmx_127_thr1, %%mm1 # + 127-THR1 with wrap
\n
\
pcmpgtb mmx_127_2xthr1_1, %%mm1 # > 127 -2*thr1 - 1
\n
"
"movq %%mm1, %%mm0 #
\n
\
psrlw $8, %%mm1 #
\n
\
paddb %%mm1, %%mm0 #
\n
\
#
\n
\
movq %%mm0, %%mm1 # Now sum to create eq_cnt
\n
\
psrld $16, %%mm0 #
\n
\
paddb %%mm0, %%mm1 #
\n
\
#
\n
\
movq %%mm1, %%mm0 #
\n
\
psrlq $32, %%mm1 #
\n
\
paddb %%mm1, %%mm0
\n
"
"movd %%mm0, %0 #
\n
\
negl %0
\n
\
andl $255, %0"
__asm__
__volatile__
(
"
#* Do (v0-v1) to (v7-v8)
\n
"
"
movq (%1), %%mm1 # load v0->v7
\n
"
"
movq 1(%1), %%mm2 # load v1->v8
\n
"
"
psubb %%mm2, %%mm1 # v[i]-v[i+1]
\n
"
"
paddb mmx_127_thr1, %%mm1 # + 127-THR1 with wrap
\n
"
"
pcmpgtb mmx_127_2xthr1_1, %%mm1 # > 127 -2*thr1 - 1
\n
"
"movq %%mm1, %%mm0 #
\n
"
"
psrlw $8, %%mm1 #
\n
"
"
paddb %%mm1, %%mm0 #
\n
"
"
#
\n
"
"
movq %%mm0, %%mm1 # Now sum to create eq_cnt
\n
"
"
psrld $16, %%mm0 #
\n
"
"
paddb %%mm0, %%mm1 #
\n
"
"
#
\n
"
"
movq %%mm1, %%mm0 #
\n
"
"
psrlq $32, %%mm1 #
\n
"
"
paddb %%mm1, %%mm0
\n
"
"movd %%mm0, %0 #
\n
"
"
negl %0
\n
"
"
andl $255, %0"
:
"=r"
(
i_eq_cnt
)
:
"r"
(
p_v
)
);
/* last test, hey, 9 don't fit in MMX */
if
((
(
p_v
[
8
]
-
p_v
[
9
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
{
if
((
(
p_v
[
8
]
-
p_v
[
9
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
{
i_eq_cnt
++
;
}
...
...
@@ -160,8 +160,8 @@ static inline int pp_deblock_isDC_mode( u8 *p_v )
for( i =0; i < 9; i++ )
{
if(( ( p_v[i] - p_v[i+1] + PP_THR1 )&0xffff )<= PP_2xTHR1 )
{
if(( ( p_v[i] - p_v[i+1] + PP_THR1 )&0xffff )<= PP_2xTHR1 )
{
i_eq_cnt++;
}
}
...
...
@@ -170,25 +170,25 @@ static inline int pp_deblock_isDC_mode( u8 *p_v )
return
(
(
i_eq_cnt
>=
PP_THR2
)
?
1
:
0
);
}
static
inline
int
pp_deblock_isMinMaxOk
(
u
8
*
p_v
,
int
i_QP
)
static
inline
int
pp_deblock_isMinMaxOk
(
u
int8_t
*
p_v
,
int
i_QP
)
{
int
i_range
;
#if 0
__asm__ __volatile__ (
__asm__ __volatile__ (
"movq 1(%1), %%mm0 # 8 bytes \n"
"movq %%mm0, %%mm1 \n"
MMXEXT_GET_PMIN( %%mm0, %%mm7 )
MMXEXT_GET_PMAX( %%mm1, %%mm7 )
"psubd %%mm0, %%mm1 # max - min \n
\
movd %%mm1, %0 \n
\
andl $255, %0" : "=r"(i_range) : "r"(p_v) );
"psubd %%mm0, %%mm1 # max - min \n
"
"
movd %%mm1, %0 \n
"
"
andl $255, %0" : "=r"(i_range) : "r"(p_v) );
#endif
int
i_max
,
i_min
;
int
i
;
i_min
=
i_max
=
p_v
[
1
];
i_min
=
i_max
=
p_v
[
1
];
for
(
i
=
2
;
i
<
9
;
i
++
)
{
if
(
i_max
<
p_v
[
i
]
)
i_max
=
p_v
[
i
];
...
...
@@ -200,7 +200,7 @@ static inline int pp_deblock_isMinMaxOk( u8 *p_v, int i_QP )
}
static
inline
void
pp_deblock_DefaultMode
(
u
8
i_v
[
10
],
int
i_stride
,
static
inline
void
pp_deblock_DefaultMode
(
u
int8_t
i_v
[
10
],
int
i_stride
,
int
i_QP
)
{
int
d
,
i_delta
;
...
...
@@ -210,16 +210,16 @@ static inline void pp_deblock_DefaultMode( u8 i_v[10], int i_stride,
/* d = CLIP( 5(a3x0' - a3x0)//8, 0, (v4-v5)/2 ).d( abs(a3x0) < QP ) */
/* First calculate a3x0 */
__asm__
__volatile__
(
"
\n
\
pxor %%mm7, %%mm7 # mm7 = 0
\n
\
movq mmx_m2_5_m5_2, %%mm6 # mm6 =(2,-5,5,-2)
\n
\
movd 3(%1), %%mm0
\n
\
punpcklbw %%mm7,%%mm0
\n
\
pmaddwd %%mm6, %%mm0
\n
"
"movq %%mm0, %%mm1
\n
\
psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm0
\n
\
movd %%mm0, %0"
:
"=r"
(
a3x0
)
:
"r"
(
i_v
)
);
__asm__
__volatile__
(
"
pxor %%mm7, %%mm7 # mm7 = 0
\n
"
"
movq mmx_m2_5_m5_2, %%mm6 # mm6 =(2,-5,5,-2)
\n
"
"
movd 3(%1), %%mm0
\n
"
"
punpcklbw %%mm7,%%mm0
\n
"
"
pmaddwd %%mm6, %%mm0
\n
"
"movq %%mm0, %%mm1
\n
"
"
psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm0
\n
"
"
movd %%mm0, %0"
:
"=r"
(
a3x0
)
:
"r"
(
i_v
)
);
#if 0
a3x0 = 2 * ( i_v[3] - i_v[6] ) + 5 *( i_v[5] - i_v[4] );
#endif
...
...
@@ -228,7 +228,7 @@ static inline void pp_deblock_DefaultMode( u8 i_v[10], int i_stride,
{
b_neg
=
1
;
a3x0
=
-
a3x0
;
}
}
else
{
b_neg
=
0
;
...
...
@@ -237,24 +237,24 @@ static inline void pp_deblock_DefaultMode( u8 i_v[10], int i_stride,
if
(
(
a3x0
<
8
*
i_QP
)
&&
(
a3x0
!=
0
)
)
/* |a3x0| < 8*i_QP */
{
/* calculate a3x1 et a3x2 */
__asm__
__volatile__
(
"
\n
\
# mm7 = 0
\n
\
# mm6 = ( 2, -5, 5, -2 )
\n
\
movd 1(%2), %%mm0
\n
\
movd 5(%2), %%mm2
\n
\
punpcklbw %%mm7,%%mm0
\n
\
punpcklbw %%mm7,%%mm2
\n
\
pmaddwd %%mm6, %%mm0
\n
\
pmaddwd %%mm6, %%mm2
\n
"
"movq %%mm0, %%mm1
\n
\
psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm0 # mm0 = a3x1
\n
\
movd %%mm0, %0
\n
"
"movq %%mm2, %%mm1
\n
\
psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm2 # mm2 = a3x2
\n
\
movd %%mm2, %1
\n
\
"
:
"=r"
(
a3x1
),
"=r"
(
a3x2
)
:
"r"
(
i_v
)
);
__asm__
__volatile__
(
"
# mm7 = 0
\n
"
"
# mm6 = ( 2, -5, 5, -2 )
\n
"
"
movd 1(%2), %%mm0
\n
"
"
movd 5(%2), %%mm2
\n
"
"
punpcklbw %%mm7,%%mm0
\n
"
"
punpcklbw %%mm7,%%mm2
\n
"
"
pmaddwd %%mm6, %%mm0
\n
"
"
pmaddwd %%mm6, %%mm2
\n
"
"movq %%mm0, %%mm1
\n
"
"
psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm0 # mm0 = a3x1
\n
"
"
movd %%mm0, %0
\n
"
"movq %%mm2, %%mm1
\n
"
"
psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm2 # mm2 = a3x2
\n
"
"
movd %%mm2, %1
\n
"
:
"=r"
(
a3x1
),
"=r"
(
a3x2
)
:
"r"
(
i_v
)
);
#if 0
a3x1 = 2 * ( i_v[1] - i_v[4] ) + 5 * ( i_v[3] - i_v[2] );
a3x2 = 2 * ( i_v[5] - i_v[8] ) + 5 * ( i_v[7] - i_v[6] );
...
...
@@ -264,7 +264,7 @@ static inline void pp_deblock_DefaultMode( u8 i_v[10], int i_stride,
if
(
a3x2
<
0
)
a3x2
=
-
a3x2
;
/* abs( a3x2 ) */
a3x0_
=
PP_MIN3
(
a3x0
,
a3x1
,
a3x2
);
d
=
5
*
(
a3x0
-
a3x0_
)
/
8
;
/* always > 0 */
i_delta
=
(
i_v
[
4
]
-
i_v
[
5
]
)
/
2
;
...
...
@@ -293,7 +293,7 @@ static inline void pp_deblock_DefaultMode( u8 i_v[10], int i_stride,
static
inline
void
pp_deblock_DCMode
(
u
8
*
p_v
,
/* = int i_v[10] */
static
inline
void
pp_deblock_DCMode
(
u
int8_t
*
p_v
,
/* = int i_v[10] */
int
i_QP
)
{
int
i_p0
,
i_p9
;
...
...
@@ -302,20 +302,20 @@ static inline void pp_deblock_DCMode( u8 *p_v, /* = int i_v[10] */
i_p9
=
PP_ABS
(
p_v
[
8
]
-
p_v
[
9
]
)
<
i_QP
?
p_v
[
9
]
:
p_v
[
8
];
/* mm0 = 8 pix unmodified
-We will process first 4 pixel
-We will process first 4 pixel
mm0 = 8 pix unmodified
mm1 = for the first part of the 4 first pix
(v1) -> (p0) -> ... ( word )
(v2) (v1)
(v3) (v2)
(v4) (v3)
= for the commoin part between first and last pix
(v2) -> (v3) -> ... ( word )
(v3) (v4)
(v4) (v5)
(v5) (v6)
= for the last part of the 4 last pix
(v5) -> (v6) -> ... ( word )
(v6) (v7)
...
...
@@ -323,110 +323,110 @@ static inline void pp_deblock_DCMode( u8 *p_v, /* = int i_v[10] */
(v8) (p9)
mm2 = acu for first new pix
mm3 = acu for last pix
mm3 = acu for last pix
mm4 = unused
mm5 = p0
mm5 = p0
mm6 = p9 << 48
mm7 = 0 */
__asm__
__volatile__
(
"pxor %%mm7, %%mm7
\n
\
movq 1(%0), %%mm0 # get 8 pix
\n
\
# unpack into mm1
\n
\
movq %%mm0, %%mm1
\n
\
punpcklbw %%mm7, %%mm1
\n
\
# get p_0 and i_p9
\n
\
movd %1, %%mm5
\n
\
movd %2, %%mm6
\n
\
psllq $48, %%mm6
\n
\n
\
movq %%mm1, %%mm3 # p_v[5-8] = v[1-4] !!
\n
\
movq %%mm1, %%mm2
\n
\
psllw $2, %%mm2 # p_v[1-4] = 4*v[1-4]
\n
\
\n
\
psllq $16, %%mm1
\n
\
por %%mm5, %%mm1 # mm1 =( p0, v1, v2 ,v3)
\n
\
\n
\
paddw %%mm1, %%mm2
\n
\
paddw %%mm1, %%mm2
\n
\
\n
\
psllq $16, %%mm1
\n
\
por %%mm5, %%mm1 # mm1 =( p0, p0, v1, v2)
\n
\
\n
\
paddw %%mm1, %%mm2
\n
\
paddw %%mm1, %%mm2
\n
\
\n
\
psllq $16, %%mm1
\n
\
por %%mm5, %%mm1 # mm1 =( p0, p0, p0, v1)
\n
\
\n
\
paddw %%mm1, %%mm2
\n
\
\n
\
psllq $16, %%mm1
\n
\
por %%mm5, %%mm1 # mm1 =( p0, p0, p0, p0)
\n
\
\n
\
paddw %%mm1, %%mm2
\n
\
# Now last part a little borring
\n
\
# last part for mm2, beginig for mm3
movq %%mm0, %%mm1
\n
\
psrlq $8, %%mm1
\n
\
punpcklbw %%mm7, %%mm1 # mm1 =( v2, v3, v4, v5 )
\n
\
paddw %%mm1, %%mm2
\n
\
paddw %%mm1, %%mm2
\n
\
paddw %%mm1, %%mm3
\n
\
\n
\
movq %%mm0, %%mm1
\n
\
psrlq $16, %%mm1
\n
\
punpcklbw %%mm7, %%mm1 # mm1 =( v3, v4, v5, v6 )
\n
\
psllw $1, %%mm1
\n
\
paddw %%mm1, %%mm2
\n
\
paddw %%mm1, %%mm3
\n
\
\n
\
movq %%mm0, %%mm1
\n
\
psrlq $24, %%mm1
\n
\
punpcklbw %%mm7, %%mm1 # mm1 =( v4, v5, v6, v7)
\n
\
paddw %%mm1, %%mm2
\n
\
paddw %%mm1, %%mm3
\n
\
paddw %%mm1, %%mm3
\n
\
\n
\
movq %%mm0, %%mm1
\n
\
psrlq $32, %%mm1
\n
\
punpcklbw %%mm7, %%mm1 # mm1 =( v5, v6, v7, v8)
\n
\
paddw %%mm1, %%mm2
\n
\
psllw $2, %%mm1
paddw %%mm1, %%mm3
\n
\
# Now last part for last 4 pix
\n
\
#
\n
\
movq %%mm0, %%mm1
\n
\
punpckhbw %%mm7, %%mm1 # mm1 = ( v5, v6, v7, v8)
\n
\
\n
\
psrlq $16, %%mm1
\n
\
por %%mm6, %%mm1 # mm1 =( v6, v7, v8, p9 )
\n
\
\n
\
paddw %%mm1, %%mm3
\n
\
paddw %%mm1, %%mm3
\n
\
\n
\
psrlq $16, %%mm1
\n
\
por %%mm6, %%mm1 # mm1 =( v7, v8, p9, p9)
\n
\
\n
\
paddw %%mm1, %%mm3
\n
\
paddw %%mm1, %%mm3
\n
\
\n
\
psrlq $16, %%mm1
\n
\
por %%mm6, %%mm1 # mm1 =( v8, p9, p9, p9 )
\n
\
\n
\
paddw %%mm1, %%mm3
\n
\
\n
\
psrlq $16, %%mm1
\n
\
por %%mm6, %%mm1 # mm1 =( p9, p9, p9, p9 )
\n
\
\n
\
paddw %%mm1, %%mm3
\n
\
psrlw $4, %%mm2
\n
\
psrlw $4, %%mm3
\n
\
packuswb %%mm3, %%mm2
\n
\
movq %%mm2, 1(%0)
\n
\
"
:
:
"r"
(
p_v
),
"r"
(
i_p0
),
"r"
(
i_p9
)
:
"memory"
);
"pxor %%mm7, %%mm7
\n
"
"
movq 1(%0), %%mm0 # get 8 pix
\n
"
"
# unpack into mm1
\n
"
"
movq %%mm0, %%mm1
\n
"
"
punpcklbw %%mm7, %%mm1
\n
"
"
# get p_0 and i_p9
\n
"
"
movd %1, %%mm5
\n
"
"
movd %2, %%mm6
\n
"
"
psllq $48, %%mm6
\n
"
"
\n
"
"
movq %%mm1, %%mm3 # p_v[5-8] = v[1-4] !!
\n
"
"
movq %%mm1, %%mm2
\n
"
"
psllw $2, %%mm2 # p_v[1-4] = 4*v[1-4]
\n
"
"
\n
"
"
psllq $16, %%mm1
\n
"
"
por %%mm5, %%mm1 # mm1 =( p0, v1, v2 ,v3)
\n
"
"
\n
"
"
paddw %%mm1, %%mm2
\n
"
"
paddw %%mm1, %%mm2
\n
"
"
\n
"
"
psllq $16, %%mm1
\n
"
"
por %%mm5, %%mm1 # mm1 =( p0, p0, v1, v2)
\n
"
"
\n
"
"
paddw %%mm1, %%mm2
\n
"
"
paddw %%mm1, %%mm2
\n
"
"
\n
"
"
psllq $16, %%mm1
\n
"
"
por %%mm5, %%mm1 # mm1 =( p0, p0, p0, v1)
\n
"
"
\n
"
"
paddw %%mm1, %%mm2
\n
"
"
\n
"
"
psllq $16, %%mm1
\n
"
"
por %%mm5, %%mm1 # mm1 =( p0, p0, p0, p0)
\n
"
"
\n
"
"
paddw %%mm1, %%mm2
\n
"
"
# Now last part a little borring
\n
"
"
# last part for mm2, beginig for mm3
\n
"
"
movq %%mm0, %%mm1
\n
"
"
psrlq $8, %%mm1
\n
"
"
punpcklbw %%mm7, %%mm1 # mm1 =( v2, v3, v4, v5 )
\n
"
"
paddw %%mm1, %%mm2
\n
"
"
paddw %%mm1, %%mm2
\n
"
"
paddw %%mm1, %%mm3
\n
"
"
\n
"
"
movq %%mm0, %%mm1
\n
"
"
psrlq $16, %%mm1
\n
"
"
punpcklbw %%mm7, %%mm1 # mm1 =( v3, v4, v5, v6 )
\n
"