Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
Steve Lhomme
VLC
Commits
5f232025
Commit
5f232025
authored
May 16, 2001
by
Renaud Dartus
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
* Add 3D Now! imdct
* Remove kmudge for ac3 on MacOS X
parent
1ac785a2
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
1180 additions
and
197 deletions
+1180
-197
Makefile
Makefile
+1
-6
configure
configure
+1
-1
configure.in
configure.in
+1
-1
plugins/imdct/Makefile
plugins/imdct/Makefile
+16
-2
plugins/imdct/ac3_imdct_3dn.c
plugins/imdct/ac3_imdct_3dn.c
+559
-0
plugins/imdct/ac3_imdct_c.c
plugins/imdct/ac3_imdct_c.c
+2
-60
plugins/imdct/ac3_imdct_common.c
plugins/imdct/ac3_imdct_common.c
+3
-62
plugins/imdct/ac3_imdct_common.h
plugins/imdct/ac3_imdct_common.h
+1
-2
plugins/imdct/ac3_imdct_sse.c
plugins/imdct/ac3_imdct_sse.c
+6
-52
plugins/imdct/ac3_retables.h
plugins/imdct/ac3_retables.h
+83
-0
plugins/imdct/ac3_srfft_3dn.c
plugins/imdct/ac3_srfft_3dn.c
+344
-0
plugins/imdct/ac3_srfft_sse.c
plugins/imdct/ac3_srfft_sse.c
+9
-9
plugins/imdct/imdct3dn.c
plugins/imdct/imdct3dn.c
+152
-0
src/ac3_decoder/ac3_imdct.c
src/ac3_decoder/ac3_imdct.c
+2
-2
No files found.
Makefile
View file @
5f232025
...
...
@@ -26,7 +26,7 @@ PLUGINS_TARGETS := alsa/alsa beos/beos darwin/darwin dsp/dsp dummy/dummy \
dvd/dvd esd/esd fb/fb ggi/ggi glide/glide gnome/gnome gtk/gtk
\
downmix/downmix downmix/downmixsse downmix/downmix3dn
\
idct/idct idct/idctclassic idct/idctmmx idct/idctmmxext
\
imdct/imdct imdct/imdctsse
\
imdct/imdct
imdct/imdct3dn
imdct/imdctsse
\
macosx/macosx mga/mga
\
motion/motion motion/motionmmx motion/motionmmxext
\
mpeg/es mpeg/ps mpeg/ts null/null qt/qt sdl/sdl
\
...
...
@@ -317,12 +317,7 @@ endif
$(C_OBJ)
:
%.o: Makefile.opts Makefile.dep Makefile
$(C_OBJ)
:
%.o: .dep/%.d
$(C_OBJ)
:
%.o: %.c
ifneq
(,$(findstring darwin,$(SYS)))
#this is uglier of all
@if
test
"src/ac3_decoder/ac3_imdct.c"
=
"$<"
;
then
$(CC)
`echo
$(CFLAGS)
|
sed
-e
's/-O3/-O/'
`
-c
-o
$@
$<;
echo
"(CC) `echo $(CFLAGS) | sed -e 's/-O3/-O/'` -c -o $@ $<"
;
else
$(CC)
$(CFLAGS)
-c
-o
$@
$<;
echo
"$(CC) $(CFLAGS) -c -o $@ $<"
;
fi
else
$(CC)
$(CFLAGS)
-c
-o
$@
$<
endif
$(CPP_OBJ)
:
%.o: Makefile.opts Makefile.dep Makefile
$(CPP_OBJ)
:
%.o: .dep/%.dpp
...
...
configure
View file @
5f232025
...
...
@@ -3285,7 +3285,7 @@ int main() {
EOF
if
{
(
eval echo
configure:3287:
\"
$ac_compile
\"
)
1>&5
;
(
eval
$ac_compile
)
2>&5
;
}
;
then
rm
-rf
conftest
*
ACCEL_PLUGINS
=
"
${
ACCEL_PLUGINS
}
idctmmxext motionmmxext imdctsse downmix3dn downmixsse"
ACCEL_PLUGINS
=
"
${
ACCEL_PLUGINS
}
idctmmxext motionmmxext
imdct3dn
imdctsse downmix3dn downmixsse"
echo
"
$ac_t
""yes"
1>&6
else
echo
"configure: failed program was:"
>
&5
...
...
configure.in
View file @
5f232025
...
...
@@ -162,7 +162,7 @@ AC_TRY_COMPILE([void quux(){void *p;asm("packuswb %%mm1,%%mm2"::"r"(p));}],,
AC_MSG_CHECKING([if \$CC groks MMX EXT or SSE inline assembly])
AC_TRY_COMPILE([void quux(){void *p;asm("maskmovq %%mm1,%%mm2"::"r"(p));}],,
ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext imdctsse downmix3dn downmixsse"
ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext
imdct3dn
imdctsse downmix3dn downmixsse"
AC_MSG_RESULT(yes), AC_MSG_RESULT(no))
dnl
...
...
plugins/imdct/Makefile
View file @
5f232025
...
...
@@ -9,15 +9,18 @@
PLUGIN_IMDCT
=
imdct.o ac3_imdct_c.o ac3_srfft_c.o
PLUGIN_IMDCTSSE
=
imdctsse.o ac3_imdct_sse.o ac3_srfft_sse.o
PLUGIN_IMDCT3DN
=
imdct3dn.o ac3_imdct_3dn.o ac3_srfft_3dn.o
PLUGIN_IMDCTCOMMON
=
ac3_imdct_common.o
BUILTIN_IMDCT
=
$(PLUGIN_IMDCT:%.o=BUILTIN_IMDCT_%.o)
\
$(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCT_%.o)
BUILTIN_IMDCTSSE
=
$(PLUGIN_IMDCTSSE:%.o=BUILTIN_IMDCTSSE_%.o)
\
$(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCTSSE_%.o)
BUILTIN_IMDCT3DN
=
$(PLUGIN_IMDCT3DN:%.o=BUILTIN_IMDCT3DN_%.o)
\
$(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCT3DN_%.o)
PLUGIN_C
=
$(PLUGIN_IMDCT)
$(PLUGIN_IMDCTSSE)
$(PLUGIN_IMDCTCOMMON)
ALL_OBJ
=
$(PLUGIN_C)
$(BUILTIN_IMDCT)
$(BUILTIN_IMDCTSSE)
PLUGIN_C
=
$(PLUGIN_IMDCT)
$(PLUGIN_IMDCTSSE)
$(PLUGIN_IMDCT3DN)
$(PLUGIN_IMDCTCOMMON)
ALL_OBJ
=
$(PLUGIN_C)
$(BUILTIN_IMDCT)
$(BUILTIN_IMDCTSSE)
$(BUILTIN_IMDCT3DN)
#
# Virtual targets
...
...
@@ -33,6 +36,10 @@ $(BUILTIN_IMDCTSSE): BUILTIN_IMDCTSSE_%.o: .dep/%.d
$(BUILTIN_IMDCTSSE)
:
BUILTIN_IMDCTSSE_%.o: %.c
$(CC)
$(CFLAGS)
-DBUILTIN
-DMODULE_NAME
=
imdctsse
-c
-o
$@
$<
$(BUILTIN_IMDCT3DN)
:
BUILTIN_IMDCT3DN_%.o: .dep/%.d
$(BUILTIN_IMDCT3DN)
:
BUILTIN_IMDCT3DN_%.o: %.c
$(CC)
$(CFLAGS)
-DBUILTIN
-DMODULE_NAME
=
imdct3dn
-c
-o
$@
$<
#
# Real targets
#
...
...
@@ -51,3 +58,10 @@ $(BUILTIN_IMDCTSSE): BUILTIN_IMDCTSSE_%.o: %.c
ar r
$@
$^
$(RANLIB)
$@
../../lib/imdct3dn.so
:
$(PLUGIN_IMDCT3DN) $(PLUGIN_IMDCTCOMMON)
$(CC)
$(PCFLAGS)
-o
$@
$^
$(PLCFLAGS)
../../lib/imdct3dn.a
:
$(BUILTIN_IMDCT3DN)
ar r
$@
$^
$(RANLIB)
$@
plugins/imdct/ac3_imdct_3dn.c
0 → 100644
View file @
5f232025
/*****************************************************************************
* ac3_imdct_3dn.c: accelerated 3D Now! ac3 DCT
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_imdct_3dn.c,v 1.1 2001/05/16 14:51:29 reno Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#define MODULE_NAME imdct3dn
#include "modules_inner.h"
/*****************************************************************************
* Preamble
*****************************************************************************/
#include "defs.h"
#include <math.h>
#include <stdio.h>
#include "config.h"
#include "common.h"
#include "threads.h"
#include "mtime.h"
#include "ac3_imdct.h"
#include "ac3_imdct_common.h"
#include "ac3_retables.h"
void
_M
(
fft_64p
)
(
complex_t
*
x
);
void
_M
(
fft_128p
)
(
complex_t
*
a
);
static
void
imdct512_pre_ifft_twiddle_3dn
(
const
int
*
pmt
,
complex_t
*
buf
,
float
*
data
,
float
*
xcos_sin_sse
);
static
void
imdct512_post_ifft_twiddle_3dn
(
complex_t
*
buf
,
float
*
xcos_sin_sse
);
static
void
imdct512_window_delay_3dn
(
complex_t
*
buf
,
float
*
data_ptr
,
float
*
window_prt
,
float
*
delay_prt
);
static
void
imdct512_window_delay_nol_3dn
(
complex_t
*
buf
,
float
*
data_ptr
,
float
*
window_prt
,
float
*
delay_prt
);
void
_M
(
imdct_init
)
(
imdct_t
*
p_imdct
)
{
int
i
;
float
scale
=
181
.
01
9
;
fprintf
(
stderr
,
"imct_init
\n
"
);
for
(
i
=
0
;
i
<
128
;
i
++
)
{
float
xcos_i
=
cos
(
2
.
0
f
*
M_PI
*
(
8
*
i
+
1
)
/
(
8
*
N
))
*
scale
;
float
xsin_i
=
sin
(
2
.
0
f
*
M_PI
*
(
8
*
i
+
1
)
/
(
8
*
N
))
*
scale
;
p_imdct
->
xcos_sin_sse
[
i
*
4
]
=
xcos_i
;
p_imdct
->
xcos_sin_sse
[
i
*
4
+
1
]
=
-
xsin_i
;
p_imdct
->
xcos_sin_sse
[
i
*
4
+
2
]
=
-
xsin_i
;
p_imdct
->
xcos_sin_sse
[
i
*
4
+
3
]
=
-
xcos_i
;
}
fprintf
(
stderr
,
"done imct_init
\n
"
);
}
void
_M
(
imdct_do_512
)
(
imdct_t
*
p_imdct
,
float
data
[],
float
delay
[])
{
imdct512_pre_ifft_twiddle_3dn
(
pm128
,
p_imdct
->
buf
,
data
,
p_imdct
->
xcos_sin_sse
);
_M
(
fft_128p
)
(
p_imdct
->
buf
);
imdct512_post_ifft_twiddle_3dn
(
p_imdct
->
buf
,
p_imdct
->
xcos_sin_sse
);
imdct512_window_delay_3dn
(
p_imdct
->
buf
,
data
,
window
,
delay
);
}
void
_M
(
imdct_do_512_nol
)
(
imdct_t
*
p_imdct
,
float
data
[],
float
delay
[])
{
imdct512_pre_ifft_twiddle_3dn
(
pm128
,
p_imdct
->
buf
,
data
,
p_imdct
->
xcos_sin_sse
);
_M
(
fft_128p
)
(
p_imdct
->
buf
);
imdct512_post_ifft_twiddle_3dn
(
p_imdct
->
buf
,
p_imdct
->
xcos_sin_sse
);
imdct512_window_delay_nol_3dn
(
p_imdct
->
buf
,
data
,
window
,
delay
);
}
static
void
imdct512_pre_ifft_twiddle_3dn
(
const
int
*
pmt
,
complex_t
*
buf
,
float
*
data
,
float
*
xcos_sin_sse
)
{
__asm__
__volatile__
(
"pushl %%ebp
\n
"
"movl %%esp, %%ebp
\n
"
"addl $-4, %%esp
\n
"
/* local variable, loop counter */
"pushl %%eax
\n
"
"pushl %%ebx
\n
"
"pushl %%ecx
\n
"
"pushl %%edx
\n
"
"pushl %%edi
\n
"
"pushl %%esi
\n
"
"movl 8(%%ebp), %%eax
\n
"
/* pmt */
"movl 12(%%ebp), %%ebx
\n
"
/* buf */
"movl 16(%%ebp), %%ecx
\n
"
/* data */
"movl 20(%%ebp), %%edx
\n
"
/* xcos_sin_sse */
"movl $128, -4(%%ebp)
\n
"
".loop:
\n
"
"movl (%%eax), %%esi
\n
"
"movd (%%ecx, %%esi, 8), %%mm1
\n
"
/* 2j */
"punpckldq %%mm1, %%mm1
\n
"
/* 2j | 2j */
"shll $1, %%esi
\n
"
"movq (%%edx, %%esi, 8), %%mm0
\n
"
/* -s_j | c_j */
"movq 8(%%edx, %%esi, 8), %%mm2
\n
"
/* -c_j | -s_j */
"negl %%esi
\n
"
"movd 1020(%%ecx, %%esi, 4), %%mm4
\n
"
/* 255-2j */
"punpckldq %%mm4, %%mm4
\n
"
/* 255-2j | 255-2j */
"addl $4, %%eax
\n
"
"pfmul %%mm4, %%mm0
\n
"
/* 255-2j * -s_j | 255-2j * c_j */
"pfmul %%mm1, %%mm2
\n
"
/* 2j * -c_j | 2j * -s_j */
"addl $8, %%ebx
\n
"
"pfadd %%mm2, %%mm0
\n
"
/* 2j * -c_j + 255-2j * -s_j | 2j * -s_j + 255-2j * c_j */
"movq %%mm0, -8(%%ebx)
\n
"
"decl -4(%%ebp)
\n
"
"jnz .loop
\n
"
"popl %%esi
\n
"
"popl %%edi
\n
"
"popl %%edx
\n
"
"popl %%ecx
\n
"
"popl %%ebx
\n
"
"popl %%eax
\n
"
"addl $4, %%esp
\n
"
"popl %%ebp
\n
"
"femms
\n
"
::
);
}
static
void
imdct512_post_ifft_twiddle_3dn
(
complex_t
*
buf
,
float
*
xcos_sin_sse
)
{
__asm__
__volatile__
(
"pushl %%ebx
\n
"
"movl $64, %%ebx
\n
"
/* loop counter */
".loop1:
\n
"
"movq (%%eax), %%mm0
\n
"
/* im0 | re0 */
"movq %%mm0, %%mm1
\n
"
/* im0 | re0 */
"punpckldq %%mm0, %%mm0
\n
"
/* re0 | re0 */
"punpckhdq %%mm1, %%mm1
\n
"
/* im0 | im0 */
"movq (%%ecx), %%mm2
\n
"
/* -s | c */
"movq 8(%%ecx), %%mm3
\n
"
/* -c | -s */
"movq %%mm3, %%mm4
\n
"
"punpckhdq %%mm2,%%mm3
\n
"
/* -s | -c */
"punpckldq %%mm2,%%mm4
\n
"
/* c | -s */
"movq 8(%%eax), %%mm2
\n
"
/* im1 | re1 */
"movq %%mm2, %%mm5
\n
"
/* im1 | re1 */
"punpckldq %%mm2, %%mm2
\n
"
/* re1 | re1 */
"punpckhdq %%mm5, %%mm5
\n
"
/* im1 | im1 */
"pfmul %%mm3, %%mm0
\n
"
/* -s * re0 | -c * re0 */
"pfmul %%mm4, %%mm1
\n
"
/* c * im0 | -s * im0 */
"movq 16(%%ecx), %%mm6
\n
"
/* -s1 | c1 */
"movq 24(%%ecx), %%mm7
\n
"
/* -c1 | -s1 */
"movq %%mm7, %%mm4
\n
"
"punpckhdq %%mm6,%%mm7
\n
"
/* -s1 | -c1 */
"punpckldq %%mm6,%%mm4
\n
"
/* c1 | -s1 */
"pfmul %%mm7, %%mm2
\n
"
/* -s1*re1 | -c1*re1 */
"pfmul %%mm4, %%mm5
\n
"
/* c1*im1 | -s1*im1 */
"pfadd %%mm1, %%mm0
\n
"
/* -s * re0 + c * im0 | -c * re0 - s * im0 */
"pfadd %%mm5, %%mm2
\n
"
/* -s1 * re1 + c1 * im1 | -c1 * re1 - s1 * im1 */
"movq %%mm0, (%%eax)
\n
"
"movq %%mm2, 8(%%eax)
\n
"
"addl $32, %%ecx
\n
"
"addl $16, %%eax
\n
"
"decl %%ebx
\n
"
"jnz .loop1
\n
"
"popl %%ebx
\n
"
"femms
\n
"
:
"=a"
(
buf
)
:
"a"
(
buf
),
"c"
(
xcos_sin_sse
)
);
}
static
void
imdct512_window_delay_3dn
(
complex_t
*
buf
,
float
*
data_ptr
,
float
*
window_prt
,
float
*
delay_prt
)
{
__asm__
__volatile__
(
"pushl %%ebp
\n
"
"movl %%esp, %%ebp
\n
"
"pushl %%eax
\n
"
"pushl %%ebx
\n
"
"pushl %%ecx
\n
"
"pushl %%edx
\n
"
"pushl %%esi
\n
"
"pushl %%edi
\n
"
"movl 20(%%ebp), %%ebx
\n
"
/* delay */
"movl 16(%%ebp), %%edx
\n
"
/* window */
"movl 8(%%ebp), %%eax
\n
"
/* buf */
"movl $32, %%ecx
\n
"
/* loop count */
"leal 516(%%eax), %%esi
\n
"
/* buf[64].im */
"leal 504(%%eax), %%edi
\n
"
/* buf[63].re */
"movl 12(%%ebp), %%eax
\n
"
/* data */
".first_128_samples:
\n
"
"movd (%%esi), %%mm0
\n
"
/* im0 */
"movd 8(%%esi), %%mm2
\n
"
/* im1 */
"movd (%%edi), %%mm1
\n
"
/* re0 */
"movd -8(%%edi), %%mm3
\n
"
/* re1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -im0 */
"pfsub %%mm2, %%mm5
\n
"
/* -im1 */
"punpckldq %%mm1, %%mm4
\n
"
/* re0 | -im0 */
"punpckldq %%mm3, %%mm5
\n
"
/* re1 | -im1 */
"movq (%%edx), %%mm0
\n
"
/* w1 | w0 */
"movq 8(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq (%%ebx), %%mm2
\n
"
/* d1 | d0 */
"movq 8(%%ebx), %%mm3
\n
"
/* d3 | d2 */
"pfmul %%mm4, %%mm0
\n
"
/* w1*re0 | -w0*im0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*re1 | -w2*im1 */
"pfadd %%mm2, %%mm0
\n
"
/* w1*re0+d1 | -w0*im0+d0 */
"pfadd %%mm3, %%mm1
\n
"
/* w3*re1+d3 | -w2*im1+d2 */
"addl $16, %%edx
\n
"
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%ebx
\n
"
"addl $16, %%esi
\n
"
"addl $16, %%eax
\n
"
"addl $-16, %%edi
\n
"
"decl %%ecx
\n
"
"jnz .first_128_samples
\n
"
"movl 8(%%ebp), %%esi
\n
"
/* buf[0].re */
"leal 1020(%%esi), %%edi
\n
"
/* buf[127].im */
"movl $32, %%ecx
\n
"
/* loop count */
".second_128_samples:
\n
"
"movd (%%esi), %%mm0
\n
"
/* buf[i].re */
"movd 8(%%esi), %%mm2
\n
"
/* re1 */
"movd (%%edi), %%mm1
\n
"
/* buf[127-i].im */
"movd -8(%%edi), %%mm3
\n
"
/* im1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -re0 */
"pfsub %%mm2, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm1, %%mm4
\n
"
/* im0 | -re0 */
"punpckldq %%mm3, %%mm5
\n
"
/* im1 | -re1 */
"movq (%%edx), %%mm0
\n
"
/* w1 | w0 */
"movq 8(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq (%%ebx), %%mm2
\n
"
/* d1 | d0 */
"movq 8(%%ebx), %%mm3
\n
"
/* d3 | d2 */
"addl $16, %%esi
\n
"
"pfmul %%mm4, %%mm0
\n
"
/* w1*im0 | -w0*re0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*im1 | -w2*re1 */
"pfadd %%mm2, %%mm0
\n
"
/* w1*im0+d1 | -w0*re0+d0 */
"pfadd %%mm3, %%mm1
\n
"
/* w3*im1+d3 | -w2*re1+d2 */
"addl $-16, %%edi
\n
"
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%edx
\n
"
"addl $16, %%eax
\n
"
"addl $16, %%ebx
\n
"
"decl %%ecx
\n
"
"jnz .second_128_samples
\n
"
"movl 8(%%ebp), %%eax
\n
"
"leal 512(%%eax), %%esi
\n
"
/* buf[64].re */
"leal 508(%%eax), %%edi
\n
"
/* buf[63].im */
"movl $32, %%ecx
\n
"
/* loop count */
"movl 20(%%ebp), %%eax
\n
"
/* delay */
".first_128_delay:
\n
"
"movd (%%esi), %%mm0
\n
"
/* re0 */
"movd 8(%%esi), %%mm2
\n
"
/* re1 */
"movd (%%edi), %%mm1
\n
"
/* im0 */
"movd -8(%%edi), %%mm3
\n
"
/* im1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -re0 */
"pfsub %%mm2, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm1, %%mm4
\n
"
/* im0 | -re0 */
"punpckldq %%mm3, %%mm5
\n
"
/* im1 | -re1 */
"movq -16(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq -8(%%edx), %%mm0
\n
"
/* w1 | w0 */
"addl $-16, %%edx
\n
"
"pfmul %%mm4, %%mm0
\n
"
/* w1*im0 | -w0*re0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*im1 | -w2*re1 */
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%esi
\n
"
"addl $-16, %%edi
\n
"
"addl $16, %%eax
\n
"
"decl %%ecx
\n
"
"jnz .first_128_delay
\n
"
"movl 8(%%ebp), %%ebx
\n
"
"leal 4(%%ebx), %%esi
\n
"
/* buf[0].im */
"leal 1016(%%ebx), %%edi
\n
"
/* buf[127].re */
"movl $32, %%ecx
\n
"
/* loop count */
".second_128_delay:
\n
"
"movd (%%esi), %%mm0
\n
"
/* im0 */
"movd 8(%%esi), %%mm2
\n
"
/* im1 */
"movd (%%edi), %%mm1
\n
"
/* re0 */
"movd -8(%%edi), %%mm3
\n
"
/* re1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm1, %%mm4
\n
"
/* -re0 */
"pfsub %%mm3, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm4, %%mm0
\n
"
/* -re0 | im0 */
"punpckldq %%mm5, %%mm2
\n
"
/* -re1 | im1 */
"movq -16(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq -8(%%edx), %%mm3
\n
"
/* w1 | w0 */
"addl $-16, %%edx
\n
"
"pfmul %%mm0, %%mm1
\n
"
/* -w1*re0 | w0*im0 */
"pfmul %%mm2, %%mm3
\n
"
/* -w3*re1 | w2*im1 */
"movq %%mm1, (%%eax)
\n
"
"movq %%mm3, 8(%%eax)
\n
"
"addl $16, %%esi
\n
"
"addl $-16, %%edi
\n
"
"addl $16, %%eax
\n
"
"decl %%ecx
\n
"
"jnz .second_128_delay
\n
"
"popl %%edi
\n
"
"popl %%esi
\n
"
"popl %%edx
\n
"
"popl %%ecx
\n
"
"popl %%ebx
\n
"
"popl %%eax
\n
"
"leave
\n
"
"femms
\n
"
::
);
}
static
void
imdct512_window_delay_nol_3dn
(
complex_t
*
buf
,
float
*
data_ptr
,
float
*
window_prt
,
float
*
delay_prt
)
{
__asm__
__volatile__
(
"pushl %%ebp
\n
"
"movl %%esp, %%ebp
\n
"
"pushl %%eax
\n
"
"pushl %%ebx
\n
"
"pushl %%ecx
\n
"
"pushl %%edx
\n
"
"pushl %%esi
\n
"
"pushl %%edi
\n
"
"movl 20(%%ebp), %%ebx
\n
"
/* delay */
"movl 16(%%ebp), %%edx
\n
"
/* window */
"movl 8(%%ebp), %%eax
\n
"
/* buf */
"movl $32, %%ecx
\n
"
/* loop count */
"leal 516(%%eax), %%esi
\n
"
/* buf[64].im */
"leal 504(%%eax), %%edi
\n
"
/* buf[63].re */
"movl 12(%%ebp), %%eax
\n
"
/* data */
".first_128_samples2:
\n
"
"movd (%%esi), %%mm0
\n
"
/* im0 */
"movd 8(%%esi), %%mm2
\n
"
/* im1 */
"movd (%%edi), %%mm1
\n
"
/* re0 */
"movd -8(%%edi), %%mm3
\n
"
/* re1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -im0 */
"pfsub %%mm2, %%mm5
\n
"
/* -im1 */
"punpckldq %%mm1, %%mm4
\n
"
/* re0 | -im0 */
"punpckldq %%mm3, %%mm5
\n
"
/* re1 | -im1 */
"movq (%%edx), %%mm0
\n
"
/* w1 | w0 */
"movq 8(%%edx), %%mm1
\n
"
/* w3 | w2 */
"pfmul %%mm4, %%mm0
\n
"
/* w1*re0 | -w0*im0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*re1 | -w2*im1 */
"addl $16, %%edx
\n
"
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%ebx
\n
"
"addl $16, %%esi
\n
"
"addl $16, %%eax
\n
"
"addl $-16, %%edi
\n
"
"decl %%ecx
\n
"
"jnz .first_128_samples2
\n
"
"movl 8(%%ebp), %%esi
\n
"
/* buf[0].re */
"leal 1020(%%esi), %%edi
\n
"
/* buf[127].im */
"movl $32, %%ecx
\n
"
/* loop count */
".second_128_samples2:
\n
"
"movd (%%esi), %%mm0
\n
"
/* buf[i].re */
"movd 8(%%esi), %%mm2
\n
"
/* re1 */
"movd (%%edi), %%mm1
\n
"
/* buf[127-i].im */
"movd -8(%%edi), %%mm3
\n
"
/* im1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -re0 */
"pfsub %%mm2, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm1, %%mm4
\n
"
/* im0 | -re0 */
"punpckldq %%mm3, %%mm5
\n
"
/* im1 | -re1 */
"movq (%%edx), %%mm0
\n
"
/* w1 | w0 */
"movq 8(%%edx), %%mm1
\n
"
/* w3 | w2 */
"addl $16, %%esi
\n
"
"pfmul %%mm4, %%mm0
\n
"
/* w1*im0 | -w0*re0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*im1 | -w2*re1 */
"addl $-16, %%edi
\n
"
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%edx
\n
"
"addl $16, %%eax
\n
"
"addl $16, %%ebx
\n
"
"decl %%ecx
\n
"
"jnz .second_128_samples2
\n
"
"movl 8(%%ebp), %%eax
\n
"
"leal 512(%%eax), %%esi
\n
"
/* buf[64].re */
"leal 508(%%eax), %%edi
\n
"
/* buf[63].im */
"movl $32, %%ecx
\n
"
/* loop count */
"movl 20(%%ebp), %%eax
\n
"
/* delay */
".first_128_delays:
\n
"
"movd (%%esi), %%mm0
\n
"
/* re0 */
"movd 8(%%esi), %%mm2
\n
"
/* re1 */
"movd (%%edi), %%mm1
\n
"
/* im0 */
"movd -8(%%edi), %%mm3
\n
"
/* im1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -re0 */
"pfsub %%mm2, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm1, %%mm4
\n
"
/* im0 | -re0 */
"punpckldq %%mm3, %%mm5
\n
"
/* im1 | -re1 */
"movq -16(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq -8(%%edx), %%mm0
\n
"
/* w1 | w0 */
"addl $-16, %%edx
\n
"
"pfmul %%mm4, %%mm0
\n
"
/* w1*im0 | -w0*re0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*im1 | -w2*re1 */
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%esi
\n
"
"addl $-16, %%edi
\n
"
"addl $16, %%eax
\n
"
"decl %%ecx
\n
"
"jnz .first_128_delays
\n
"
"movl 8(%%ebp), %%ebx
\n
"
"leal 4(%%ebx), %%esi
\n
"
/* buf[0].im */
"leal 1016(%%ebx), %%edi
\n
"
/* buf[127].re */
"movl $32, %%ecx
\n
"
/* loop count */
".second_128_delays:
\n
"
"movd (%%esi), %%mm0
\n
"
/* im0 */
"movd 8(%%esi), %%mm2
\n
"
/* im1 */
"movd (%%edi), %%mm1
\n
"
/* re0 */
"movd -8(%%edi), %%mm3
\n
"
/* re1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm1, %%mm4
\n
"
/* -re0 */
"pfsub %%mm3, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm4, %%mm0
\n
"
/* -re0 | im0 */
"punpckldq %%mm5, %%mm2
\n
"
/* -re1 | im1 */
"movq -16(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq -8(%%edx), %%mm3
\n
"
/* w1 | w0 */
"addl $-16, %%edx
\n
"
"pfmul %%mm0, %%mm1
\n
"
/* -w1*re0 | w0*im0 */
"pfmul %%mm2, %%mm3
\n
"
/* -w3*re1 | w2*im1 */
"movq %%mm1, (%%eax)
\n
"
"movq %%mm3, 8(%%eax)
\n
"
"addl $16, %%esi
\n
"