Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
VLC
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
15
Merge Requests
15
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Steve Lhomme
VLC
Commits
5f232025
Commit
5f232025
authored
May 16, 2001
by
Renaud Dartus
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
* Add 3D Now! imdct
* Remove kmudge for ac3 on MacOS X
parent
1ac785a2
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
1180 additions
and
197 deletions
+1180
-197
Makefile
Makefile
+1
-6
configure
configure
+1
-1
configure.in
configure.in
+1
-1
plugins/imdct/Makefile
plugins/imdct/Makefile
+16
-2
plugins/imdct/ac3_imdct_3dn.c
plugins/imdct/ac3_imdct_3dn.c
+559
-0
plugins/imdct/ac3_imdct_c.c
plugins/imdct/ac3_imdct_c.c
+2
-60
plugins/imdct/ac3_imdct_common.c
plugins/imdct/ac3_imdct_common.c
+3
-62
plugins/imdct/ac3_imdct_common.h
plugins/imdct/ac3_imdct_common.h
+1
-2
plugins/imdct/ac3_imdct_sse.c
plugins/imdct/ac3_imdct_sse.c
+6
-52
plugins/imdct/ac3_retables.h
plugins/imdct/ac3_retables.h
+83
-0
plugins/imdct/ac3_srfft_3dn.c
plugins/imdct/ac3_srfft_3dn.c
+344
-0
plugins/imdct/ac3_srfft_sse.c
plugins/imdct/ac3_srfft_sse.c
+9
-9
plugins/imdct/imdct3dn.c
plugins/imdct/imdct3dn.c
+152
-0
src/ac3_decoder/ac3_imdct.c
src/ac3_decoder/ac3_imdct.c
+2
-2
No files found.
Makefile
View file @
5f232025
...
...
@@ -26,7 +26,7 @@ PLUGINS_TARGETS := alsa/alsa beos/beos darwin/darwin dsp/dsp dummy/dummy \
dvd/dvd esd/esd fb/fb ggi/ggi glide/glide gnome/gnome gtk/gtk
\
downmix/downmix downmix/downmixsse downmix/downmix3dn
\
idct/idct idct/idctclassic idct/idctmmx idct/idctmmxext
\
imdct/imdct imdct/imdctsse
\
imdct/imdct imdct/imdct
3dn imdct/imdct
sse
\
macosx/macosx mga/mga
\
motion/motion motion/motionmmx motion/motionmmxext
\
mpeg/es mpeg/ps mpeg/ts null/null qt/qt sdl/sdl
\
...
...
@@ -317,12 +317,7 @@ endif
$(C_OBJ)
:
%.o: Makefile.opts Makefile.dep Makefile
$(C_OBJ)
:
%.o: .dep/%.d
$(C_OBJ)
:
%.o: %.c
ifneq
(,$(findstring darwin,$(SYS)))
#this is uglier of all
@if
test
"src/ac3_decoder/ac3_imdct.c"
=
"$<"
;
then
$(CC)
`echo
$(CFLAGS)
|
sed
-e
's/-O3/-O/'
`
-c
-o
$@
$<;
echo
"(CC) `echo $(CFLAGS) | sed -e 's/-O3/-O/'` -c -o $@ $<"
;
else
$(CC)
$(CFLAGS)
-c
-o
$@
$<;
echo
"$(CC) $(CFLAGS) -c -o $@ $<"
;
fi
else
$(CC)
$(CFLAGS)
-c
-o
$@
$<
endif
$(CPP_OBJ)
:
%.o: Makefile.opts Makefile.dep Makefile
$(CPP_OBJ)
:
%.o: .dep/%.dpp
...
...
configure
View file @
5f232025
...
...
@@ -3285,7 +3285,7 @@ int main() {
EOF
if
{
(
eval echo
configure:3287:
\"
$ac_compile
\"
)
1>&5
;
(
eval
$ac_compile
)
2>&5
;
}
;
then
rm
-rf
conftest
*
ACCEL_PLUGINS
=
"
${
ACCEL_PLUGINS
}
idctmmxext motionmmxext imdctsse downmix3dn downmixsse"
ACCEL_PLUGINS
=
"
${
ACCEL_PLUGINS
}
idctmmxext motionmmxext imdct
3dn imdct
sse downmix3dn downmixsse"
echo
"
$ac_t
""yes"
1>&6
else
echo
"configure: failed program was:"
>
&5
...
...
configure.in
View file @
5f232025
...
...
@@ -162,7 +162,7 @@ AC_TRY_COMPILE([void quux(){void *p;asm("packuswb %%mm1,%%mm2"::"r"(p));}],,
AC_MSG_CHECKING([if \$CC groks MMX EXT or SSE inline assembly])
AC_TRY_COMPILE([void quux(){void *p;asm("maskmovq %%mm1,%%mm2"::"r"(p));}],,
ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext imdctsse downmix3dn downmixsse"
ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext imdct
3dn imdct
sse downmix3dn downmixsse"
AC_MSG_RESULT(yes), AC_MSG_RESULT(no))
dnl
...
...
plugins/imdct/Makefile
View file @
5f232025
...
...
@@ -9,15 +9,18 @@
PLUGIN_IMDCT
=
imdct.o ac3_imdct_c.o ac3_srfft_c.o
PLUGIN_IMDCTSSE
=
imdctsse.o ac3_imdct_sse.o ac3_srfft_sse.o
PLUGIN_IMDCT3DN
=
imdct3dn.o ac3_imdct_3dn.o ac3_srfft_3dn.o
PLUGIN_IMDCTCOMMON
=
ac3_imdct_common.o
BUILTIN_IMDCT
=
$(PLUGIN_IMDCT:%.o=BUILTIN_IMDCT_%.o)
\
$(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCT_%.o)
BUILTIN_IMDCTSSE
=
$(PLUGIN_IMDCTSSE:%.o=BUILTIN_IMDCTSSE_%.o)
\
$(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCTSSE_%.o)
BUILTIN_IMDCT3DN
=
$(PLUGIN_IMDCT3DN:%.o=BUILTIN_IMDCT3DN_%.o)
\
$(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCT3DN_%.o)
PLUGIN_C
=
$(PLUGIN_IMDCT)
$(PLUGIN_IMDCTSSE)
$(PLUGIN_IMDCTCOMMON)
ALL_OBJ
=
$(PLUGIN_C)
$(BUILTIN_IMDCT)
$(BUILTIN_IMDCTSSE)
PLUGIN_C
=
$(PLUGIN_IMDCT)
$(PLUGIN_IMDCTSSE)
$(PLUGIN_IMDCT
3DN)
$(PLUGIN_IMDCT
COMMON)
ALL_OBJ
=
$(PLUGIN_C)
$(BUILTIN_IMDCT)
$(BUILTIN_IMDCTSSE)
$(BUILTIN_IMDCT3DN)
#
# Virtual targets
...
...
@@ -33,6 +36,10 @@ $(BUILTIN_IMDCTSSE): BUILTIN_IMDCTSSE_%.o: .dep/%.d
$(BUILTIN_IMDCTSSE)
:
BUILTIN_IMDCTSSE_%.o: %.c
$(CC)
$(CFLAGS)
-DBUILTIN
-DMODULE_NAME
=
imdctsse
-c
-o
$@
$<
$(BUILTIN_IMDCT3DN)
:
BUILTIN_IMDCT3DN_%.o: .dep/%.d
$(BUILTIN_IMDCT3DN)
:
BUILTIN_IMDCT3DN_%.o: %.c
$(CC)
$(CFLAGS)
-DBUILTIN
-DMODULE_NAME
=
imdct3dn
-c
-o
$@
$<
#
# Real targets
#
...
...
@@ -51,3 +58,10 @@ $(BUILTIN_IMDCTSSE): BUILTIN_IMDCTSSE_%.o: %.c
ar r
$@
$^
$(RANLIB)
$@
../../lib/imdct3dn.so
:
$(PLUGIN_IMDCT3DN) $(PLUGIN_IMDCTCOMMON)
$(CC)
$(PCFLAGS)
-o
$@
$^
$(PLCFLAGS)
../../lib/imdct3dn.a
:
$(BUILTIN_IMDCT3DN)
ar r
$@
$^
$(RANLIB)
$@
plugins/imdct/ac3_imdct_3dn.c
0 → 100644
View file @
5f232025
/*****************************************************************************
* ac3_imdct_3dn.c: accelerated 3D Now! ac3 DCT
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_imdct_3dn.c,v 1.1 2001/05/16 14:51:29 reno Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#define MODULE_NAME imdct3dn
#include "modules_inner.h"
/*****************************************************************************
* Preamble
*****************************************************************************/
#include "defs.h"
#include <math.h>
#include <stdio.h>
#include "config.h"
#include "common.h"
#include "threads.h"
#include "mtime.h"
#include "ac3_imdct.h"
#include "ac3_imdct_common.h"
#include "ac3_retables.h"
void
_M
(
fft_64p
)
(
complex_t
*
x
);
void
_M
(
fft_128p
)
(
complex_t
*
a
);
static
void
imdct512_pre_ifft_twiddle_3dn
(
const
int
*
pmt
,
complex_t
*
buf
,
float
*
data
,
float
*
xcos_sin_sse
);
static
void
imdct512_post_ifft_twiddle_3dn
(
complex_t
*
buf
,
float
*
xcos_sin_sse
);
static
void
imdct512_window_delay_3dn
(
complex_t
*
buf
,
float
*
data_ptr
,
float
*
window_prt
,
float
*
delay_prt
);
static
void
imdct512_window_delay_nol_3dn
(
complex_t
*
buf
,
float
*
data_ptr
,
float
*
window_prt
,
float
*
delay_prt
);
void
_M
(
imdct_init
)
(
imdct_t
*
p_imdct
)
{
int
i
;
float
scale
=
181
.
01
9
;
fprintf
(
stderr
,
"imct_init
\n
"
);
for
(
i
=
0
;
i
<
128
;
i
++
)
{
float
xcos_i
=
cos
(
2
.
0
f
*
M_PI
*
(
8
*
i
+
1
)
/
(
8
*
N
))
*
scale
;
float
xsin_i
=
sin
(
2
.
0
f
*
M_PI
*
(
8
*
i
+
1
)
/
(
8
*
N
))
*
scale
;
p_imdct
->
xcos_sin_sse
[
i
*
4
]
=
xcos_i
;
p_imdct
->
xcos_sin_sse
[
i
*
4
+
1
]
=
-
xsin_i
;
p_imdct
->
xcos_sin_sse
[
i
*
4
+
2
]
=
-
xsin_i
;
p_imdct
->
xcos_sin_sse
[
i
*
4
+
3
]
=
-
xcos_i
;
}
fprintf
(
stderr
,
"done imct_init
\n
"
);
}
void
_M
(
imdct_do_512
)
(
imdct_t
*
p_imdct
,
float
data
[],
float
delay
[])
{
imdct512_pre_ifft_twiddle_3dn
(
pm128
,
p_imdct
->
buf
,
data
,
p_imdct
->
xcos_sin_sse
);
_M
(
fft_128p
)
(
p_imdct
->
buf
);
imdct512_post_ifft_twiddle_3dn
(
p_imdct
->
buf
,
p_imdct
->
xcos_sin_sse
);
imdct512_window_delay_3dn
(
p_imdct
->
buf
,
data
,
window
,
delay
);
}
void
_M
(
imdct_do_512_nol
)
(
imdct_t
*
p_imdct
,
float
data
[],
float
delay
[])
{
imdct512_pre_ifft_twiddle_3dn
(
pm128
,
p_imdct
->
buf
,
data
,
p_imdct
->
xcos_sin_sse
);
_M
(
fft_128p
)
(
p_imdct
->
buf
);
imdct512_post_ifft_twiddle_3dn
(
p_imdct
->
buf
,
p_imdct
->
xcos_sin_sse
);
imdct512_window_delay_nol_3dn
(
p_imdct
->
buf
,
data
,
window
,
delay
);
}
static
void
imdct512_pre_ifft_twiddle_3dn
(
const
int
*
pmt
,
complex_t
*
buf
,
float
*
data
,
float
*
xcos_sin_sse
)
{
__asm__
__volatile__
(
"pushl %%ebp
\n
"
"movl %%esp, %%ebp
\n
"
"addl $-4, %%esp
\n
"
/* local variable, loop counter */
"pushl %%eax
\n
"
"pushl %%ebx
\n
"
"pushl %%ecx
\n
"
"pushl %%edx
\n
"
"pushl %%edi
\n
"
"pushl %%esi
\n
"
"movl 8(%%ebp), %%eax
\n
"
/* pmt */
"movl 12(%%ebp), %%ebx
\n
"
/* buf */
"movl 16(%%ebp), %%ecx
\n
"
/* data */
"movl 20(%%ebp), %%edx
\n
"
/* xcos_sin_sse */
"movl $128, -4(%%ebp)
\n
"
".loop:
\n
"
"movl (%%eax), %%esi
\n
"
"movd (%%ecx, %%esi, 8), %%mm1
\n
"
/* 2j */
"punpckldq %%mm1, %%mm1
\n
"
/* 2j | 2j */
"shll $1, %%esi
\n
"
"movq (%%edx, %%esi, 8), %%mm0
\n
"
/* -s_j | c_j */
"movq 8(%%edx, %%esi, 8), %%mm2
\n
"
/* -c_j | -s_j */
"negl %%esi
\n
"
"movd 1020(%%ecx, %%esi, 4), %%mm4
\n
"
/* 255-2j */
"punpckldq %%mm4, %%mm4
\n
"
/* 255-2j | 255-2j */
"addl $4, %%eax
\n
"
"pfmul %%mm4, %%mm0
\n
"
/* 255-2j * -s_j | 255-2j * c_j */
"pfmul %%mm1, %%mm2
\n
"
/* 2j * -c_j | 2j * -s_j */
"addl $8, %%ebx
\n
"
"pfadd %%mm2, %%mm0
\n
"
/* 2j * -c_j + 255-2j * -s_j | 2j * -s_j + 255-2j * c_j */
"movq %%mm0, -8(%%ebx)
\n
"
"decl -4(%%ebp)
\n
"
"jnz .loop
\n
"
"popl %%esi
\n
"
"popl %%edi
\n
"
"popl %%edx
\n
"
"popl %%ecx
\n
"
"popl %%ebx
\n
"
"popl %%eax
\n
"
"addl $4, %%esp
\n
"
"popl %%ebp
\n
"
"femms
\n
"
::
);
}
static
void
imdct512_post_ifft_twiddle_3dn
(
complex_t
*
buf
,
float
*
xcos_sin_sse
)
{
__asm__
__volatile__
(
"pushl %%ebx
\n
"
"movl $64, %%ebx
\n
"
/* loop counter */
".loop1:
\n
"
"movq (%%eax), %%mm0
\n
"
/* im0 | re0 */
"movq %%mm0, %%mm1
\n
"
/* im0 | re0 */
"punpckldq %%mm0, %%mm0
\n
"
/* re0 | re0 */
"punpckhdq %%mm1, %%mm1
\n
"
/* im0 | im0 */
"movq (%%ecx), %%mm2
\n
"
/* -s | c */
"movq 8(%%ecx), %%mm3
\n
"
/* -c | -s */
"movq %%mm3, %%mm4
\n
"
"punpckhdq %%mm2,%%mm3
\n
"
/* -s | -c */
"punpckldq %%mm2,%%mm4
\n
"
/* c | -s */
"movq 8(%%eax), %%mm2
\n
"
/* im1 | re1 */
"movq %%mm2, %%mm5
\n
"
/* im1 | re1 */
"punpckldq %%mm2, %%mm2
\n
"
/* re1 | re1 */
"punpckhdq %%mm5, %%mm5
\n
"
/* im1 | im1 */
"pfmul %%mm3, %%mm0
\n
"
/* -s * re0 | -c * re0 */
"pfmul %%mm4, %%mm1
\n
"
/* c * im0 | -s * im0 */
"movq 16(%%ecx), %%mm6
\n
"
/* -s1 | c1 */
"movq 24(%%ecx), %%mm7
\n
"
/* -c1 | -s1 */
"movq %%mm7, %%mm4
\n
"
"punpckhdq %%mm6,%%mm7
\n
"
/* -s1 | -c1 */
"punpckldq %%mm6,%%mm4
\n
"
/* c1 | -s1 */
"pfmul %%mm7, %%mm2
\n
"
/* -s1*re1 | -c1*re1 */
"pfmul %%mm4, %%mm5
\n
"
/* c1*im1 | -s1*im1 */
"pfadd %%mm1, %%mm0
\n
"
/* -s * re0 + c * im0 | -c * re0 - s * im0 */
"pfadd %%mm5, %%mm2
\n
"
/* -s1 * re1 + c1 * im1 | -c1 * re1 - s1 * im1 */
"movq %%mm0, (%%eax)
\n
"
"movq %%mm2, 8(%%eax)
\n
"
"addl $32, %%ecx
\n
"
"addl $16, %%eax
\n
"
"decl %%ebx
\n
"
"jnz .loop1
\n
"
"popl %%ebx
\n
"
"femms
\n
"
:
"=a"
(
buf
)
:
"a"
(
buf
),
"c"
(
xcos_sin_sse
)
);
}
static
void
imdct512_window_delay_3dn
(
complex_t
*
buf
,
float
*
data_ptr
,
float
*
window_prt
,
float
*
delay_prt
)
{
__asm__
__volatile__
(
"pushl %%ebp
\n
"
"movl %%esp, %%ebp
\n
"
"pushl %%eax
\n
"
"pushl %%ebx
\n
"
"pushl %%ecx
\n
"
"pushl %%edx
\n
"
"pushl %%esi
\n
"
"pushl %%edi
\n
"
"movl 20(%%ebp), %%ebx
\n
"
/* delay */
"movl 16(%%ebp), %%edx
\n
"
/* window */
"movl 8(%%ebp), %%eax
\n
"
/* buf */
"movl $32, %%ecx
\n
"
/* loop count */
"leal 516(%%eax), %%esi
\n
"
/* buf[64].im */
"leal 504(%%eax), %%edi
\n
"
/* buf[63].re */
"movl 12(%%ebp), %%eax
\n
"
/* data */
".first_128_samples:
\n
"
"movd (%%esi), %%mm0
\n
"
/* im0 */
"movd 8(%%esi), %%mm2
\n
"
/* im1 */
"movd (%%edi), %%mm1
\n
"
/* re0 */
"movd -8(%%edi), %%mm3
\n
"
/* re1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -im0 */
"pfsub %%mm2, %%mm5
\n
"
/* -im1 */
"punpckldq %%mm1, %%mm4
\n
"
/* re0 | -im0 */
"punpckldq %%mm3, %%mm5
\n
"
/* re1 | -im1 */
"movq (%%edx), %%mm0
\n
"
/* w1 | w0 */
"movq 8(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq (%%ebx), %%mm2
\n
"
/* d1 | d0 */
"movq 8(%%ebx), %%mm3
\n
"
/* d3 | d2 */
"pfmul %%mm4, %%mm0
\n
"
/* w1*re0 | -w0*im0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*re1 | -w2*im1 */
"pfadd %%mm2, %%mm0
\n
"
/* w1*re0+d1 | -w0*im0+d0 */
"pfadd %%mm3, %%mm1
\n
"
/* w3*re1+d3 | -w2*im1+d2 */
"addl $16, %%edx
\n
"
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%ebx
\n
"
"addl $16, %%esi
\n
"
"addl $16, %%eax
\n
"
"addl $-16, %%edi
\n
"
"decl %%ecx
\n
"
"jnz .first_128_samples
\n
"
"movl 8(%%ebp), %%esi
\n
"
/* buf[0].re */
"leal 1020(%%esi), %%edi
\n
"
/* buf[127].im */
"movl $32, %%ecx
\n
"
/* loop count */
".second_128_samples:
\n
"
"movd (%%esi), %%mm0
\n
"
/* buf[i].re */
"movd 8(%%esi), %%mm2
\n
"
/* re1 */
"movd (%%edi), %%mm1
\n
"
/* buf[127-i].im */
"movd -8(%%edi), %%mm3
\n
"
/* im1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -re0 */
"pfsub %%mm2, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm1, %%mm4
\n
"
/* im0 | -re0 */
"punpckldq %%mm3, %%mm5
\n
"
/* im1 | -re1 */
"movq (%%edx), %%mm0
\n
"
/* w1 | w0 */
"movq 8(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq (%%ebx), %%mm2
\n
"
/* d1 | d0 */
"movq 8(%%ebx), %%mm3
\n
"
/* d3 | d2 */
"addl $16, %%esi
\n
"
"pfmul %%mm4, %%mm0
\n
"
/* w1*im0 | -w0*re0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*im1 | -w2*re1 */
"pfadd %%mm2, %%mm0
\n
"
/* w1*im0+d1 | -w0*re0+d0 */
"pfadd %%mm3, %%mm1
\n
"
/* w3*im1+d3 | -w2*re1+d2 */
"addl $-16, %%edi
\n
"
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%edx
\n
"
"addl $16, %%eax
\n
"
"addl $16, %%ebx
\n
"
"decl %%ecx
\n
"
"jnz .second_128_samples
\n
"
"movl 8(%%ebp), %%eax
\n
"
"leal 512(%%eax), %%esi
\n
"
/* buf[64].re */
"leal 508(%%eax), %%edi
\n
"
/* buf[63].im */
"movl $32, %%ecx
\n
"
/* loop count */
"movl 20(%%ebp), %%eax
\n
"
/* delay */
".first_128_delay:
\n
"
"movd (%%esi), %%mm0
\n
"
/* re0 */
"movd 8(%%esi), %%mm2
\n
"
/* re1 */
"movd (%%edi), %%mm1
\n
"
/* im0 */
"movd -8(%%edi), %%mm3
\n
"
/* im1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -re0 */
"pfsub %%mm2, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm1, %%mm4
\n
"
/* im0 | -re0 */
"punpckldq %%mm3, %%mm5
\n
"
/* im1 | -re1 */
"movq -16(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq -8(%%edx), %%mm0
\n
"
/* w1 | w0 */
"addl $-16, %%edx
\n
"
"pfmul %%mm4, %%mm0
\n
"
/* w1*im0 | -w0*re0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*im1 | -w2*re1 */
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%esi
\n
"
"addl $-16, %%edi
\n
"
"addl $16, %%eax
\n
"
"decl %%ecx
\n
"
"jnz .first_128_delay
\n
"
"movl 8(%%ebp), %%ebx
\n
"
"leal 4(%%ebx), %%esi
\n
"
/* buf[0].im */
"leal 1016(%%ebx), %%edi
\n
"
/* buf[127].re */
"movl $32, %%ecx
\n
"
/* loop count */
".second_128_delay:
\n
"
"movd (%%esi), %%mm0
\n
"
/* im0 */
"movd 8(%%esi), %%mm2
\n
"
/* im1 */
"movd (%%edi), %%mm1
\n
"
/* re0 */
"movd -8(%%edi), %%mm3
\n
"
/* re1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm1, %%mm4
\n
"
/* -re0 */
"pfsub %%mm3, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm4, %%mm0
\n
"
/* -re0 | im0 */
"punpckldq %%mm5, %%mm2
\n
"
/* -re1 | im1 */
"movq -16(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq -8(%%edx), %%mm3
\n
"
/* w1 | w0 */
"addl $-16, %%edx
\n
"
"pfmul %%mm0, %%mm1
\n
"
/* -w1*re0 | w0*im0 */
"pfmul %%mm2, %%mm3
\n
"
/* -w3*re1 | w2*im1 */
"movq %%mm1, (%%eax)
\n
"
"movq %%mm3, 8(%%eax)
\n
"
"addl $16, %%esi
\n
"
"addl $-16, %%edi
\n
"
"addl $16, %%eax
\n
"
"decl %%ecx
\n
"
"jnz .second_128_delay
\n
"
"popl %%edi
\n
"
"popl %%esi
\n
"
"popl %%edx
\n
"
"popl %%ecx
\n
"
"popl %%ebx
\n
"
"popl %%eax
\n
"
"leave
\n
"
"femms
\n
"
::
);
}
static
void
imdct512_window_delay_nol_3dn
(
complex_t
*
buf
,
float
*
data_ptr
,
float
*
window_prt
,
float
*
delay_prt
)
{
__asm__
__volatile__
(
"pushl %%ebp
\n
"
"movl %%esp, %%ebp
\n
"
"pushl %%eax
\n
"
"pushl %%ebx
\n
"
"pushl %%ecx
\n
"
"pushl %%edx
\n
"
"pushl %%esi
\n
"
"pushl %%edi
\n
"
"movl 20(%%ebp), %%ebx
\n
"
/* delay */
"movl 16(%%ebp), %%edx
\n
"
/* window */
"movl 8(%%ebp), %%eax
\n
"
/* buf */
"movl $32, %%ecx
\n
"
/* loop count */
"leal 516(%%eax), %%esi
\n
"
/* buf[64].im */
"leal 504(%%eax), %%edi
\n
"
/* buf[63].re */
"movl 12(%%ebp), %%eax
\n
"
/* data */
".first_128_samples2:
\n
"
"movd (%%esi), %%mm0
\n
"
/* im0 */
"movd 8(%%esi), %%mm2
\n
"
/* im1 */
"movd (%%edi), %%mm1
\n
"
/* re0 */
"movd -8(%%edi), %%mm3
\n
"
/* re1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -im0 */
"pfsub %%mm2, %%mm5
\n
"
/* -im1 */
"punpckldq %%mm1, %%mm4
\n
"
/* re0 | -im0 */
"punpckldq %%mm3, %%mm5
\n
"
/* re1 | -im1 */
"movq (%%edx), %%mm0
\n
"
/* w1 | w0 */
"movq 8(%%edx), %%mm1
\n
"
/* w3 | w2 */
"pfmul %%mm4, %%mm0
\n
"
/* w1*re0 | -w0*im0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*re1 | -w2*im1 */
"addl $16, %%edx
\n
"
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%ebx
\n
"
"addl $16, %%esi
\n
"
"addl $16, %%eax
\n
"
"addl $-16, %%edi
\n
"
"decl %%ecx
\n
"
"jnz .first_128_samples2
\n
"
"movl 8(%%ebp), %%esi
\n
"
/* buf[0].re */
"leal 1020(%%esi), %%edi
\n
"
/* buf[127].im */
"movl $32, %%ecx
\n
"
/* loop count */
".second_128_samples2:
\n
"
"movd (%%esi), %%mm0
\n
"
/* buf[i].re */
"movd 8(%%esi), %%mm2
\n
"
/* re1 */
"movd (%%edi), %%mm1
\n
"
/* buf[127-i].im */
"movd -8(%%edi), %%mm3
\n
"
/* im1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -re0 */
"pfsub %%mm2, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm1, %%mm4
\n
"
/* im0 | -re0 */
"punpckldq %%mm3, %%mm5
\n
"
/* im1 | -re1 */
"movq (%%edx), %%mm0
\n
"
/* w1 | w0 */
"movq 8(%%edx), %%mm1
\n
"
/* w3 | w2 */
"addl $16, %%esi
\n
"
"pfmul %%mm4, %%mm0
\n
"
/* w1*im0 | -w0*re0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*im1 | -w2*re1 */
"addl $-16, %%edi
\n
"
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%edx
\n
"
"addl $16, %%eax
\n
"
"addl $16, %%ebx
\n
"
"decl %%ecx
\n
"
"jnz .second_128_samples2
\n
"
"movl 8(%%ebp), %%eax
\n
"
"leal 512(%%eax), %%esi
\n
"
/* buf[64].re */
"leal 508(%%eax), %%edi
\n
"
/* buf[63].im */
"movl $32, %%ecx
\n
"
/* loop count */
"movl 20(%%ebp), %%eax
\n
"
/* delay */
".first_128_delays:
\n
"
"movd (%%esi), %%mm0
\n
"
/* re0 */
"movd 8(%%esi), %%mm2
\n
"
/* re1 */
"movd (%%edi), %%mm1
\n
"
/* im0 */
"movd -8(%%edi), %%mm3
\n
"
/* im1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm0, %%mm4
\n
"
/* -re0 */
"pfsub %%mm2, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm1, %%mm4
\n
"
/* im0 | -re0 */
"punpckldq %%mm3, %%mm5
\n
"
/* im1 | -re1 */
"movq -16(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq -8(%%edx), %%mm0
\n
"
/* w1 | w0 */
"addl $-16, %%edx
\n
"
"pfmul %%mm4, %%mm0
\n
"
/* w1*im0 | -w0*re0 */
"pfmul %%mm5, %%mm1
\n
"
/* w3*im1 | -w2*re1 */
"movq %%mm0, (%%eax)
\n
"
"movq %%mm1, 8(%%eax)
\n
"
"addl $16, %%esi
\n
"
"addl $-16, %%edi
\n
"
"addl $16, %%eax
\n
"
"decl %%ecx
\n
"
"jnz .first_128_delays
\n
"
"movl 8(%%ebp), %%ebx
\n
"
"leal 4(%%ebx), %%esi
\n
"
/* buf[0].im */
"leal 1016(%%ebx), %%edi
\n
"
/* buf[127].re */
"movl $32, %%ecx
\n
"
/* loop count */
".second_128_delays:
\n
"
"movd (%%esi), %%mm0
\n
"
/* im0 */
"movd 8(%%esi), %%mm2
\n
"
/* im1 */
"movd (%%edi), %%mm1
\n
"
/* re0 */
"movd -8(%%edi), %%mm3
\n
"
/* re1 */
"pxor %%mm4, %%mm4
\n
"
"pxor %%mm5, %%mm5
\n
"
"pfsub %%mm1, %%mm4
\n
"
/* -re0 */
"pfsub %%mm3, %%mm5
\n
"
/* -re1 */
"punpckldq %%mm4, %%mm0
\n
"
/* -re0 | im0 */
"punpckldq %%mm5, %%mm2
\n
"
/* -re1 | im1 */
"movq -16(%%edx), %%mm1
\n
"
/* w3 | w2 */
"movq -8(%%edx), %%mm3
\n
"
/* w1 | w0 */
"addl $-16, %%edx
\n
"
"pfmul %%mm0, %%mm1
\n
"
/* -w1*re0 | w0*im0 */
"pfmul %%mm2, %%mm3
\n
"
/* -w3*re1 | w2*im1 */
"movq %%mm1, (%%eax)
\n
"
"movq %%mm3, 8(%%eax)
\n
"
"addl $16, %%esi
\n
"
"addl $-16, %%edi
\n
"