Commit 501cb1ba authored by Sam Hocevar's avatar Sam Hocevar

  * AC3 IMDCT and downmix functions are now in plugins, --imdct and
    --downmix options added.
parent 32ef13cf
......@@ -13,7 +13,7 @@
#
PLUGINS_DIR := alsa beos darwin dsp dummy \
dvd esd fb ggi glide gnome gtk \
idct \
downmix idct imdct \
macosx mga \
motion \
mpeg null qt sdl \
......@@ -24,7 +24,9 @@ PLUGINS_DIR := alsa beos darwin dsp dummy \
#
PLUGINS_TARGETS := alsa/alsa beos/beos darwin/darwin dsp/dsp dummy/dummy \
dvd/dvd esd/esd fb/fb ggi/ggi glide/glide gnome/gnome gtk/gtk \
downmix/downmix downmix/downmixsse downmix/downmix3dn \
idct/idct idct/idctclassic idct/idctmmx idct/idctmmxext \
imdct/imdct imdct/imdctsse \
macosx/macosx mga/mga \
motion/motion motion/motionmmx motion/motionmmxext \
mpeg/es mpeg/ps mpeg/ts null/null qt/qt sdl/sdl \
......@@ -69,11 +71,7 @@ AC3_DECODER = src/ac3_decoder/ac3_decoder_thread.o \
src/ac3_decoder/ac3_bit_allocate.o \
src/ac3_decoder/ac3_mantissa.o \
src/ac3_decoder/ac3_rematrix.o \
src/ac3_decoder/ac3_imdct.o \
src/ac3_decoder/ac3_imdct_c.o \
src/ac3_decoder/ac3_srfft.o \
src/ac3_decoder/ac3_downmix.o \
src/ac3_decoder/ac3_downmix_c.o
src/ac3_decoder/ac3_imdct.o
AC3_SPDIF = src/ac3_spdif/ac3_spdif.o \
src/ac3_spdif/ac3_iec958.o
......
......@@ -3162,7 +3162,7 @@ fi
ARCH=${host_cpu}
BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion"
BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion imdct downmix"
case x$host_os in
xmingw32msvc)
......@@ -3195,8 +3195,8 @@ else
fi
rm -f conftest*
echo $ac_n "checking if \$CC groks MMX EXT (SSE) inline assembly""... $ac_c" 1>&6
echo "configure:3200: checking if \$CC groks MMX EXT (SSE) inline assembly" >&5
echo $ac_n "checking if \$CC groks MMX EXT or SSE inline assembly""... $ac_c" 1>&6
echo "configure:3200: checking if \$CC groks MMX EXT or SSE inline assembly" >&5
cat > conftest.$ac_ext <<EOF
#line 3202 "configure"
#include "confdefs.h"
......@@ -3207,7 +3207,7 @@ int main() {
EOF
if { (eval echo configure:3209: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
rm -rf conftest*
ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext"
ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext imdctsse downmix3dn downmixsse"
echo "$ac_t""yes" 1>&6
else
echo "configure: failed program was:" >&5
......
......@@ -135,7 +135,7 @@ ARCH=${host_cpu}
dnl
dnl default modules
dnl
BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion"
BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion imdct downmix"
dnl
dnl Accelerated modules
......@@ -154,9 +154,9 @@ AC_TRY_COMPILE([void quux(){void *p;asm("packuswb %%mm1,%%mm2"::"r"(p));}],,
ACCEL_PLUGINS="${ACCEL_PLUGINS} ${MMX_PLUGINS}"
AC_MSG_RESULT(yes), AC_MSG_RESULT(no))
AC_MSG_CHECKING([if \$CC groks MMX EXT (SSE) inline assembly])
AC_MSG_CHECKING([if \$CC groks MMX EXT or SSE inline assembly])
AC_TRY_COMPILE([void quux(){void *p;asm("maskmovq %%mm1,%%mm2"::"r"(p));}],,
ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext"
ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext imdctsse downmix3dn downmixsse"
AC_MSG_RESULT(yes), AC_MSG_RESULT(no))
dnl
......
......@@ -52,6 +52,12 @@ Choose stereo or mono audio output.
.B \-\-spdif
Activate hardware AC3 pass-through mode.
.TP
.B \-\-downmix <module>
Specify a module for AC3 downmix: "downmix", "downmixsse", for instance.
.TP
.B \-\-imdct <module>
Specify a module for AC3 IMDCT: "imdct", "imdctsse", for instance.
.TP
.B \-\-novideo
Disable video output.
.TP
......@@ -145,11 +151,13 @@ also accepts a lot of parameters to customize its behaviour.
vlc_channels=<filename> channels list
.TP
.B Audio parameters:
vlc_aout=<method name> audio method
vlc_dsp=<filename> dsp device path
vlc_stereo={1|0} stereo or mono output
vlc_spdif={1|0} AC3 pass-through mode
vlc_audio_rate=<rate> output rate
vlc_aout=<method name> audio method
vlc_dsp=<filename> dsp device path
vlc_stereo={1|0} stereo or mono output
vlc_spdif={1|0} AC3 pass-through mode
vlc_downmix=<method name> AC3 downmix method
vlc_imdct=<method name> AC3 IMDCT method
vlc_audio_rate=<rate> output rate
.TP
.B Video parameters:
vlc_vout=<method name> display method
......@@ -160,6 +168,7 @@ also accepts a lot of parameters to customize its behaviour.
vlc_grayscale={1|0} grayscale or color
vlc_fullscreen={1|0} full screen
vlc_overlay={1|0} overlay
vlc_motion=<method name> motion compensation method
vlc_idct=<method name> IDCT method
vlc_yuv=<method name> YUV method
vlc_synchro={I|I+|IP|IP+|IPB} synchro algorithm
......
/*****************************************************************************
* ac3_downmix.h : AC3 downmix types
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_downmix.h,v 1.3 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Renaud Dartus <reno@videolan.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
typedef struct dm_par_s {
float unit;
float clev;
float slev;
} dm_par_t;
typedef struct downmix_s {
/* Module used and shortcuts */
struct module_s * p_module;
void (*pf_downmix_3f_2r_to_2ch)(float *, dm_par_t * dm_par);
void (*pf_downmix_3f_1r_to_2ch)(float *, dm_par_t * dm_par);
void (*pf_downmix_2f_2r_to_2ch)(float *, dm_par_t * dm_par);
void (*pf_downmix_2f_1r_to_2ch)(float *, dm_par_t * dm_par);
void (*pf_downmix_3f_0r_to_2ch)(float *, dm_par_t * dm_par);
void (*pf_stream_sample_2ch_to_s16)(s16 *, float *left, float *right);
void (*pf_stream_sample_1ch_to_s16)(s16 *, float *center);
} downmix_t;
/*****************************************************************************
* ac3_downmix.c: ac3 downmix functions
* ac3_imdct.h : AC3 IMDCT types
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_downmix.c,v 1.23 2001/05/14 15:58:03 reno Exp $
* $Id: ac3_imdct.h,v 1.3 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
* Renaud Dartus <reno@videolan.org>
*
* This program is free software; you can redistribute it and/or modify
......@@ -22,58 +21,48 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include "defs.h"
#include <string.h> /* memcpy() */
typedef struct complex_s {
float real;
float imag;
} complex_t;
#include "config.h"
#include "common.h"
#include "threads.h"
#include "mtime.h"
#define N 512
#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */
#include "tests.h"
typedef struct imdct_s
{
complex_t buf[N/4];
#include "stream_control.h"
#include "input_ext-dec.h"
/* Delay buffer for time domain interleaving */
float delay[6][256];
float delay1[6][256];
#include "ac3_decoder.h"
#include "ac3_downmix.h"
/* Twiddle factors for IMDCT */
float xcos1[N/4];
float xsin1[N/4];
float xcos2[N/8];
float xsin2[N/8];
/* Twiddle factor LUT */
complex_t *w[7];
complex_t w_1[1];
complex_t w_2[2];
complex_t w_4[4];
complex_t w_8[8];
complex_t w_16[16];
complex_t w_32[32];
complex_t w_64[64];
float xcos_sin_sse[128 * 4] __attribute__((aligned(16)));
/* Module used and shortcuts */
struct module_s * p_module;
void (*pf_imdct_init) (struct imdct_s *);
//void (*pf_fft_64p) (complex_t *a);
void (*pf_imdct_256)(struct imdct_s *, float data[], float delay[]);
void (*pf_imdct_256_nol)(struct imdct_s *, float data[], float delay[]);
void (*pf_imdct_512)(struct imdct_s *, float data[], float delay[]);
void (*pf_imdct_512_nol)(struct imdct_s *, float data[], float delay[]);
} imdct_t;
void downmix_init (downmix_t * p_downmix)
{
#if 0
if ( TestCPU (CPU_CAPABILITY_SSE) )
{
intf_WarnMsg (1,"ac3dec: using MMX_SSE for downmix");
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_sse;
p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_sse;
p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_sse;
p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_sse;
p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_sse;
p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_sse;
p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_sse;
}
else if ( TestCPU (CPU_CAPABILITY_3DNOW) )
{
intf_WarnMsg (1,"ac3dec: using MMX_3DNOW for downmix");
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_3dn;
p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_3dn;
p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_3dn;
p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_3dn;
p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_3dn;
p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_3dn;
p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_3dn;
}
else
#endif
{
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c;
p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_c;
p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_c;
p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_c;
p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_c;
p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_c;
p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_c;
}
}
......@@ -264,6 +264,12 @@
#define AOUT_SPDIF_VAR "vlc_spdif"
#define AOUT_SPDIF_DEFAULT 0
/* Environment variable containing the AC3 downmix method */
#define DOWNMIX_METHOD_VAR "vlc_downmix"
/* Environment variable containing the AC3 IMDCT method */
#define IMDCT_METHOD_VAR "vlc_imdct"
/* Volume */
#define VOLUME_DEFAULT 512
#define VOLUME_STEP 128
......
......@@ -2,7 +2,7 @@
* modules.h : Module management functions.
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: modules.h,v 1.23 2001/05/06 04:32:02 sam Exp $
* $Id: modules.h,v 1.24 2001/05/15 16:19:42 sam Exp $
*
* Authors: Samuel Hocevar <sam@zoy.org>
*
......@@ -65,16 +65,19 @@ typedef void * module_handle_t;
#define MODULE_CAPABILITY_DECAPS 1 << 3 /* Decaps */
#define MODULE_CAPABILITY_ADEC 1 << 4 /* Audio decoder */
#define MODULE_CAPABILITY_VDEC 1 << 5 /* Video decoder */
#define MODULE_CAPABILITY_MOTION 1 << 6 /* Video decoder */
#define MODULE_CAPABILITY_MOTION 1 << 6 /* Motion compensation */
#define MODULE_CAPABILITY_IDCT 1 << 7 /* IDCT transformation */
#define MODULE_CAPABILITY_AOUT 1 << 8 /* Audio output */
#define MODULE_CAPABILITY_VOUT 1 << 9 /* Video output */
#define MODULE_CAPABILITY_YUV 1 << 10 /* YUV colorspace conversion */
#define MODULE_CAPABILITY_AFX 1 << 11 /* Audio effects */
#define MODULE_CAPABILITY_VFX 1 << 12 /* Video effects */
#define MODULE_CAPABILITY_IMDCT 1 << 11 /* IMDCT transformation */
#define MODULE_CAPABILITY_DOWNMIX 1 << 12 /* AC3 downmix */
/* FIXME: kludge */
struct input_area_s;
struct imdct_s;
struct complex_s;
struct dm_par_s;
/* FIXME: not yet used */
typedef struct probedata_s
......@@ -190,6 +193,35 @@ typedef struct function_list_s
void ( * pf_end ) ( struct vout_thread_s * );
} yuv;
/* IMDCT plugin */
struct
{
void ( * pf_imdct_init ) ( struct imdct_s * );
void ( * pf_imdct_256 ) ( struct imdct_s *,
float data[], float delay[] );
void ( * pf_imdct_256_nol )( struct imdct_s *,
float data[], float delay[] );
void ( * pf_imdct_512 ) ( struct imdct_s *,
float data[], float delay[] );
void ( * pf_imdct_512_nol )( struct imdct_s *,
float data[], float delay[] );
// void ( * pf_fft_64p ) ( struct complex_s * );
} imdct;
/* AC3 downmix plugin */
struct
{
void ( * pf_downmix_3f_2r_to_2ch ) ( float *, struct dm_par_s * );
void ( * pf_downmix_3f_1r_to_2ch ) ( float *, struct dm_par_s * );
void ( * pf_downmix_2f_2r_to_2ch ) ( float *, struct dm_par_s * );
void ( * pf_downmix_2f_1r_to_2ch ) ( float *, struct dm_par_s * );
void ( * pf_downmix_3f_0r_to_2ch ) ( float *, struct dm_par_s * );
void ( * pf_stream_sample_2ch_to_s16 ) ( s16 *, float *, float * );
void ( * pf_stream_sample_1ch_to_s16 ) ( s16 *, float * );
} downmix;
} functions;
} function_list_t;
......@@ -208,8 +240,8 @@ typedef struct module_functions_s
function_list_t aout;
function_list_t vout;
function_list_t yuv;
function_list_t afx;
function_list_t vfx;
function_list_t imdct;
function_list_t downmix;
} module_functions_t;
......
###############################################################################
# vlc (VideoLAN Client) downmix module makefile
# (c)2001 VideoLAN
###############################################################################
#
# Objects
#
PLUGIN_DOWNMIX = downmix.o ac3_downmix_c.o
PLUGIN_DOWNMIXSSE = downmixsse.o ac3_downmix_sse.o
PLUGIN_DOWNMIX3DN = downmix3dn.o ac3_downmix_3dn.o
BUILTIN_DOWNMIX = $(PLUGIN_DOWNMIX:%.o=BUILTIN_DOWNMIX_%.o)
BUILTIN_DOWNMIXSSE = $(PLUGIN_DOWNMIXSSE:%.o=BUILTIN_DOWNMIXSSE_%.o)
BUILTIN_DOWNMIX3DN = $(PLUGIN_DOWNMIX3DN:%.o=BUILTIN_DOWNMIX3DN_%.o)
PLUGIN_C = $(PLUGIN_DOWNMIX) $(PLUGIN_DOWNMIXSSE) $(PLUGIN_DOWNMIX3DN)
ALL_OBJ = $(PLUGIN_C) $(BUILTIN_DOWNMIX) $(BUILTIN_DOWNMIXSSE) $(BUILTIN_DOWNMIX3DN)
#
# Virtual targets
#
include ../../Makefile.modules
$(BUILTIN_DOWNMIX): BUILTIN_DOWNMIX_%.o: .dep/%.d
$(BUILTIN_DOWNMIX): BUILTIN_DOWNMIX_%.o: %.c
$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmix -c -o $@ $<
$(BUILTIN_DOWNMIXSSE): BUILTIN_DOWNMIXSSE_%.o: .dep/%.d
$(BUILTIN_DOWNMIXSSE): BUILTIN_DOWNMIXSSE_%.o: %.c
$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmixsse -c -o $@ $<
$(BUILTIN_DOWNMIX3DN): BUILTIN_DOWNMIX3DN_%.o: .dep/%.d
$(BUILTIN_DOWNMIX3DN): BUILTIN_DOWNMIX3DN_%.o: %.c
$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmix3dn -c -o $@ $<
#
# Real targets
#
../../lib/downmix.so: $(PLUGIN_DOWNMIX)
$(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS)
../../lib/downmix.a: $(BUILTIN_DOWNMIX)
ar r $@ $^
$(RANLIB) $@
../../lib/downmixsse.so: $(PLUGIN_DOWNMIXSSE)
$(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS)
../../lib/downmixsse.a: $(BUILTIN_DOWNMIXSSE)
ar r $@ $^
$(RANLIB) $@
../../lib/downmix3dn.so: $(PLUGIN_DOWNMIX3DN)
$(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS)
../../lib/downmix3dn.a: $(BUILTIN_DOWNMIX3DN)
ar r $@ $^
$(RANLIB) $@
/*****************************************************************************
* ac3_downmix_3dn.c: accelerated 3D Now! ac3 downmix functions
*****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_downmix_3dn.c,v 1.1 2001/05/15 16:19:42 sam Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#define MODULE_NAME downmix3dn
#include "modules_inner.h"
/*****************************************************************************
* Preamble
*****************************************************************************/
#include "defs.h"
#include "config.h"
#include "common.h"
#include "threads.h"
#include "mtime.h"
#include "tests.h"
#include "ac3_downmix.h"
void sqrt2_3dn (void)
{
__asm__ (".float 0f0.7071068");
}
void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"movq 3072(%%eax), %%mm3\n" /* leftsur */
"movq 4096(%%eax), %%mm4\n" /* rightsur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfadd %%mm2, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfmul %%mm7, %%mm4\n"
"pfadd %%mm3, %%mm0\n"
"pfadd %%mm4, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop3:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 1024(%%eax), %%mm1\n" /* right */
"movq 2048(%%eax), %%mm3\n" /* leftsur */
"movq 3072(%%eax), %%mm4\n" /* rightsur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfmul %%mm7, %%mm4\n"
"pfadd %%mm3, %%mm0\n"
"pfadd %%mm4, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop3\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop4:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"movq 3072(%%eax), %%mm3\n" /* sur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfmul %%mm7, %%mm3\n"
"pfadd %%mm2, %%mm1\n"
"pfsub %%mm3, %%mm0\n"
"pfadd %%mm3, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop4\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop5:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 1024(%%eax), %%mm1\n" /* right */
"movq 2048(%%eax), %%mm3\n" /* sur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfsub %%mm3, %%mm0\n"
"pfadd %%mm3, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop5\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
".loop6:\n"
"movq (%%eax), %%mm0\n" /*left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfadd %%mm2, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop6\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void _M( stream_sample_1ch_to_s16 ) (s16 *s16_samples, float *left)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"pushl %%edx\n"
"movl $sqrt2_3dn, %%edx\n"
"movd (%%edx), %%mm7\n"
"punpckldq %%mm7, %%mm7\n" /* sqrt2 | sqrt2 */
"movl $128, %%ecx\n"
".loop2:\n"
"movq (%%ebx), %%mm0\n" /* c1 | c0 */
"pfmul %%mm7, %%mm0\n"
"pf2id %%mm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */
"packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */
"movq %%mm0, (%%eax)\n"
"addl $8, %%eax\n"
"addl $8, %%ebx\n"
"decl %%ecx\n"
"jnz .loop2\n"
"popl %%edx\n"
"popl %%ecx\n"
"femms\n"
: "=a" (s16_samples), "=b" (left)
: "a" (s16_samples), "b" (left));
}
void _M( stream_sample_2ch_to_s16 ) (s16 *s16_samples, float *left, float *right)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n"
".loop1:\n"
"movq (%%ebx), %%mm0\n" /* l1 | l0 */
"movq (%%edx), %%mm1\n" /* r1 | r0 */
"movq %%mm0, %%mm2\n" /* l1 | l0 */
"punpckldq %%mm1, %%mm0\n" /* r0 | l0 */
"punpckhdq %%mm1, %%mm2\n" /* r1 | l1 */
"pf2id %%mm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */
"pf2id %%mm2, %%mm2\n" /* r0 l0 --> mm0, int_32 */
"packssdw %%mm2, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */
"movq %%mm0, (%%eax)\n"
"movq %%mm2, 8(%%eax)\n"
"addl $8, %%eax\n"
"addl $8, %%ebx\n"
"addl $8, %%edx\n"
"decl %%ecx\n"
"jnz .loop1\n"
"popl %%ecx\n"
"femms\n"
: "=a" (s16_samples), "=b" (left), "=d" (right)