Commit db7b0421 authored by Renaud Dartus's avatar Renaud Dartus

* Begin of SSE/3DNow! support for imdct and downmix

If you have a PIII or a Athlon and you want to try this, just comment #if 0
in ac3_downmix.c and ac3_imdct.c and add in AC3_DECODER section of Makefile :
	src/ac3_decoder/ac3_imdct_sse.o \
	src/ac3_decoder/ac3_srfft_sse.o \
	src/ac3_decoder/ac3_downmix_sse.o \
	src/ac3_decoder/ac3_downmix_3dn.o \
parent c1df8159
...@@ -55,7 +55,7 @@ AUDIO_OUTPUT = src/audio_output/audio_output.o \ ...@@ -55,7 +55,7 @@ AUDIO_OUTPUT = src/audio_output/audio_output.o \
src/audio_output/aout_s8.o \ src/audio_output/aout_s8.o \
src/audio_output/aout_u16.o \ src/audio_output/aout_u16.o \
src/audio_output/aout_s16.o \ src/audio_output/aout_s16.o \
src/audio_output/aout_spdif.o src/audio_output/aout_spdif.o
VIDEO_OUTPUT = src/video_output/video_output.o \ VIDEO_OUTPUT = src/video_output/video_output.o \
src/video_output/video_text.o \ src/video_output/video_text.o \
...@@ -76,7 +76,7 @@ AC3_DECODER = src/ac3_decoder/ac3_decoder_thread.o \ ...@@ -76,7 +76,7 @@ AC3_DECODER = src/ac3_decoder/ac3_decoder_thread.o \
src/ac3_decoder/ac3_downmix_c.o src/ac3_decoder/ac3_downmix_c.o
AC3_SPDIF = src/ac3_spdif/ac3_spdif.o \ AC3_SPDIF = src/ac3_spdif/ac3_spdif.o \
src/ac3_spdif/ac3_iec958.o src/ac3_spdif/ac3_iec958.o
LPCM_DECODER = src/lpcm_decoder/lpcm_decoder_thread.o \ LPCM_DECODER = src/lpcm_decoder/lpcm_decoder_thread.o \
src/lpcm_decoder/lpcm_decoder.o src/lpcm_decoder/lpcm_decoder.o
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* tests.h: several test functions needed by the plugins * tests.h: several test functions needed by the plugins
***************************************************************************** *****************************************************************************
* Copyright (C) 1996, 1997, 1998, 1999, 2000 VideoLAN * Copyright (C) 1996, 1997, 1998, 1999, 2000 VideoLAN
* $Id: tests.h,v 1.9 2001/03/21 13:42:33 sam Exp $ * $Id: tests.h,v 1.10 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Samuel Hocevar <sam@zoy.org> * Authors: Samuel Hocevar <sam@zoy.org>
* *
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#define CPU_CAPABILITY_MMX 1<<3 #define CPU_CAPABILITY_MMX 1<<3
#define CPU_CAPABILITY_3DNOW 1<<4 #define CPU_CAPABILITY_3DNOW 1<<4
#define CPU_CAPABILITY_MMXEXT 1<<5 #define CPU_CAPABILITY_MMXEXT 1<<5
#define CPU_CAPABILITY_SSE 1<<6
#define CPU_CAPABILITY_ALTIVEC 1<<16 #define CPU_CAPABILITY_ALTIVEC 1<<16
/***************************************************************************** /*****************************************************************************
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_bit_allocate.c: ac3 allocation tables * ac3_bit_allocate.c: ac3 allocation tables
***************************************************************************** *****************************************************************************
* Copyright (C) 2000 VideoLAN * Copyright (C) 2000 VideoLAN
* $Id: ac3_bit_allocate.c,v 1.20 2001/05/06 04:32:02 sam Exp $ * $Id: ac3_bit_allocate.c,v 1.21 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
#include "input_ext-dec.h" #include "input_ext-dec.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_internal.h" #include "ac3_internal.h" /* DELTA_BIT_REUSE */
static void ba_compute_psd (bit_allocate_t * p_bit, s16 start, s16 end, s16 exps[]); static void ba_compute_psd (bit_allocate_t * p_bit, s16 start, s16 end, s16 exps[]);
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder.c: core ac3 decoder * ac3_decoder.c: core ac3 decoder
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder.c,v 1.32 2001/05/07 03:14:09 stef Exp $ * $Id: ac3_decoder.c,v 1.33 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Michel Lespinasse <walken@zoy.org> * Michel Lespinasse <walken@zoy.org>
...@@ -40,22 +40,15 @@ ...@@ -40,22 +40,15 @@
#include "audio_output.h" #include "audio_output.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_decoder_thread.h" #include "ac3_decoder_thread.h" /* ac3dec_thread_t */
#include "ac3_internal.h" #include "ac3_internal.h"
#include <stdio.h> static const float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
static const float smixlev_lut[4] = { 0.707, 0.500, 0.0 , 0.500 };
void imdct_init (imdct_t * p_imdct);
void downmix_init (downmix_t * p_downmix);
static float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
static float smixlev_lut[4] = { 0.707, 0.500, 0.0 , 0.500 };
int ac3_init (ac3dec_t * p_ac3dec) int ac3_init (ac3dec_t * p_ac3dec)
{ {
// p_ac3dec->bit_stream.buffer = 0; p_ac3dec->mantissa.lfsr_state = 1; /* dither_gen initialization */
// p_ac3dec->bit_stream.i_available = 0;
p_ac3dec->mantissa.lfsr_state = 1; /* dither_gen initialization */
imdct_init(&p_ac3dec->imdct); imdct_init(&p_ac3dec->imdct);
downmix_init(&p_ac3dec->downmix); downmix_init(&p_ac3dec->downmix);
...@@ -69,7 +62,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -69,7 +62,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
if (parse_bsi (p_ac3dec)) if (parse_bsi (p_ac3dec))
{ {
intf_WarnMsg (3,"Error during ac3parsing"); intf_WarnMsg (3,"ac3dec warn: error during parsing");
parse_auxdata (p_ac3dec); parse_auxdata (p_ac3dec);
return 1; return 1;
} }
...@@ -102,7 +95,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -102,7 +95,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
if (parse_audblk (p_ac3dec, i)) if (parse_audblk (p_ac3dec, i))
{ {
intf_WarnMsg (3,"Error during ac3audioblock"); intf_WarnMsg (3,"ac3dec warn: error during audioblock");
parse_auxdata (p_ac3dec); parse_auxdata (p_ac3dec);
return 1; return 1;
} }
...@@ -114,7 +107,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -114,7 +107,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
if (exponent_unpack (p_ac3dec)) if (exponent_unpack (p_ac3dec))
{ {
intf_WarnMsg (3,"Error during ac3unpack"); intf_WarnMsg (3,"ac3dec warn: error during unpack");
parse_auxdata (p_ac3dec); parse_auxdata (p_ac3dec);
return 1; return 1;
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder.h : ac3 decoder interface * ac3_decoder.h : ac3 decoder interface
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder.h,v 1.7 2001/04/30 21:04:20 reno Exp $ * $Id: ac3_decoder.h,v 1.8 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Renaud Dartus <reno@videolan.org> * Renaud Dartus <reno@videolan.org>
...@@ -372,7 +372,7 @@ typedef struct imdct_s ...@@ -372,7 +372,7 @@ typedef struct imdct_s
float xsin1[N/4]; float xsin1[N/4];
float xcos2[N/8]; float xcos2[N/8];
float xsin2[N/8]; float xsin2[N/8];
/* Twiddle factor LUT */ /* Twiddle factor LUT */
complex_t *w[7]; complex_t *w[7];
complex_t w_1[1]; complex_t w_1[1];
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder_thread.h : ac3 decoder thread interface * ac3_decoder_thread.h : ac3 decoder thread interface
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder_thread.h,v 1.6 2001/05/01 04:18:18 sam Exp $ * $Id: ac3_decoder_thread.h,v 1.7 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* *
...@@ -30,14 +30,11 @@ typedef struct ac3dec_thread_s ...@@ -30,14 +30,11 @@ typedef struct ac3dec_thread_s
* Thread properties * Thread properties
*/ */
vlc_thread_t thread_id; /* id for thread functions */ vlc_thread_t thread_id; /* id for thread functions */
// bit_stream_t bit_stream;
/* /*
* Input properties * Input properties
*/ */
decoder_fifo_t * p_fifo; /* stores the PES stream data */ decoder_fifo_t * p_fifo; /* stores the PES stream data */
// data_packet_t * p_data;
int sync_ptr; /* sync ptr from ac3 magic header */ int sync_ptr; /* sync ptr from ac3 magic header */
adec_config_t * p_config; adec_config_t * p_config;
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_downmix.c: ac3 downmix functions * ac3_downmix.c: ac3 downmix functions
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_downmix.c,v 1.22 2001/05/06 04:32:02 sam Exp $ * $Id: ac3_downmix.c,v 1.23 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -31,29 +31,41 @@ ...@@ -31,29 +31,41 @@
#include "threads.h" #include "threads.h"
#include "mtime.h" #include "mtime.h"
#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */
#include "tests.h" #include "tests.h"
#include "stream_control.h" #include "stream_control.h"
#include "input_ext-dec.h" #include "input_ext-dec.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_internal.h"
#include "ac3_downmix.h" #include "ac3_downmix.h"
void downmix_init (downmix_t * p_downmix) void downmix_init (downmix_t * p_downmix)
{ {
#if 0 #if 0
if ( TestCPU (CPU_CAPABILITY_MMX) ) if ( TestCPU (CPU_CAPABILITY_SSE) )
{ {
fprintf(stderr,"Using MMX for downmix\n"); intf_WarnMsg (1,"ac3dec: using MMX_SSE for downmix");
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_kni; p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_sse;
p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_kni; p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_sse;
p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_kni; p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_sse;
p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_kni; p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_sse;
p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_kni; p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_sse;
p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_kni; p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_sse;
p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_kni; p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_sse;
} else }
else if ( TestCPU (CPU_CAPABILITY_3DNOW) )
{
intf_WarnMsg (1,"ac3dec: using MMX_3DNOW for downmix");
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_3dn;
p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_3dn;
p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_3dn;
p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_3dn;
p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_3dn;
p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_3dn;
p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_3dn;
}
else
#endif #endif
{ {
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c; p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c;
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_downmix.h: ac3 downmix functions * ac3_downmix.h: ac3 downmix functions
***************************************************************************** *****************************************************************************
* Copyright (C) 2000, 2001 VideoLAN * Copyright (C) 2000, 2001 VideoLAN
* $Id: ac3_downmix.h,v 1.6 2001/04/30 21:04:20 reno Exp $ * $Id: ac3_downmix.h,v 1.7 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* *
...@@ -30,13 +30,22 @@ void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par); ...@@ -30,13 +30,22 @@ void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par);
void stream_sample_2ch_to_s16_c(s16 *s16_samples, float *left, float *right); void stream_sample_2ch_to_s16_c(s16 *s16_samples, float *left, float *right);
void stream_sample_1ch_to_s16_c(s16 *s16_samples, float *center); void stream_sample_1ch_to_s16_c(s16 *s16_samples, float *center);
#if 0 /* SSE functions */
/* Kni functions */ void downmix_3f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_3f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par); void downmix_3f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_3f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par); void downmix_2f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_2f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par); void downmix_2f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_2f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par); void downmix_3f_0r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_3f_0r_to_2ch_kni(float *samples, dm_par_t * dm_par); void stream_sample_2ch_to_s16_sse(s16 *s16_samples, float *left, float *right);
void stream_sample_2ch_to_s16_kni(s16 *s16_samples, float *left, float *right); void stream_sample_1ch_to_s16_sse(s16 *s16_samples, float *center);
void stream_sample_1ch_to_s16_kni(s16 *s16_samples, float *center);
#endif /* 3DNow! functions */
void downmix_3f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_3f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_2f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_2f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_3f_0r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void stream_sample_2ch_to_s16_3dn(s16 *s16_samples, float *left, float *right);
void stream_sample_1ch_to_s16_3dn(s16 *s16_samples, float *center);
/*****************************************************************************
* ac3_downmix_3dn.c: ac3 downmix functions
*****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_downmix_3dn.c,v 1.1 2001/05/14 15:58:04 reno Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include "defs.h"
#include "config.h"
#include "common.h"
#include "threads.h"
#include "mtime.h"
#include "tests.h"
#include "stream_control.h"
#include "input_ext-dec.h"
#include "ac3_decoder.h"
void downmix_3f_2r_to_2ch_3dn (float * samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"movq 3072(%%eax), %%mm3\n" /* leftsur */
"movq 4096(%%eax), %%mm4\n" /* rightsur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfadd %%mm2, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfmul %%mm7, %%mm4\n"
"pfadd %%mm3, %%mm0\n"
"pfadd %%mm4, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_2f_2r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop3:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 1024(%%eax), %%mm1\n" /* right */
"movq 2048(%%eax), %%mm3\n" /* leftsur */
"movq 3072(%%eax), %%mm4\n" /* rightsur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfmul %%mm7, %%mm4\n"
"pfadd %%mm3, %%mm0\n"
"pfadd %%mm4, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop3\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_3f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop4:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"movq 3072(%%eax), %%mm3\n" /* sur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfmul %%mm7, %%mm3\n"
"pfadd %%mm2, %%mm1\n"
"pfsub %%mm3, %%mm0\n"
"pfadd %%mm3, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop4\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_2f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop5:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 1024(%%eax), %%mm1\n" /* right */
"movq 2048(%%eax), %%mm3\n" /* sur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfsub %%mm3, %%mm0\n"
"pfadd %%mm3, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop5\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_3f_0r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
".loop6:\n"
"movq (%%eax), %%mm0\n" /*left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfadd %%mm2, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop6\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void stream_sample_1ch_to_s16_3dn (s16 *s16_samples, float *left)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"pushl %%edx\n"
"movl $sqrt2, %%edx\n"
"movd (%%edx), %%mm7\n"
"punpckldq %%mm7, %%mm7\n" /* sqrt2 | sqrt2 */
"movl $128, %%ecx\n"
".loop2:\n"
"movq (%%ebx), %%mm0\n" /* c1 | c0 */
"pfmul %%mm7, %%mm0\n"
"pf2id %%mm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */
"packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */
"movq %%mm0, (%%eax)\n"
"addl $8, %%eax\n"
"addl $8, %%ebx\n"
"decl %%ecx\n"
"jnz .loop2\n"
"popl %%edx\n"
"popl %%ecx\n"
"femms\n"
: "=a" (s16_samples), "=b" (left)
: "a" (s16_samples), "b" (left));
}
void stream_sample_2ch_to_s16_3dn (s16 *s16_samples, float *left, float *right)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n"
".loop1:\n"
"movq (%%ebx), %%mm0\n" /* l1 | l0 */
"movq (%%edx), %%mm1\n" /* r1 | r0 */
"movq %%mm0, %%mm2\n" /* l1 | l0 */
"punpckldq %%mm1, %%mm0\n" /* r0 | l0 */
"punpckhdq %%mm1, %%mm2\n" /* r1 | l1 */
"pf2id %%mm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */
"pf2id %%mm2, %%mm2\n" /* r0 l0 --> mm0, int_32 */
"packssdw %%mm2, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */
"movq %%mm0, (%%eax)\n"
"movq %%mm2, 8(%%eax)\n"
"addl $8, %%eax\n"
"addl $8, %%ebx\n"
"addl $8, %%edx\n"
"decl %%ecx\n"
"jnz .loop1\n"
"popl %%ecx\n"
"femms\n"
: "=a" (s16_samples), "=b" (left), "=d" (right)
: "a" (s16_samples), "b" (left), "d" (right));
}
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_downmix_c.c: ac3 downmix functions * ac3_downmix_c.c: ac3 downmix functions
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN * Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_downmix_c.c,v 1.7 2001/05/06 04:32:02 sam Exp $ * $Id: ac3_downmix_c.c,v 1.8 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -35,11 +35,8 @@ ...@@ -35,11 +35,8 @@
#include "input_ext-dec.h" #include "input_ext-dec.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_internal.h"
#include "ac3_downmix.h" void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
void __inline__ downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
{ {
int i; int i;
float *left, *right, *center, *left_sur, *right_sur; float *left, *right, *center, *left_sur, *right_sur;
...@@ -59,7 +56,7 @@ void __inline__ downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par) ...@@ -59,7 +56,7 @@ void __inline__ downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
} }
} }
void __inline__ downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par) void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
{ {
int i; int i;
float *left