Commit db7b0421 authored by Renaud Dartus's avatar Renaud Dartus

* Begin of SSE/3DNow! support for imdct and downmix

If you have a PIII or a Athlon and you want to try this, just comment #if 0
in ac3_downmix.c and ac3_imdct.c and add in AC3_DECODER section of Makefile :
	src/ac3_decoder/ac3_imdct_sse.o \
	src/ac3_decoder/ac3_srfft_sse.o \
	src/ac3_decoder/ac3_downmix_sse.o \
	src/ac3_decoder/ac3_downmix_3dn.o \
parent c1df8159
......@@ -55,7 +55,7 @@ AUDIO_OUTPUT = src/audio_output/audio_output.o \
src/audio_output/aout_s8.o \
src/audio_output/aout_u16.o \
src/audio_output/aout_s16.o \
src/audio_output/aout_spdif.o
src/audio_output/aout_spdif.o
VIDEO_OUTPUT = src/video_output/video_output.o \
src/video_output/video_text.o \
......@@ -76,7 +76,7 @@ AC3_DECODER = src/ac3_decoder/ac3_decoder_thread.o \
src/ac3_decoder/ac3_downmix_c.o
AC3_SPDIF = src/ac3_spdif/ac3_spdif.o \
src/ac3_spdif/ac3_iec958.o
src/ac3_spdif/ac3_iec958.o
LPCM_DECODER = src/lpcm_decoder/lpcm_decoder_thread.o \
src/lpcm_decoder/lpcm_decoder.o
......
......@@ -2,7 +2,7 @@
* tests.h: several test functions needed by the plugins
*****************************************************************************
* Copyright (C) 1996, 1997, 1998, 1999, 2000 VideoLAN
* $Id: tests.h,v 1.9 2001/03/21 13:42:33 sam Exp $
* $Id: tests.h,v 1.10 2001/05/14 15:58:03 reno Exp $
*
* Authors: Samuel Hocevar <sam@zoy.org>
*
......@@ -28,6 +28,7 @@
#define CPU_CAPABILITY_MMX 1<<3
#define CPU_CAPABILITY_3DNOW 1<<4
#define CPU_CAPABILITY_MMXEXT 1<<5
#define CPU_CAPABILITY_SSE 1<<6
#define CPU_CAPABILITY_ALTIVEC 1<<16
/*****************************************************************************
......
......@@ -2,7 +2,7 @@
* ac3_bit_allocate.c: ac3 allocation tables
*****************************************************************************
* Copyright (C) 2000 VideoLAN
* $Id: ac3_bit_allocate.c,v 1.20 2001/05/06 04:32:02 sam Exp $
* $Id: ac3_bit_allocate.c,v 1.21 2001/05/14 15:58:03 reno Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
......@@ -37,7 +37,7 @@
#include "input_ext-dec.h"
#include "ac3_decoder.h"
#include "ac3_internal.h"
#include "ac3_internal.h" /* DELTA_BIT_REUSE */
static void ba_compute_psd (bit_allocate_t * p_bit, s16 start, s16 end, s16 exps[]);
......
......@@ -2,7 +2,7 @@
* ac3_decoder.c: core ac3 decoder
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder.c,v 1.32 2001/05/07 03:14:09 stef Exp $
* $Id: ac3_decoder.c,v 1.33 2001/05/14 15:58:03 reno Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Michel Lespinasse <walken@zoy.org>
......@@ -40,22 +40,15 @@
#include "audio_output.h"
#include "ac3_decoder.h"
#include "ac3_decoder_thread.h"
#include "ac3_decoder_thread.h" /* ac3dec_thread_t */
#include "ac3_internal.h"
#include <stdio.h>
void imdct_init (imdct_t * p_imdct);
void downmix_init (downmix_t * p_downmix);
static float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
static float smixlev_lut[4] = { 0.707, 0.500, 0.0 , 0.500 };
static const float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
static const float smixlev_lut[4] = { 0.707, 0.500, 0.0 , 0.500 };
int ac3_init (ac3dec_t * p_ac3dec)
{
// p_ac3dec->bit_stream.buffer = 0;
// p_ac3dec->bit_stream.i_available = 0;
p_ac3dec->mantissa.lfsr_state = 1; /* dither_gen initialization */
p_ac3dec->mantissa.lfsr_state = 1; /* dither_gen initialization */
imdct_init(&p_ac3dec->imdct);
downmix_init(&p_ac3dec->downmix);
......@@ -69,7 +62,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
if (parse_bsi (p_ac3dec))
{
intf_WarnMsg (3,"Error during ac3parsing");
intf_WarnMsg (3,"ac3dec warn: error during parsing");
parse_auxdata (p_ac3dec);
return 1;
}
......@@ -102,7 +95,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
if (parse_audblk (p_ac3dec, i))
{
intf_WarnMsg (3,"Error during ac3audioblock");
intf_WarnMsg (3,"ac3dec warn: error during audioblock");
parse_auxdata (p_ac3dec);
return 1;
}
......@@ -114,7 +107,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
if (exponent_unpack (p_ac3dec))
{
intf_WarnMsg (3,"Error during ac3unpack");
intf_WarnMsg (3,"ac3dec warn: error during unpack");
parse_auxdata (p_ac3dec);
return 1;
}
......
......@@ -2,7 +2,7 @@
* ac3_decoder.h : ac3 decoder interface
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder.h,v 1.7 2001/04/30 21:04:20 reno Exp $
* $Id: ac3_decoder.h,v 1.8 2001/05/14 15:58:03 reno Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Renaud Dartus <reno@videolan.org>
......@@ -372,7 +372,7 @@ typedef struct imdct_s
float xsin1[N/4];
float xcos2[N/8];
float xsin2[N/8];
/* Twiddle factor LUT */
complex_t *w[7];
complex_t w_1[1];
......
......@@ -2,7 +2,7 @@
* ac3_decoder_thread.h : ac3 decoder thread interface
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder_thread.h,v 1.6 2001/05/01 04:18:18 sam Exp $
* $Id: ac3_decoder_thread.h,v 1.7 2001/05/14 15:58:03 reno Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
*
......@@ -30,14 +30,11 @@ typedef struct ac3dec_thread_s
* Thread properties
*/
vlc_thread_t thread_id; /* id for thread functions */
// bit_stream_t bit_stream;
/*
* Input properties
*/
decoder_fifo_t * p_fifo; /* stores the PES stream data */
// data_packet_t * p_data;
int sync_ptr; /* sync ptr from ac3 magic header */
adec_config_t * p_config;
......
......@@ -2,7 +2,7 @@
* ac3_downmix.c: ac3 downmix functions
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_downmix.c,v 1.22 2001/05/06 04:32:02 sam Exp $
* $Id: ac3_downmix.c,v 1.23 2001/05/14 15:58:03 reno Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
......@@ -31,29 +31,41 @@
#include "threads.h"
#include "mtime.h"
#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */
#include "tests.h"
#include "stream_control.h"
#include "input_ext-dec.h"
#include "ac3_decoder.h"
#include "ac3_internal.h"
#include "ac3_downmix.h"
void downmix_init (downmix_t * p_downmix)
{
#if 0
if ( TestCPU (CPU_CAPABILITY_MMX) )
if ( TestCPU (CPU_CAPABILITY_SSE) )
{
fprintf(stderr,"Using MMX for downmix\n");
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_kni;
p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_kni;
p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_kni;
p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_kni;
p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_kni;
p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_kni;
p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_kni;
} else
intf_WarnMsg (1,"ac3dec: using MMX_SSE for downmix");
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_sse;
p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_sse;
p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_sse;
p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_sse;
p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_sse;
p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_sse;
p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_sse;
}
else if ( TestCPU (CPU_CAPABILITY_3DNOW) )
{
intf_WarnMsg (1,"ac3dec: using MMX_3DNOW for downmix");
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_3dn;
p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_3dn;
p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_3dn;
p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_3dn;
p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_3dn;
p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_3dn;
p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_3dn;
}
else
#endif
{
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c;
......
......@@ -2,7 +2,7 @@
* ac3_downmix.h: ac3 downmix functions
*****************************************************************************
* Copyright (C) 2000, 2001 VideoLAN
* $Id: ac3_downmix.h,v 1.6 2001/04/30 21:04:20 reno Exp $
* $Id: ac3_downmix.h,v 1.7 2001/05/14 15:58:04 reno Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
*
......@@ -30,13 +30,22 @@ void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par);
void stream_sample_2ch_to_s16_c(s16 *s16_samples, float *left, float *right);
void stream_sample_1ch_to_s16_c(s16 *s16_samples, float *center);
#if 0
/* Kni functions */
void downmix_3f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par);
void downmix_3f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par);
void downmix_2f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par);
void downmix_2f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par);
void downmix_3f_0r_to_2ch_kni(float *samples, dm_par_t * dm_par);
void stream_sample_2ch_to_s16_kni(s16 *s16_samples, float *left, float *right);
void stream_sample_1ch_to_s16_kni(s16 *s16_samples, float *center);
#endif
/* SSE functions */
void downmix_3f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_3f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_2f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_2f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_3f_0r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void stream_sample_2ch_to_s16_sse(s16 *s16_samples, float *left, float *right);
void stream_sample_1ch_to_s16_sse(s16 *s16_samples, float *center);
/* 3DNow! functions */
void downmix_3f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_3f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_2f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_2f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_3f_0r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void stream_sample_2ch_to_s16_3dn(s16 *s16_samples, float *left, float *right);
void stream_sample_1ch_to_s16_3dn(s16 *s16_samples, float *center);
/*****************************************************************************
* ac3_downmix_3dn.c: ac3 downmix functions
*****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_downmix_3dn.c,v 1.1 2001/05/14 15:58:04 reno Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include "defs.h"
#include "config.h"
#include "common.h"
#include "threads.h"
#include "mtime.h"
#include "tests.h"
#include "stream_control.h"
#include "input_ext-dec.h"
#include "ac3_decoder.h"
void downmix_3f_2r_to_2ch_3dn (float * samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"movq 3072(%%eax), %%mm3\n" /* leftsur */
"movq 4096(%%eax), %%mm4\n" /* rightsur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfadd %%mm2, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfmul %%mm7, %%mm4\n"
"pfadd %%mm3, %%mm0\n"
"pfadd %%mm4, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_2f_2r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop3:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 1024(%%eax), %%mm1\n" /* right */
"movq 2048(%%eax), %%mm3\n" /* leftsur */
"movq 3072(%%eax), %%mm4\n" /* rightsur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfmul %%mm7, %%mm4\n"
"pfadd %%mm3, %%mm0\n"
"pfadd %%mm4, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop3\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_3f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop4:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"movq 3072(%%eax), %%mm3\n" /* sur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfmul %%mm7, %%mm3\n"
"pfadd %%mm2, %%mm1\n"
"pfsub %%mm3, %%mm0\n"
"pfadd %%mm3, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop4\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_2f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop5:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 1024(%%eax), %%mm1\n" /* right */
"movq 2048(%%eax), %%mm3\n" /* sur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfsub %%mm3, %%mm0\n"
"pfadd %%mm3, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop5\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_3f_0r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
".loop6:\n"
"movq (%%eax), %%mm0\n" /*left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfadd %%mm2, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop6\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void stream_sample_1ch_to_s16_3dn (s16 *s16_samples, float *left)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"pushl %%edx\n"
"movl $sqrt2, %%edx\n"
"movd (%%edx), %%mm7\n"
"punpckldq %%mm7, %%mm7\n" /* sqrt2 | sqrt2 */
"movl $128, %%ecx\n"
".loop2:\n"
"movq (%%ebx), %%mm0\n" /* c1 | c0 */
"pfmul %%mm7, %%mm0\n"
"pf2id %%mm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */
"packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */
"movq %%mm0, (%%eax)\n"
"addl $8, %%eax\n"
"addl $8, %%ebx\n"
"decl %%ecx\n"
"jnz .loop2\n"
"popl %%edx\n"
"popl %%ecx\n"
"femms\n"
: "=a" (s16_samples), "=b" (left)
: "a" (s16_samples), "b" (left));
}
void stream_sample_2ch_to_s16_3dn (s16 *s16_samples, float *left, float *right)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n"
".loop1:\n"
"movq (%%ebx), %%mm0\n" /* l1 | l0 */
"movq (%%edx), %%mm1\n" /* r1 | r0 */
"movq %%mm0, %%mm2\n" /* l1 | l0 */
"punpckldq %%mm1, %%mm0\n" /* r0 | l0 */
"punpckhdq %%mm1, %%mm2\n" /* r1 | l1 */
"pf2id %%mm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */
"pf2id %%mm2, %%mm2\n" /* r0 l0 --> mm0, int_32 */
"packssdw %%mm2, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */
"movq %%mm0, (%%eax)\n"
"movq %%mm2, 8(%%eax)\n"
"addl $8, %%eax\n"
"addl $8, %%ebx\n"
"addl $8, %%edx\n"
"decl %%ecx\n"
"jnz .loop1\n"
"popl %%ecx\n"
"femms\n"
: "=a" (s16_samples), "=b" (left), "=d" (right)
: "a" (s16_samples), "b" (left), "d" (right));
}
......@@ -2,7 +2,7 @@
* ac3_downmix_c.c: ac3 downmix functions
*****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_downmix_c.c,v 1.7 2001/05/06 04:32:02 sam Exp $
* $Id: ac3_downmix_c.c,v 1.8 2001/05/14 15:58:04 reno Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
......@@ -35,11 +35,8 @@
#include "input_ext-dec.h"
#include "ac3_decoder.h"
#include "ac3_internal.h"
#include "ac3_downmix.h"
void __inline__ downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *center, *left_sur, *right_sur;
......@@ -59,7 +56,7 @@ void __inline__ downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
}
}
void __inline__ downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *left_sur, *right_sur;
......@@ -78,7 +75,7 @@ void __inline__ downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
}
}
void __inline__ downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
void downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *center, *right_sur;
......@@ -98,7 +95,7 @@ void __inline__ downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
}
void __inline__ downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
void downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *right_sur;
......@@ -117,7 +114,7 @@ void __inline__ downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
}
void __inline__ downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
void downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *center;
......@@ -136,7 +133,7 @@ void __inline__ downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
}
void __inline__ stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *right)
void stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *right)
{
int i;
for (i=0; i < 256; i++) {
......@@ -146,7 +143,7 @@ void __inline__ stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *ri
}
void __inline__ stream_sample_1ch_to_s16_c (s16 *out_buf, float *center)
void stream_sample_1ch_to_s16_c (s16 *out_buf, float *center)
{
int i;
float tmp;
......
/*****************************************************************************
* ac3_downmix_sse.c: ac3 downmix functions
*****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_downmix_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include "defs.h"
#include "config.h"
#include "common.h"
#include "threads.h"
#include "mtime.h"
#include "tests.h"
#include "stream_control.h"
#include "input_ext-dec.h"
#include "ac3_decoder.h"
void sqrt2 (void)
{
__asm__ (".float 0f0.7071068");
}
void downmix_3f_2r_to_2ch_sse (float * samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $64, %%ecx\n" /* loop counter */
"movss (%%ebx), %%xmm5\n" /* unit */
"shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
"movss 4(%%ebx), %%xmm6\n" /* clev */
"shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
"movss 8(%%ebx), %%xmm7\n" /* slev */
"shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
".loop:\n"
"movups (%%eax), %%xmm0\n" /* left */
"movups 2048(%%eax), %%xmm1\n" /* right */
"movups 1024(%%eax), %%xmm2\n" /* center */
"movups 3072(%%eax), %%xmm3\n" /* leftsur */
"movups 4096(%%eax), %%xmm4\n" /* rithgsur */
"mulps %%xmm5, %%xmm0\n"
"mulps %%xmm5, %%xmm1\n"
"mulps %%xmm6, %%xmm2\n"
"addps %%xmm2, %%xmm0\n"
"addps %%xmm2, %%xmm1\n"
"mulps %%xmm7, %%xmm3\n"
"mulps %%xmm7, %%xmm4\n"
"addps %%xmm3, %%xmm0\n"
"addps %%xmm4, %%xmm1\n"
"movups %%xmm0, (%%eax)\n"
"movups %%xmm1, 1024(%%eax)\n"
"addl $16, %%eax\n"
"decl %%ecx\n"
"jnz .loop\n"
"popl %%ecx\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_2f_2r_to_2ch_sse (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $64, %%ecx\n" /* loop counter */
"movss (%%ebx), %%xmm5\n" /* unit */
"shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
"movss 8(%%ebx), %%xmm7\n" /* slev */
"shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
".loop3:\n"
"movups (%%eax), %%xmm0\n" /* left */
"movups 1024(%%eax), %%xmm1\n