Commit 204e1a60 authored by Rishikesh More's avatar Rishikesh More Committed by Henrik Gramner

mips: MSA pixel optimizations

Signed-off-by: 's avatarRishikesh More <rishikesh.more@imgtec.com>
parent 3ce6430e
......@@ -147,7 +147,7 @@ endif
ifeq ($(SYS_ARCH),MIPS)
ifneq ($(findstring HAVE_MSA 1, $(CONFIG)),)
SRCS += common/mips/mc-c.c common/mips/dct-c.c \
common/mips/deblock-c.c
common/mips/deblock-c.c common/mips/pixel-c.c
endif
endif
......
This diff is collapsed.
/*****************************************************************************
* pixel.h: msa pixel metrics
*****************************************************************************
* Copyright (C) 2015 x264 project
*
* Authors: Mandar Sahastrabuddhe <mandar.sahastrabuddhe@imgtec.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
#ifndef X264_MIPS_SAD_H
#define X264_MIPS_SAD_H
int32_t x264_pixel_sad_16x16_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_sad_16x8_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_sad_8x16_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_sad_8x8_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_sad_8x4_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_sad_4x16_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_sad_4x8_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_sad_4x4_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
void x264_pixel_sad_x4_16x16_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
uint8_t *p_ref3, intptr_t i_ref_stride,
int32_t p_sad_array[4] );
void x264_pixel_sad_x4_16x8_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
uint8_t *p_ref3, intptr_t i_ref_stride,
int32_t p_sad_array[4] );
void x264_pixel_sad_x4_8x16_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
uint8_t *p_ref3, intptr_t i_ref_stride,
int32_t p_sad_array[4] );
void x264_pixel_sad_x4_8x8_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
uint8_t *p_ref3, intptr_t i_ref_stride,
int32_t p_sad_array[4] );
void x264_pixel_sad_x4_8x4_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
uint8_t *p_ref3, intptr_t i_ref_stride,
int32_t p_sad_array[4] );
void x264_pixel_sad_x4_4x8_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
uint8_t *p_ref3, intptr_t i_ref_stride,
int32_t p_sad_array[4] );
void x264_pixel_sad_x4_4x4_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
uint8_t *p_ref3, intptr_t i_ref_stride,
int32_t p_sad_array[4] );
void x264_pixel_sad_x3_16x16_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
intptr_t i_ref_stride,
int32_t p_sad_array[3] );
void x264_pixel_sad_x3_16x8_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
intptr_t i_ref_stride,
int32_t p_sad_array[3] );
void x264_pixel_sad_x3_8x16_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
intptr_t i_ref_stride,
int32_t p_sad_array[3] );
void x264_pixel_sad_x3_8x8_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
intptr_t i_ref_stride,
int32_t p_sad_array[3] );
void x264_pixel_sad_x3_8x4_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
intptr_t i_ref_stride,
int32_t p_sad_array[3] );
void x264_pixel_sad_x3_4x8_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
intptr_t i_ref_stride,
int32_t p_sad_array[3] );
void x264_pixel_sad_x3_4x4_msa( uint8_t *p_src, uint8_t *p_ref0,
uint8_t *p_ref1, uint8_t *p_ref2,
intptr_t i_ref_stride,
int32_t p_sad_array[3] );
int32_t x264_pixel_ssd_16x16_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_ssd_16x8_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_ssd_8x16_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_ssd_8x8_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_ssd_8x4_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_ssd_4x16_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_ssd_4x8_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
int32_t x264_pixel_ssd_4x4_msa( uint8_t *p_src, intptr_t i_src_stride,
uint8_t *p_ref, intptr_t i_ref_stride );
void x264_intra_sad_x3_4x4_msa( uint8_t *p_enc, uint8_t *p_dec,
int32_t p_sad_array[3] );
void x264_intra_sad_x3_16x16_msa( uint8_t *p_enc, uint8_t *p_dec,
int32_t p_sad_array[3] );
void x264_intra_sad_x3_8x8_msa( uint8_t *p_enc, uint8_t p_edge[36],
int32_t p_sad_array[3] );
void x264_intra_sad_x3_8x8c_msa( uint8_t *p_enc, uint8_t *p_dec,
int32_t p_sad_array[3] );
void x264_ssim_4x4x2_core_msa( const uint8_t *p_pix1, intptr_t i_stride1,
const uint8_t *p_pix2, intptr_t i_stride2,
int32_t i_sums[2][4] );
uint64_t x264_pixel_hadamard_ac_8x8_msa( uint8_t *p_pix, intptr_t i_stride );
uint64_t x264_pixel_hadamard_ac_8x16_msa( uint8_t *p_pix, intptr_t i_stride );
uint64_t x264_pixel_hadamard_ac_16x8_msa( uint8_t *p_pix, intptr_t i_stride );
uint64_t x264_pixel_hadamard_ac_16x16_msa( uint8_t *p_pix, intptr_t i_stride );
int32_t x264_pixel_satd_4x4_msa( uint8_t *p_pix1, intptr_t i_stride,
uint8_t *p_pix2, intptr_t i_stride2 );
int32_t x264_pixel_satd_4x8_msa( uint8_t *p_pix1, intptr_t i_stride,
uint8_t *p_pix2, intptr_t i_stride2 );
int32_t x264_pixel_satd_4x16_msa( uint8_t *p_pix1, intptr_t i_stride,
uint8_t *p_pix2, intptr_t i_stride2 );
int32_t x264_pixel_satd_8x4_msa( uint8_t *p_pix1, intptr_t i_stride,
uint8_t *p_pix2, intptr_t i_stride2 );
int32_t x264_pixel_satd_8x8_msa( uint8_t *p_pix1, intptr_t i_stride,
uint8_t *p_pix2, intptr_t i_stride2 );
int32_t x264_pixel_satd_8x16_msa( uint8_t *p_pix1, intptr_t i_stride,
uint8_t *p_pix2, intptr_t i_stride2 );
int32_t x264_pixel_satd_16x8_msa( uint8_t *p_pix1, intptr_t i_stride,
uint8_t *p_pix2, intptr_t i_stride2 );
int32_t x264_pixel_satd_16x16_msa( uint8_t *p_pix1, intptr_t i_stride,
uint8_t *p_pix2, intptr_t i_stride2 );
int32_t x264_pixel_sa8d_8x8_msa( uint8_t *p_pix1, intptr_t i_stride,
uint8_t *p_pix2, intptr_t i_stride2 );
int32_t x264_pixel_sa8d_16x16_msa( uint8_t *p_pix1, intptr_t i_stride,
uint8_t *p_pix2, intptr_t i_stride2 );
void x264_intra_satd_x3_4x4_msa( uint8_t *p_enc, uint8_t *p_dec,
int32_t p_sad_array[3] );
void x264_intra_satd_x3_16x16_msa( uint8_t *p_enc, uint8_t *p_dec,
int32_t p_sad_array[3] );
void x264_intra_sa8d_x3_8x8_msa( uint8_t *p_enc, uint8_t p_edge[36],
int32_t p_sad_array[3] );
void x264_intra_satd_x3_8x8c_msa( uint8_t *p_enc, uint8_t *p_dec,
int32_t p_sad_array[3] );
uint64_t x264_pixel_var_16x16_msa( uint8_t *p_pix, intptr_t i_stride );
uint64_t x264_pixel_var_8x16_msa( uint8_t *p_pix, intptr_t i_stride );
uint64_t x264_pixel_var_8x8_msa( uint8_t *p_pix, intptr_t i_stride );
int32_t x264_pixel_var2_8x16_msa( uint8_t *p_pix1, intptr_t i_stride1,
uint8_t *p_pix2, intptr_t i_stride2,
int32_t *p_ssd );
int32_t x264_pixel_var2_8x8_msa( uint8_t *p_pix1, intptr_t i_stride1,
uint8_t *p_pix2, intptr_t i_stride2,
int32_t *p_ssd );
#endif
......@@ -42,6 +42,9 @@
# include "aarch64/pixel.h"
# include "aarch64/predict.h"
#endif
#if ARCH_MIPS
# include "mips/pixel.h"
#endif
/****************************************************************************
......@@ -1449,6 +1452,38 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
}
#endif // ARCH_AARCH64
#if HAVE_MSA
if( cpu&X264_CPU_MSA )
{
INIT8( sad, _msa );
INIT8_NAME( sad_aligned, sad, _msa );
INIT8( ssd, _msa );
INIT7( sad_x3, _msa );
INIT7( sad_x4, _msa );
INIT8( satd, _msa );
INIT4( hadamard_ac, _msa );
pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_msa;
pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_msa;
pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_msa;
pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_msa;
pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_msa;
pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_msa;
pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_msa;
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_msa;
pixf->ssim_4x4x2_core = x264_ssim_4x4x2_core_msa;
pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_msa;
pixf->var[PIXEL_8x16] = x264_pixel_var_8x16_msa;
pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_msa;
pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16_msa;
pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8_msa;
pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8;
}
#endif // HAVE_MSA
#endif // HIGH_BIT_DEPTH
#if HAVE_ALTIVEC
if( cpu&X264_CPU_ALTIVEC )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment