Commit 31126194 authored by Loren Merritt's avatar Loren Merritt

cosmetics: move avg function ptrs from pixf to mc.



git-svn-id: svn://svn.videolan.org/x264/trunk@306 df754926-b1dd-0310-bc7b-ec298dee348c
parent 82d5e6fa
......@@ -51,11 +51,17 @@ BITS 64
%endif
%endmacro
;-----------------------------------------------------------------------------
; Various memory constants (trigonometric values or rounding values)
;-----------------------------------------------------------------------------
;=============================================================================
; Constants
;=============================================================================
SECTION .rodata
ALIGN 16
pw_4: times 4 dw 4
pw_8: times 4 dw 8
pw_32: times 4 dw 32
pw_64: times 4 dw 64
;=============================================================================
; Code
......@@ -75,6 +81,9 @@ cglobal x264_mc_copy_w16_sse2
cglobal x264_mc_chroma_sse
;=============================================================================
; pixel avg
;=============================================================================
ALIGN 16
;-----------------------------------------------------------------------------
......@@ -238,6 +247,10 @@ ALIGN 4
;=============================================================================
; pixel copy
;=============================================================================
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w4_mmxext( uint8_t *src, int i_src_stride,
......@@ -368,13 +381,10 @@ ALIGN 4
ret
SECTION .rodata
ALIGN 16
eights times 4 dw 8
thirty2s times 4 dw 32
SECTION .text
;=============================================================================
; chroma MC
;=============================================================================
ALIGN 16
;-----------------------------------------------------------------------------
......@@ -396,7 +406,7 @@ x264_mc_chroma_sse:
pshufw mm5, mm0, 0 ; mm5 - dx
pshufw mm6, mm1, 0 ; mm6 - dy
movq mm4, [eights GLOBAL]
movq mm4, [pw_8 GLOBAL]
movq mm0, mm4
psubw mm4, mm5 ; mm4 - 8-dx
......@@ -437,7 +447,7 @@ ALIGN 4
punpcklbw mm2, mm3
punpcklbw mm1, mm3
paddw mm0, [thirty2s GLOBAL]
paddw mm0, [pw_32 GLOBAL]
pmullw mm2, mm5 ; line * cB
pmullw mm1, mm7 ; line * cD
......
......@@ -46,7 +46,7 @@ BITS 32
%endmacro
;=============================================================================
; Local Data (Read Only)
; Constants
;=============================================================================
%ifdef FORMAT_COFF
......@@ -55,11 +55,11 @@ SECTION .rodata data
SECTION .rodata data align=16
%endif
;-----------------------------------------------------------------------------
; Various memory constants (trigonometric values or rounding values)
;-----------------------------------------------------------------------------
ALIGN 16
pw_4: times 4 dw 4
pw_8: times 4 dw 8
pw_32: times 4 dw 32
pw_64: times 4 dw 64
;=============================================================================
; Code
......@@ -79,6 +79,9 @@ cglobal x264_mc_copy_w16_sse2
cglobal x264_mc_chroma_sse
;=============================================================================
; pixel avg
;=============================================================================
ALIGN 16
;-----------------------------------------------------------------------------
......@@ -241,6 +244,9 @@ ALIGN 4
ret
;=============================================================================
; pixel copy
;=============================================================================
ALIGN 16
;-----------------------------------------------------------------------------
......@@ -396,13 +402,10 @@ ALIGN 4
ret
SECTION .rodata
ALIGN 16
eights times 4 dw 8
thirty2s times 4 dw 32
SECTION .text
;=============================================================================
; chroma MC
;=============================================================================
ALIGN 16
;-----------------------------------------------------------------------------
......@@ -419,7 +422,7 @@ x264_mc_chroma_sse:
pshufw mm5, [esp+20], 0 ; mm5 - dx
pshufw mm6, [esp+24], 0 ; mm6 - dy
movq mm4, [eights]
movq mm4, [pw_8]
movq mm0, mm4
psubw mm4, mm5 ; mm4 - 8-dx
......@@ -455,7 +458,7 @@ ALIGN 4
punpcklbw mm2, mm3
punpcklbw mm1, mm3
paddw mm0, [thirty2s]
paddw mm0, [pw_32]
pmullw mm2, mm5 ; line * cB
pmullw mm1, mm7 ; line * cD
......
......@@ -32,6 +32,7 @@
#include <stdarg.h>
#include "x264.h" /* DECLARE_ALIGNED */
#include "common/pixel.h"
#include "common/mc.h"
#include "common/clip1.h"
#include "mc.h"
......
......@@ -746,27 +746,27 @@ static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int he
const int i_ref0 = h->mb.cache.ref[0][i8];
const int weight = h->mb.bipred_weight[i_ref0][i_ref1];
h->pixf.avg_weight[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16, weight );
h->mc.avg_weight[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16, weight );
h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg_weight[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16, weight );
h->mc.avg_weight[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16, weight );
h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg_weight[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16, weight );
h->mc.avg_weight[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16, weight );
}
else
{
h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16 );
h->mc.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16 );
h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16 );
h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16 );
h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16 );
h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16 );
}
}
......
......@@ -33,6 +33,7 @@
#include "x264.h"
#include "pixel.h"
#include "mc.h"
#include "clip1.h"
#include "frame.h"
......@@ -72,6 +73,88 @@ static inline void pixel_avg( uint8_t *dst, int i_dst_stride,
}
}
static inline void pixel_avg_wxh( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int width, int height )
{
int x, y;
for( y = 0; y < height; y++ )
{
for( x = 0; x < width; x++ )
{
dst[x] = ( dst[x] + src[x] + 1 ) >> 1;
}
dst += i_dst;
src += i_src;
}
}
#define PIXEL_AVG_C( name, width, height ) \
static void name( uint8_t *pix1, int i_stride_pix1, \
uint8_t *pix2, int i_stride_pix2 ) \
{ \
pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ); \
}
PIXEL_AVG_C( pixel_avg_16x16, 16, 16 )
PIXEL_AVG_C( pixel_avg_16x8, 16, 8 )
PIXEL_AVG_C( pixel_avg_8x16, 8, 16 )
PIXEL_AVG_C( pixel_avg_8x8, 8, 8 )
PIXEL_AVG_C( pixel_avg_8x4, 8, 4 )
PIXEL_AVG_C( pixel_avg_4x8, 4, 8 )
PIXEL_AVG_C( pixel_avg_4x4, 4, 4 )
PIXEL_AVG_C( pixel_avg_4x2, 4, 2 )
PIXEL_AVG_C( pixel_avg_2x4, 2, 4 )
PIXEL_AVG_C( pixel_avg_2x2, 2, 2 )
/* Implicit weighted bipred only:
* assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */
#define op_scale2(x) dst[x] = x264_clip_uint8( (dst[x]*i_weight1 + src[x]*i_weight2 + (1<<5)) >> 6 )
static inline void pixel_avg_weight_wxh( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int width, int height, int i_weight1 ){
int y;
const int i_weight2 = 64 - i_weight1;
for(y=0; y<height; y++, dst += i_dst, src += i_src){
op_scale2(0);
op_scale2(1);
if(width==2) continue;
op_scale2(2);
op_scale2(3);
if(width==4) continue;
op_scale2(4);
op_scale2(5);
op_scale2(6);
op_scale2(7);
if(width==8) continue;
op_scale2(8);
op_scale2(9);
op_scale2(10);
op_scale2(11);
op_scale2(12);
op_scale2(13);
op_scale2(14);
op_scale2(15);
}
}
#define PIXEL_AVG_WEIGHT_C( width, height ) \
static void pixel_avg_weight_##width##x##height( \
uint8_t *pix1, int i_stride_pix1, \
uint8_t *pix2, int i_stride_pix2, int i_weight1 ) \
{ \
pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height, i_weight1 ); \
}
PIXEL_AVG_WEIGHT_C(16,16)
PIXEL_AVG_WEIGHT_C(16,8)
PIXEL_AVG_WEIGHT_C(8,16)
PIXEL_AVG_WEIGHT_C(8,8)
PIXEL_AVG_WEIGHT_C(8,4)
PIXEL_AVG_WEIGHT_C(4,8)
PIXEL_AVG_WEIGHT_C(4,4)
PIXEL_AVG_WEIGHT_C(4,2)
PIXEL_AVG_WEIGHT_C(2,4)
PIXEL_AVG_WEIGHT_C(2,2)
#undef op_scale2
#undef PIXEL_AVG_WEIGHT_C
typedef void (*pf_mc_t)(uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height );
static void mc_copy( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
......@@ -396,6 +479,28 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
pf->get_ref = get_ref;
pf->mc_chroma = motion_compensation_chroma;
pf->avg[PIXEL_16x16]= pixel_avg_16x16;
pf->avg[PIXEL_16x8] = pixel_avg_16x8;
pf->avg[PIXEL_8x16] = pixel_avg_8x16;
pf->avg[PIXEL_8x8] = pixel_avg_8x8;
pf->avg[PIXEL_8x4] = pixel_avg_8x4;
pf->avg[PIXEL_4x8] = pixel_avg_4x8;
pf->avg[PIXEL_4x4] = pixel_avg_4x4;
pf->avg[PIXEL_4x2] = pixel_avg_4x2;
pf->avg[PIXEL_2x4] = pixel_avg_2x4;
pf->avg[PIXEL_2x2] = pixel_avg_2x2;
pf->avg_weight[PIXEL_16x16]= pixel_avg_weight_16x16;
pf->avg_weight[PIXEL_16x8] = pixel_avg_weight_16x8;
pf->avg_weight[PIXEL_8x16] = pixel_avg_weight_8x16;
pf->avg_weight[PIXEL_8x8] = pixel_avg_weight_8x8;
pf->avg_weight[PIXEL_8x4] = pixel_avg_weight_8x4;
pf->avg_weight[PIXEL_4x8] = pixel_avg_weight_4x8;
pf->avg_weight[PIXEL_4x4] = pixel_avg_weight_4x4;
pf->avg_weight[PIXEL_4x2] = pixel_avg_weight_4x2;
pf->avg_weight[PIXEL_2x4] = pixel_avg_weight_2x4;
pf->avg_weight[PIXEL_2x2] = pixel_avg_weight_2x2;
#ifdef HAVE_MMXEXT
if( cpu&X264_CPU_MMXEXT ) {
x264_mc_mmxext_init( pf );
......
......@@ -44,6 +44,9 @@ typedef struct
void (*mc_chroma)(uint8_t *, int, uint8_t *, int,
int mvx, int mvy,
int i_width, int i_height );
void (*avg[10])( uint8_t *dst, int, uint8_t *src, int );
void (*avg_weight[10])( uint8_t *dst, int, uint8_t *src, int, int i_weight );
} x264_mc_functions_t;
void x264_mc_init( int cpu, x264_mc_functions_t *pf );
......
......@@ -264,89 +264,6 @@ PIXEL_SA8D_C( 8, 16 )
PIXEL_SA8D_C( 8, 8 )
static inline void pixel_avg_wxh( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int width, int height )
{
int x, y;
for( y = 0; y < height; y++ )
{
for( x = 0; x < width; x++ )
{
dst[x] = ( dst[x] + src[x] + 1 ) >> 1;
}
dst += i_dst;
src += i_src;
}
}
#define PIXEL_AVG_C( name, width, height ) \
static void name( uint8_t *pix1, int i_stride_pix1, \
uint8_t *pix2, int i_stride_pix2 ) \
{ \
pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ); \
}
PIXEL_AVG_C( pixel_avg_16x16, 16, 16 )
PIXEL_AVG_C( pixel_avg_16x8, 16, 8 )
PIXEL_AVG_C( pixel_avg_8x16, 8, 16 )
PIXEL_AVG_C( pixel_avg_8x8, 8, 8 )
PIXEL_AVG_C( pixel_avg_8x4, 8, 4 )
PIXEL_AVG_C( pixel_avg_4x8, 4, 8 )
PIXEL_AVG_C( pixel_avg_4x4, 4, 4 )
PIXEL_AVG_C( pixel_avg_4x2, 4, 2 )
PIXEL_AVG_C( pixel_avg_2x4, 2, 4 )
PIXEL_AVG_C( pixel_avg_2x2, 2, 2 )
/* Implicit weighted bipred only:
* assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */
#define op_scale2(x) dst[x] = x264_clip_uint8( (dst[x]*i_weight1 + src[x]*i_weight2 + (1<<5)) >> 6 )
static inline void pixel_avg_weight_wxh( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int width, int height, int i_weight1 ){
int y;
const int i_weight2 = 64 - i_weight1;
for(y=0; y<height; y++, dst += i_dst, src += i_src){
op_scale2(0);
op_scale2(1);
if(width==2) continue;
op_scale2(2);
op_scale2(3);
if(width==4) continue;
op_scale2(4);
op_scale2(5);
op_scale2(6);
op_scale2(7);
if(width==8) continue;
op_scale2(8);
op_scale2(9);
op_scale2(10);
op_scale2(11);
op_scale2(12);
op_scale2(13);
op_scale2(14);
op_scale2(15);
}
}
#define PIXEL_AVG_WEIGHT_C( width, height ) \
static void pixel_avg_weight_##width##x##height( \
uint8_t *pix1, int i_stride_pix1, \
uint8_t *pix2, int i_stride_pix2, int i_weight1 ) \
{ \
pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height, i_weight1 ); \
}
PIXEL_AVG_WEIGHT_C(16,16)
PIXEL_AVG_WEIGHT_C(16,8)
PIXEL_AVG_WEIGHT_C(8,16)
PIXEL_AVG_WEIGHT_C(8,8)
PIXEL_AVG_WEIGHT_C(8,4)
PIXEL_AVG_WEIGHT_C(4,8)
PIXEL_AVG_WEIGHT_C(4,4)
PIXEL_AVG_WEIGHT_C(4,2)
PIXEL_AVG_WEIGHT_C(2,4)
PIXEL_AVG_WEIGHT_C(2,2)
#undef op_scale2
#undef PIXEL_AVG_WEIGHT_C
/****************************************************************************
* x264_pixel_init:
****************************************************************************/
......@@ -381,28 +298,6 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->sa8d[PIXEL_8x16] = pixel_sa8d_8x16;
pixf->sa8d[PIXEL_8x8] = pixel_sa8d_8x8;
pixf->avg[PIXEL_16x16]= pixel_avg_16x16;
pixf->avg[PIXEL_16x8] = pixel_avg_16x8;
pixf->avg[PIXEL_8x16] = pixel_avg_8x16;
pixf->avg[PIXEL_8x8] = pixel_avg_8x8;
pixf->avg[PIXEL_8x4] = pixel_avg_8x4;
pixf->avg[PIXEL_4x8] = pixel_avg_4x8;
pixf->avg[PIXEL_4x4] = pixel_avg_4x4;
pixf->avg[PIXEL_4x2] = pixel_avg_4x2;
pixf->avg[PIXEL_2x4] = pixel_avg_2x4;
pixf->avg[PIXEL_2x2] = pixel_avg_2x2;
pixf->avg_weight[PIXEL_16x16]= pixel_avg_weight_16x16;
pixf->avg_weight[PIXEL_16x8] = pixel_avg_weight_16x8;
pixf->avg_weight[PIXEL_8x16] = pixel_avg_weight_8x16;
pixf->avg_weight[PIXEL_8x8] = pixel_avg_weight_8x8;
pixf->avg_weight[PIXEL_8x4] = pixel_avg_weight_8x4;
pixf->avg_weight[PIXEL_4x8] = pixel_avg_weight_4x8;
pixf->avg_weight[PIXEL_4x4] = pixel_avg_weight_4x4;
pixf->avg_weight[PIXEL_4x2] = pixel_avg_weight_4x2;
pixf->avg_weight[PIXEL_2x4] = pixel_avg_weight_2x4;
pixf->avg_weight[PIXEL_2x2] = pixel_avg_weight_2x2;
#ifdef HAVE_MMXEXT
if( cpu&X264_CPU_MMXEXT )
{
......
......@@ -25,8 +25,6 @@
#define _PIXEL_H 1
typedef int (*x264_pixel_cmp_t) ( uint8_t *, int, uint8_t *, int );
typedef void (*x264_pixel_avg_t) ( uint8_t *, int, uint8_t *, int );
typedef void (*x264_pixel_avg_weight_t) ( uint8_t *, int, uint8_t *, int, int );
enum
{
......@@ -68,8 +66,6 @@ typedef struct
x264_pixel_cmp_t satd[7];
x264_pixel_cmp_t sa8d[4];
x264_pixel_cmp_t mbcmp[7]; /* either satd or sad for subpel refine and mode decision */
x264_pixel_avg_t avg[10];
x264_pixel_avg_weight_t avg_weight[10];
} x264_pixel_function_t;
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf );
......
......@@ -1038,10 +1038,10 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
#define WEIGHTED_AVG( size, pix1, stride1, src2, stride2 ) \
{ \
if( h->param.analyse.b_weighted_bipred ) \
h->pixf.avg_weight[size]( pix1, stride1, src2, stride2, \
h->mc.avg_weight[size]( pix1, stride1, src2, stride2, \
h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] ); \
else \
h->pixf.avg[size]( pix1, stride1, src2, stride2 ); \
h->mc.avg[size]( pix1, stride1, src2, stride2 ); \
}
static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
......@@ -1151,10 +1151,10 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
}
if( h->param.analyse.b_weighted_bipred )
h->pixf.avg_weight[PIXEL_16x16]( pix1, 16, src2, stride2,
h->mc.avg_weight[PIXEL_16x16]( pix1, 16, src2, stride2,
h->mb.bipred_weight[pix1_ref][src2_ref] );
else
h->pixf.avg[PIXEL_16x16]( pix1, 16, src2, stride2 );
h->mc.avg[PIXEL_16x16]( pix1, 16, src2, stride2 );
a->i_cost16x16bi = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], pix1, 16 )
+ a->i_lambda * ( bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref )
......
......@@ -120,7 +120,7 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
(mv0)[0], (mv0)[1], 8, 8 ); \
src2 = h->mc.get_ref( m[1].p_fref, m[1].i_stride[0], pix2, &stride2, \
(mv1)[0], (mv1)[1], 8, 8 ); \
h->pixf.avg[PIXEL_8x8]( pix1, 8, src2, stride2 ); \
h->mc.avg[PIXEL_8x8]( pix1, 8, src2, stride2 ); \
i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
m[0].p_fenc[0], m[0].i_stride[0], pix1, 8 ); \
if( i_bcost > i_cost ) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment