Commit c7f7a5a0 authored by Christophe Massiot's avatar Christophe Massiot

* ALL: support for ARM-specific optimizations, code by Koji Agawa.

parent 41eaa411
......@@ -9,8 +9,10 @@ Sam Hocevar <sam@zoy.org> and Christophe Massiot <massiot@via.ecp.fr>
are the current maintainers.
Other contributors include:
Koji Agawa <i (AT) atty (DOT) jp> - ARM code
Bruno Barreyra <barreyra@ufl.edu> - build fixes
Gildas Bazin <gbazin@netcourrier.com> - mingw32 port
Diego Biurrun <diego@biurrun.de> - portability fixes
Alexander W. Chin <alexc@newt.phys.unsw.edu.au> - progressive_seq fix
Stephen Crowley <stephenc@dns2.digitalpassage.com> - build fixes
Didier Gautheron <dgautheron@magic.fr> - bug fixes
......
......@@ -17,6 +17,7 @@ AM_MAINTAINER_MODE
dnl Checks for compiler
AC_PROG_CC
AC_PROG_GCC_TRADITIONAL
AM_PROG_AS
dnl Checks for headers. We do this before the CC-specific section because
dnl autoconf generates tests for generic headers before the first header test.
......@@ -58,6 +59,7 @@ elif test x"$GCC" = x"yes"; then
AC_TRY_CFLAGS([$TRY_CFLAGS $CFLAGS],[OPT_CFLAGS="$TRY_CFLAGS"])
dnl arch-specific flags
arm_conditional=false
case "$host" in
i?86-* | k?-* | x86_64-* | amd64-*)
AC_DEFINE([ARCH_X86],,[x86 architecture])
......@@ -99,6 +101,9 @@ elif test x"$GCC" = x"yes"; then
AC_TRY_CFLAGS([$TRY_CFLAGS $CFLAGS],[OPT_CFLAGS="$TRY_CFLAGS"]);;
alpha*)
AC_DEFINE([ARCH_ALPHA],,[alpha architecture]);;
arm*)
arm_conditional=:
AC_DEFINE([ARCH_ARM],,[ARM architecture]);;
esac
elif test x"$CC" = x"tendracc"; then
dnl TenDRA portability checking compiler
......@@ -118,6 +123,8 @@ else
esac
fi
AM_CONDITIONAL(ARCH_ARM, ${arm_conditional})
dnl Checks for libtool - this must be done after we set cflags
AC_DISABLE_SHARED
AC_LIBTOOL_WIN32_DLL
......
......@@ -162,6 +162,7 @@ void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf);
#define MPEG2_ACCEL_ALPHA_MVI 2
#define MPEG2_ACCEL_SPARC_VIS 1
#define MPEG2_ACCEL_SPARC_VIS2 2
#define MPEG2_ACCEL_ARM 1
#define MPEG2_ACCEL_DETECT 0x80000000
uint32_t mpeg2_accel (uint32_t accel);
......
......@@ -11,8 +11,11 @@ noinst_LTLIBRARIES = libmpeg2arch.la
libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \
motion_comp_altivec.c idct_altivec.c \
motion_comp_alpha.c idct_alpha.c \
motion_comp_vis.c \
motion_comp_vis.c motion_comp_arm.c \
cpu_accel.c cpu_state.c
if ARCH_ARM
libmpeg2arch_la_SOURCES += motion_comp_arm_s.S
endif
libmpeg2arch_la_CFLAGS = $(OPT_CFLAGS) $(ARCH_OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
pkgconfigdir = $(libdir)/pkgconfig
......
......@@ -56,6 +56,11 @@ void mpeg2_mc_init (uint32_t accel)
if (accel & MPEG2_ACCEL_SPARC_VIS)
mpeg2_mc = mpeg2_mc_vis;
else
#endif
#ifdef ARCH_ARM
if (accel & MPEG2_ACCEL_ARM) {
mpeg2_mc = mpeg2_mc_arm;
} else
#endif
mpeg2_mc = mpeg2_mc_c;
}
......
/*
* motion_comp_arm.c
* Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
*
* This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
* See http://libmpeg2.sourceforge.net/ for updates.
*
* mpeg2dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* mpeg2dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with mpeg2dec; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "config.h"
#ifdef ARCH_ARM
#include <inttypes.h>
#include "mpeg2.h"
#include "attributes.h"
#include "mpeg2_internal.h"
#define avg2(a,b) ((a+b+1)>>1)
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
#define predict_o(i) (ref[i])
#define predict_x(i) (avg2 (ref[i], ref[i+1]))
#define predict_y(i) (avg2 (ref[i], (ref+stride)[i]))
#define predict_xy(i) (avg4 (ref[i], ref[i+1], \
(ref+stride)[i], (ref+stride)[i+1]))
#define put(predictor,i) dest[i] = predictor (i)
#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i])
/* mc function template */
#define MC_FUNC(op,xy) \
static void inline MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \
const int stride, int height) \
{ \
do { \
op (predict_##xy, 0); \
op (predict_##xy, 1); \
op (predict_##xy, 2); \
op (predict_##xy, 3); \
op (predict_##xy, 4); \
op (predict_##xy, 5); \
op (predict_##xy, 6); \
op (predict_##xy, 7); \
op (predict_##xy, 8); \
op (predict_##xy, 9); \
op (predict_##xy, 10); \
op (predict_##xy, 11); \
op (predict_##xy, 12); \
op (predict_##xy, 13); \
op (predict_##xy, 14); \
op (predict_##xy, 15); \
ref += stride; \
dest += stride; \
} while (--height); \
} \
static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \
const int stride, int height) \
{ \
do { \
op (predict_##xy, 0); \
op (predict_##xy, 1); \
op (predict_##xy, 2); \
op (predict_##xy, 3); \
op (predict_##xy, 4); \
op (predict_##xy, 5); \
op (predict_##xy, 6); \
op (predict_##xy, 7); \
ref += stride; \
dest += stride; \
} while (--height); \
} \
/* definitions of the actual mc functions */
MC_FUNC (avg,o)
MC_FUNC (avg,x)
MC_FUNC (put,y)
MC_FUNC (avg,y)
MC_FUNC (put,xy)
MC_FUNC (avg,xy)
extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height);
extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height);
static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put_y_16_c(dest, ref, stride, height);
}
static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put_xy_16_c(dest, ref, stride, height);
}
extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height);
extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height);
static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put_y_8_c(dest, ref, stride, height);
}
static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put_xy_8_c(dest, ref, stride, height);
}
static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg_o_16_c(dest, ref, stride, height);
}
static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg_x_16_c(dest, ref, stride, height);
}
static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg_y_16_c(dest, ref, stride, height);
}
static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg_xy_16_c(dest, ref, stride, height);
}
static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg_o_8_c(dest, ref, stride, height);
}
static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg_x_8_c(dest, ref, stride, height);
}
static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg_y_8_c(dest, ref, stride, height);
}
static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg_xy_8_c(dest, ref, stride, height);
}
MPEG2_MC_EXTERN (arm)
#endif
@ motion_comp_arm_s.S
@ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
@
@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
@ See http://libmpeg2.sourceforge.net/ for updates.
@
@ mpeg2dec is free software; you can redistribute it and/or modify
@ it under the terms of the GNU General Public License as published by
@ the Free Software Foundation; either version 2 of the License, or
@ (at your option) any later version.
@
@ mpeg2dec is distributed in the hope that it will be useful,
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@ GNU General Public License for more details.
@
@ You should have received a copy of the GNU General Public License
@ along with mpeg2dec; if not, write to the Free Software
@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
.text
@ ----------------------------------------------------------------
.align
.global MC_put_o_16_arm
MC_put_o_16_arm:
@@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
pld [r1]
stmfd sp!, {r4-r11, lr} @ R14 is also called LR
and r4, r1, #3
adr r5, MC_put_o_16_arm_align_jt
add r5, r5, r4, lsl #2
ldr pc, [r5]
MC_put_o_16_arm_align0:
ldmia r1, {r4-r7}
add r1, r1, r2
pld [r1]
stmia r0, {r4-r7}
subs r3, r3, #1
add r0, r0, r2
bne MC_put_o_16_arm_align0
ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
.macro PROC shift
ldmia r1, {r4-r8}
add r1, r1, r2
mov r9, r4, lsr #(\shift)
pld [r1]
mov r10, r5, lsr #(\shift)
orr r9, r9, r5, lsl #(32-\shift)
mov r11, r6, lsr #(\shift)
orr r10, r10, r6, lsl #(32-\shift)
mov r12, r7, lsr #(\shift)
orr r11, r11, r7, lsl #(32-\shift)
orr r12, r12, r8, lsl #(32-\shift)
stmia r0, {r9-r12}
subs r3, r3, #1
add r0, r0, r2
.endm
MC_put_o_16_arm_align1:
and r1, r1, #0xFFFFFFFC
1: PROC(8)
bne 1b
ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
MC_put_o_16_arm_align2:
and r1, r1, #0xFFFFFFFC
1: PROC(16)
bne 1b
ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
MC_put_o_16_arm_align3:
and r1, r1, #0xFFFFFFFC
1: PROC(24)
bne 1b
ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
MC_put_o_16_arm_align_jt:
.word MC_put_o_16_arm_align0
.word MC_put_o_16_arm_align1
.word MC_put_o_16_arm_align2
.word MC_put_o_16_arm_align3
@ ----------------------------------------------------------------
.align
.global MC_put_o_8_arm
MC_put_o_8_arm:
@@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
pld [r1]
stmfd sp!, {r4-r10, lr} @ R14 is also called LR
and r4, r1, #3
adr r5, MC_put_o_8_arm_align_jt
add r5, r5, r4, lsl #2
ldr pc, [r5]
MC_put_o_8_arm_align0:
ldmia r1, {r4-r5}
add r1, r1, r2
pld [r1]
stmia r0, {r4-r5}
add r0, r0, r2
subs r3, r3, #1
bne MC_put_o_8_arm_align0
ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
.macro PROC8 shift
ldmia r1, {r4-r6}
add r1, r1, r2
mov r9, r4, lsr #(\shift)
pld [r1]
mov r10, r5, lsr #(\shift)
orr r9, r9, r5, lsl #(32-\shift)
orr r10, r10, r6, lsl #(32-\shift)
stmia r0, {r9-r10}
subs r3, r3, #1
add r0, r0, r2
.endm
MC_put_o_8_arm_align1:
and r1, r1, #0xFFFFFFFC
1: PROC8(8)
bne 1b
ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
MC_put_o_8_arm_align2:
and r1, r1, #0xFFFFFFFC
1: PROC8(16)
bne 1b
ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
MC_put_o_8_arm_align3:
and r1, r1, #0xFFFFFFFC
1: PROC8(24)
bne 1b
ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
MC_put_o_8_arm_align_jt:
.word MC_put_o_8_arm_align0
.word MC_put_o_8_arm_align1
.word MC_put_o_8_arm_align2
.word MC_put_o_8_arm_align3
@ ----------------------------------------------------------------
.macro AVG_PW rW1, rW2
mov \rW2, \rW2, lsl #24
orr \rW2, \rW2, \rW1, lsr #8
eor r9, \rW1, \rW2
and \rW2, \rW1, \rW2
and r10, r9, r12
add \rW2, \rW2, r10, lsr #1
and r10, r9, r11
add \rW2, \rW2, r10
.endm
.align
.global MC_put_x_16_arm
MC_put_x_16_arm:
@@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
pld [r1]
stmfd sp!, {r4-r11,lr} @ R14 is also called LR
and r4, r1, #3
adr r5, MC_put_x_16_arm_align_jt
ldr r11, [r5]
mvn r12, r11
add r5, r5, r4, lsl #2
ldr pc, [r5, #4]
.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4
mov \R0, \R0, lsr #(\shift)
orr \R0, \R0, \R1, lsl #(32 - \shift)
mov \R1, \R1, lsr #(\shift)
orr \R1, \R1, \R2, lsl #(32 - \shift)
mov \R2, \R2, lsr #(\shift)
orr \R2, \R2, \R3, lsl #(32 - \shift)
mov \R3, \R3, lsr #(\shift)
orr \R3, \R3, \R4, lsl #(32 - \shift)
mov \R4, \R4, lsr #(\shift)
@ and \R4, \R4, #0xFF
.endm
MC_put_x_16_arm_align0:
ldmia r1, {r4-r8}
add r1, r1, r2
pld [r1]
AVG_PW r7, r8
AVG_PW r6, r7
AVG_PW r5, r6
AVG_PW r4, r5
stmia r0, {r5-r8}
subs r3, r3, #1
add r0, r0, r2
bne MC_put_x_16_arm_align0
ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
MC_put_x_16_arm_align1:
and r1, r1, #0xFFFFFFFC
1: ldmia r1, {r4-r8}
add r1, r1, r2
pld [r1]
ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8
AVG_PW r7, r8
AVG_PW r6, r7
AVG_PW r5, r6
AVG_PW r4, r5
stmia r0, {r5-r8}
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
MC_put_x_16_arm_align2:
and r1, r1, #0xFFFFFFFC
1: ldmia r1, {r4-r8}
add r1, r1, r2
pld [r1]
ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8
AVG_PW r7, r8
AVG_PW r6, r7
AVG_PW r5, r6
AVG_PW r4, r5
stmia r0, {r5-r8}
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
MC_put_x_16_arm_align3:
and r1, r1, #0xFFFFFFFC
1: ldmia r1, {r4-r8}
add r1, r1, r2
pld [r1]
ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8
AVG_PW r7, r8
AVG_PW r6, r7
AVG_PW r5, r6
AVG_PW r4, r5
stmia r0, {r5-r8}
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
MC_put_x_16_arm_align_jt:
.word 0x01010101
.word MC_put_x_16_arm_align0
.word MC_put_x_16_arm_align1
.word MC_put_x_16_arm_align2
.word MC_put_x_16_arm_align3
@ ----------------------------------------------------------------
.align
.global MC_put_x_8_arm
MC_put_x_8_arm:
@@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
pld [r1]
stmfd sp!, {r4-r11,lr} @ R14 is also called LR
and r4, r1, #3
adr r5, MC_put_x_8_arm_align_jt
ldr r11, [r5]
mvn r12, r11
add r5, r5, r4, lsl #2
ldr pc, [r5, #4]
.macro ADJ_ALIGN_DW shift, R0, R1, R2
mov \R0, \R0, lsr #(\shift)
orr \R0, \R0, \R1, lsl #(32 - \shift)
mov \R1, \R1, lsr #(\shift)
orr \R1, \R1, \R2, lsl #(32 - \shift)
mov \R2, \R2, lsr #(\shift)
@ and \R4, \R4, #0xFF
.endm
MC_put_x_8_arm_align0:
ldmia r1, {r4-r6}
add r1, r1, r2
pld [r1]
AVG_PW r5, r6
AVG_PW r4, r5
stmia r0, {r5-r6}
subs r3, r3, #1
add r0, r0, r2
bne MC_put_x_8_arm_align0
ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
MC_put_x_8_arm_align1:
and r1, r1, #0xFFFFFFFC
1: ldmia r1, {r4-r6}
add r1, r1, r2
pld [r1]
ADJ_ALIGN_DW 8, r4, r5, r6
AVG_PW r5, r6
AVG_PW r4, r5
stmia r0, {r5-r6}
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
MC_put_x_8_arm_align2:
and r1, r1, #0xFFFFFFFC
1: ldmia r1, {r4-r6}
add r1, r1, r2
pld [r1]
ADJ_ALIGN_DW 16, r4, r5, r6
AVG_PW r5, r6
AVG_PW r4, r5
stmia r0, {r5-r6}
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
MC_put_x_8_arm_align3:
and r1, r1, #0xFFFFFFFC
1: ldmia r1, {r4-r6}
add r1, r1, r2
pld [r1]
ADJ_ALIGN_DW 24, r4, r5, r6
AVG_PW r5, r6
AVG_PW r4, r5
stmia r0, {r5-r6}
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
MC_put_x_8_arm_align_jt:
.word 0x01010101
.word MC_put_x_8_arm_align0
.word MC_put_x_8_arm_align1
.word MC_put_x_8_arm_align2
.word MC_put_x_8_arm_align3
......@@ -307,5 +307,6 @@ extern mpeg2_mc_t mpeg2_mc_3dnow;
extern mpeg2_mc_t mpeg2_mc_altivec;
extern mpeg2_mc_t mpeg2_mc_alpha;
extern mpeg2_mc_t mpeg2_mc_vis;
extern mpeg2_mc_t mpeg2_mc_arm;
#endif /* LIBMPEG2_MPEG2_INTERNAL_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment